diff options
Diffstat (limited to 'fs')
194 files changed, 39861 insertions, 4851 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index fd01d90cada5..57997fa14e69 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
| @@ -51,4 +51,4 @@ int v9fs_dir_release(struct inode *inode, struct file *filp); | |||
| 51 | int v9fs_file_open(struct inode *inode, struct file *file); | 51 | int v9fs_file_open(struct inode *inode, struct file *file); |
| 52 | void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); | 52 | void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); |
| 53 | void v9fs_dentry_release(struct dentry *); | 53 | void v9fs_dentry_release(struct dentry *); |
| 54 | int v9fs_uflags2omode(int uflags); | 54 | int v9fs_uflags2omode(int uflags, int extended); |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 0d55affe37d4..52944d2249a4 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
| @@ -59,7 +59,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
| 59 | 59 | ||
| 60 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); | 60 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); |
| 61 | v9ses = v9fs_inode2v9ses(inode); | 61 | v9ses = v9fs_inode2v9ses(inode); |
| 62 | omode = v9fs_uflags2omode(file->f_flags); | 62 | omode = v9fs_uflags2omode(file->f_flags, v9fs_extended(v9ses)); |
| 63 | fid = file->private_data; | 63 | fid = file->private_data; |
| 64 | if (!fid) { | 64 | if (!fid) { |
| 65 | fid = v9fs_fid_clone(file->f_path.dentry); | 65 | fid = v9fs_fid_clone(file->f_path.dentry); |
| @@ -75,6 +75,8 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
| 75 | inode->i_size = 0; | 75 | inode->i_size = 0; |
| 76 | inode->i_blocks = 0; | 76 | inode->i_blocks = 0; |
| 77 | } | 77 | } |
| 78 | if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses))) | ||
| 79 | generic_file_llseek(file, 0, SEEK_END); | ||
| 78 | } | 80 | } |
| 79 | 81 | ||
| 80 | file->private_data = fid; | 82 | file->private_data = fid; |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 40fa807bd929..c95295c65045 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
| @@ -132,10 +132,10 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | |||
| 132 | /** | 132 | /** |
| 133 | * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits | 133 | * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits |
| 134 | * @uflags: flags to convert | 134 | * @uflags: flags to convert |
| 135 | * | 135 | * @extended: if .u extensions are active |
| 136 | */ | 136 | */ |
| 137 | 137 | ||
| 138 | int v9fs_uflags2omode(int uflags) | 138 | int v9fs_uflags2omode(int uflags, int extended) |
| 139 | { | 139 | { |
| 140 | int ret; | 140 | int ret; |
| 141 | 141 | ||
| @@ -155,14 +155,16 @@ int v9fs_uflags2omode(int uflags) | |||
| 155 | break; | 155 | break; |
| 156 | } | 156 | } |
| 157 | 157 | ||
| 158 | if (uflags & O_EXCL) | ||
| 159 | ret |= P9_OEXCL; | ||
| 160 | |||
| 161 | if (uflags & O_TRUNC) | 158 | if (uflags & O_TRUNC) |
| 162 | ret |= P9_OTRUNC; | 159 | ret |= P9_OTRUNC; |
| 163 | 160 | ||
| 164 | if (uflags & O_APPEND) | 161 | if (extended) { |
| 165 | ret |= P9_OAPPEND; | 162 | if (uflags & O_EXCL) |
| 163 | ret |= P9_OEXCL; | ||
| 164 | |||
| 165 | if (uflags & O_APPEND) | ||
| 166 | ret |= P9_OAPPEND; | ||
| 167 | } | ||
| 166 | 168 | ||
| 167 | return ret; | 169 | return ret; |
| 168 | } | 170 | } |
| @@ -506,7 +508,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 506 | flags = O_RDWR; | 508 | flags = O_RDWR; |
| 507 | 509 | ||
| 508 | fid = v9fs_create(v9ses, dir, dentry, NULL, perm, | 510 | fid = v9fs_create(v9ses, dir, dentry, NULL, perm, |
| 509 | v9fs_uflags2omode(flags)); | 511 | v9fs_uflags2omode(flags, v9fs_extended(v9ses))); |
| 510 | if (IS_ERR(fid)) { | 512 | if (IS_ERR(fid)) { |
| 511 | err = PTR_ERR(fid); | 513 | err = PTR_ERR(fid); |
| 512 | fid = NULL; | 514 | fid = NULL; |
diff --git a/fs/Kconfig b/fs/Kconfig index cf12c403b8c7..17216ba99c85 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -830,7 +830,7 @@ config NTFS_FS | |||
| 830 | from the project web site. | 830 | from the project web site. |
| 831 | 831 | ||
| 832 | For more information see <file:Documentation/filesystems/ntfs.txt> | 832 | For more information see <file:Documentation/filesystems/ntfs.txt> |
| 833 | and <http://linux-ntfs.sourceforge.net/>. | 833 | and <http://www.linux-ntfs.org/>. |
| 834 | 834 | ||
| 835 | To compile this file system support as a module, choose M here: the | 835 | To compile this file system support as a module, choose M here: the |
| 836 | module will be called ntfs. | 836 | module will be called ntfs. |
| @@ -930,7 +930,7 @@ config PROC_KCORE | |||
| 930 | 930 | ||
| 931 | config PROC_VMCORE | 931 | config PROC_VMCORE |
| 932 | bool "/proc/vmcore support (EXPERIMENTAL)" | 932 | bool "/proc/vmcore support (EXPERIMENTAL)" |
| 933 | depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP | 933 | depends on PROC_FS && CRASH_DUMP |
| 934 | default y | 934 | default y |
| 935 | help | 935 | help |
| 936 | Exports the dump image of crashed kernel in ELF format. | 936 | Exports the dump image of crashed kernel in ELF format. |
| @@ -1375,6 +1375,9 @@ config JFFS2_CMODE_FAVOURLZO | |||
| 1375 | 1375 | ||
| 1376 | endchoice | 1376 | endchoice |
| 1377 | 1377 | ||
| 1378 | # UBIFS File system configuration | ||
| 1379 | source "fs/ubifs/Kconfig" | ||
| 1380 | |||
| 1378 | config CRAMFS | 1381 | config CRAMFS |
| 1379 | tristate "Compressed ROM file system support (cramfs)" | 1382 | tristate "Compressed ROM file system support (cramfs)" |
| 1380 | depends on BLOCK | 1383 | depends on BLOCK |
| @@ -1544,10 +1547,6 @@ config UFS_FS | |||
| 1544 | The recently released UFS2 variant (used in FreeBSD 5.x) is | 1547 | The recently released UFS2 variant (used in FreeBSD 5.x) is |
| 1545 | READ-ONLY supported. | 1548 | READ-ONLY supported. |
| 1546 | 1549 | ||
| 1547 | If you only intend to mount files from some other Unix over the | ||
| 1548 | network using NFS, you don't need the UFS file system support (but | ||
| 1549 | you need NFS file system support obviously). | ||
| 1550 | |||
| 1551 | Note that this option is generally not needed for floppies, since a | 1550 | Note that this option is generally not needed for floppies, since a |
| 1552 | good portable way to transport files and directories between unixes | 1551 | good portable way to transport files and directories between unixes |
| 1553 | (and even other operating systems) is given by the tar program ("man | 1552 | (and even other operating systems) is given by the tar program ("man |
| @@ -1587,6 +1586,7 @@ menuconfig NETWORK_FILESYSTEMS | |||
| 1587 | Say Y here to get to see options for network filesystems and | 1586 | Say Y here to get to see options for network filesystems and |
| 1588 | filesystem-related networking code, such as NFS daemon and | 1587 | filesystem-related networking code, such as NFS daemon and |
| 1589 | RPCSEC security modules. | 1588 | RPCSEC security modules. |
| 1589 | |||
| 1590 | This option alone does not add any kernel code. | 1590 | This option alone does not add any kernel code. |
| 1591 | 1591 | ||
| 1592 | If you say N, all options in this submenu will be skipped and | 1592 | If you say N, all options in this submenu will be skipped and |
| @@ -1595,76 +1595,92 @@ menuconfig NETWORK_FILESYSTEMS | |||
| 1595 | if NETWORK_FILESYSTEMS | 1595 | if NETWORK_FILESYSTEMS |
| 1596 | 1596 | ||
| 1597 | config NFS_FS | 1597 | config NFS_FS |
| 1598 | tristate "NFS file system support" | 1598 | tristate "NFS client support" |
| 1599 | depends on INET | 1599 | depends on INET |
| 1600 | select LOCKD | 1600 | select LOCKD |
| 1601 | select SUNRPC | 1601 | select SUNRPC |
| 1602 | select NFS_ACL_SUPPORT if NFS_V3_ACL | 1602 | select NFS_ACL_SUPPORT if NFS_V3_ACL |
| 1603 | help | 1603 | help |
| 1604 | If you are connected to some other (usually local) Unix computer | 1604 | Choose Y here if you want to access files residing on other |
| 1605 | (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing | 1605 | computers using Sun's Network File System protocol. To compile |
| 1606 | on that computer (the NFS server) using the Network File Sharing | 1606 | this file system support as a module, choose M here: the module |
| 1607 | protocol, say Y. "Mounting files" means that the client can access | 1607 | will be called nfs. |
| 1608 | the files with usual UNIX commands as if they were sitting on the | ||
| 1609 | client's hard disk. For this to work, the server must run the | ||
| 1610 | programs nfsd and mountd (but does not need to have NFS file system | ||
| 1611 | support enabled in its kernel). NFS is explained in the Network | ||
| 1612 | Administrator's Guide, available from | ||
| 1613 | <http://www.tldp.org/docs.html#guide>, on its man page: "man | ||
| 1614 | nfs", and in the NFS-HOWTO. | ||
| 1615 | 1608 | ||
| 1616 | A superior but less widely used alternative to NFS is provided by | 1609 | To mount file systems exported by NFS servers, you also need to |
| 1617 | the Coda file system; see "Coda file system support" below. | 1610 | install the user space mount.nfs command which can be found in |
| 1611 | the Linux nfs-utils package, available from http://linux-nfs.org/. | ||
| 1612 | Information about using the mount command is available in the | ||
| 1613 | mount(8) man page. More detail about the Linux NFS client | ||
| 1614 | implementation is available via the nfs(5) man page. | ||
| 1618 | 1615 | ||
| 1619 | If you say Y here, you should have said Y to TCP/IP networking also. | 1616 | Below you can choose which versions of the NFS protocol are |
| 1620 | This option would enlarge your kernel by about 27 KB. | 1617 | available in the kernel to mount NFS servers. Support for NFS |
| 1621 | 1618 | version 2 (RFC 1094) is always available when NFS_FS is selected. | |
| 1622 | To compile this file system support as a module, choose M here: the | ||
| 1623 | module will be called nfs. | ||
| 1624 | 1619 | ||
| 1625 | If you are configuring a diskless machine which will mount its root | 1620 | To configure a system which mounts its root file system via NFS |
| 1626 | file system over NFS at boot time, say Y here and to "Kernel | 1621 | at boot time, say Y here, select "Kernel level IP |
| 1627 | level IP autoconfiguration" above and to "Root file system on NFS" | 1622 | autoconfiguration" in the NETWORK menu, and select "Root file |
| 1628 | below. You cannot compile this driver as a module in this case. | 1623 | system on NFS" below. You cannot compile this file system as a |
| 1629 | There are two packages designed for booting diskless machines over | 1624 | module in this case. |
| 1630 | the net: netboot, available from | ||
| 1631 | <http://ftp1.sourceforge.net/netboot/>, and Etherboot, | ||
| 1632 | available from <http://ftp1.sourceforge.net/etherboot/>. | ||
| 1633 | 1625 | ||
| 1634 | If you don't know what all this is about, say N. | 1626 | If unsure, say N. |
| 1635 | 1627 | ||
| 1636 | config NFS_V3 | 1628 | config NFS_V3 |
| 1637 | bool "Provide NFSv3 client support" | 1629 | bool "NFS client support for NFS version 3" |
| 1638 | depends on NFS_FS | 1630 | depends on NFS_FS |
| 1639 | help | 1631 | help |
| 1640 | Say Y here if you want your NFS client to be able to speak version | 1632 | This option enables support for version 3 of the NFS protocol |
| 1641 | 3 of the NFS protocol. | 1633 | (RFC 1813) in the kernel's NFS client. |
| 1642 | 1634 | ||
| 1643 | If unsure, say Y. | 1635 | If unsure, say Y. |
| 1644 | 1636 | ||
| 1645 | config NFS_V3_ACL | 1637 | config NFS_V3_ACL |
| 1646 | bool "Provide client support for the NFSv3 ACL protocol extension" | 1638 | bool "NFS client support for the NFSv3 ACL protocol extension" |
| 1647 | depends on NFS_V3 | 1639 | depends on NFS_V3 |
| 1648 | help | 1640 | help |
| 1649 | Implement the NFSv3 ACL protocol extension for manipulating POSIX | 1641 | Some NFS servers support an auxiliary NFSv3 ACL protocol that |
| 1650 | Access Control Lists. The server should also be compiled with | 1642 | Sun added to Solaris but never became an official part of the |
| 1651 | the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. | 1643 | NFS version 3 protocol. This protocol extension allows |
| 1644 | applications on NFS clients to manipulate POSIX Access Control | ||
| 1645 | Lists on files residing on NFS servers. NFS servers enforce | ||
| 1646 | ACLs on local files whether this protocol is available or not. | ||
| 1647 | |||
| 1648 | Choose Y here if your NFS server supports the Solaris NFSv3 ACL | ||
| 1649 | protocol extension and you want your NFS client to allow | ||
| 1650 | applications to access and modify ACLs on files on the server. | ||
| 1651 | |||
| 1652 | Most NFS servers don't support the Solaris NFSv3 ACL protocol | ||
| 1653 | extension. You can choose N here or specify the "noacl" mount | ||
| 1654 | option to prevent your NFS client from trying to use the NFSv3 | ||
| 1655 | ACL protocol. | ||
| 1652 | 1656 | ||
| 1653 | If unsure, say N. | 1657 | If unsure, say N. |
| 1654 | 1658 | ||
| 1655 | config NFS_V4 | 1659 | config NFS_V4 |
| 1656 | bool "Provide NFSv4 client support (EXPERIMENTAL)" | 1660 | bool "NFS client support for NFS version 4 (EXPERIMENTAL)" |
| 1657 | depends on NFS_FS && EXPERIMENTAL | 1661 | depends on NFS_FS && EXPERIMENTAL |
| 1658 | select RPCSEC_GSS_KRB5 | 1662 | select RPCSEC_GSS_KRB5 |
| 1659 | help | 1663 | help |
| 1660 | Say Y here if you want your NFS client to be able to speak the newer | 1664 | This option enables support for version 4 of the NFS protocol |
| 1661 | version 4 of the NFS protocol. | 1665 | (RFC 3530) in the kernel's NFS client. |
| 1662 | 1666 | ||
| 1663 | Note: Requires auxiliary userspace daemons which may be found on | 1667 | To mount NFS servers using NFSv4, you also need to install user |
| 1664 | http://www.citi.umich.edu/projects/nfsv4/ | 1668 | space programs which can be found in the Linux nfs-utils package, |
| 1669 | available from http://linux-nfs.org/. | ||
| 1665 | 1670 | ||
| 1666 | If unsure, say N. | 1671 | If unsure, say N. |
| 1667 | 1672 | ||
| 1673 | config ROOT_NFS | ||
| 1674 | bool "Root file system on NFS" | ||
| 1675 | depends on NFS_FS=y && IP_PNP | ||
| 1676 | help | ||
| 1677 | If you want your system to mount its root file system via NFS, | ||
| 1678 | choose Y here. This is common practice for managing systems | ||
| 1679 | without local permanent storage. For details, read | ||
| 1680 | <file:Documentation/filesystems/nfsroot.txt>. | ||
| 1681 | |||
| 1682 | Most people say N here. | ||
| 1683 | |||
| 1668 | config NFSD | 1684 | config NFSD |
| 1669 | tristate "NFS server support" | 1685 | tristate "NFS server support" |
| 1670 | depends on INET | 1686 | depends on INET |
| @@ -1746,20 +1762,6 @@ config NFSD_V4 | |||
| 1746 | 1762 | ||
| 1747 | If unsure, say N. | 1763 | If unsure, say N. |
| 1748 | 1764 | ||
| 1749 | config ROOT_NFS | ||
| 1750 | bool "Root file system on NFS" | ||
| 1751 | depends on NFS_FS=y && IP_PNP | ||
| 1752 | help | ||
| 1753 | If you want your Linux box to mount its whole root file system (the | ||
| 1754 | one containing the directory /) from some other computer over the | ||
| 1755 | net via NFS (presumably because your box doesn't have a hard disk), | ||
| 1756 | say Y. Read <file:Documentation/filesystems/nfsroot.txt> for | ||
| 1757 | details. It is likely that in this case, you also want to say Y to | ||
| 1758 | "Kernel level IP autoconfiguration" so that your box can discover | ||
| 1759 | its network address at boot time. | ||
| 1760 | |||
| 1761 | Most people say N here. | ||
| 1762 | |||
| 1763 | config LOCKD | 1765 | config LOCKD |
| 1764 | tristate | 1766 | tristate |
| 1765 | 1767 | ||
| @@ -1800,27 +1802,6 @@ config SUNRPC_XPRT_RDMA | |||
| 1800 | 1802 | ||
| 1801 | If unsure, say N. | 1803 | If unsure, say N. |
| 1802 | 1804 | ||
| 1803 | config SUNRPC_BIND34 | ||
| 1804 | bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" | ||
| 1805 | depends on SUNRPC && EXPERIMENTAL | ||
| 1806 | default n | ||
| 1807 | help | ||
| 1808 | RPC requests over IPv6 networks require support for larger | ||
| 1809 | addresses when performing an RPC bind. Sun added support for | ||
| 1810 | IPv6 addressing by creating two new versions of the rpcbind | ||
| 1811 | protocol (RFC 1833). | ||
| 1812 | |||
| 1813 | This option enables support in the kernel RPC client for | ||
| 1814 | querying rpcbind servers via versions 3 and 4 of the rpcbind | ||
| 1815 | protocol. The kernel automatically falls back to version 2 | ||
| 1816 | if a remote rpcbind service does not support versions 3 or 4. | ||
| 1817 | By themselves, these new versions do not provide support for | ||
| 1818 | RPC over IPv6, but the new protocol versions are necessary to | ||
| 1819 | support it. | ||
| 1820 | |||
| 1821 | If unsure, say N to get traditional behavior (version 2 rpcbind | ||
| 1822 | requests only). | ||
| 1823 | |||
| 1824 | config RPCSEC_GSS_KRB5 | 1805 | config RPCSEC_GSS_KRB5 |
| 1825 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" | 1806 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" |
| 1826 | depends on SUNRPC && EXPERIMENTAL | 1807 | depends on SUNRPC && EXPERIMENTAL |
diff --git a/fs/Makefile b/fs/Makefile index 1e7a11bd4da1..3b2178b4bb66 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -19,6 +19,7 @@ else | |||
| 19 | obj-y += no-block.o | 19 | obj-y += no-block.o |
| 20 | endif | 20 | endif |
| 21 | 21 | ||
| 22 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o | ||
| 22 | obj-$(CONFIG_INOTIFY) += inotify.o | 23 | obj-$(CONFIG_INOTIFY) += inotify.o |
| 23 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o | 24 | obj-$(CONFIG_INOTIFY_USER) += inotify_user.o |
| 24 | obj-$(CONFIG_EPOLL) += eventpoll.o | 25 | obj-$(CONFIG_EPOLL) += eventpoll.o |
| @@ -100,6 +101,7 @@ obj-$(CONFIG_NTFS_FS) += ntfs/ | |||
| 100 | obj-$(CONFIG_UFS_FS) += ufs/ | 101 | obj-$(CONFIG_UFS_FS) += ufs/ |
| 101 | obj-$(CONFIG_EFS_FS) += efs/ | 102 | obj-$(CONFIG_EFS_FS) += efs/ |
| 102 | obj-$(CONFIG_JFFS2_FS) += jffs2/ | 103 | obj-$(CONFIG_JFFS2_FS) += jffs2/ |
| 104 | obj-$(CONFIG_UBIFS_FS) += ubifs/ | ||
| 103 | obj-$(CONFIG_AFFS_FS) += affs/ | 105 | obj-$(CONFIG_AFFS_FS) += affs/ |
| 104 | obj-$(CONFIG_ROMFS_FS) += romfs/ | 106 | obj-$(CONFIG_ROMFS_FS) += romfs/ |
| 105 | obj-$(CONFIG_QNX4FS_FS) += qnx4/ | 107 | obj-$(CONFIG_QNX4FS_FS) += qnx4/ |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0fa95b198e6e..d48ff5f370f4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
| 17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
| 18 | #include <linux/mman.h> | 18 | #include <linux/mman.h> |
| 19 | #include <linux/a.out.h> | ||
| 20 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
| 21 | #include <linux/signal.h> | 20 | #include <linux/signal.h> |
| 22 | #include <linux/binfmts.h> | 21 | #include <linux/binfmts.h> |
| @@ -548,7 +547,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 548 | struct { | 547 | struct { |
| 549 | struct elfhdr elf_ex; | 548 | struct elfhdr elf_ex; |
| 550 | struct elfhdr interp_elf_ex; | 549 | struct elfhdr interp_elf_ex; |
| 551 | struct exec interp_ex; | ||
| 552 | } *loc; | 550 | } *loc; |
| 553 | 551 | ||
| 554 | loc = kmalloc(sizeof(*loc), GFP_KERNEL); | 552 | loc = kmalloc(sizeof(*loc), GFP_KERNEL); |
| @@ -680,7 +678,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 680 | } | 678 | } |
| 681 | 679 | ||
| 682 | /* Get the exec headers */ | 680 | /* Get the exec headers */ |
| 683 | loc->interp_ex = *((struct exec *)bprm->buf); | ||
| 684 | loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); | 681 | loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); |
| 685 | break; | 682 | break; |
| 686 | } | 683 | } |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c new file mode 100644 index 000000000000..63e2ee63058d --- /dev/null +++ b/fs/bio-integrity.c | |||
| @@ -0,0 +1,719 @@ | |||
| 1 | /* | ||
| 2 | * bio-integrity.c - bio data integrity extensions | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007, 2008 Oracle Corporation | ||
| 5 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License version | ||
| 9 | * 2 as published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License | ||
| 17 | * along with this program; see the file COPYING. If not, write to | ||
| 18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
| 19 | * USA. | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/blkdev.h> | ||
| 24 | #include <linux/mempool.h> | ||
| 25 | #include <linux/bio.h> | ||
| 26 | #include <linux/workqueue.h> | ||
| 27 | |||
| 28 | static struct kmem_cache *bio_integrity_slab __read_mostly; | ||
| 29 | static struct workqueue_struct *kintegrityd_wq; | ||
| 30 | |||
| 31 | /** | ||
| 32 | * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio | ||
| 33 | * @bio: bio to attach integrity metadata to | ||
| 34 | * @gfp_mask: Memory allocation mask | ||
| 35 | * @nr_vecs: Number of integrity metadata scatter-gather elements | ||
| 36 | * @bs: bio_set to allocate from | ||
| 37 | * | ||
| 38 | * Description: This function prepares a bio for attaching integrity | ||
| 39 | * metadata. nr_vecs specifies the maximum number of pages containing | ||
| 40 | * integrity metadata that can be attached. | ||
| 41 | */ | ||
| 42 | struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | ||
| 43 | gfp_t gfp_mask, | ||
| 44 | unsigned int nr_vecs, | ||
| 45 | struct bio_set *bs) | ||
| 46 | { | ||
| 47 | struct bio_integrity_payload *bip; | ||
| 48 | struct bio_vec *iv; | ||
| 49 | unsigned long idx; | ||
| 50 | |||
| 51 | BUG_ON(bio == NULL); | ||
| 52 | |||
| 53 | bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); | ||
| 54 | if (unlikely(bip == NULL)) { | ||
| 55 | printk(KERN_ERR "%s: could not alloc bip\n", __func__); | ||
| 56 | return NULL; | ||
| 57 | } | ||
| 58 | |||
| 59 | memset(bip, 0, sizeof(*bip)); | ||
| 60 | |||
| 61 | iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs); | ||
| 62 | if (unlikely(iv == NULL)) { | ||
| 63 | printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); | ||
| 64 | mempool_free(bip, bs->bio_integrity_pool); | ||
| 65 | return NULL; | ||
| 66 | } | ||
| 67 | |||
| 68 | bip->bip_pool = idx; | ||
| 69 | bip->bip_vec = iv; | ||
| 70 | bip->bip_bio = bio; | ||
| 71 | bio->bi_integrity = bip; | ||
| 72 | |||
| 73 | return bip; | ||
| 74 | } | ||
| 75 | EXPORT_SYMBOL(bio_integrity_alloc_bioset); | ||
| 76 | |||
| 77 | /** | ||
| 78 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio | ||
| 79 | * @bio: bio to attach integrity metadata to | ||
| 80 | * @gfp_mask: Memory allocation mask | ||
| 81 | * @nr_vecs: Number of integrity metadata scatter-gather elements | ||
| 82 | * | ||
| 83 | * Description: This function prepares a bio for attaching integrity | ||
| 84 | * metadata. nr_vecs specifies the maximum number of pages containing | ||
| 85 | * integrity metadata that can be attached. | ||
| 86 | */ | ||
| 87 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, | ||
| 88 | gfp_t gfp_mask, | ||
| 89 | unsigned int nr_vecs) | ||
| 90 | { | ||
| 91 | return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); | ||
| 92 | } | ||
| 93 | EXPORT_SYMBOL(bio_integrity_alloc); | ||
| 94 | |||
| 95 | /** | ||
| 96 | * bio_integrity_free - Free bio integrity payload | ||
| 97 | * @bio: bio containing bip to be freed | ||
| 98 | * @bs: bio_set this bio was allocated from | ||
| 99 | * | ||
| 100 | * Description: Used to free the integrity portion of a bio. Usually | ||
| 101 | * called from bio_free(). | ||
| 102 | */ | ||
| 103 | void bio_integrity_free(struct bio *bio, struct bio_set *bs) | ||
| 104 | { | ||
| 105 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 106 | |||
| 107 | BUG_ON(bip == NULL); | ||
| 108 | |||
| 109 | /* A cloned bio doesn't own the integrity metadata */ | ||
| 110 | if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) | ||
| 111 | kfree(bip->bip_buf); | ||
| 112 | |||
| 113 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); | ||
| 114 | mempool_free(bip, bs->bio_integrity_pool); | ||
| 115 | |||
| 116 | bio->bi_integrity = NULL; | ||
| 117 | } | ||
| 118 | EXPORT_SYMBOL(bio_integrity_free); | ||
| 119 | |||
| 120 | /** | ||
| 121 | * bio_integrity_add_page - Attach integrity metadata | ||
| 122 | * @bio: bio to update | ||
| 123 | * @page: page containing integrity metadata | ||
| 124 | * @len: number of bytes of integrity metadata in page | ||
| 125 | * @offset: start offset within page | ||
| 126 | * | ||
| 127 | * Description: Attach a page containing integrity metadata to bio. | ||
| 128 | */ | ||
| 129 | int bio_integrity_add_page(struct bio *bio, struct page *page, | ||
| 130 | unsigned int len, unsigned int offset) | ||
| 131 | { | ||
| 132 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 133 | struct bio_vec *iv; | ||
| 134 | |||
| 135 | if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { | ||
| 136 | printk(KERN_ERR "%s: bip_vec full\n", __func__); | ||
| 137 | return 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | iv = bip_vec_idx(bip, bip->bip_vcnt); | ||
| 141 | BUG_ON(iv == NULL); | ||
| 142 | BUG_ON(iv->bv_page != NULL); | ||
| 143 | |||
| 144 | iv->bv_page = page; | ||
| 145 | iv->bv_len = len; | ||
| 146 | iv->bv_offset = offset; | ||
| 147 | bip->bip_vcnt++; | ||
| 148 | |||
| 149 | return len; | ||
| 150 | } | ||
| 151 | EXPORT_SYMBOL(bio_integrity_add_page); | ||
| 152 | |||
| 153 | /** | ||
| 154 | * bio_integrity_enabled - Check whether integrity can be passed | ||
| 155 | * @bio: bio to check | ||
| 156 | * | ||
| 157 | * Description: Determines whether bio_integrity_prep() can be called | ||
| 158 | * on this bio or not. bio data direction and target device must be | ||
| 159 | * set prior to calling. The functions honors the write_generate and | ||
| 160 | * read_verify flags in sysfs. | ||
| 161 | */ | ||
| 162 | int bio_integrity_enabled(struct bio *bio) | ||
| 163 | { | ||
| 164 | /* Already protected? */ | ||
| 165 | if (bio_integrity(bio)) | ||
| 166 | return 0; | ||
| 167 | |||
| 168 | return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio)); | ||
| 169 | } | ||
| 170 | EXPORT_SYMBOL(bio_integrity_enabled); | ||
| 171 | |||
| 172 | /** | ||
| 173 | * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto | ||
| 174 | * @bi: blk_integrity profile for device | ||
| 175 | * @sectors: Number of 512 sectors to convert | ||
| 176 | * | ||
| 177 | * Description: The block layer calculates everything in 512 byte | ||
| 178 | * sectors but integrity metadata is done in terms of the hardware | ||
| 179 | * sector size of the storage device. Convert the block layer sectors | ||
| 180 | * to physical sectors. | ||
| 181 | */ | ||
| 182 | static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi, | ||
| 183 | unsigned int sectors) | ||
| 184 | { | ||
| 185 | /* At this point there are only 512b or 4096b DIF/EPP devices */ | ||
| 186 | if (bi->sector_size == 4096) | ||
| 187 | return sectors >>= 3; | ||
| 188 | |||
| 189 | return sectors; | ||
| 190 | } | ||
| 191 | |||
| 192 | /** | ||
| 193 | * bio_integrity_tag_size - Retrieve integrity tag space | ||
| 194 | * @bio: bio to inspect | ||
| 195 | * | ||
| 196 | * Description: Returns the maximum number of tag bytes that can be | ||
| 197 | * attached to this bio. Filesystems can use this to determine how | ||
| 198 | * much metadata to attach to an I/O. | ||
| 199 | */ | ||
| 200 | unsigned int bio_integrity_tag_size(struct bio *bio) | ||
| 201 | { | ||
| 202 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 203 | |||
| 204 | BUG_ON(bio->bi_size == 0); | ||
| 205 | |||
| 206 | return bi->tag_size * (bio->bi_size / bi->sector_size); | ||
| 207 | } | ||
| 208 | EXPORT_SYMBOL(bio_integrity_tag_size); | ||
| 209 | |||
| 210 | int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set) | ||
| 211 | { | ||
| 212 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 213 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 214 | unsigned int nr_sectors; | ||
| 215 | |||
| 216 | BUG_ON(bip->bip_buf == NULL); | ||
| 217 | |||
| 218 | if (bi->tag_size == 0) | ||
| 219 | return -1; | ||
| 220 | |||
| 221 | nr_sectors = bio_integrity_hw_sectors(bi, | ||
| 222 | DIV_ROUND_UP(len, bi->tag_size)); | ||
| 223 | |||
| 224 | if (nr_sectors * bi->tuple_size > bip->bip_size) { | ||
| 225 | printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", | ||
| 226 | __func__, nr_sectors * bi->tuple_size, bip->bip_size); | ||
| 227 | return -1; | ||
| 228 | } | ||
| 229 | |||
| 230 | if (set) | ||
| 231 | bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors); | ||
| 232 | else | ||
| 233 | bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors); | ||
| 234 | |||
| 235 | return 0; | ||
| 236 | } | ||
| 237 | |||
| 238 | /** | ||
| 239 | * bio_integrity_set_tag - Attach a tag buffer to a bio | ||
| 240 | * @bio: bio to attach buffer to | ||
| 241 | * @tag_buf: Pointer to a buffer containing tag data | ||
| 242 | * @len: Length of the included buffer | ||
| 243 | * | ||
| 244 | * Description: Use this function to tag a bio by leveraging the extra | ||
| 245 | * space provided by devices formatted with integrity protection. The | ||
| 246 | * size of the integrity buffer must be <= to the size reported by | ||
| 247 | * bio_integrity_tag_size(). | ||
| 248 | */ | ||
| 249 | int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len) | ||
| 250 | { | ||
| 251 | BUG_ON(bio_data_dir(bio) != WRITE); | ||
| 252 | |||
| 253 | return bio_integrity_tag(bio, tag_buf, len, 1); | ||
| 254 | } | ||
| 255 | EXPORT_SYMBOL(bio_integrity_set_tag); | ||
| 256 | |||
| 257 | /** | ||
| 258 | * bio_integrity_get_tag - Retrieve a tag buffer from a bio | ||
| 259 | * @bio: bio to retrieve buffer from | ||
| 260 | * @tag_buf: Pointer to a buffer for the tag data | ||
| 261 | * @len: Length of the target buffer | ||
| 262 | * | ||
| 263 | * Description: Use this function to retrieve the tag buffer from a | ||
| 264 | * completed I/O. The size of the integrity buffer must be <= to the | ||
| 265 | * size reported by bio_integrity_tag_size(). | ||
| 266 | */ | ||
| 267 | int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) | ||
| 268 | { | ||
| 269 | BUG_ON(bio_data_dir(bio) != READ); | ||
| 270 | |||
| 271 | return bio_integrity_tag(bio, tag_buf, len, 0); | ||
| 272 | } | ||
| 273 | EXPORT_SYMBOL(bio_integrity_get_tag); | ||
| 274 | |||
| 275 | /** | ||
| 276 | * bio_integrity_generate - Generate integrity metadata for a bio | ||
| 277 | * @bio: bio to generate integrity metadata for | ||
| 278 | * | ||
| 279 | * Description: Generates integrity metadata for a bio by calling the | ||
| 280 | * block device's generation callback function. The bio must have a | ||
| 281 | * bip attached with enough room to accommodate the generated | ||
| 282 | * integrity metadata. | ||
| 283 | */ | ||
| 284 | static void bio_integrity_generate(struct bio *bio) | ||
| 285 | { | ||
| 286 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 287 | struct blk_integrity_exchg bix; | ||
| 288 | struct bio_vec *bv; | ||
| 289 | sector_t sector = bio->bi_sector; | ||
| 290 | unsigned int i, sectors, total; | ||
| 291 | void *prot_buf = bio->bi_integrity->bip_buf; | ||
| 292 | |||
| 293 | total = 0; | ||
| 294 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | ||
| 295 | bix.sector_size = bi->sector_size; | ||
| 296 | |||
| 297 | bio_for_each_segment(bv, bio, i) { | ||
| 298 | void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); | ||
| 299 | bix.data_buf = kaddr + bv->bv_offset; | ||
| 300 | bix.data_size = bv->bv_len; | ||
| 301 | bix.prot_buf = prot_buf; | ||
| 302 | bix.sector = sector; | ||
| 303 | |||
| 304 | bi->generate_fn(&bix); | ||
| 305 | |||
| 306 | sectors = bv->bv_len / bi->sector_size; | ||
| 307 | sector += sectors; | ||
| 308 | prot_buf += sectors * bi->tuple_size; | ||
| 309 | total += sectors * bi->tuple_size; | ||
| 310 | BUG_ON(total > bio->bi_integrity->bip_size); | ||
| 311 | |||
| 312 | kunmap_atomic(kaddr, KM_USER0); | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | /** | ||
| 317 | * bio_integrity_prep - Prepare bio for integrity I/O | ||
| 318 | * @bio: bio to prepare | ||
| 319 | * | ||
| 320 | * Description: Allocates a buffer for integrity metadata, maps the | ||
| 321 | * pages and attaches them to a bio. The bio must have data | ||
| 322 | * direction, target device and start sector set priot to calling. In | ||
| 323 | * the WRITE case, integrity metadata will be generated using the | ||
| 324 | * block device's integrity function. In the READ case, the buffer | ||
| 325 | * will be prepared for DMA and a suitable end_io handler set up. | ||
| 326 | */ | ||
| 327 | int bio_integrity_prep(struct bio *bio) | ||
| 328 | { | ||
| 329 | struct bio_integrity_payload *bip; | ||
| 330 | struct blk_integrity *bi; | ||
| 331 | struct request_queue *q; | ||
| 332 | void *buf; | ||
| 333 | unsigned long start, end; | ||
| 334 | unsigned int len, nr_pages; | ||
| 335 | unsigned int bytes, offset, i; | ||
| 336 | unsigned int sectors; | ||
| 337 | |||
| 338 | bi = bdev_get_integrity(bio->bi_bdev); | ||
| 339 | q = bdev_get_queue(bio->bi_bdev); | ||
| 340 | BUG_ON(bi == NULL); | ||
| 341 | BUG_ON(bio_integrity(bio)); | ||
| 342 | |||
| 343 | sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio)); | ||
| 344 | |||
| 345 | /* Allocate kernel buffer for protection data */ | ||
| 346 | len = sectors * blk_integrity_tuple_size(bi); | ||
| 347 | buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp); | ||
| 348 | if (unlikely(buf == NULL)) { | ||
| 349 | printk(KERN_ERR "could not allocate integrity buffer\n"); | ||
| 350 | return -EIO; | ||
| 351 | } | ||
| 352 | |||
| 353 | end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 354 | start = ((unsigned long) buf) >> PAGE_SHIFT; | ||
| 355 | nr_pages = end - start; | ||
| 356 | |||
| 357 | /* Allocate bio integrity payload and integrity vectors */ | ||
| 358 | bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages); | ||
| 359 | if (unlikely(bip == NULL)) { | ||
| 360 | printk(KERN_ERR "could not allocate data integrity bioset\n"); | ||
| 361 | kfree(buf); | ||
| 362 | return -EIO; | ||
| 363 | } | ||
| 364 | |||
| 365 | bip->bip_buf = buf; | ||
| 366 | bip->bip_size = len; | ||
| 367 | bip->bip_sector = bio->bi_sector; | ||
| 368 | |||
| 369 | /* Map it */ | ||
| 370 | offset = offset_in_page(buf); | ||
| 371 | for (i = 0 ; i < nr_pages ; i++) { | ||
| 372 | int ret; | ||
| 373 | bytes = PAGE_SIZE - offset; | ||
| 374 | |||
| 375 | if (len <= 0) | ||
| 376 | break; | ||
| 377 | |||
| 378 | if (bytes > len) | ||
| 379 | bytes = len; | ||
| 380 | |||
| 381 | ret = bio_integrity_add_page(bio, virt_to_page(buf), | ||
| 382 | bytes, offset); | ||
| 383 | |||
| 384 | if (ret == 0) | ||
| 385 | return 0; | ||
| 386 | |||
| 387 | if (ret < bytes) | ||
| 388 | break; | ||
| 389 | |||
| 390 | buf += bytes; | ||
| 391 | len -= bytes; | ||
| 392 | offset = 0; | ||
| 393 | } | ||
| 394 | |||
| 395 | /* Install custom I/O completion handler if read verify is enabled */ | ||
| 396 | if (bio_data_dir(bio) == READ) { | ||
| 397 | bip->bip_end_io = bio->bi_end_io; | ||
| 398 | bio->bi_end_io = bio_integrity_endio; | ||
| 399 | } | ||
| 400 | |||
| 401 | /* Auto-generate integrity metadata if this is a write */ | ||
| 402 | if (bio_data_dir(bio) == WRITE) | ||
| 403 | bio_integrity_generate(bio); | ||
| 404 | |||
| 405 | return 0; | ||
| 406 | } | ||
| 407 | EXPORT_SYMBOL(bio_integrity_prep); | ||
| 408 | |||
| 409 | /** | ||
| 410 | * bio_integrity_verify - Verify integrity metadata for a bio | ||
| 411 | * @bio: bio to verify | ||
| 412 | * | ||
| 413 | * Description: This function is called to verify the integrity of a | ||
| 414 | * bio. The data in the bio io_vec is compared to the integrity | ||
| 415 | * metadata returned by the HBA. | ||
| 416 | */ | ||
| 417 | static int bio_integrity_verify(struct bio *bio) | ||
| 418 | { | ||
| 419 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 420 | struct blk_integrity_exchg bix; | ||
| 421 | struct bio_vec *bv; | ||
| 422 | sector_t sector = bio->bi_integrity->bip_sector; | ||
| 423 | unsigned int i, sectors, total, ret; | ||
| 424 | void *prot_buf = bio->bi_integrity->bip_buf; | ||
| 425 | |||
| 426 | ret = total = 0; | ||
| 427 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | ||
| 428 | bix.sector_size = bi->sector_size; | ||
| 429 | |||
| 430 | bio_for_each_segment(bv, bio, i) { | ||
| 431 | void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); | ||
| 432 | bix.data_buf = kaddr + bv->bv_offset; | ||
| 433 | bix.data_size = bv->bv_len; | ||
| 434 | bix.prot_buf = prot_buf; | ||
| 435 | bix.sector = sector; | ||
| 436 | |||
| 437 | ret = bi->verify_fn(&bix); | ||
| 438 | |||
| 439 | if (ret) { | ||
| 440 | kunmap_atomic(kaddr, KM_USER0); | ||
| 441 | break; | ||
| 442 | } | ||
| 443 | |||
| 444 | sectors = bv->bv_len / bi->sector_size; | ||
| 445 | sector += sectors; | ||
| 446 | prot_buf += sectors * bi->tuple_size; | ||
| 447 | total += sectors * bi->tuple_size; | ||
| 448 | BUG_ON(total > bio->bi_integrity->bip_size); | ||
| 449 | |||
| 450 | kunmap_atomic(kaddr, KM_USER0); | ||
| 451 | } | ||
| 452 | |||
| 453 | return ret; | ||
| 454 | } | ||
| 455 | |||
| 456 | /** | ||
| 457 | * bio_integrity_verify_fn - Integrity I/O completion worker | ||
| 458 | * @work: Work struct stored in bio to be verified | ||
| 459 | * | ||
| 460 | * Description: This workqueue function is called to complete a READ | ||
| 461 | * request. The function verifies the transferred integrity metadata | ||
| 462 | * and then calls the original bio end_io function. | ||
| 463 | */ | ||
| 464 | static void bio_integrity_verify_fn(struct work_struct *work) | ||
| 465 | { | ||
| 466 | struct bio_integrity_payload *bip = | ||
| 467 | container_of(work, struct bio_integrity_payload, bip_work); | ||
| 468 | struct bio *bio = bip->bip_bio; | ||
| 469 | int error = bip->bip_error; | ||
| 470 | |||
| 471 | if (bio_integrity_verify(bio)) { | ||
| 472 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 473 | error = -EIO; | ||
| 474 | } | ||
| 475 | |||
| 476 | /* Restore original bio completion handler */ | ||
| 477 | bio->bi_end_io = bip->bip_end_io; | ||
| 478 | |||
| 479 | if (bio->bi_end_io) | ||
| 480 | bio->bi_end_io(bio, error); | ||
| 481 | } | ||
| 482 | |||
| 483 | /** | ||
| 484 | * bio_integrity_endio - Integrity I/O completion function | ||
| 485 | * @bio: Protected bio | ||
| 486 | * @error: Pointer to errno | ||
| 487 | * | ||
| 488 | * Description: Completion for integrity I/O | ||
| 489 | * | ||
| 490 | * Normally I/O completion is done in interrupt context. However, | ||
| 491 | * verifying I/O integrity is a time-consuming task which must be run | ||
| 492 | * in process context. This function postpones completion | ||
| 493 | * accordingly. | ||
| 494 | */ | ||
| 495 | void bio_integrity_endio(struct bio *bio, int error) | ||
| 496 | { | ||
| 497 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 498 | |||
| 499 | BUG_ON(bip->bip_bio != bio); | ||
| 500 | |||
| 501 | bip->bip_error = error; | ||
| 502 | INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); | ||
| 503 | queue_work(kintegrityd_wq, &bip->bip_work); | ||
| 504 | } | ||
| 505 | EXPORT_SYMBOL(bio_integrity_endio); | ||
| 506 | |||
| 507 | /** | ||
| 508 | * bio_integrity_mark_head - Advance bip_vec skip bytes | ||
| 509 | * @bip: Integrity vector to advance | ||
| 510 | * @skip: Number of bytes to advance it | ||
| 511 | */ | ||
| 512 | void bio_integrity_mark_head(struct bio_integrity_payload *bip, | ||
| 513 | unsigned int skip) | ||
| 514 | { | ||
| 515 | struct bio_vec *iv; | ||
| 516 | unsigned int i; | ||
| 517 | |||
| 518 | bip_for_each_vec(iv, bip, i) { | ||
| 519 | if (skip == 0) { | ||
| 520 | bip->bip_idx = i; | ||
| 521 | return; | ||
| 522 | } else if (skip >= iv->bv_len) { | ||
| 523 | skip -= iv->bv_len; | ||
| 524 | } else { /* skip < iv->bv_len) */ | ||
| 525 | iv->bv_offset += skip; | ||
| 526 | iv->bv_len -= skip; | ||
| 527 | bip->bip_idx = i; | ||
| 528 | return; | ||
| 529 | } | ||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 533 | /** | ||
| 534 | * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long | ||
| 535 | * @bip: Integrity vector to truncate | ||
| 536 | * @len: New length of integrity vector | ||
| 537 | */ | ||
| 538 | void bio_integrity_mark_tail(struct bio_integrity_payload *bip, | ||
| 539 | unsigned int len) | ||
| 540 | { | ||
| 541 | struct bio_vec *iv; | ||
| 542 | unsigned int i; | ||
| 543 | |||
| 544 | bip_for_each_vec(iv, bip, i) { | ||
| 545 | if (len == 0) { | ||
| 546 | bip->bip_vcnt = i; | ||
| 547 | return; | ||
| 548 | } else if (len >= iv->bv_len) { | ||
| 549 | len -= iv->bv_len; | ||
| 550 | } else { /* len < iv->bv_len) */ | ||
| 551 | iv->bv_len = len; | ||
| 552 | len = 0; | ||
| 553 | } | ||
| 554 | } | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * bio_integrity_advance - Advance integrity vector | ||
| 559 | * @bio: bio whose integrity vector to update | ||
| 560 | * @bytes_done: number of data bytes that have been completed | ||
| 561 | * | ||
| 562 | * Description: This function calculates how many integrity bytes the | ||
| 563 | * number of completed data bytes correspond to and advances the | ||
| 564 | * integrity vector accordingly. | ||
| 565 | */ | ||
| 566 | void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) | ||
| 567 | { | ||
| 568 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 569 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 570 | unsigned int nr_sectors; | ||
| 571 | |||
| 572 | BUG_ON(bip == NULL); | ||
| 573 | BUG_ON(bi == NULL); | ||
| 574 | |||
| 575 | nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9); | ||
| 576 | bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size); | ||
| 577 | } | ||
| 578 | EXPORT_SYMBOL(bio_integrity_advance); | ||
| 579 | |||
| 580 | /** | ||
| 581 | * bio_integrity_trim - Trim integrity vector | ||
| 582 | * @bio: bio whose integrity vector to update | ||
| 583 | * @offset: offset to first data sector | ||
| 584 | * @sectors: number of data sectors | ||
| 585 | * | ||
| 586 | * Description: Used to trim the integrity vector in a cloned bio. | ||
| 587 | * The ivec will be advanced corresponding to 'offset' data sectors | ||
| 588 | * and the length will be truncated corresponding to 'len' data | ||
| 589 | * sectors. | ||
| 590 | */ | ||
| 591 | void bio_integrity_trim(struct bio *bio, unsigned int offset, | ||
| 592 | unsigned int sectors) | ||
| 593 | { | ||
| 594 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 595 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | ||
| 596 | unsigned int nr_sectors; | ||
| 597 | |||
| 598 | BUG_ON(bip == NULL); | ||
| 599 | BUG_ON(bi == NULL); | ||
| 600 | BUG_ON(!bio_flagged(bio, BIO_CLONED)); | ||
| 601 | |||
| 602 | nr_sectors = bio_integrity_hw_sectors(bi, sectors); | ||
| 603 | bip->bip_sector = bip->bip_sector + offset; | ||
| 604 | bio_integrity_mark_head(bip, offset * bi->tuple_size); | ||
| 605 | bio_integrity_mark_tail(bip, sectors * bi->tuple_size); | ||
| 606 | } | ||
| 607 | EXPORT_SYMBOL(bio_integrity_trim); | ||
| 608 | |||
| 609 | /** | ||
| 610 | * bio_integrity_split - Split integrity metadata | ||
| 611 | * @bio: Protected bio | ||
| 612 | * @bp: Resulting bio_pair | ||
| 613 | * @sectors: Offset | ||
| 614 | * | ||
| 615 | * Description: Splits an integrity page into a bio_pair. | ||
| 616 | */ | ||
| 617 | void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) | ||
| 618 | { | ||
| 619 | struct blk_integrity *bi; | ||
| 620 | struct bio_integrity_payload *bip = bio->bi_integrity; | ||
| 621 | unsigned int nr_sectors; | ||
| 622 | |||
| 623 | if (bio_integrity(bio) == 0) | ||
| 624 | return; | ||
| 625 | |||
| 626 | bi = bdev_get_integrity(bio->bi_bdev); | ||
| 627 | BUG_ON(bi == NULL); | ||
| 628 | BUG_ON(bip->bip_vcnt != 1); | ||
| 629 | |||
| 630 | nr_sectors = bio_integrity_hw_sectors(bi, sectors); | ||
| 631 | |||
| 632 | bp->bio1.bi_integrity = &bp->bip1; | ||
| 633 | bp->bio2.bi_integrity = &bp->bip2; | ||
| 634 | |||
| 635 | bp->iv1 = bip->bip_vec[0]; | ||
| 636 | bp->iv2 = bip->bip_vec[0]; | ||
| 637 | |||
| 638 | bp->bip1.bip_vec = &bp->iv1; | ||
| 639 | bp->bip2.bip_vec = &bp->iv2; | ||
| 640 | |||
| 641 | bp->iv1.bv_len = sectors * bi->tuple_size; | ||
| 642 | bp->iv2.bv_offset += sectors * bi->tuple_size; | ||
| 643 | bp->iv2.bv_len -= sectors * bi->tuple_size; | ||
| 644 | |||
| 645 | bp->bip1.bip_sector = bio->bi_integrity->bip_sector; | ||
| 646 | bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors; | ||
| 647 | |||
| 648 | bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1; | ||
| 649 | bp->bip1.bip_idx = bp->bip2.bip_idx = 0; | ||
| 650 | } | ||
| 651 | EXPORT_SYMBOL(bio_integrity_split); | ||
| 652 | |||
| 653 | /** | ||
| 654 | * bio_integrity_clone - Callback for cloning bios with integrity metadata | ||
| 655 | * @bio: New bio | ||
| 656 | * @bio_src: Original bio | ||
| 657 | * @bs: bio_set to allocate bip from | ||
| 658 | * | ||
| 659 | * Description: Called to allocate a bip when cloning a bio | ||
| 660 | */ | ||
| 661 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, | ||
| 662 | struct bio_set *bs) | ||
| 663 | { | ||
| 664 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; | ||
| 665 | struct bio_integrity_payload *bip; | ||
| 666 | |||
| 667 | BUG_ON(bip_src == NULL); | ||
| 668 | |||
| 669 | bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs); | ||
| 670 | |||
| 671 | if (bip == NULL) | ||
| 672 | return -EIO; | ||
| 673 | |||
| 674 | memcpy(bip->bip_vec, bip_src->bip_vec, | ||
| 675 | bip_src->bip_vcnt * sizeof(struct bio_vec)); | ||
| 676 | |||
| 677 | bip->bip_sector = bip_src->bip_sector; | ||
| 678 | bip->bip_vcnt = bip_src->bip_vcnt; | ||
| 679 | bip->bip_idx = bip_src->bip_idx; | ||
| 680 | |||
| 681 | return 0; | ||
| 682 | } | ||
| 683 | EXPORT_SYMBOL(bio_integrity_clone); | ||
| 684 | |||
| 685 | int bioset_integrity_create(struct bio_set *bs, int pool_size) | ||
| 686 | { | ||
| 687 | bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, | ||
| 688 | bio_integrity_slab); | ||
| 689 | if (!bs->bio_integrity_pool) | ||
| 690 | return -1; | ||
| 691 | |||
| 692 | return 0; | ||
| 693 | } | ||
| 694 | EXPORT_SYMBOL(bioset_integrity_create); | ||
| 695 | |||
| 696 | void bioset_integrity_free(struct bio_set *bs) | ||
| 697 | { | ||
| 698 | if (bs->bio_integrity_pool) | ||
| 699 | mempool_destroy(bs->bio_integrity_pool); | ||
| 700 | } | ||
| 701 | EXPORT_SYMBOL(bioset_integrity_free); | ||
| 702 | |||
| 703 | void __init bio_integrity_init_slab(void) | ||
| 704 | { | ||
| 705 | bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, | ||
| 706 | SLAB_HWCACHE_ALIGN|SLAB_PANIC); | ||
| 707 | } | ||
| 708 | EXPORT_SYMBOL(bio_integrity_init_slab); | ||
| 709 | |||
| 710 | static int __init integrity_init(void) | ||
| 711 | { | ||
| 712 | kintegrityd_wq = create_workqueue("kintegrityd"); | ||
| 713 | |||
| 714 | if (!kintegrityd_wq) | ||
| 715 | panic("Failed to create kintegrityd\n"); | ||
| 716 | |||
| 717 | return 0; | ||
| 718 | } | ||
| 719 | subsys_initcall(integrity_init); | ||
| @@ -28,25 +28,10 @@ | |||
| 28 | #include <linux/blktrace_api.h> | 28 | #include <linux/blktrace_api.h> |
| 29 | #include <scsi/sg.h> /* for struct sg_iovec */ | 29 | #include <scsi/sg.h> /* for struct sg_iovec */ |
| 30 | 30 | ||
| 31 | #define BIO_POOL_SIZE 2 | ||
| 32 | |||
| 33 | static struct kmem_cache *bio_slab __read_mostly; | 31 | static struct kmem_cache *bio_slab __read_mostly; |
| 34 | 32 | ||
| 35 | #define BIOVEC_NR_POOLS 6 | ||
| 36 | |||
| 37 | /* | ||
| 38 | * a small number of entries is fine, not going to be performance critical. | ||
| 39 | * basically we just need to survive | ||
| 40 | */ | ||
| 41 | #define BIO_SPLIT_ENTRIES 2 | ||
| 42 | mempool_t *bio_split_pool __read_mostly; | 33 | mempool_t *bio_split_pool __read_mostly; |
| 43 | 34 | ||
| 44 | struct biovec_slab { | ||
| 45 | int nr_vecs; | ||
| 46 | char *name; | ||
| 47 | struct kmem_cache *slab; | ||
| 48 | }; | ||
| 49 | |||
| 50 | /* | 35 | /* |
| 51 | * if you change this list, also change bvec_alloc or things will | 36 | * if you change this list, also change bvec_alloc or things will |
| 52 | * break badly! cannot be bigger than what you can fit into an | 37 | * break badly! cannot be bigger than what you can fit into an |
| @@ -60,23 +45,17 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { | |||
| 60 | #undef BV | 45 | #undef BV |
| 61 | 46 | ||
| 62 | /* | 47 | /* |
| 63 | * bio_set is used to allow other portions of the IO system to | ||
| 64 | * allocate their own private memory pools for bio and iovec structures. | ||
| 65 | * These memory pools in turn all allocate from the bio_slab | ||
| 66 | * and the bvec_slabs[]. | ||
| 67 | */ | ||
| 68 | struct bio_set { | ||
| 69 | mempool_t *bio_pool; | ||
| 70 | mempool_t *bvec_pools[BIOVEC_NR_POOLS]; | ||
| 71 | }; | ||
| 72 | |||
| 73 | /* | ||
| 74 | * fs_bio_set is the bio_set containing bio and iovec memory pools used by | 48 | * fs_bio_set is the bio_set containing bio and iovec memory pools used by |
| 75 | * IO code that does not need private memory pools. | 49 | * IO code that does not need private memory pools. |
| 76 | */ | 50 | */ |
| 77 | static struct bio_set *fs_bio_set; | 51 | struct bio_set *fs_bio_set; |
| 52 | |||
| 53 | unsigned int bvec_nr_vecs(unsigned short idx) | ||
| 54 | { | ||
| 55 | return bvec_slabs[idx].nr_vecs; | ||
| 56 | } | ||
| 78 | 57 | ||
| 79 | static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) | 58 | struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) |
| 80 | { | 59 | { |
| 81 | struct bio_vec *bvl; | 60 | struct bio_vec *bvl; |
| 82 | 61 | ||
| @@ -117,6 +96,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set) | |||
| 117 | mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); | 96 | mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); |
| 118 | } | 97 | } |
| 119 | 98 | ||
| 99 | if (bio_integrity(bio)) | ||
| 100 | bio_integrity_free(bio, bio_set); | ||
| 101 | |||
| 120 | mempool_free(bio, bio_set->bio_pool); | 102 | mempool_free(bio, bio_set->bio_pool); |
| 121 | } | 103 | } |
| 122 | 104 | ||
| @@ -275,9 +257,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | |||
| 275 | { | 257 | { |
| 276 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); | 258 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); |
| 277 | 259 | ||
| 278 | if (b) { | 260 | if (!b) |
| 279 | b->bi_destructor = bio_fs_destructor; | 261 | return NULL; |
| 280 | __bio_clone(b, bio); | 262 | |
| 263 | b->bi_destructor = bio_fs_destructor; | ||
| 264 | __bio_clone(b, bio); | ||
| 265 | |||
| 266 | if (bio_integrity(bio)) { | ||
| 267 | int ret; | ||
| 268 | |||
| 269 | ret = bio_integrity_clone(b, bio, fs_bio_set); | ||
| 270 | |||
| 271 | if (ret < 0) | ||
| 272 | return NULL; | ||
| 281 | } | 273 | } |
| 282 | 274 | ||
| 283 | return b; | 275 | return b; |
| @@ -333,10 +325,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
| 333 | if (page == prev->bv_page && | 325 | if (page == prev->bv_page && |
| 334 | offset == prev->bv_offset + prev->bv_len) { | 326 | offset == prev->bv_offset + prev->bv_len) { |
| 335 | prev->bv_len += len; | 327 | prev->bv_len += len; |
| 336 | if (q->merge_bvec_fn && | 328 | |
| 337 | q->merge_bvec_fn(q, bio, prev) < len) { | 329 | if (q->merge_bvec_fn) { |
| 338 | prev->bv_len -= len; | 330 | struct bvec_merge_data bvm = { |
| 339 | return 0; | 331 | .bi_bdev = bio->bi_bdev, |
| 332 | .bi_sector = bio->bi_sector, | ||
| 333 | .bi_size = bio->bi_size, | ||
| 334 | .bi_rw = bio->bi_rw, | ||
| 335 | }; | ||
| 336 | |||
| 337 | if (q->merge_bvec_fn(q, &bvm, prev) < len) { | ||
| 338 | prev->bv_len -= len; | ||
| 339 | return 0; | ||
| 340 | } | ||
| 340 | } | 341 | } |
| 341 | 342 | ||
| 342 | goto done; | 343 | goto done; |
| @@ -377,11 +378,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
| 377 | * queue to get further control | 378 | * queue to get further control |
| 378 | */ | 379 | */ |
| 379 | if (q->merge_bvec_fn) { | 380 | if (q->merge_bvec_fn) { |
| 381 | struct bvec_merge_data bvm = { | ||
| 382 | .bi_bdev = bio->bi_bdev, | ||
| 383 | .bi_sector = bio->bi_sector, | ||
| 384 | .bi_size = bio->bi_size, | ||
| 385 | .bi_rw = bio->bi_rw, | ||
| 386 | }; | ||
| 387 | |||
| 380 | /* | 388 | /* |
| 381 | * merge_bvec_fn() returns number of bytes it can accept | 389 | * merge_bvec_fn() returns number of bytes it can accept |
| 382 | * at this offset | 390 | * at this offset |
| 383 | */ | 391 | */ |
| 384 | if (q->merge_bvec_fn(q, bio, bvec) < len) { | 392 | if (q->merge_bvec_fn(q, &bvm, bvec) < len) { |
| 385 | bvec->bv_page = NULL; | 393 | bvec->bv_page = NULL; |
| 386 | bvec->bv_len = 0; | 394 | bvec->bv_len = 0; |
| 387 | bvec->bv_offset = 0; | 395 | bvec->bv_offset = 0; |
| @@ -1249,6 +1257,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
| 1249 | bp->bio1.bi_private = bi; | 1257 | bp->bio1.bi_private = bi; |
| 1250 | bp->bio2.bi_private = pool; | 1258 | bp->bio2.bi_private = pool; |
| 1251 | 1259 | ||
| 1260 | if (bio_integrity(bi)) | ||
| 1261 | bio_integrity_split(bi, bp, first_sectors); | ||
| 1262 | |||
| 1252 | return bp; | 1263 | return bp; |
| 1253 | } | 1264 | } |
| 1254 | 1265 | ||
| @@ -1290,6 +1301,7 @@ void bioset_free(struct bio_set *bs) | |||
| 1290 | if (bs->bio_pool) | 1301 | if (bs->bio_pool) |
| 1291 | mempool_destroy(bs->bio_pool); | 1302 | mempool_destroy(bs->bio_pool); |
| 1292 | 1303 | ||
| 1304 | bioset_integrity_free(bs); | ||
| 1293 | biovec_free_pools(bs); | 1305 | biovec_free_pools(bs); |
| 1294 | 1306 | ||
| 1295 | kfree(bs); | 1307 | kfree(bs); |
| @@ -1306,6 +1318,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) | |||
| 1306 | if (!bs->bio_pool) | 1318 | if (!bs->bio_pool) |
| 1307 | goto bad; | 1319 | goto bad; |
| 1308 | 1320 | ||
| 1321 | if (bioset_integrity_create(bs, bio_pool_size)) | ||
| 1322 | goto bad; | ||
| 1323 | |||
| 1309 | if (!biovec_create_pools(bs, bvec_pool_size)) | 1324 | if (!biovec_create_pools(bs, bvec_pool_size)) |
| 1310 | return bs; | 1325 | return bs; |
| 1311 | 1326 | ||
| @@ -1332,6 +1347,7 @@ static int __init init_bio(void) | |||
| 1332 | { | 1347 | { |
| 1333 | bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 1348 | bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
| 1334 | 1349 | ||
| 1350 | bio_integrity_init_slab(); | ||
| 1335 | biovec_init_slabs(); | 1351 | biovec_init_slabs(); |
| 1336 | 1352 | ||
| 1337 | fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); | 1353 | fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 470c10ceb0fb..10d8a0aa871a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -931,8 +931,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
| 931 | struct gendisk *disk; | 931 | struct gendisk *disk; |
| 932 | int ret; | 932 | int ret; |
| 933 | int part; | 933 | int part; |
| 934 | int perm = 0; | ||
| 934 | 935 | ||
| 935 | ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); | 936 | if (file->f_mode & FMODE_READ) |
| 937 | perm |= MAY_READ; | ||
| 938 | if (file->f_mode & FMODE_WRITE) | ||
| 939 | perm |= MAY_WRITE; | ||
| 940 | /* | ||
| 941 | * hooks: /n/, see "layering violations". | ||
| 942 | */ | ||
| 943 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); | ||
| 936 | if (ret != 0) | 944 | if (ret != 0) |
| 937 | return ret; | 945 | return ret; |
| 938 | 946 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index a073f3f4f013..d48caee12e2a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -821,7 +821,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
| 821 | * contents - it is a noop if I/O is still in | 821 | * contents - it is a noop if I/O is still in |
| 822 | * flight on potentially older contents. | 822 | * flight on potentially older contents. |
| 823 | */ | 823 | */ |
| 824 | ll_rw_block(SWRITE, 1, &bh); | 824 | ll_rw_block(SWRITE_SYNC, 1, &bh); |
| 825 | brelse(bh); | 825 | brelse(bh); |
| 826 | spin_lock(lock); | 826 | spin_lock(lock); |
| 827 | } | 827 | } |
| @@ -1464,7 +1464,7 @@ static void invalidate_bh_lru(void *arg) | |||
| 1464 | 1464 | ||
| 1465 | void invalidate_bh_lrus(void) | 1465 | void invalidate_bh_lrus(void) |
| 1466 | { | 1466 | { |
| 1467 | on_each_cpu(invalidate_bh_lru, NULL, 1, 1); | 1467 | on_each_cpu(invalidate_bh_lru, NULL, 1); |
| 1468 | } | 1468 | } |
| 1469 | EXPORT_SYMBOL_GPL(invalidate_bh_lrus); | 1469 | EXPORT_SYMBOL_GPL(invalidate_bh_lrus); |
| 1470 | 1470 | ||
| @@ -1691,11 +1691,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
| 1691 | */ | 1691 | */ |
| 1692 | clear_buffer_dirty(bh); | 1692 | clear_buffer_dirty(bh); |
| 1693 | set_buffer_uptodate(bh); | 1693 | set_buffer_uptodate(bh); |
| 1694 | } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { | 1694 | } else if ((!buffer_mapped(bh) || buffer_delay(bh)) && |
| 1695 | buffer_dirty(bh)) { | ||
| 1695 | WARN_ON(bh->b_size != blocksize); | 1696 | WARN_ON(bh->b_size != blocksize); |
| 1696 | err = get_block(inode, block, bh, 1); | 1697 | err = get_block(inode, block, bh, 1); |
| 1697 | if (err) | 1698 | if (err) |
| 1698 | goto recover; | 1699 | goto recover; |
| 1700 | clear_buffer_delay(bh); | ||
| 1699 | if (buffer_new(bh)) { | 1701 | if (buffer_new(bh)) { |
| 1700 | /* blockdev mappings never come here */ | 1702 | /* blockdev mappings never come here */ |
| 1701 | clear_buffer_new(bh); | 1703 | clear_buffer_new(bh); |
| @@ -1774,7 +1776,8 @@ recover: | |||
| 1774 | bh = head; | 1776 | bh = head; |
| 1775 | /* Recovery: lock and submit the mapped buffers */ | 1777 | /* Recovery: lock and submit the mapped buffers */ |
| 1776 | do { | 1778 | do { |
| 1777 | if (buffer_mapped(bh) && buffer_dirty(bh)) { | 1779 | if (buffer_mapped(bh) && buffer_dirty(bh) && |
| 1780 | !buffer_delay(bh)) { | ||
| 1778 | lock_buffer(bh); | 1781 | lock_buffer(bh); |
| 1779 | mark_buffer_async_write(bh); | 1782 | mark_buffer_async_write(bh); |
| 1780 | } else { | 1783 | } else { |
| @@ -2061,6 +2064,7 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
| 2061 | struct page *page, void *fsdata) | 2064 | struct page *page, void *fsdata) |
| 2062 | { | 2065 | { |
| 2063 | struct inode *inode = mapping->host; | 2066 | struct inode *inode = mapping->host; |
| 2067 | int i_size_changed = 0; | ||
| 2064 | 2068 | ||
| 2065 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 2069 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
| 2066 | 2070 | ||
| @@ -2073,12 +2077,21 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
| 2073 | */ | 2077 | */ |
| 2074 | if (pos+copied > inode->i_size) { | 2078 | if (pos+copied > inode->i_size) { |
| 2075 | i_size_write(inode, pos+copied); | 2079 | i_size_write(inode, pos+copied); |
| 2076 | mark_inode_dirty(inode); | 2080 | i_size_changed = 1; |
| 2077 | } | 2081 | } |
| 2078 | 2082 | ||
| 2079 | unlock_page(page); | 2083 | unlock_page(page); |
| 2080 | page_cache_release(page); | 2084 | page_cache_release(page); |
| 2081 | 2085 | ||
| 2086 | /* | ||
| 2087 | * Don't mark the inode dirty under page lock. First, it unnecessarily | ||
| 2088 | * makes the holding time of page lock longer. Second, it forces lock | ||
| 2089 | * ordering of page lock and transaction start for journaling | ||
| 2090 | * filesystems. | ||
| 2091 | */ | ||
| 2092 | if (i_size_changed) | ||
| 2093 | mark_inode_dirty(inode); | ||
| 2094 | |||
| 2082 | return copied; | 2095 | return copied; |
| 2083 | } | 2096 | } |
| 2084 | EXPORT_SYMBOL(generic_write_end); | 2097 | EXPORT_SYMBOL(generic_write_end); |
| @@ -2940,16 +2953,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
| 2940 | for (i = 0; i < nr; i++) { | 2953 | for (i = 0; i < nr; i++) { |
| 2941 | struct buffer_head *bh = bhs[i]; | 2954 | struct buffer_head *bh = bhs[i]; |
| 2942 | 2955 | ||
| 2943 | if (rw == SWRITE) | 2956 | if (rw == SWRITE || rw == SWRITE_SYNC) |
| 2944 | lock_buffer(bh); | 2957 | lock_buffer(bh); |
| 2945 | else if (test_set_buffer_locked(bh)) | 2958 | else if (test_set_buffer_locked(bh)) |
| 2946 | continue; | 2959 | continue; |
| 2947 | 2960 | ||
| 2948 | if (rw == WRITE || rw == SWRITE) { | 2961 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { |
| 2949 | if (test_clear_buffer_dirty(bh)) { | 2962 | if (test_clear_buffer_dirty(bh)) { |
| 2950 | bh->b_end_io = end_buffer_write_sync; | 2963 | bh->b_end_io = end_buffer_write_sync; |
| 2951 | get_bh(bh); | 2964 | get_bh(bh); |
| 2952 | submit_bh(WRITE, bh); | 2965 | if (rw == SWRITE_SYNC) |
| 2966 | submit_bh(WRITE_SYNC, bh); | ||
| 2967 | else | ||
| 2968 | submit_bh(WRITE, bh); | ||
| 2953 | continue; | 2969 | continue; |
| 2954 | } | 2970 | } |
| 2955 | } else { | 2971 | } else { |
| @@ -2978,7 +2994,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
| 2978 | if (test_clear_buffer_dirty(bh)) { | 2994 | if (test_clear_buffer_dirty(bh)) { |
| 2979 | get_bh(bh); | 2995 | get_bh(bh); |
| 2980 | bh->b_end_io = end_buffer_write_sync; | 2996 | bh->b_end_io = end_buffer_write_sync; |
| 2981 | ret = submit_bh(WRITE, bh); | 2997 | ret = submit_bh(WRITE_SYNC, bh); |
| 2982 | wait_on_buffer(bh); | 2998 | wait_on_buffer(bh); |
| 2983 | if (buffer_eopnotsupp(bh)) { | 2999 | if (buffer_eopnotsupp(bh)) { |
| 2984 | clear_buffer_eopnotsupp(bh); | 3000 | clear_buffer_eopnotsupp(bh); |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 68e510b88457..3cb7cda3d780 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
| @@ -373,6 +373,8 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
| 373 | return -ENXIO; | 373 | return -ENXIO; |
| 374 | new = container_of(kobj, struct cdev, kobj); | 374 | new = container_of(kobj, struct cdev, kobj); |
| 375 | spin_lock(&cdev_lock); | 375 | spin_lock(&cdev_lock); |
| 376 | /* Check i_cdev again in case somebody beat us to it while | ||
| 377 | we dropped the lock. */ | ||
| 376 | p = inode->i_cdev; | 378 | p = inode->i_cdev; |
| 377 | if (!p) { | 379 | if (!p) { |
| 378 | inode->i_cdev = p = new; | 380 | inode->i_cdev = p = new; |
| @@ -392,11 +394,8 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
| 392 | cdev_put(p); | 394 | cdev_put(p); |
| 393 | return -ENXIO; | 395 | return -ENXIO; |
| 394 | } | 396 | } |
| 395 | if (filp->f_op->open) { | 397 | if (filp->f_op->open) |
| 396 | lock_kernel(); | ||
| 397 | ret = filp->f_op->open(inode,filp); | 398 | ret = filp->f_op->open(inode,filp); |
| 398 | unlock_kernel(); | ||
| 399 | } | ||
| 400 | if (ret) | 399 | if (ret) |
| 401 | cdev_put(p); | 400 | cdev_put(p); |
| 402 | return ret; | 401 | return ret; |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 28e3d5c5fcac..1f3465201fdf 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
| @@ -2,6 +2,11 @@ Version 1.53 | |||
| 2 | ------------ | 2 | ------------ |
| 3 | DFS support added (Microsoft Distributed File System client support needed | 3 | DFS support added (Microsoft Distributed File System client support needed |
| 4 | for referrals which enable a hierarchical name space among servers). | 4 | for referrals which enable a hierarchical name space among servers). |
| 5 | Disable temporary caching of mode bits to servers which do not support | ||
| 6 | storing of mode (e.g. Windows servers, when client mounts without cifsacl | ||
| 7 | mount option) and add new "dynperm" mount option to enable temporary caching | ||
| 8 | of mode (enable old behavior). Fix hang on mount caused when server crashes | ||
| 9 | tcp session during negotiate protocol. | ||
| 5 | 10 | ||
| 6 | Version 1.52 | 11 | Version 1.52 |
| 7 | ------------ | 12 | ------------ |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 34902cff5400..0e9fc2ba90ee 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
| @@ -34,11 +34,11 @@ | |||
| 34 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { | 34 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { |
| 35 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, | 35 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, |
| 36 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, | 36 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, |
| 37 | {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, | 37 | {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"}, |
| 38 | {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, | 38 | {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"}, |
| 39 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"}, | 39 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"}, |
| 40 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"}, | 40 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"}, |
| 41 | {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"} } | 41 | {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} } |
| 42 | ; | 42 | ; |
| 43 | 43 | ||
| 44 | 44 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5df93fd6303f..22857c639df5 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -97,9 +97,6 @@ cifs_read_super(struct super_block *sb, void *data, | |||
| 97 | { | 97 | { |
| 98 | struct inode *inode; | 98 | struct inode *inode; |
| 99 | struct cifs_sb_info *cifs_sb; | 99 | struct cifs_sb_info *cifs_sb; |
| 100 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
| 101 | int len; | ||
| 102 | #endif | ||
| 103 | int rc = 0; | 100 | int rc = 0; |
| 104 | 101 | ||
| 105 | /* BB should we make this contingent on mount parm? */ | 102 | /* BB should we make this contingent on mount parm? */ |
| @@ -117,15 +114,17 @@ cifs_read_super(struct super_block *sb, void *data, | |||
| 117 | * complex operation (mount), and in case of fail | 114 | * complex operation (mount), and in case of fail |
| 118 | * just exit instead of doing mount and attempting | 115 | * just exit instead of doing mount and attempting |
| 119 | * undo it if this copy fails?*/ | 116 | * undo it if this copy fails?*/ |
| 120 | len = strlen(data); | 117 | if (data) { |
| 121 | cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); | 118 | int len = strlen(data); |
| 122 | if (cifs_sb->mountdata == NULL) { | 119 | cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); |
| 123 | kfree(sb->s_fs_info); | 120 | if (cifs_sb->mountdata == NULL) { |
| 124 | sb->s_fs_info = NULL; | 121 | kfree(sb->s_fs_info); |
| 125 | return -ENOMEM; | 122 | sb->s_fs_info = NULL; |
| 123 | return -ENOMEM; | ||
| 124 | } | ||
| 125 | strncpy(cifs_sb->mountdata, data, len + 1); | ||
| 126 | cifs_sb->mountdata[len] = '\0'; | ||
| 126 | } | 127 | } |
| 127 | strncpy(cifs_sb->mountdata, data, len + 1); | ||
| 128 | cifs_sb->mountdata[len] = '\0'; | ||
| 129 | #endif | 128 | #endif |
| 130 | 129 | ||
| 131 | rc = cifs_mount(sb, cifs_sb, data, devname); | 130 | rc = cifs_mount(sb, cifs_sb, data, devname); |
| @@ -613,7 +612,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) | |||
| 613 | if (retval < 0) | 612 | if (retval < 0) |
| 614 | return (loff_t)retval; | 613 | return (loff_t)retval; |
| 615 | } | 614 | } |
| 616 | return remote_llseek(file, offset, origin); | 615 | return generic_file_llseek_unlocked(file, offset, origin); |
| 617 | } | 616 | } |
| 618 | 617 | ||
| 619 | struct file_system_type cifs_fs_type = { | 618 | struct file_system_type cifs_fs_type = { |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 08914053242b..9cfcf326ead3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
| @@ -333,7 +333,6 @@ struct cifsFileInfo { | |||
| 333 | bool messageMode:1; /* for pipes: message vs byte mode */ | 333 | bool messageMode:1; /* for pipes: message vs byte mode */ |
| 334 | atomic_t wrtPending; /* handle in use - defer close */ | 334 | atomic_t wrtPending; /* handle in use - defer close */ |
| 335 | struct semaphore fh_sem; /* prevents reopen race after dead ses*/ | 335 | struct semaphore fh_sem; /* prevents reopen race after dead ses*/ |
| 336 | char *search_resume_name; /* BB removeme BB */ | ||
| 337 | struct cifs_search_info srch_inf; | 336 | struct cifs_search_info srch_inf; |
| 338 | }; | 337 | }; |
| 339 | 338 | ||
| @@ -626,7 +625,7 @@ GLOBAL_EXTERN atomic_t tcpSesAllocCount; | |||
| 626 | GLOBAL_EXTERN atomic_t tcpSesReconnectCount; | 625 | GLOBAL_EXTERN atomic_t tcpSesReconnectCount; |
| 627 | GLOBAL_EXTERN atomic_t tconInfoReconnectCount; | 626 | GLOBAL_EXTERN atomic_t tconInfoReconnectCount; |
| 628 | 627 | ||
| 629 | /* Various Debug counters to remove someday (BB) */ | 628 | /* Various Debug counters */ |
| 630 | GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */ | 629 | GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */ |
| 631 | #ifdef CONFIG_CIFS_STATS2 | 630 | #ifdef CONFIG_CIFS_STATS2 |
| 632 | GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */ | 631 | GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */ |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 65d58b4e6a61..0f327c224da3 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
| @@ -79,6 +79,19 @@ | |||
| 79 | #define TRANS2_GET_DFS_REFERRAL 0x10 | 79 | #define TRANS2_GET_DFS_REFERRAL 0x10 |
| 80 | #define TRANS2_REPORT_DFS_INCOSISTENCY 0x11 | 80 | #define TRANS2_REPORT_DFS_INCOSISTENCY 0x11 |
| 81 | 81 | ||
| 82 | /* SMB Transact (Named Pipe) subcommand codes */ | ||
| 83 | #define TRANS_SET_NMPIPE_STATE 0x0001 | ||
| 84 | #define TRANS_RAW_READ_NMPIPE 0x0011 | ||
| 85 | #define TRANS_QUERY_NMPIPE_STATE 0x0021 | ||
| 86 | #define TRANS_QUERY_NMPIPE_INFO 0x0022 | ||
| 87 | #define TRANS_PEEK_NMPIPE 0x0023 | ||
| 88 | #define TRANS_TRANSACT_NMPIPE 0x0026 | ||
| 89 | #define TRANS_RAW_WRITE_NMPIPE 0x0031 | ||
| 90 | #define TRANS_READ_NMPIPE 0x0036 | ||
| 91 | #define TRANS_WRITE_NMPIPE 0x0037 | ||
| 92 | #define TRANS_WAIT_NMPIPE 0x0053 | ||
| 93 | #define TRANS_CALL_NMPIPE 0x0054 | ||
| 94 | |||
| 82 | /* NT Transact subcommand codes */ | 95 | /* NT Transact subcommand codes */ |
| 83 | #define NT_TRANSACT_CREATE 0x01 | 96 | #define NT_TRANSACT_CREATE 0x01 |
| 84 | #define NT_TRANSACT_IOCTL 0x02 | 97 | #define NT_TRANSACT_IOCTL 0x02 |
| @@ -328,12 +341,13 @@ | |||
| 328 | #define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ | 341 | #define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ |
| 329 | #define CREATE_NO_EA_KNOWLEDGE 0x00000200 | 342 | #define CREATE_NO_EA_KNOWLEDGE 0x00000200 |
| 330 | #define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete | 343 | #define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete |
| 331 | open for recovery flag - should | 344 | "open for recovery" flag - should |
| 332 | be zero */ | 345 | be zero in any case */ |
| 346 | #define CREATE_OPEN_FOR_RECOVERY 0x00000400 | ||
| 333 | #define CREATE_RANDOM_ACCESS 0x00000800 | 347 | #define CREATE_RANDOM_ACCESS 0x00000800 |
| 334 | #define CREATE_DELETE_ON_CLOSE 0x00001000 | 348 | #define CREATE_DELETE_ON_CLOSE 0x00001000 |
| 335 | #define CREATE_OPEN_BY_ID 0x00002000 | 349 | #define CREATE_OPEN_BY_ID 0x00002000 |
| 336 | #define CREATE_OPEN_BACKUP_INTN 0x00004000 | 350 | #define CREATE_OPEN_BACKUP_INTENT 0x00004000 |
| 337 | #define CREATE_NO_COMPRESSION 0x00008000 | 351 | #define CREATE_NO_COMPRESSION 0x00008000 |
| 338 | #define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */ | 352 | #define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */ |
| 339 | #define OPEN_REPARSE_POINT 0x00200000 | 353 | #define OPEN_REPARSE_POINT 0x00200000 |
| @@ -722,7 +736,6 @@ typedef struct smb_com_tconx_rsp_ext { | |||
| 722 | #define SMB_CSC_CACHE_AUTO_REINT 0x0004 | 736 | #define SMB_CSC_CACHE_AUTO_REINT 0x0004 |
| 723 | #define SMB_CSC_CACHE_VDO 0x0008 | 737 | #define SMB_CSC_CACHE_VDO 0x0008 |
| 724 | #define SMB_CSC_NO_CACHING 0x000C | 738 | #define SMB_CSC_NO_CACHING 0x000C |
| 725 | |||
| 726 | #define SMB_UNIQUE_FILE_NAME 0x0010 | 739 | #define SMB_UNIQUE_FILE_NAME 0x0010 |
| 727 | #define SMB_EXTENDED_SIGNATURES 0x0020 | 740 | #define SMB_EXTENDED_SIGNATURES 0x0020 |
| 728 | 741 | ||
| @@ -806,7 +819,7 @@ typedef struct smb_com_findclose_req { | |||
| 806 | #define ICOUNT_MASK 0x00FF | 819 | #define ICOUNT_MASK 0x00FF |
| 807 | #define PIPE_READ_MODE 0x0100 | 820 | #define PIPE_READ_MODE 0x0100 |
| 808 | #define NAMED_PIPE_TYPE 0x0400 | 821 | #define NAMED_PIPE_TYPE 0x0400 |
| 809 | #define PIPE_END_POINT 0x0800 | 822 | #define PIPE_END_POINT 0x4000 |
| 810 | #define BLOCKING_NAMED_PIPE 0x8000 | 823 | #define BLOCKING_NAMED_PIPE 0x8000 |
| 811 | 824 | ||
| 812 | typedef struct smb_com_open_req { /* also handles create */ | 825 | typedef struct smb_com_open_req { /* also handles create */ |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index fb655b4593c6..4511b708f0f3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
| @@ -1728,7 +1728,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, | |||
| 1728 | { | 1728 | { |
| 1729 | int rc = 0; | 1729 | int rc = 0; |
| 1730 | LOCK_REQ *pSMB = NULL; | 1730 | LOCK_REQ *pSMB = NULL; |
| 1731 | LOCK_RSP *pSMBr = NULL; | 1731 | /* LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */ |
| 1732 | int bytes_returned; | 1732 | int bytes_returned; |
| 1733 | int timeout = 0; | 1733 | int timeout = 0; |
| 1734 | __u16 count; | 1734 | __u16 count; |
| @@ -1739,8 +1739,6 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, | |||
| 1739 | if (rc) | 1739 | if (rc) |
| 1740 | return rc; | 1740 | return rc; |
| 1741 | 1741 | ||
| 1742 | pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */ | ||
| 1743 | |||
| 1744 | if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { | 1742 | if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { |
| 1745 | timeout = CIFS_ASYNC_OP; /* no response expected */ | 1743 | timeout = CIFS_ASYNC_OP; /* no response expected */ |
| 1746 | pSMB->Timeout = 0; | 1744 | pSMB->Timeout = 0; |
| @@ -1774,7 +1772,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, | |||
| 1774 | 1772 | ||
| 1775 | if (waitFlag) { | 1773 | if (waitFlag) { |
| 1776 | rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, | 1774 | rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, |
| 1777 | (struct smb_hdr *) pSMBr, &bytes_returned); | 1775 | (struct smb_hdr *) pSMB, &bytes_returned); |
| 1778 | cifs_small_buf_release(pSMB); | 1776 | cifs_small_buf_release(pSMB); |
| 1779 | } else { | 1777 | } else { |
| 1780 | rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB, | 1778 | rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB, |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 023434f72c15..e8fa46c7cff2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
| @@ -653,6 +653,7 @@ multi_t2_fnd: | |||
| 653 | spin_lock(&GlobalMid_Lock); | 653 | spin_lock(&GlobalMid_Lock); |
| 654 | server->tcpStatus = CifsExiting; | 654 | server->tcpStatus = CifsExiting; |
| 655 | spin_unlock(&GlobalMid_Lock); | 655 | spin_unlock(&GlobalMid_Lock); |
| 656 | wake_up_all(&server->response_q); | ||
| 656 | 657 | ||
| 657 | /* don't exit until kthread_stop is called */ | 658 | /* don't exit until kthread_stop is called */ |
| 658 | set_current_state(TASK_UNINTERRUPTIBLE); | 659 | set_current_state(TASK_UNINTERRUPTIBLE); |
| @@ -2120,6 +2121,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
| 2120 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; | 2121 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; |
| 2121 | } | 2122 | } |
| 2122 | 2123 | ||
| 2124 | if ((volume_info.cifs_acl) && (volume_info.dynperm)) | ||
| 2125 | cERROR(1, ("mount option dynperm ignored if cifsacl " | ||
| 2126 | "mount option supported")); | ||
| 2127 | |||
| 2123 | tcon = | 2128 | tcon = |
| 2124 | find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, | 2129 | find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, |
| 2125 | volume_info.username); | 2130 | volume_info.username); |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f0b5b5f3dd2e..fb69c1fa85c9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
| @@ -260,7 +260,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
| 260 | buf, inode->i_sb, xid, | 260 | buf, inode->i_sb, xid, |
| 261 | &fileHandle); | 261 | &fileHandle); |
| 262 | if (newinode) { | 262 | if (newinode) { |
| 263 | newinode->i_mode = mode; | 263 | if (cifs_sb->mnt_cifs_flags & |
| 264 | CIFS_MOUNT_DYNPERM) | ||
| 265 | newinode->i_mode = mode; | ||
| 264 | if ((oplock & CIFS_CREATE_ACTION) && | 266 | if ((oplock & CIFS_CREATE_ACTION) && |
| 265 | (cifs_sb->mnt_cifs_flags & | 267 | (cifs_sb->mnt_cifs_flags & |
| 266 | CIFS_MOUNT_SET_UID)) { | 268 | CIFS_MOUNT_SET_UID)) { |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8636cec2642c..0aac824371a5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -546,7 +546,6 @@ int cifs_close(struct inode *inode, struct file *file) | |||
| 546 | msleep(timeout); | 546 | msleep(timeout); |
| 547 | timeout *= 8; | 547 | timeout *= 8; |
| 548 | } | 548 | } |
| 549 | kfree(pSMBFile->search_resume_name); | ||
| 550 | kfree(file->private_data); | 549 | kfree(file->private_data); |
| 551 | file->private_data = NULL; | 550 | file->private_data = NULL; |
| 552 | } else | 551 | } else |
| @@ -605,12 +604,6 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
| 605 | else | 604 | else |
| 606 | cifs_buf_release(ptmp); | 605 | cifs_buf_release(ptmp); |
| 607 | } | 606 | } |
| 608 | ptmp = pCFileStruct->search_resume_name; | ||
| 609 | if (ptmp) { | ||
| 610 | cFYI(1, ("closedir free resume name")); | ||
| 611 | pCFileStruct->search_resume_name = NULL; | ||
| 612 | kfree(ptmp); | ||
| 613 | } | ||
| 614 | kfree(file->private_data); | 607 | kfree(file->private_data); |
| 615 | file->private_data = NULL; | 608 | file->private_data = NULL; |
| 616 | } | 609 | } |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 129dbfe4dca7..2e904bd111c8 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
| @@ -219,15 +219,15 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
| 219 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, | 219 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, |
| 220 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 220 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
| 221 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 221 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 222 | if (rc) { | 222 | if (rc == -EREMOTE && !is_dfs_referral) { |
| 223 | if (rc == -EREMOTE && !is_dfs_referral) { | 223 | is_dfs_referral = true; |
| 224 | is_dfs_referral = true; | 224 | cFYI(DBG2, ("DFS ref")); |
| 225 | cFYI(DBG2, ("DFS ref")); | 225 | /* for DFS, server does not give us real inode data */ |
| 226 | /* for DFS, server does not give us real inode data */ | 226 | fill_fake_finddataunix(&find_data, sb); |
| 227 | fill_fake_finddataunix(&find_data, sb); | 227 | rc = 0; |
| 228 | rc = 0; | 228 | } else if (rc) |
| 229 | } | 229 | goto cgiiu_exit; |
| 230 | } | 230 | |
| 231 | num_of_bytes = le64_to_cpu(find_data.NumOfBytes); | 231 | num_of_bytes = le64_to_cpu(find_data.NumOfBytes); |
| 232 | end_of_file = le64_to_cpu(find_data.EndOfFile); | 232 | end_of_file = le64_to_cpu(find_data.EndOfFile); |
| 233 | 233 | ||
| @@ -236,7 +236,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
| 236 | *pinode = new_inode(sb); | 236 | *pinode = new_inode(sb); |
| 237 | if (*pinode == NULL) { | 237 | if (*pinode == NULL) { |
| 238 | rc = -ENOMEM; | 238 | rc = -ENOMEM; |
| 239 | goto cgiiu_exit; | 239 | goto cgiiu_exit; |
| 240 | } | 240 | } |
| 241 | /* Is an i_ino of zero legal? */ | 241 | /* Is an i_ino of zero legal? */ |
| 242 | /* note ino incremented to unique num in new_inode */ | 242 | /* note ino incremented to unique num in new_inode */ |
| @@ -418,6 +418,7 @@ int cifs_get_inode_info(struct inode **pinode, | |||
| 418 | char *buf = NULL; | 418 | char *buf = NULL; |
| 419 | bool adjustTZ = false; | 419 | bool adjustTZ = false; |
| 420 | bool is_dfs_referral = false; | 420 | bool is_dfs_referral = false; |
| 421 | umode_t default_mode; | ||
| 421 | 422 | ||
| 422 | pTcon = cifs_sb->tcon; | 423 | pTcon = cifs_sb->tcon; |
| 423 | cFYI(1, ("Getting info on %s", full_path)); | 424 | cFYI(1, ("Getting info on %s", full_path)); |
| @@ -530,47 +531,42 @@ int cifs_get_inode_info(struct inode **pinode, | |||
| 530 | inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; | 531 | inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; |
| 531 | } | 532 | } |
| 532 | 533 | ||
| 533 | /* set default mode. will override for dirs below */ | 534 | /* get default inode mode */ |
| 534 | if (atomic_read(&cifsInfo->inUse) == 0) | 535 | if (attr & ATTR_DIRECTORY) |
| 535 | /* new inode, can safely set these fields */ | 536 | default_mode = cifs_sb->mnt_dir_mode; |
| 536 | inode->i_mode = cifs_sb->mnt_file_mode; | 537 | else |
| 537 | else /* since we set the inode type below we need to mask off | 538 | default_mode = cifs_sb->mnt_file_mode; |
| 538 | to avoid strange results if type changes and both | 539 | |
| 539 | get orred in */ | 540 | /* set permission bits */ |
| 540 | inode->i_mode &= ~S_IFMT; | 541 | if (atomic_read(&cifsInfo->inUse) == 0 || |
| 541 | /* if (attr & ATTR_REPARSE) */ | 542 | (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) |
| 542 | /* We no longer handle these as symlinks because we could not | 543 | inode->i_mode = default_mode; |
| 543 | follow them due to the absolute path with drive letter */ | 544 | else { |
| 544 | if (attr & ATTR_DIRECTORY) { | 545 | /* just reenable write bits if !ATTR_READONLY */ |
| 545 | /* override default perms since we do not do byte range locking | 546 | if ((inode->i_mode & S_IWUGO) == 0 && |
| 546 | on dirs */ | 547 | (attr & ATTR_READONLY) == 0) |
| 547 | inode->i_mode = cifs_sb->mnt_dir_mode; | 548 | inode->i_mode |= (S_IWUGO & default_mode); |
| 548 | inode->i_mode |= S_IFDIR; | 549 | inode->i_mode &= ~S_IFMT; |
| 549 | } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && | 550 | } |
| 550 | (cifsInfo->cifsAttrs & ATTR_SYSTEM) && | 551 | /* clear write bits if ATTR_READONLY is set */ |
| 551 | /* No need to le64 convert size of zero */ | 552 | if (attr & ATTR_READONLY) |
| 552 | (pfindData->EndOfFile == 0)) { | 553 | inode->i_mode &= ~S_IWUGO; |
| 553 | inode->i_mode = cifs_sb->mnt_file_mode; | 554 | |
| 554 | inode->i_mode |= S_IFIFO; | 555 | /* set inode type */ |
| 555 | /* BB Finish for SFU style symlinks and devices */ | 556 | if ((attr & ATTR_SYSTEM) && |
| 556 | } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && | 557 | (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { |
| 557 | (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { | 558 | /* no need to fix endianness on 0 */ |
| 558 | if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), | 559 | if (pfindData->EndOfFile == 0) |
| 559 | full_path, cifs_sb, xid)) | 560 | inode->i_mode |= S_IFIFO; |
| 560 | cFYI(1, ("Unrecognized sfu inode type")); | 561 | else if (decode_sfu_inode(inode, |
| 561 | 562 | le64_to_cpu(pfindData->EndOfFile), | |
| 562 | cFYI(1, ("sfu mode 0%o", inode->i_mode)); | 563 | full_path, cifs_sb, xid)) |
| 564 | cFYI(1, ("unknown SFU file type\n")); | ||
| 563 | } else { | 565 | } else { |
| 564 | inode->i_mode |= S_IFREG; | 566 | if (attr & ATTR_DIRECTORY) |
| 565 | /* treat dos attribute of read-only as read-only mode eg 555 */ | 567 | inode->i_mode |= S_IFDIR; |
| 566 | if (cifsInfo->cifsAttrs & ATTR_READONLY) | 568 | else |
| 567 | inode->i_mode &= ~(S_IWUGO); | 569 | inode->i_mode |= S_IFREG; |
| 568 | else if ((inode->i_mode & S_IWUGO) == 0) | ||
| 569 | /* the ATTR_READONLY flag may have been */ | ||
| 570 | /* changed on server -- set any w bits */ | ||
| 571 | /* allowed by mnt_file_mode */ | ||
| 572 | inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); | ||
| 573 | /* BB add code to validate if device or weird share or device type? */ | ||
| 574 | } | 570 | } |
| 575 | 571 | ||
| 576 | spin_lock(&inode->i_lock); | 572 | spin_lock(&inode->i_lock); |
| @@ -1019,8 +1015,11 @@ mkdir_get_info: | |||
| 1019 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1015 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 1020 | } | 1016 | } |
| 1021 | if (direntry->d_inode) { | 1017 | if (direntry->d_inode) { |
| 1022 | direntry->d_inode->i_mode = mode; | 1018 | if (cifs_sb->mnt_cifs_flags & |
| 1023 | direntry->d_inode->i_mode |= S_IFDIR; | 1019 | CIFS_MOUNT_DYNPERM) |
| 1020 | direntry->d_inode->i_mode = | ||
| 1021 | (mode | S_IFDIR); | ||
| 1022 | |||
| 1024 | if (cifs_sb->mnt_cifs_flags & | 1023 | if (cifs_sb->mnt_cifs_flags & |
| 1025 | CIFS_MOUNT_SET_UID) { | 1024 | CIFS_MOUNT_SET_UID) { |
| 1026 | direntry->d_inode->i_uid = | 1025 | direntry->d_inode->i_uid = |
| @@ -1547,13 +1546,26 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) | |||
| 1547 | } else | 1546 | } else |
| 1548 | goto cifs_setattr_exit; | 1547 | goto cifs_setattr_exit; |
| 1549 | } | 1548 | } |
| 1550 | if (attrs->ia_valid & ATTR_UID) { | 1549 | |
| 1551 | cFYI(1, ("UID changed to %d", attrs->ia_uid)); | 1550 | /* |
| 1552 | uid = attrs->ia_uid; | 1551 | * Without unix extensions we can't send ownership changes to the |
| 1553 | } | 1552 | * server, so silently ignore them. This is consistent with how |
| 1554 | if (attrs->ia_valid & ATTR_GID) { | 1553 | * local DOS/Windows filesystems behave (VFAT, NTFS, etc). With |
| 1555 | cFYI(1, ("GID changed to %d", attrs->ia_gid)); | 1554 | * CIFSACL support + proper Windows to Unix idmapping, we may be |
| 1556 | gid = attrs->ia_gid; | 1555 | * able to support this in the future. |
| 1556 | */ | ||
| 1557 | if (!pTcon->unix_ext && | ||
| 1558 | !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { | ||
| 1559 | attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); | ||
| 1560 | } else { | ||
| 1561 | if (attrs->ia_valid & ATTR_UID) { | ||
| 1562 | cFYI(1, ("UID changed to %d", attrs->ia_uid)); | ||
| 1563 | uid = attrs->ia_uid; | ||
| 1564 | } | ||
| 1565 | if (attrs->ia_valid & ATTR_GID) { | ||
| 1566 | cFYI(1, ("GID changed to %d", attrs->ia_gid)); | ||
| 1567 | gid = attrs->ia_gid; | ||
| 1568 | } | ||
| 1557 | } | 1569 | } |
| 1558 | 1570 | ||
| 1559 | time_buf.Attributes = 0; | 1571 | time_buf.Attributes = 0; |
| @@ -1563,7 +1575,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) | |||
| 1563 | attrs->ia_valid &= ~ATTR_MODE; | 1575 | attrs->ia_valid &= ~ATTR_MODE; |
| 1564 | 1576 | ||
| 1565 | if (attrs->ia_valid & ATTR_MODE) { | 1577 | if (attrs->ia_valid & ATTR_MODE) { |
| 1566 | cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode)); | 1578 | cFYI(1, ("Mode changed to 0%o", attrs->ia_mode)); |
| 1567 | mode = attrs->ia_mode; | 1579 | mode = attrs->ia_mode; |
| 1568 | } | 1580 | } |
| 1569 | 1581 | ||
| @@ -1578,18 +1590,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) | |||
| 1578 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 1590 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
| 1579 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) | 1591 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) |
| 1580 | rc = mode_to_acl(inode, full_path, mode); | 1592 | rc = mode_to_acl(inode, full_path, mode); |
| 1581 | else if ((mode & S_IWUGO) == 0) { | 1593 | else |
| 1582 | #else | ||
| 1583 | if ((mode & S_IWUGO) == 0) { | ||
| 1584 | #endif | 1594 | #endif |
| 1585 | /* not writeable */ | 1595 | if (((mode & S_IWUGO) == 0) && |
| 1586 | if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { | 1596 | (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { |
| 1587 | set_dosattr = true; | 1597 | set_dosattr = true; |
| 1588 | time_buf.Attributes = | 1598 | time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | |
| 1589 | cpu_to_le32(cifsInode->cifsAttrs | | 1599 | ATTR_READONLY); |
| 1590 | ATTR_READONLY); | 1600 | /* fix up mode if we're not using dynperm */ |
| 1591 | } | 1601 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) |
| 1592 | } else if (cifsInode->cifsAttrs & ATTR_READONLY) { | 1602 | attrs->ia_mode = inode->i_mode & ~S_IWUGO; |
| 1603 | } else if ((mode & S_IWUGO) && | ||
| 1604 | (cifsInode->cifsAttrs & ATTR_READONLY)) { | ||
| 1593 | /* If file is readonly on server, we would | 1605 | /* If file is readonly on server, we would |
| 1594 | not be able to write to it - so if any write | 1606 | not be able to write to it - so if any write |
| 1595 | bit is enabled for user or group or other we | 1607 | bit is enabled for user or group or other we |
| @@ -1600,6 +1612,20 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) | |||
| 1600 | /* Windows ignores set to zero */ | 1612 | /* Windows ignores set to zero */ |
| 1601 | if (time_buf.Attributes == 0) | 1613 | if (time_buf.Attributes == 0) |
| 1602 | time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); | 1614 | time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); |
| 1615 | |||
| 1616 | /* reset local inode permissions to normal */ | ||
| 1617 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { | ||
| 1618 | attrs->ia_mode &= ~(S_IALLUGO); | ||
| 1619 | if (S_ISDIR(inode->i_mode)) | ||
| 1620 | attrs->ia_mode |= | ||
| 1621 | cifs_sb->mnt_dir_mode; | ||
| 1622 | else | ||
| 1623 | attrs->ia_mode |= | ||
| 1624 | cifs_sb->mnt_file_mode; | ||
| 1625 | } | ||
| 1626 | } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { | ||
| 1627 | /* ignore mode change - ATTR_READONLY hasn't changed */ | ||
| 1628 | attrs->ia_valid &= ~ATTR_MODE; | ||
| 1603 | } | 1629 | } |
| 1604 | } | 1630 | } |
| 1605 | 1631 | ||
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1d69b8014e0b..4b17f8fe3157 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
| @@ -519,8 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
| 519 | pnotify = (struct file_notify_information *) | 519 | pnotify = (struct file_notify_information *) |
| 520 | ((char *)&pSMBr->hdr.Protocol + data_offset); | 520 | ((char *)&pSMBr->hdr.Protocol + data_offset); |
| 521 | cFYI(1, ("dnotify on %s Action: 0x%x", | 521 | cFYI(1, ("dnotify on %s Action: 0x%x", |
| 522 | pnotify->FileName, | 522 | pnotify->FileName, pnotify->Action)); |
| 523 | pnotify->Action)); /* BB removeme BB */ | ||
| 524 | /* cifs_dump_mem("Rcvd notify Data: ",buf, | 523 | /* cifs_dump_mem("Rcvd notify Data: ",buf, |
| 525 | sizeof(struct smb_hdr)+60); */ | 524 | sizeof(struct smb_hdr)+60); */ |
| 526 | return true; | 525 | return true; |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 713c25110197..83f306954883 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
| @@ -132,6 +132,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, | |||
| 132 | __u32 attr; | 132 | __u32 attr; |
| 133 | __u64 allocation_size; | 133 | __u64 allocation_size; |
| 134 | __u64 end_of_file; | 134 | __u64 end_of_file; |
| 135 | umode_t default_mode; | ||
| 135 | 136 | ||
| 136 | /* save mtime and size */ | 137 | /* save mtime and size */ |
| 137 | local_mtime = tmp_inode->i_mtime; | 138 | local_mtime = tmp_inode->i_mtime; |
| @@ -187,48 +188,54 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, | |||
| 187 | if (atomic_read(&cifsInfo->inUse) == 0) { | 188 | if (atomic_read(&cifsInfo->inUse) == 0) { |
| 188 | tmp_inode->i_uid = cifs_sb->mnt_uid; | 189 | tmp_inode->i_uid = cifs_sb->mnt_uid; |
| 189 | tmp_inode->i_gid = cifs_sb->mnt_gid; | 190 | tmp_inode->i_gid = cifs_sb->mnt_gid; |
| 190 | /* set default mode. will override for dirs below */ | 191 | } |
| 191 | tmp_inode->i_mode = cifs_sb->mnt_file_mode; | 192 | |
| 192 | } else { | 193 | if (attr & ATTR_DIRECTORY) |
| 193 | /* mask off the type bits since it gets set | 194 | default_mode = cifs_sb->mnt_dir_mode; |
| 194 | below and we do not want to get two type | 195 | else |
| 195 | bits set */ | 196 | default_mode = cifs_sb->mnt_file_mode; |
| 197 | |||
| 198 | /* set initial permissions */ | ||
| 199 | if ((atomic_read(&cifsInfo->inUse) == 0) || | ||
| 200 | (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) | ||
| 201 | tmp_inode->i_mode = default_mode; | ||
| 202 | else { | ||
| 203 | /* just reenable write bits if !ATTR_READONLY */ | ||
| 204 | if ((tmp_inode->i_mode & S_IWUGO) == 0 && | ||
| 205 | (attr & ATTR_READONLY) == 0) | ||
| 206 | tmp_inode->i_mode |= (S_IWUGO & default_mode); | ||
| 207 | |||
| 196 | tmp_inode->i_mode &= ~S_IFMT; | 208 | tmp_inode->i_mode &= ~S_IFMT; |
| 197 | } | 209 | } |
| 198 | 210 | ||
| 199 | if (attr & ATTR_DIRECTORY) { | 211 | /* clear write bits if ATTR_READONLY is set */ |
| 200 | *pobject_type = DT_DIR; | 212 | if (attr & ATTR_READONLY) |
| 201 | /* override default perms since we do not lock dirs */ | 213 | tmp_inode->i_mode &= ~S_IWUGO; |
| 202 | if (atomic_read(&cifsInfo->inUse) == 0) | 214 | |
| 203 | tmp_inode->i_mode = cifs_sb->mnt_dir_mode; | 215 | /* set inode type */ |
| 204 | tmp_inode->i_mode |= S_IFDIR; | 216 | if ((attr & ATTR_SYSTEM) && |
| 205 | } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && | 217 | (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { |
| 206 | (attr & ATTR_SYSTEM)) { | ||
| 207 | if (end_of_file == 0) { | 218 | if (end_of_file == 0) { |
| 208 | *pobject_type = DT_FIFO; | ||
| 209 | tmp_inode->i_mode |= S_IFIFO; | 219 | tmp_inode->i_mode |= S_IFIFO; |
| 220 | *pobject_type = DT_FIFO; | ||
| 210 | } else { | 221 | } else { |
| 211 | /* rather than get the type here, we mark the | 222 | /* |
| 212 | inode as needing revalidate and get the real type | 223 | * trying to get the type can be slow, so just call |
| 213 | (blk vs chr vs. symlink) later ie in lookup */ | 224 | * this a regular file for now, and mark for reval |
| 214 | *pobject_type = DT_REG; | 225 | */ |
| 215 | tmp_inode->i_mode |= S_IFREG; | 226 | tmp_inode->i_mode |= S_IFREG; |
| 227 | *pobject_type = DT_REG; | ||
| 216 | cifsInfo->time = 0; | 228 | cifsInfo->time = 0; |
| 217 | } | 229 | } |
| 218 | /* we no longer mark these because we could not follow them */ | ||
| 219 | /* } else if (attr & ATTR_REPARSE) { | ||
| 220 | *pobject_type = DT_LNK; | ||
| 221 | tmp_inode->i_mode |= S_IFLNK; */ | ||
| 222 | } else { | 230 | } else { |
| 223 | *pobject_type = DT_REG; | 231 | if (attr & ATTR_DIRECTORY) { |
| 224 | tmp_inode->i_mode |= S_IFREG; | 232 | tmp_inode->i_mode |= S_IFDIR; |
| 225 | if (attr & ATTR_READONLY) | 233 | *pobject_type = DT_DIR; |
| 226 | tmp_inode->i_mode &= ~(S_IWUGO); | 234 | } else { |
| 227 | else if ((tmp_inode->i_mode & S_IWUGO) == 0) | 235 | tmp_inode->i_mode |= S_IFREG; |
| 228 | /* the ATTR_READONLY flag may have been changed on */ | 236 | *pobject_type = DT_REG; |
| 229 | /* server -- set any w bits allowed by mnt_file_mode */ | 237 | } |
| 230 | tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); | 238 | } |
| 231 | } /* could add code here - to validate if device or weird share type? */ | ||
| 232 | 239 | ||
| 233 | /* can not fill in nlink here as in qpathinfo version and Unx search */ | 240 | /* can not fill in nlink here as in qpathinfo version and Unx search */ |
| 234 | if (atomic_read(&cifsInfo->inUse) == 0) | 241 | if (atomic_read(&cifsInfo->inUse) == 0) |
| @@ -675,8 +682,6 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, | |||
| 675 | cifsFile->invalidHandle = true; | 682 | cifsFile->invalidHandle = true; |
| 676 | CIFSFindClose(xid, pTcon, cifsFile->netfid); | 683 | CIFSFindClose(xid, pTcon, cifsFile->netfid); |
| 677 | } | 684 | } |
| 678 | kfree(cifsFile->search_resume_name); | ||
| 679 | cifsFile->search_resume_name = NULL; | ||
| 680 | if (cifsFile->srch_inf.ntwrk_buf_start) { | 685 | if (cifsFile->srch_inf.ntwrk_buf_start) { |
| 681 | cFYI(1, ("freeing SMB ff cache buf on search rewind")); | 686 | cFYI(1, ("freeing SMB ff cache buf on search rewind")); |
| 682 | if (cifsFile->srch_inf.smallBuf) | 687 | if (cifsFile->srch_inf.smallBuf) |
| @@ -1043,9 +1048,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
| 1043 | } /* else { | 1048 | } /* else { |
| 1044 | cifsFile->invalidHandle = true; | 1049 | cifsFile->invalidHandle = true; |
| 1045 | CIFSFindClose(xid, pTcon, cifsFile->netfid); | 1050 | CIFSFindClose(xid, pTcon, cifsFile->netfid); |
| 1046 | } | 1051 | } */ |
| 1047 | kfree(cifsFile->search_resume_name); | ||
| 1048 | cifsFile->search_resume_name = NULL; */ | ||
| 1049 | 1052 | ||
| 1050 | rc = find_cifs_entry(xid, pTcon, file, | 1053 | rc = find_cifs_entry(xid, pTcon, file, |
| 1051 | ¤t_entry, &num_to_fill); | 1054 | ¤t_entry, &num_to_fill); |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 97dba0d92348..c54eaab71a19 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
| @@ -69,9 +69,11 @@ | |||
| 69 | #include <linux/capi.h> | 69 | #include <linux/capi.h> |
| 70 | #include <linux/gigaset_dev.h> | 70 | #include <linux/gigaset_dev.h> |
| 71 | 71 | ||
| 72 | #ifdef CONFIG_BLOCK | ||
| 72 | #include <scsi/scsi.h> | 73 | #include <scsi/scsi.h> |
| 73 | #include <scsi/scsi_ioctl.h> | 74 | #include <scsi/scsi_ioctl.h> |
| 74 | #include <scsi/sg.h> | 75 | #include <scsi/sg.h> |
| 76 | #endif | ||
| 75 | 77 | ||
| 76 | #include <asm/uaccess.h> | 78 | #include <asm/uaccess.h> |
| 77 | #include <linux/ethtool.h> | 79 | #include <linux/ethtool.h> |
| @@ -2024,6 +2026,7 @@ COMPATIBLE_IOCTL(GIO_UNISCRNMAP) | |||
| 2024 | COMPATIBLE_IOCTL(PIO_UNISCRNMAP) | 2026 | COMPATIBLE_IOCTL(PIO_UNISCRNMAP) |
| 2025 | COMPATIBLE_IOCTL(PIO_FONTRESET) | 2027 | COMPATIBLE_IOCTL(PIO_FONTRESET) |
| 2026 | COMPATIBLE_IOCTL(PIO_UNIMAPCLR) | 2028 | COMPATIBLE_IOCTL(PIO_UNIMAPCLR) |
| 2029 | #ifdef CONFIG_BLOCK | ||
| 2027 | /* Big S */ | 2030 | /* Big S */ |
| 2028 | COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN) | 2031 | COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN) |
| 2029 | COMPATIBLE_IOCTL(SCSI_IOCTL_DOORLOCK) | 2032 | COMPATIBLE_IOCTL(SCSI_IOCTL_DOORLOCK) |
| @@ -2033,6 +2036,7 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_GET_BUS_NUMBER) | |||
| 2033 | COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND) | 2036 | COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND) |
| 2034 | COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) | 2037 | COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) |
| 2035 | COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) | 2038 | COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) |
| 2039 | #endif | ||
| 2036 | /* Big T */ | 2040 | /* Big T */ |
| 2037 | COMPATIBLE_IOCTL(TUNSETNOCSUM) | 2041 | COMPATIBLE_IOCTL(TUNSETNOCSUM) |
| 2038 | COMPATIBLE_IOCTL(TUNSETDEBUG) | 2042 | COMPATIBLE_IOCTL(TUNSETDEBUG) |
| @@ -2103,6 +2107,7 @@ COMPATIBLE_IOCTL(SIOCGIFVLAN) | |||
| 2103 | COMPATIBLE_IOCTL(SIOCSIFVLAN) | 2107 | COMPATIBLE_IOCTL(SIOCSIFVLAN) |
| 2104 | COMPATIBLE_IOCTL(SIOCBRADDBR) | 2108 | COMPATIBLE_IOCTL(SIOCBRADDBR) |
| 2105 | COMPATIBLE_IOCTL(SIOCBRDELBR) | 2109 | COMPATIBLE_IOCTL(SIOCBRDELBR) |
| 2110 | #ifdef CONFIG_BLOCK | ||
| 2106 | /* SG stuff */ | 2111 | /* SG stuff */ |
| 2107 | COMPATIBLE_IOCTL(SG_SET_TIMEOUT) | 2112 | COMPATIBLE_IOCTL(SG_SET_TIMEOUT) |
| 2108 | COMPATIBLE_IOCTL(SG_GET_TIMEOUT) | 2113 | COMPATIBLE_IOCTL(SG_GET_TIMEOUT) |
| @@ -2127,6 +2132,7 @@ COMPATIBLE_IOCTL(SG_SCSI_RESET) | |||
| 2127 | COMPATIBLE_IOCTL(SG_GET_REQUEST_TABLE) | 2132 | COMPATIBLE_IOCTL(SG_GET_REQUEST_TABLE) |
| 2128 | COMPATIBLE_IOCTL(SG_SET_KEEP_ORPHAN) | 2133 | COMPATIBLE_IOCTL(SG_SET_KEEP_ORPHAN) |
| 2129 | COMPATIBLE_IOCTL(SG_GET_KEEP_ORPHAN) | 2134 | COMPATIBLE_IOCTL(SG_GET_KEEP_ORPHAN) |
| 2135 | #endif | ||
| 2130 | /* PPP stuff */ | 2136 | /* PPP stuff */ |
| 2131 | COMPATIBLE_IOCTL(PPPIOCGFLAGS) | 2137 | COMPATIBLE_IOCTL(PPPIOCGFLAGS) |
| 2132 | COMPATIBLE_IOCTL(PPPIOCSFLAGS) | 2138 | COMPATIBLE_IOCTL(PPPIOCSFLAGS) |
diff --git a/fs/dcache.c b/fs/dcache.c index 3ee588d5f585..6068c25b393c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
| 18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
| 19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
| 20 | #include <linux/fdtable.h> | ||
| 20 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
| 21 | #include <linux/fsnotify.h> | 22 | #include <linux/fsnotify.h> |
| 22 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
| @@ -106,9 +107,10 @@ static void dentry_lru_remove(struct dentry *dentry) | |||
| 106 | /* | 107 | /* |
| 107 | * Release the dentry's inode, using the filesystem | 108 | * Release the dentry's inode, using the filesystem |
| 108 | * d_iput() operation if defined. | 109 | * d_iput() operation if defined. |
| 109 | * Called with dcache_lock and per dentry lock held, drops both. | ||
| 110 | */ | 110 | */ |
| 111 | static void dentry_iput(struct dentry * dentry) | 111 | static void dentry_iput(struct dentry * dentry) |
| 112 | __releases(dentry->d_lock) | ||
| 113 | __releases(dcache_lock) | ||
| 112 | { | 114 | { |
| 113 | struct inode *inode = dentry->d_inode; | 115 | struct inode *inode = dentry->d_inode; |
| 114 | if (inode) { | 116 | if (inode) { |
| @@ -132,12 +134,13 @@ static void dentry_iput(struct dentry * dentry) | |||
| 132 | * d_kill - kill dentry and return parent | 134 | * d_kill - kill dentry and return parent |
| 133 | * @dentry: dentry to kill | 135 | * @dentry: dentry to kill |
| 134 | * | 136 | * |
| 135 | * Called with dcache_lock and d_lock, releases both. The dentry must | 137 | * The dentry must already be unhashed and removed from the LRU. |
| 136 | * already be unhashed and removed from the LRU. | ||
| 137 | * | 138 | * |
| 138 | * If this is the root of the dentry tree, return NULL. | 139 | * If this is the root of the dentry tree, return NULL. |
| 139 | */ | 140 | */ |
| 140 | static struct dentry *d_kill(struct dentry *dentry) | 141 | static struct dentry *d_kill(struct dentry *dentry) |
| 142 | __releases(dentry->d_lock) | ||
| 143 | __releases(dcache_lock) | ||
| 141 | { | 144 | { |
| 142 | struct dentry *parent; | 145 | struct dentry *parent; |
| 143 | 146 | ||
| @@ -383,11 +386,11 @@ restart: | |||
| 383 | * Try to prune ancestors as well. This is necessary to prevent | 386 | * Try to prune ancestors as well. This is necessary to prevent |
| 384 | * quadratic behavior of shrink_dcache_parent(), but is also expected | 387 | * quadratic behavior of shrink_dcache_parent(), but is also expected |
| 385 | * to be beneficial in reducing dentry cache fragmentation. | 388 | * to be beneficial in reducing dentry cache fragmentation. |
| 386 | * | ||
| 387 | * Called with dcache_lock, drops it and then regains. | ||
| 388 | * Called with dentry->d_lock held, drops it. | ||
| 389 | */ | 389 | */ |
| 390 | static void prune_one_dentry(struct dentry * dentry) | 390 | static void prune_one_dentry(struct dentry * dentry) |
| 391 | __releases(dentry->d_lock) | ||
| 392 | __releases(dcache_lock) | ||
| 393 | __acquires(dcache_lock) | ||
| 391 | { | 394 | { |
| 392 | __d_drop(dentry); | 395 | __d_drop(dentry); |
| 393 | dentry = d_kill(dentry); | 396 | dentry = d_kill(dentry); |
| @@ -1604,10 +1607,9 @@ static int d_isparent(struct dentry *p1, struct dentry *p2) | |||
| 1604 | * | 1607 | * |
| 1605 | * Note: If ever the locking in lock_rename() changes, then please | 1608 | * Note: If ever the locking in lock_rename() changes, then please |
| 1606 | * remember to update this too... | 1609 | * remember to update this too... |
| 1607 | * | ||
| 1608 | * On return, dcache_lock will have been unlocked. | ||
| 1609 | */ | 1610 | */ |
| 1610 | static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) | 1611 | static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) |
| 1612 | __releases(dcache_lock) | ||
| 1611 | { | 1613 | { |
| 1612 | struct mutex *m1 = NULL, *m2 = NULL; | 1614 | struct mutex *m1 = NULL, *m2 = NULL; |
| 1613 | struct dentry *ret; | 1615 | struct dentry *ret; |
| @@ -1743,11 +1745,9 @@ out_nolock: | |||
| 1743 | shouldnt_be_hashed: | 1745 | shouldnt_be_hashed: |
| 1744 | spin_unlock(&dcache_lock); | 1746 | spin_unlock(&dcache_lock); |
| 1745 | BUG(); | 1747 | BUG(); |
| 1746 | goto shouldnt_be_hashed; | ||
| 1747 | } | 1748 | } |
| 1748 | 1749 | ||
| 1749 | static int prepend(char **buffer, int *buflen, const char *str, | 1750 | static int prepend(char **buffer, int *buflen, const char *str, int namelen) |
| 1750 | int namelen) | ||
| 1751 | { | 1751 | { |
| 1752 | *buflen -= namelen; | 1752 | *buflen -= namelen; |
| 1753 | if (*buflen < 0) | 1753 | if (*buflen < 0) |
| @@ -1757,8 +1757,13 @@ static int prepend(char **buffer, int *buflen, const char *str, | |||
| 1757 | return 0; | 1757 | return 0; |
| 1758 | } | 1758 | } |
| 1759 | 1759 | ||
| 1760 | static int prepend_name(char **buffer, int *buflen, struct qstr *name) | ||
| 1761 | { | ||
| 1762 | return prepend(buffer, buflen, name->name, name->len); | ||
| 1763 | } | ||
| 1764 | |||
| 1760 | /** | 1765 | /** |
| 1761 | * d_path - return the path of a dentry | 1766 | * __d_path - return the path of a dentry |
| 1762 | * @path: the dentry/vfsmount to report | 1767 | * @path: the dentry/vfsmount to report |
| 1763 | * @root: root vfsmnt/dentry (may be modified by this function) | 1768 | * @root: root vfsmnt/dentry (may be modified by this function) |
| 1764 | * @buffer: buffer to return value in | 1769 | * @buffer: buffer to return value in |
| @@ -1779,9 +1784,10 @@ char *__d_path(const struct path *path, struct path *root, | |||
| 1779 | { | 1784 | { |
| 1780 | struct dentry *dentry = path->dentry; | 1785 | struct dentry *dentry = path->dentry; |
| 1781 | struct vfsmount *vfsmnt = path->mnt; | 1786 | struct vfsmount *vfsmnt = path->mnt; |
| 1782 | char * end = buffer+buflen; | 1787 | char *end = buffer + buflen; |
| 1783 | char * retval; | 1788 | char *retval; |
| 1784 | 1789 | ||
| 1790 | spin_lock(&vfsmount_lock); | ||
| 1785 | prepend(&end, &buflen, "\0", 1); | 1791 | prepend(&end, &buflen, "\0", 1); |
| 1786 | if (!IS_ROOT(dentry) && d_unhashed(dentry) && | 1792 | if (!IS_ROOT(dentry) && d_unhashed(dentry) && |
| 1787 | (prepend(&end, &buflen, " (deleted)", 10) != 0)) | 1793 | (prepend(&end, &buflen, " (deleted)", 10) != 0)) |
| @@ -1800,38 +1806,37 @@ char *__d_path(const struct path *path, struct path *root, | |||
| 1800 | break; | 1806 | break; |
| 1801 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { | 1807 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { |
| 1802 | /* Global root? */ | 1808 | /* Global root? */ |
| 1803 | spin_lock(&vfsmount_lock); | ||
| 1804 | if (vfsmnt->mnt_parent == vfsmnt) { | 1809 | if (vfsmnt->mnt_parent == vfsmnt) { |
| 1805 | spin_unlock(&vfsmount_lock); | ||
| 1806 | goto global_root; | 1810 | goto global_root; |
| 1807 | } | 1811 | } |
| 1808 | dentry = vfsmnt->mnt_mountpoint; | 1812 | dentry = vfsmnt->mnt_mountpoint; |
| 1809 | vfsmnt = vfsmnt->mnt_parent; | 1813 | vfsmnt = vfsmnt->mnt_parent; |
| 1810 | spin_unlock(&vfsmount_lock); | ||
| 1811 | continue; | 1814 | continue; |
| 1812 | } | 1815 | } |
| 1813 | parent = dentry->d_parent; | 1816 | parent = dentry->d_parent; |
| 1814 | prefetch(parent); | 1817 | prefetch(parent); |
| 1815 | if ((prepend(&end, &buflen, dentry->d_name.name, | 1818 | if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || |
| 1816 | dentry->d_name.len) != 0) || | ||
| 1817 | (prepend(&end, &buflen, "/", 1) != 0)) | 1819 | (prepend(&end, &buflen, "/", 1) != 0)) |
| 1818 | goto Elong; | 1820 | goto Elong; |
| 1819 | retval = end; | 1821 | retval = end; |
| 1820 | dentry = parent; | 1822 | dentry = parent; |
| 1821 | } | 1823 | } |
| 1822 | 1824 | ||
| 1825 | out: | ||
| 1826 | spin_unlock(&vfsmount_lock); | ||
| 1823 | return retval; | 1827 | return retval; |
| 1824 | 1828 | ||
| 1825 | global_root: | 1829 | global_root: |
| 1826 | retval += 1; /* hit the slash */ | 1830 | retval += 1; /* hit the slash */ |
| 1827 | if (prepend(&retval, &buflen, dentry->d_name.name, | 1831 | if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) |
| 1828 | dentry->d_name.len) != 0) | ||
| 1829 | goto Elong; | 1832 | goto Elong; |
| 1830 | root->mnt = vfsmnt; | 1833 | root->mnt = vfsmnt; |
| 1831 | root->dentry = dentry; | 1834 | root->dentry = dentry; |
| 1832 | return retval; | 1835 | goto out; |
| 1836 | |||
| 1833 | Elong: | 1837 | Elong: |
| 1834 | return ERR_PTR(-ENAMETOOLONG); | 1838 | retval = ERR_PTR(-ENAMETOOLONG); |
| 1839 | goto out; | ||
| 1835 | } | 1840 | } |
| 1836 | 1841 | ||
| 1837 | /** | 1842 | /** |
| @@ -1845,9 +1850,9 @@ Elong: | |||
| 1845 | * | 1850 | * |
| 1846 | * Returns the buffer or an error code if the path was too long. | 1851 | * Returns the buffer or an error code if the path was too long. |
| 1847 | * | 1852 | * |
| 1848 | * "buflen" should be positive. Caller holds the dcache_lock. | 1853 | * "buflen" should be positive. |
| 1849 | */ | 1854 | */ |
| 1850 | char *d_path(struct path *path, char *buf, int buflen) | 1855 | char *d_path(const struct path *path, char *buf, int buflen) |
| 1851 | { | 1856 | { |
| 1852 | char *res; | 1857 | char *res; |
| 1853 | struct path root; | 1858 | struct path root; |
| @@ -1915,16 +1920,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) | |||
| 1915 | retval = end-1; | 1920 | retval = end-1; |
| 1916 | *retval = '/'; | 1921 | *retval = '/'; |
| 1917 | 1922 | ||
| 1918 | for (;;) { | 1923 | while (!IS_ROOT(dentry)) { |
| 1919 | struct dentry *parent; | 1924 | struct dentry *parent = dentry->d_parent; |
| 1920 | if (IS_ROOT(dentry)) | ||
| 1921 | break; | ||
| 1922 | 1925 | ||
| 1923 | parent = dentry->d_parent; | ||
| 1924 | prefetch(parent); | 1926 | prefetch(parent); |
| 1925 | 1927 | if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || | |
| 1926 | if ((prepend(&end, &buflen, dentry->d_name.name, | ||
| 1927 | dentry->d_name.len) != 0) || | ||
| 1928 | (prepend(&end, &buflen, "/", 1) != 0)) | 1928 | (prepend(&end, &buflen, "/", 1) != 0)) |
| 1929 | goto Elong; | 1929 | goto Elong; |
| 1930 | 1930 | ||
| @@ -1975,7 +1975,7 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) | |||
| 1975 | error = -ENOENT; | 1975 | error = -ENOENT; |
| 1976 | /* Has the current directory has been unlinked? */ | 1976 | /* Has the current directory has been unlinked? */ |
| 1977 | spin_lock(&dcache_lock); | 1977 | spin_lock(&dcache_lock); |
| 1978 | if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { | 1978 | if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { |
| 1979 | unsigned long len; | 1979 | unsigned long len; |
| 1980 | struct path tmp = root; | 1980 | struct path tmp = root; |
| 1981 | char * cwd; | 1981 | char * cwd; |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index ebbcf38fd33b..f976f303c196 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/poll.h> | 15 | #include <linux/poll.h> |
| 16 | #include <linux/signal.h> | 16 | #include <linux/signal.h> |
| 17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 18 | #include <linux/smp_lock.h> | ||
| 18 | #include <linux/dlm.h> | 19 | #include <linux/dlm.h> |
| 19 | #include <linux/dlm_device.h> | 20 | #include <linux/dlm_device.h> |
| 20 | 21 | ||
| @@ -618,13 +619,17 @@ static int device_open(struct inode *inode, struct file *file) | |||
| 618 | struct dlm_user_proc *proc; | 619 | struct dlm_user_proc *proc; |
| 619 | struct dlm_ls *ls; | 620 | struct dlm_ls *ls; |
| 620 | 621 | ||
| 622 | lock_kernel(); | ||
| 621 | ls = dlm_find_lockspace_device(iminor(inode)); | 623 | ls = dlm_find_lockspace_device(iminor(inode)); |
| 622 | if (!ls) | 624 | if (!ls) { |
| 625 | unlock_kernel(); | ||
| 623 | return -ENOENT; | 626 | return -ENOENT; |
| 627 | } | ||
| 624 | 628 | ||
| 625 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); | 629 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); |
| 626 | if (!proc) { | 630 | if (!proc) { |
| 627 | dlm_put_lockspace(ls); | 631 | dlm_put_lockspace(ls); |
| 632 | unlock_kernel(); | ||
| 628 | return -ENOMEM; | 633 | return -ENOMEM; |
| 629 | } | 634 | } |
| 630 | 635 | ||
| @@ -636,6 +641,7 @@ static int device_open(struct inode *inode, struct file *file) | |||
| 636 | spin_lock_init(&proc->locks_spin); | 641 | spin_lock_init(&proc->locks_spin); |
| 637 | init_waitqueue_head(&proc->wait); | 642 | init_waitqueue_head(&proc->wait); |
| 638 | file->private_data = proc; | 643 | file->private_data = proc; |
| 644 | unlock_kernel(); | ||
| 639 | 645 | ||
| 640 | return 0; | 646 | return 0; |
| 641 | } | 647 | } |
| @@ -870,6 +876,7 @@ static unsigned int device_poll(struct file *file, poll_table *wait) | |||
| 870 | 876 | ||
| 871 | static int ctl_device_open(struct inode *inode, struct file *file) | 877 | static int ctl_device_open(struct inode *inode, struct file *file) |
| 872 | { | 878 | { |
| 879 | cycle_kernel_lock(); | ||
| 873 | file->private_data = NULL; | 880 | file->private_data = NULL; |
| 874 | return 0; | 881 | return 0; |
| 875 | } | 882 | } |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2258b8f654a6..24749bf0668f 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
| 31 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
| 32 | #include <linux/fs_stack.h> | 32 | #include <linux/fs_stack.h> |
| 33 | #include <linux/smp_lock.h> | ||
| 33 | #include "ecryptfs_kernel.h" | 34 | #include "ecryptfs_kernel.h" |
| 34 | 35 | ||
| 35 | /** | 36 | /** |
| @@ -277,9 +278,11 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) | |||
| 277 | int rc = 0; | 278 | int rc = 0; |
| 278 | struct file *lower_file = NULL; | 279 | struct file *lower_file = NULL; |
| 279 | 280 | ||
| 281 | lock_kernel(); | ||
| 280 | lower_file = ecryptfs_file_to_lower(file); | 282 | lower_file = ecryptfs_file_to_lower(file); |
| 281 | if (lower_file->f_op && lower_file->f_op->fasync) | 283 | if (lower_file->f_op && lower_file->f_op->fasync) |
| 282 | rc = lower_file->f_op->fasync(fd, lower_file, flag); | 284 | rc = lower_file->f_op->fasync(fd, lower_file, flag); |
| 285 | unlock_kernel(); | ||
| 283 | return rc; | 286 | return rc; |
| 284 | } | 287 | } |
| 285 | 288 | ||
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 50c994a249a5..09a4522f65e6 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
| @@ -575,13 +575,11 @@ int ecryptfs_init_ecryptfs_miscdev(void) | |||
| 575 | int rc; | 575 | int rc; |
| 576 | 576 | ||
| 577 | atomic_set(&ecryptfs_num_miscdev_opens, 0); | 577 | atomic_set(&ecryptfs_num_miscdev_opens, 0); |
| 578 | mutex_lock(&ecryptfs_daemon_hash_mux); | ||
| 579 | rc = misc_register(&ecryptfs_miscdev); | 578 | rc = misc_register(&ecryptfs_miscdev); |
| 580 | if (rc) | 579 | if (rc) |
| 581 | printk(KERN_ERR "%s: Failed to register miscellaneous device " | 580 | printk(KERN_ERR "%s: Failed to register miscellaneous device " |
| 582 | "for communications with userspace daemons; rc = [%d]\n", | 581 | "for communications with userspace daemons; rc = [%d]\n", |
| 583 | __func__, rc); | 582 | __func__, rc); |
| 584 | mutex_unlock(&ecryptfs_daemon_hash_mux); | ||
| 585 | return rc; | 583 | return rc; |
| 586 | } | 584 | } |
| 587 | 585 | ||
| @@ -26,7 +26,6 @@ | |||
| 26 | #include <linux/file.h> | 26 | #include <linux/file.h> |
| 27 | #include <linux/fdtable.h> | 27 | #include <linux/fdtable.h> |
| 28 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
| 29 | #include <linux/a.out.h> | ||
| 30 | #include <linux/stat.h> | 29 | #include <linux/stat.h> |
| 31 | #include <linux/fcntl.h> | 30 | #include <linux/fcntl.h> |
| 32 | #include <linux/smp_lock.h> | 31 | #include <linux/smp_lock.h> |
| @@ -61,6 +60,11 @@ | |||
| 61 | #include <linux/kmod.h> | 60 | #include <linux/kmod.h> |
| 62 | #endif | 61 | #endif |
| 63 | 62 | ||
| 63 | #ifdef __alpha__ | ||
| 64 | /* for /sbin/loader handling in search_binary_handler() */ | ||
| 65 | #include <linux/a.out.h> | ||
| 66 | #endif | ||
| 67 | |||
| 64 | int core_uses_pid; | 68 | int core_uses_pid; |
| 65 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | 69 | char core_pattern[CORENAME_MAX_SIZE] = "core"; |
| 66 | int suid_dumpable = 0; | 70 | int suid_dumpable = 0; |
| @@ -606,7 +610,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
| 606 | bprm->exec -= stack_shift; | 610 | bprm->exec -= stack_shift; |
| 607 | 611 | ||
| 608 | down_write(&mm->mmap_sem); | 612 | down_write(&mm->mmap_sem); |
| 609 | vm_flags = vma->vm_flags; | 613 | vm_flags = VM_STACK_FLAGS; |
| 610 | 614 | ||
| 611 | /* | 615 | /* |
| 612 | * Adjust stack execute permissions; explicitly enable for | 616 | * Adjust stack execute permissions; explicitly enable for |
| @@ -1155,7 +1159,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
| 1155 | { | 1159 | { |
| 1156 | int try,retval; | 1160 | int try,retval; |
| 1157 | struct linux_binfmt *fmt; | 1161 | struct linux_binfmt *fmt; |
| 1158 | #if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT) | 1162 | #ifdef __alpha__ |
| 1159 | /* handle /sbin/loader.. */ | 1163 | /* handle /sbin/loader.. */ |
| 1160 | { | 1164 | { |
| 1161 | struct exec * eh = (struct exec *) bprm->buf; | 1165 | struct exec * eh = (struct exec *) bprm->buf; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index fe3119a71ada..2845425077e8 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -2875,8 +2875,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, | |||
| 2875 | blk++; | 2875 | blk++; |
| 2876 | } | 2876 | } |
| 2877 | out: | 2877 | out: |
| 2878 | if (len == towrite) | 2878 | if (len == towrite) { |
| 2879 | mutex_unlock(&inode->i_mutex); | ||
| 2879 | return err; | 2880 | return err; |
| 2881 | } | ||
| 2880 | if (inode->i_size < off+len-towrite) { | 2882 | if (inode->i_size < off+len-towrite) { |
| 2881 | i_size_write(inode, off+len-towrite); | 2883 | i_size_write(inode, off+len-towrite); |
| 2882 | EXT3_I(inode)->i_disksize = inode->i_size; | 2884 | EXT3_I(inode)->i_disksize = inode->i_size; |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9cc80b9cc8d8..495ab21b9832 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -47,7 +47,7 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, | |||
| 47 | ext4_group_t block_group) | 47 | ext4_group_t block_group) |
| 48 | { | 48 | { |
| 49 | ext4_group_t actual_group; | 49 | ext4_group_t actual_group; |
| 50 | ext4_get_group_no_and_offset(sb, block, &actual_group, 0); | 50 | ext4_get_group_no_and_offset(sb, block, &actual_group, NULL); |
| 51 | if (actual_group == block_group) | 51 | if (actual_group == block_group) |
| 52 | return 1; | 52 | return 1; |
| 53 | return 0; | 53 | return 0; |
| @@ -121,12 +121,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
| 121 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); | 121 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); |
| 122 | } | 122 | } |
| 123 | } else { /* For META_BG_BLOCK_GROUPS */ | 123 | } else { /* For META_BG_BLOCK_GROUPS */ |
| 124 | int group_rel = (block_group - | 124 | bit_max += ext4_bg_num_gdb(sb, block_group); |
| 125 | le32_to_cpu(sbi->s_es->s_first_meta_bg)) % | ||
| 126 | EXT4_DESC_PER_BLOCK(sb); | ||
| 127 | if (group_rel == 0 || group_rel == 1 || | ||
| 128 | (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1)) | ||
| 129 | bit_max += 1; | ||
| 130 | } | 125 | } |
| 131 | 126 | ||
| 132 | if (block_group == sbi->s_groups_count - 1) { | 127 | if (block_group == sbi->s_groups_count - 1) { |
| @@ -295,7 +290,7 @@ err_out: | |||
| 295 | return 0; | 290 | return 0; |
| 296 | } | 291 | } |
| 297 | /** | 292 | /** |
| 298 | * read_block_bitmap() | 293 | * ext4_read_block_bitmap() |
| 299 | * @sb: super block | 294 | * @sb: super block |
| 300 | * @block_group: given block group | 295 | * @block_group: given block group |
| 301 | * | 296 | * |
| @@ -305,7 +300,7 @@ err_out: | |||
| 305 | * Return buffer_head on success or NULL in case of failure. | 300 | * Return buffer_head on success or NULL in case of failure. |
| 306 | */ | 301 | */ |
| 307 | struct buffer_head * | 302 | struct buffer_head * |
| 308 | read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 303 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
| 309 | { | 304 | { |
| 310 | struct ext4_group_desc * desc; | 305 | struct ext4_group_desc * desc; |
| 311 | struct buffer_head * bh = NULL; | 306 | struct buffer_head * bh = NULL; |
| @@ -409,8 +404,7 @@ restart: | |||
| 409 | prev = rsv; | 404 | prev = rsv; |
| 410 | } | 405 | } |
| 411 | printk("Window map complete.\n"); | 406 | printk("Window map complete.\n"); |
| 412 | if (bad) | 407 | BUG_ON(bad); |
| 413 | BUG(); | ||
| 414 | } | 408 | } |
| 415 | #define rsv_window_dump(root, verbose) \ | 409 | #define rsv_window_dump(root, verbose) \ |
| 416 | __rsv_window_dump((root), (verbose), __func__) | 410 | __rsv_window_dump((root), (verbose), __func__) |
| @@ -694,7 +688,7 @@ do_more: | |||
| 694 | count -= overflow; | 688 | count -= overflow; |
| 695 | } | 689 | } |
| 696 | brelse(bitmap_bh); | 690 | brelse(bitmap_bh); |
| 697 | bitmap_bh = read_block_bitmap(sb, block_group); | 691 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
| 698 | if (!bitmap_bh) | 692 | if (!bitmap_bh) |
| 699 | goto error_return; | 693 | goto error_return; |
| 700 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); | 694 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); |
| @@ -810,6 +804,13 @@ do_more: | |||
| 810 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 804 | spin_unlock(sb_bgl_lock(sbi, block_group)); |
| 811 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 805 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
| 812 | 806 | ||
| 807 | if (sbi->s_log_groups_per_flex) { | ||
| 808 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
| 809 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 810 | sbi->s_flex_groups[flex_group].free_blocks += count; | ||
| 811 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 812 | } | ||
| 813 | |||
| 813 | /* We dirtied the bitmap block */ | 814 | /* We dirtied the bitmap block */ |
| 814 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 815 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
| 815 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | 816 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); |
| @@ -1598,23 +1599,35 @@ out: | |||
| 1598 | 1599 | ||
| 1599 | /** | 1600 | /** |
| 1600 | * ext4_has_free_blocks() | 1601 | * ext4_has_free_blocks() |
| 1601 | * @sbi: in-core super block structure. | 1602 | * @sbi: in-core super block structure. |
| 1603 | * @nblocks: number of neeed blocks | ||
| 1602 | * | 1604 | * |
| 1603 | * Check if filesystem has at least 1 free block available for allocation. | 1605 | * Check if filesystem has free blocks available for allocation. |
| 1606 | * Return the number of blocks avaible for allocation for this request | ||
| 1607 | * On success, return nblocks | ||
| 1604 | */ | 1608 | */ |
| 1605 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi) | 1609 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
| 1610 | ext4_fsblk_t nblocks) | ||
| 1606 | { | 1611 | { |
| 1607 | ext4_fsblk_t free_blocks, root_blocks; | 1612 | ext4_fsblk_t free_blocks; |
| 1613 | ext4_fsblk_t root_blocks = 0; | ||
| 1608 | 1614 | ||
| 1609 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 1615 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); |
| 1610 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 1616 | |
| 1611 | if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && | 1617 | if (!capable(CAP_SYS_RESOURCE) && |
| 1612 | sbi->s_resuid != current->fsuid && | 1618 | sbi->s_resuid != current->fsuid && |
| 1613 | (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { | 1619 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
| 1614 | return 0; | 1620 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
| 1615 | } | 1621 | #ifdef CONFIG_SMP |
| 1616 | return 1; | 1622 | if (free_blocks - root_blocks < FBC_BATCH) |
| 1617 | } | 1623 | free_blocks = |
| 1624 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | ||
| 1625 | #endif | ||
| 1626 | if (free_blocks - root_blocks < nblocks) | ||
| 1627 | return free_blocks - root_blocks; | ||
| 1628 | return nblocks; | ||
| 1629 | } | ||
| 1630 | |||
| 1618 | 1631 | ||
| 1619 | /** | 1632 | /** |
| 1620 | * ext4_should_retry_alloc() | 1633 | * ext4_should_retry_alloc() |
| @@ -1630,7 +1643,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi) | |||
| 1630 | */ | 1643 | */ |
| 1631 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) | 1644 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) |
| 1632 | { | 1645 | { |
| 1633 | if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3) | 1646 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) |
| 1634 | return 0; | 1647 | return 0; |
| 1635 | 1648 | ||
| 1636 | jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); | 1649 | jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); |
| @@ -1639,20 +1652,24 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
| 1639 | } | 1652 | } |
| 1640 | 1653 | ||
| 1641 | /** | 1654 | /** |
| 1642 | * ext4_new_blocks_old() -- core block(s) allocation function | 1655 | * ext4_old_new_blocks() -- core block bitmap based block allocation function |
| 1656 | * | ||
| 1643 | * @handle: handle to this transaction | 1657 | * @handle: handle to this transaction |
| 1644 | * @inode: file inode | 1658 | * @inode: file inode |
| 1645 | * @goal: given target block(filesystem wide) | 1659 | * @goal: given target block(filesystem wide) |
| 1646 | * @count: target number of blocks to allocate | 1660 | * @count: target number of blocks to allocate |
| 1647 | * @errp: error code | 1661 | * @errp: error code |
| 1648 | * | 1662 | * |
| 1649 | * ext4_new_blocks uses a goal block to assist allocation. It tries to | 1663 | * ext4_old_new_blocks uses a goal block to assist allocation and look up |
| 1650 | * allocate block(s) from the block group contains the goal block first. If that | 1664 | * the block bitmap directly to do block allocation. It tries to |
| 1651 | * fails, it will try to allocate block(s) from other block groups without | 1665 | * allocate block(s) from the block group contains the goal block first. If |
| 1652 | * any specific goal block. | 1666 | * that fails, it will try to allocate block(s) from other block groups |
| 1667 | * without any specific goal block. | ||
| 1668 | * | ||
| 1669 | * This function is called when -o nomballoc mount option is enabled | ||
| 1653 | * | 1670 | * |
| 1654 | */ | 1671 | */ |
| 1655 | ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | 1672 | ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, |
| 1656 | ext4_fsblk_t goal, unsigned long *count, int *errp) | 1673 | ext4_fsblk_t goal, unsigned long *count, int *errp) |
| 1657 | { | 1674 | { |
| 1658 | struct buffer_head *bitmap_bh = NULL; | 1675 | struct buffer_head *bitmap_bh = NULL; |
| @@ -1676,13 +1693,26 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | |||
| 1676 | ext4_group_t ngroups; | 1693 | ext4_group_t ngroups; |
| 1677 | unsigned long num = *count; | 1694 | unsigned long num = *count; |
| 1678 | 1695 | ||
| 1679 | *errp = -ENOSPC; | ||
| 1680 | sb = inode->i_sb; | 1696 | sb = inode->i_sb; |
| 1681 | if (!sb) { | 1697 | if (!sb) { |
| 1698 | *errp = -ENODEV; | ||
| 1682 | printk("ext4_new_block: nonexistent device"); | 1699 | printk("ext4_new_block: nonexistent device"); |
| 1683 | return 0; | 1700 | return 0; |
| 1684 | } | 1701 | } |
| 1685 | 1702 | ||
| 1703 | sbi = EXT4_SB(sb); | ||
| 1704 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) { | ||
| 1705 | /* | ||
| 1706 | * With delalloc we already reserved the blocks | ||
| 1707 | */ | ||
| 1708 | *count = ext4_has_free_blocks(sbi, *count); | ||
| 1709 | } | ||
| 1710 | if (*count == 0) { | ||
| 1711 | *errp = -ENOSPC; | ||
| 1712 | return 0; /*return with ENOSPC error */ | ||
| 1713 | } | ||
| 1714 | num = *count; | ||
| 1715 | |||
| 1686 | /* | 1716 | /* |
| 1687 | * Check quota for allocation of this block. | 1717 | * Check quota for allocation of this block. |
| 1688 | */ | 1718 | */ |
| @@ -1706,11 +1736,6 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | |||
| 1706 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) | 1736 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) |
| 1707 | my_rsv = &block_i->rsv_window_node; | 1737 | my_rsv = &block_i->rsv_window_node; |
| 1708 | 1738 | ||
| 1709 | if (!ext4_has_free_blocks(sbi)) { | ||
| 1710 | *errp = -ENOSPC; | ||
| 1711 | goto out; | ||
| 1712 | } | ||
| 1713 | |||
| 1714 | /* | 1739 | /* |
| 1715 | * First, test whether the goal block is free. | 1740 | * First, test whether the goal block is free. |
| 1716 | */ | 1741 | */ |
| @@ -1734,7 +1759,7 @@ retry_alloc: | |||
| 1734 | my_rsv = NULL; | 1759 | my_rsv = NULL; |
| 1735 | 1760 | ||
| 1736 | if (free_blocks > 0) { | 1761 | if (free_blocks > 0) { |
| 1737 | bitmap_bh = read_block_bitmap(sb, group_no); | 1762 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); |
| 1738 | if (!bitmap_bh) | 1763 | if (!bitmap_bh) |
| 1739 | goto io_error; | 1764 | goto io_error; |
| 1740 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | 1765 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, |
| @@ -1770,7 +1795,7 @@ retry_alloc: | |||
| 1770 | continue; | 1795 | continue; |
| 1771 | 1796 | ||
| 1772 | brelse(bitmap_bh); | 1797 | brelse(bitmap_bh); |
| 1773 | bitmap_bh = read_block_bitmap(sb, group_no); | 1798 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); |
| 1774 | if (!bitmap_bh) | 1799 | if (!bitmap_bh) |
| 1775 | goto io_error; | 1800 | goto io_error; |
| 1776 | /* | 1801 | /* |
| @@ -1882,7 +1907,15 @@ allocated: | |||
| 1882 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); | 1907 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); |
| 1883 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); | 1908 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); |
| 1884 | spin_unlock(sb_bgl_lock(sbi, group_no)); | 1909 | spin_unlock(sb_bgl_lock(sbi, group_no)); |
| 1885 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | 1910 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) |
| 1911 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | ||
| 1912 | |||
| 1913 | if (sbi->s_log_groups_per_flex) { | ||
| 1914 | ext4_group_t flex_group = ext4_flex_group(sbi, group_no); | ||
| 1915 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 1916 | sbi->s_flex_groups[flex_group].free_blocks -= num; | ||
| 1917 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 1918 | } | ||
| 1886 | 1919 | ||
| 1887 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | 1920 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); |
| 1888 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | 1921 | err = ext4_journal_dirty_metadata(handle, gdp_bh); |
| @@ -1915,46 +1948,104 @@ out: | |||
| 1915 | return 0; | 1948 | return 0; |
| 1916 | } | 1949 | } |
| 1917 | 1950 | ||
| 1918 | ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, | 1951 | #define EXT4_META_BLOCK 0x1 |
| 1919 | ext4_fsblk_t goal, int *errp) | 1952 | |
| 1953 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | ||
| 1954 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
| 1955 | unsigned long *count, int *errp, int flags) | ||
| 1920 | { | 1956 | { |
| 1921 | struct ext4_allocation_request ar; | 1957 | struct ext4_allocation_request ar; |
| 1922 | ext4_fsblk_t ret; | 1958 | ext4_fsblk_t ret; |
| 1923 | 1959 | ||
| 1924 | if (!test_opt(inode->i_sb, MBALLOC)) { | 1960 | if (!test_opt(inode->i_sb, MBALLOC)) { |
| 1925 | unsigned long count = 1; | 1961 | return ext4_old_new_blocks(handle, inode, goal, count, errp); |
| 1926 | ret = ext4_new_blocks_old(handle, inode, goal, &count, errp); | ||
| 1927 | return ret; | ||
| 1928 | } | 1962 | } |
| 1929 | 1963 | ||
| 1930 | memset(&ar, 0, sizeof(ar)); | 1964 | memset(&ar, 0, sizeof(ar)); |
| 1965 | /* Fill with neighbour allocated blocks */ | ||
| 1966 | |||
| 1931 | ar.inode = inode; | 1967 | ar.inode = inode; |
| 1932 | ar.goal = goal; | 1968 | ar.goal = goal; |
| 1933 | ar.len = 1; | 1969 | ar.len = *count; |
| 1970 | ar.logical = iblock; | ||
| 1971 | |||
| 1972 | if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK)) | ||
| 1973 | /* enable in-core preallocation for data block allocation */ | ||
| 1974 | ar.flags = EXT4_MB_HINT_DATA; | ||
| 1975 | else | ||
| 1976 | /* disable in-core preallocation for non-regular files */ | ||
| 1977 | ar.flags = 0; | ||
| 1978 | |||
| 1934 | ret = ext4_mb_new_blocks(handle, &ar, errp); | 1979 | ret = ext4_mb_new_blocks(handle, &ar, errp); |
| 1980 | *count = ar.len; | ||
| 1935 | return ret; | 1981 | return ret; |
| 1936 | } | 1982 | } |
| 1937 | 1983 | ||
| 1938 | ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1984 | /* |
| 1985 | * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks | ||
| 1986 | * | ||
| 1987 | * @handle: handle to this transaction | ||
| 1988 | * @inode: file inode | ||
| 1989 | * @goal: given target block(filesystem wide) | ||
| 1990 | * @count: total number of blocks need | ||
| 1991 | * @errp: error code | ||
| 1992 | * | ||
| 1993 | * Return 1st allocated block numberon success, *count stores total account | ||
| 1994 | * error stores in errp pointer | ||
| 1995 | */ | ||
| 1996 | ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | ||
| 1939 | ext4_fsblk_t goal, unsigned long *count, int *errp) | 1997 | ext4_fsblk_t goal, unsigned long *count, int *errp) |
| 1940 | { | 1998 | { |
| 1941 | struct ext4_allocation_request ar; | ||
| 1942 | ext4_fsblk_t ret; | 1999 | ext4_fsblk_t ret; |
| 1943 | 2000 | ret = do_blk_alloc(handle, inode, 0, goal, | |
| 1944 | if (!test_opt(inode->i_sb, MBALLOC)) { | 2001 | count, errp, EXT4_META_BLOCK); |
| 1945 | ret = ext4_new_blocks_old(handle, inode, goal, count, errp); | 2002 | /* |
| 1946 | return ret; | 2003 | * Account for the allocated meta blocks |
| 2004 | */ | ||
| 2005 | if (!(*errp)) { | ||
| 2006 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 2007 | EXT4_I(inode)->i_allocated_meta_blocks += *count; | ||
| 2008 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1947 | } | 2009 | } |
| 1948 | |||
| 1949 | memset(&ar, 0, sizeof(ar)); | ||
| 1950 | ar.inode = inode; | ||
| 1951 | ar.goal = goal; | ||
| 1952 | ar.len = *count; | ||
| 1953 | ret = ext4_mb_new_blocks(handle, &ar, errp); | ||
| 1954 | *count = ar.len; | ||
| 1955 | return ret; | 2010 | return ret; |
| 1956 | } | 2011 | } |
| 1957 | 2012 | ||
| 2013 | /* | ||
| 2014 | * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks | ||
| 2015 | * | ||
| 2016 | * @handle: handle to this transaction | ||
| 2017 | * @inode: file inode | ||
| 2018 | * @goal: given target block(filesystem wide) | ||
| 2019 | * @errp: error code | ||
| 2020 | * | ||
| 2021 | * Return allocated block number on success | ||
| 2022 | */ | ||
| 2023 | ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, | ||
| 2024 | ext4_fsblk_t goal, int *errp) | ||
| 2025 | { | ||
| 2026 | unsigned long count = 1; | ||
| 2027 | return ext4_new_meta_blocks(handle, inode, goal, &count, errp); | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | /* | ||
| 2031 | * ext4_new_blocks() -- allocate data blocks | ||
| 2032 | * | ||
| 2033 | * @handle: handle to this transaction | ||
| 2034 | * @inode: file inode | ||
| 2035 | * @goal: given target block(filesystem wide) | ||
| 2036 | * @count: total number of blocks need | ||
| 2037 | * @errp: error code | ||
| 2038 | * | ||
| 2039 | * Return 1st allocated block numberon success, *count stores total account | ||
| 2040 | * error stores in errp pointer | ||
| 2041 | */ | ||
| 2042 | |||
| 2043 | ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | ||
| 2044 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
| 2045 | unsigned long *count, int *errp) | ||
| 2046 | { | ||
| 2047 | return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0); | ||
| 2048 | } | ||
| 1958 | 2049 | ||
| 1959 | /** | 2050 | /** |
| 1960 | * ext4_count_free_blocks() -- count filesystem free blocks | 2051 | * ext4_count_free_blocks() -- count filesystem free blocks |
| @@ -1986,7 +2077,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
| 1986 | continue; | 2077 | continue; |
| 1987 | desc_count += le16_to_cpu(gdp->bg_free_blocks_count); | 2078 | desc_count += le16_to_cpu(gdp->bg_free_blocks_count); |
| 1988 | brelse(bitmap_bh); | 2079 | brelse(bitmap_bh); |
| 1989 | bitmap_bh = read_block_bitmap(sb, i); | 2080 | bitmap_bh = ext4_read_block_bitmap(sb, i); |
| 1990 | if (bitmap_bh == NULL) | 2081 | if (bitmap_bh == NULL) |
| 1991 | continue; | 2082 | continue; |
| 1992 | 2083 | ||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 2bf0331ea194..d3d23d73c08b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
| @@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp, | |||
| 129 | struct buffer_head *bh = NULL; | 129 | struct buffer_head *bh = NULL; |
| 130 | 130 | ||
| 131 | map_bh.b_state = 0; | 131 | map_bh.b_state = 0; |
| 132 | err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); | 132 | err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, |
| 133 | 0, 0, 0); | ||
| 133 | if (err > 0) { | 134 | if (err > 0) { |
| 134 | pgoff_t index = map_bh.b_blocknr >> | 135 | pgoff_t index = map_bh.b_blocknr >> |
| 135 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 136 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| @@ -272,7 +273,7 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
| 272 | 273 | ||
| 273 | while (n) { | 274 | while (n) { |
| 274 | /* Do the node's children first */ | 275 | /* Do the node's children first */ |
| 275 | if ((n)->rb_left) { | 276 | if (n->rb_left) { |
| 276 | n = n->rb_left; | 277 | n = n->rb_left; |
| 277 | continue; | 278 | continue; |
| 278 | } | 279 | } |
| @@ -301,24 +302,18 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
| 301 | parent->rb_right = NULL; | 302 | parent->rb_right = NULL; |
| 302 | n = parent; | 303 | n = parent; |
| 303 | } | 304 | } |
| 304 | root->rb_node = NULL; | ||
| 305 | } | 305 | } |
| 306 | 306 | ||
| 307 | 307 | ||
| 308 | static struct dir_private_info *create_dir_info(loff_t pos) | 308 | static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) |
| 309 | { | 309 | { |
| 310 | struct dir_private_info *p; | 310 | struct dir_private_info *p; |
| 311 | 311 | ||
| 312 | p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); | 312 | p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); |
| 313 | if (!p) | 313 | if (!p) |
| 314 | return NULL; | 314 | return NULL; |
| 315 | p->root.rb_node = NULL; | ||
| 316 | p->curr_node = NULL; | ||
| 317 | p->extra_fname = NULL; | ||
| 318 | p->last_pos = 0; | ||
| 319 | p->curr_hash = pos2maj_hash(pos); | 315 | p->curr_hash = pos2maj_hash(pos); |
| 320 | p->curr_minor_hash = pos2min_hash(pos); | 316 | p->curr_minor_hash = pos2min_hash(pos); |
| 321 | p->next_hash = 0; | ||
| 322 | return p; | 317 | return p; |
| 323 | } | 318 | } |
| 324 | 319 | ||
| @@ -433,7 +428,7 @@ static int ext4_dx_readdir(struct file * filp, | |||
| 433 | int ret; | 428 | int ret; |
| 434 | 429 | ||
| 435 | if (!info) { | 430 | if (!info) { |
| 436 | info = create_dir_info(filp->f_pos); | 431 | info = ext4_htree_create_dir_info(filp->f_pos); |
| 437 | if (!info) | 432 | if (!info) |
| 438 | return -ENOMEM; | 433 | return -ENOMEM; |
| 439 | filp->private_data = info; | 434 | filp->private_data = info; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8158083f7ac0..303e41cf7b14 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include "ext4_i.h" | 22 | #include "ext4_i.h" |
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| 25 | * The second extended filesystem constants/structures | 25 | * The fourth extended filesystem constants/structures |
| 26 | */ | 26 | */ |
| 27 | 27 | ||
| 28 | /* | 28 | /* |
| @@ -45,7 +45,7 @@ | |||
| 45 | #define ext4_debug(f, a...) \ | 45 | #define ext4_debug(f, a...) \ |
| 46 | do { \ | 46 | do { \ |
| 47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
| 48 | __FILE__, __LINE__, __FUNCTION__); \ | 48 | __FILE__, __LINE__, __func__); \ |
| 49 | printk (KERN_DEBUG f, ## a); \ | 49 | printk (KERN_DEBUG f, ## a); \ |
| 50 | } while (0) | 50 | } while (0) |
| 51 | #else | 51 | #else |
| @@ -74,6 +74,9 @@ | |||
| 74 | #define EXT4_MB_HINT_GOAL_ONLY 256 | 74 | #define EXT4_MB_HINT_GOAL_ONLY 256 |
| 75 | /* goal is meaningful */ | 75 | /* goal is meaningful */ |
| 76 | #define EXT4_MB_HINT_TRY_GOAL 512 | 76 | #define EXT4_MB_HINT_TRY_GOAL 512 |
| 77 | /* blocks already pre-reserved by delayed allocation */ | ||
| 78 | #define EXT4_MB_DELALLOC_RESERVED 1024 | ||
| 79 | |||
| 77 | 80 | ||
| 78 | struct ext4_allocation_request { | 81 | struct ext4_allocation_request { |
| 79 | /* target inode for block we're allocating */ | 82 | /* target inode for block we're allocating */ |
| @@ -170,6 +173,15 @@ struct ext4_group_desc | |||
| 170 | __u32 bg_reserved2[3]; | 173 | __u32 bg_reserved2[3]; |
| 171 | }; | 174 | }; |
| 172 | 175 | ||
| 176 | /* | ||
| 177 | * Structure of a flex block group info | ||
| 178 | */ | ||
| 179 | |||
| 180 | struct flex_groups { | ||
| 181 | __u32 free_inodes; | ||
| 182 | __u32 free_blocks; | ||
| 183 | }; | ||
| 184 | |||
| 173 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ | 185 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ |
| 174 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ | 186 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ |
| 175 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ | 187 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ |
| @@ -527,6 +539,7 @@ do { \ | |||
| 527 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
| 528 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
| 529 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ | 541 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ |
| 542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | ||
| 530 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
| 531 | #ifndef _LINUX_EXT2_FS_H | 544 | #ifndef _LINUX_EXT2_FS_H |
| 532 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 545 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
| @@ -647,7 +660,10 @@ struct ext4_super_block { | |||
| 647 | __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ | 660 | __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ |
| 648 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 661 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
| 649 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 662 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
| 650 | __u32 s_reserved[163]; /* Padding to the end of the block */ | 663 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
| 664 | __u8 s_reserved_char_pad2; | ||
| 665 | __le16 s_reserved_pad; | ||
| 666 | __u32 s_reserved[162]; /* Padding to the end of the block */ | ||
| 651 | }; | 667 | }; |
| 652 | 668 | ||
| 653 | #ifdef __KERNEL__ | 669 | #ifdef __KERNEL__ |
| @@ -958,12 +974,17 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, | |||
| 958 | extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); | 974 | extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); |
| 959 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | 975 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, |
| 960 | ext4_group_t group); | 976 | ext4_group_t group); |
| 961 | extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, | 977 | extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, |
| 962 | ext4_fsblk_t goal, int *errp); | 978 | ext4_fsblk_t goal, int *errp); |
| 963 | extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, | 979 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
| 964 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 980 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
| 965 | extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | 981 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, |
| 982 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
| 983 | unsigned long *count, int *errp); | ||
| 984 | extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | ||
| 966 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 985 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
| 986 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | ||
| 987 | ext4_fsblk_t nblocks); | ||
| 967 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | 988 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, |
| 968 | ext4_fsblk_t block, unsigned long count, int metadata); | 989 | ext4_fsblk_t block, unsigned long count, int metadata); |
| 969 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | 990 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, |
| @@ -1016,9 +1037,14 @@ extern int __init init_ext4_mballoc(void); | |||
| 1016 | extern void exit_ext4_mballoc(void); | 1037 | extern void exit_ext4_mballoc(void); |
| 1017 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1038 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
| 1018 | unsigned long, unsigned long, int, unsigned long *); | 1039 | unsigned long, unsigned long, int, unsigned long *); |
| 1040 | extern int ext4_mb_add_more_groupinfo(struct super_block *sb, | ||
| 1041 | ext4_group_t i, struct ext4_group_desc *desc); | ||
| 1042 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | ||
| 1043 | ext4_grpblk_t add); | ||
| 1019 | 1044 | ||
| 1020 | 1045 | ||
| 1021 | /* inode.c */ | 1046 | /* inode.c */ |
| 1047 | void ext4_da_release_space(struct inode *inode, int used, int to_free); | ||
| 1022 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | 1048 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, |
| 1023 | struct buffer_head *bh, ext4_fsblk_t blocknr); | 1049 | struct buffer_head *bh, ext4_fsblk_t blocknr); |
| 1024 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 1050 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, |
| @@ -1033,19 +1059,23 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 1033 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1059 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
| 1034 | extern int ext4_write_inode (struct inode *, int); | 1060 | extern int ext4_write_inode (struct inode *, int); |
| 1035 | extern int ext4_setattr (struct dentry *, struct iattr *); | 1061 | extern int ext4_setattr (struct dentry *, struct iattr *); |
| 1062 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 1063 | struct kstat *stat); | ||
| 1036 | extern void ext4_delete_inode (struct inode *); | 1064 | extern void ext4_delete_inode (struct inode *); |
| 1037 | extern int ext4_sync_inode (handle_t *, struct inode *); | 1065 | extern int ext4_sync_inode (handle_t *, struct inode *); |
| 1038 | extern void ext4_discard_reservation (struct inode *); | 1066 | extern void ext4_discard_reservation (struct inode *); |
| 1039 | extern void ext4_dirty_inode(struct inode *); | 1067 | extern void ext4_dirty_inode(struct inode *); |
| 1040 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1068 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
| 1041 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1069 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
| 1070 | extern int ext4_can_truncate(struct inode *inode); | ||
| 1042 | extern void ext4_truncate (struct inode *); | 1071 | extern void ext4_truncate (struct inode *); |
| 1043 | extern void ext4_set_inode_flags(struct inode *); | 1072 | extern void ext4_set_inode_flags(struct inode *); |
| 1044 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1073 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
| 1045 | extern void ext4_set_aops(struct inode *inode); | 1074 | extern void ext4_set_aops(struct inode *inode); |
| 1046 | extern int ext4_writepage_trans_blocks(struct inode *); | 1075 | extern int ext4_writepage_trans_blocks(struct inode *); |
| 1047 | extern int ext4_block_truncate_page(handle_t *handle, struct page *page, | 1076 | extern int ext4_block_truncate_page(handle_t *handle, |
| 1048 | struct address_space *mapping, loff_t from); | 1077 | struct address_space *mapping, loff_t from); |
| 1078 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | ||
| 1049 | 1079 | ||
| 1050 | /* ioctl.c */ | 1080 | /* ioctl.c */ |
| 1051 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1081 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
| @@ -1159,10 +1189,21 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
| 1159 | } | 1189 | } |
| 1160 | 1190 | ||
| 1161 | 1191 | ||
| 1192 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, | ||
| 1193 | ext4_group_t block_group) | ||
| 1194 | { | ||
| 1195 | return block_group >> sbi->s_log_groups_per_flex; | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) | ||
| 1199 | { | ||
| 1200 | return 1 << sbi->s_log_groups_per_flex; | ||
| 1201 | } | ||
| 1202 | |||
| 1162 | #define ext4_std_error(sb, errno) \ | 1203 | #define ext4_std_error(sb, errno) \ |
| 1163 | do { \ | 1204 | do { \ |
| 1164 | if ((errno)) \ | 1205 | if ((errno)) \ |
| 1165 | __ext4_std_error((sb), __FUNCTION__, (errno)); \ | 1206 | __ext4_std_error((sb), __func__, (errno)); \ |
| 1166 | } while (0) | 1207 | } while (0) |
| 1167 | 1208 | ||
| 1168 | /* | 1209 | /* |
| @@ -1191,7 +1232,7 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 1191 | ext4_lblk_t iblock, | 1232 | ext4_lblk_t iblock, |
| 1192 | unsigned long max_blocks, struct buffer_head *bh_result, | 1233 | unsigned long max_blocks, struct buffer_head *bh_result, |
| 1193 | int create, int extend_disksize); | 1234 | int create, int extend_disksize); |
| 1194 | extern void ext4_ext_truncate(struct inode *, struct page *); | 1235 | extern void ext4_ext_truncate(struct inode *); |
| 1195 | extern void ext4_ext_init(struct super_block *); | 1236 | extern void ext4_ext_init(struct super_block *); |
| 1196 | extern void ext4_ext_release(struct super_block *); | 1237 | extern void ext4_ext_release(struct super_block *); |
| 1197 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 1238 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, |
| @@ -1199,7 +1240,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | |||
| 1199 | extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, | 1240 | extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, |
| 1200 | sector_t block, unsigned long max_blocks, | 1241 | sector_t block, unsigned long max_blocks, |
| 1201 | struct buffer_head *bh, int create, | 1242 | struct buffer_head *bh, int create, |
| 1202 | int extend_disksize); | 1243 | int extend_disksize, int flag); |
| 1203 | #endif /* __KERNEL__ */ | 1244 | #endif /* __KERNEL__ */ |
| 1204 | 1245 | ||
| 1205 | #endif /* _EXT4_H */ | 1246 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 75333b595fab..6c166c0a54b7 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
| @@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) | |||
| 212 | (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); | 212 | (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | ||
| 215 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
| 216 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
| 217 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index 26a4ae255d79..ef7409f0e7e4 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
| @@ -79,7 +79,7 @@ struct ext4_ext_cache { | |||
| 79 | }; | 79 | }; |
| 80 | 80 | ||
| 81 | /* | 81 | /* |
| 82 | * third extended file system inode data in memory | 82 | * fourth extended file system inode data in memory |
| 83 | */ | 83 | */ |
| 84 | struct ext4_inode_info { | 84 | struct ext4_inode_info { |
| 85 | __le32 i_data[15]; /* unconverted */ | 85 | __le32 i_data[15]; /* unconverted */ |
| @@ -150,6 +150,7 @@ struct ext4_inode_info { | |||
| 150 | */ | 150 | */ |
| 151 | struct rw_semaphore i_data_sem; | 151 | struct rw_semaphore i_data_sem; |
| 152 | struct inode vfs_inode; | 152 | struct inode vfs_inode; |
| 153 | struct jbd2_inode jinode; | ||
| 153 | 154 | ||
| 154 | unsigned long i_ext_generation; | 155 | unsigned long i_ext_generation; |
| 155 | struct ext4_ext_cache i_cached_extent; | 156 | struct ext4_ext_cache i_cached_extent; |
| @@ -162,6 +163,13 @@ struct ext4_inode_info { | |||
| 162 | /* mballoc */ | 163 | /* mballoc */ |
| 163 | struct list_head i_prealloc_list; | 164 | struct list_head i_prealloc_list; |
| 164 | spinlock_t i_prealloc_lock; | 165 | spinlock_t i_prealloc_lock; |
| 166 | |||
| 167 | /* allocation reservation info for delalloc */ | ||
| 168 | unsigned long i_reserved_data_blocks; | ||
| 169 | unsigned long i_reserved_meta_blocks; | ||
| 170 | unsigned long i_allocated_meta_blocks; | ||
| 171 | unsigned short i_delalloc_reserved_flag; | ||
| 172 | spinlock_t i_block_reservation_lock; | ||
| 165 | }; | 173 | }; |
| 166 | 174 | ||
| 167 | #endif /* _EXT4_I */ | 175 | #endif /* _EXT4_I */ |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 9255a7d28b24..eb8bc3afe6e9 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
| @@ -142,19 +142,17 @@ int __ext4_journal_dirty_metadata(const char *where, | |||
| 142 | handle_t *handle, struct buffer_head *bh); | 142 | handle_t *handle, struct buffer_head *bh); |
| 143 | 143 | ||
| 144 | #define ext4_journal_get_undo_access(handle, bh) \ | 144 | #define ext4_journal_get_undo_access(handle, bh) \ |
| 145 | __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) | 145 | __ext4_journal_get_undo_access(__func__, (handle), (bh)) |
| 146 | #define ext4_journal_get_write_access(handle, bh) \ | 146 | #define ext4_journal_get_write_access(handle, bh) \ |
| 147 | __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) | 147 | __ext4_journal_get_write_access(__func__, (handle), (bh)) |
| 148 | #define ext4_journal_revoke(handle, blocknr, bh) \ | 148 | #define ext4_journal_revoke(handle, blocknr, bh) \ |
| 149 | __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) | 149 | __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) |
| 150 | #define ext4_journal_get_create_access(handle, bh) \ | 150 | #define ext4_journal_get_create_access(handle, bh) \ |
| 151 | __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) | 151 | __ext4_journal_get_create_access(__func__, (handle), (bh)) |
| 152 | #define ext4_journal_dirty_metadata(handle, bh) \ | 152 | #define ext4_journal_dirty_metadata(handle, bh) \ |
| 153 | __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) | 153 | __ext4_journal_dirty_metadata(__func__, (handle), (bh)) |
| 154 | #define ext4_journal_forget(handle, bh) \ | 154 | #define ext4_journal_forget(handle, bh) \ |
| 155 | __ext4_journal_forget(__FUNCTION__, (handle), (bh)) | 155 | __ext4_journal_forget(__func__, (handle), (bh)) |
| 156 | |||
| 157 | int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); | ||
| 158 | 156 | ||
| 159 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); | 157 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); |
| 160 | int __ext4_journal_stop(const char *where, handle_t *handle); | 158 | int __ext4_journal_stop(const char *where, handle_t *handle); |
| @@ -165,7 +163,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | |||
| 165 | } | 163 | } |
| 166 | 164 | ||
| 167 | #define ext4_journal_stop(handle) \ | 165 | #define ext4_journal_stop(handle) \ |
| 168 | __ext4_journal_stop(__FUNCTION__, (handle)) | 166 | __ext4_journal_stop(__func__, (handle)) |
| 169 | 167 | ||
| 170 | static inline handle_t *ext4_journal_current_handle(void) | 168 | static inline handle_t *ext4_journal_current_handle(void) |
| 171 | { | 169 | { |
| @@ -192,6 +190,11 @@ static inline int ext4_journal_force_commit(journal_t *journal) | |||
| 192 | return jbd2_journal_force_commit(journal); | 190 | return jbd2_journal_force_commit(journal); |
| 193 | } | 191 | } |
| 194 | 192 | ||
| 193 | static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) | ||
| 194 | { | ||
| 195 | return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); | ||
| 196 | } | ||
| 197 | |||
| 195 | /* super.c */ | 198 | /* super.c */ |
| 196 | int ext4_force_commit(struct super_block *sb); | 199 | int ext4_force_commit(struct super_block *sb); |
| 197 | 200 | ||
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 5802e69f2191..6300226d5531 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include <linux/rbtree.h> | 25 | #include <linux/rbtree.h> |
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| 28 | * third extended-fs super-block data in memory | 28 | * fourth extended-fs super-block data in memory |
| 29 | */ | 29 | */ |
| 30 | struct ext4_sb_info { | 30 | struct ext4_sb_info { |
| 31 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | 31 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ |
| @@ -143,6 +143,9 @@ struct ext4_sb_info { | |||
| 143 | 143 | ||
| 144 | /* locality groups */ | 144 | /* locality groups */ |
| 145 | struct ext4_locality_group *s_locality_groups; | 145 | struct ext4_locality_group *s_locality_groups; |
| 146 | |||
| 147 | unsigned int s_log_groups_per_flex; | ||
| 148 | struct flex_groups *s_flex_groups; | ||
| 146 | }; | 149 | }; |
| 147 | 150 | ||
| 148 | #endif /* _EXT4_SB */ | 151 | #endif /* _EXT4_SB */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 47929c4e3dae..42c4c0c892ed 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -92,17 +92,16 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | |||
| 92 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | 92 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) | 95 | static int ext4_ext_journal_restart(handle_t *handle, int needed) |
| 96 | { | 96 | { |
| 97 | int err; | 97 | int err; |
| 98 | 98 | ||
| 99 | if (handle->h_buffer_credits > needed) | 99 | if (handle->h_buffer_credits > needed) |
| 100 | return handle; | 100 | return 0; |
| 101 | if (!ext4_journal_extend(handle, needed)) | 101 | err = ext4_journal_extend(handle, needed); |
| 102 | return handle; | 102 | if (err) |
| 103 | err = ext4_journal_restart(handle, needed); | 103 | return err; |
| 104 | 104 | return ext4_journal_restart(handle, needed); | |
| 105 | return handle; | ||
| 106 | } | 105 | } |
| 107 | 106 | ||
| 108 | /* | 107 | /* |
| @@ -180,15 +179,18 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
| 180 | return bg_start + colour + block; | 179 | return bg_start + colour + block; |
| 181 | } | 180 | } |
| 182 | 181 | ||
| 182 | /* | ||
| 183 | * Allocation for a meta data block | ||
| 184 | */ | ||
| 183 | static ext4_fsblk_t | 185 | static ext4_fsblk_t |
| 184 | ext4_ext_new_block(handle_t *handle, struct inode *inode, | 186 | ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, |
| 185 | struct ext4_ext_path *path, | 187 | struct ext4_ext_path *path, |
| 186 | struct ext4_extent *ex, int *err) | 188 | struct ext4_extent *ex, int *err) |
| 187 | { | 189 | { |
| 188 | ext4_fsblk_t goal, newblock; | 190 | ext4_fsblk_t goal, newblock; |
| 189 | 191 | ||
| 190 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); | 192 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); |
| 191 | newblock = ext4_new_block(handle, inode, goal, err); | 193 | newblock = ext4_new_meta_block(handle, inode, goal, err); |
| 192 | return newblock; | 194 | return newblock; |
| 193 | } | 195 | } |
| 194 | 196 | ||
| @@ -246,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode) | |||
| 246 | return size; | 248 | return size; |
| 247 | } | 249 | } |
| 248 | 250 | ||
| 251 | /* | ||
| 252 | * Calculate the number of metadata blocks needed | ||
| 253 | * to allocate @blocks | ||
| 254 | * Worse case is one block per extent | ||
| 255 | */ | ||
| 256 | int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 257 | { | ||
| 258 | int lcap, icap, rcap, leafs, idxs, num; | ||
| 259 | int newextents = blocks; | ||
| 260 | |||
| 261 | rcap = ext4_ext_space_root_idx(inode); | ||
| 262 | lcap = ext4_ext_space_block(inode); | ||
| 263 | icap = ext4_ext_space_block_idx(inode); | ||
| 264 | |||
| 265 | /* number of new leaf blocks needed */ | ||
| 266 | num = leafs = (newextents + lcap - 1) / lcap; | ||
| 267 | |||
| 268 | /* | ||
| 269 | * Worse case, we need separate index block(s) | ||
| 270 | * to link all new leaf blocks | ||
| 271 | */ | ||
| 272 | idxs = (leafs + icap - 1) / icap; | ||
| 273 | do { | ||
| 274 | num += idxs; | ||
| 275 | idxs = (idxs + icap - 1) / icap; | ||
| 276 | } while (idxs > rcap); | ||
| 277 | |||
| 278 | return num; | ||
| 279 | } | ||
| 280 | |||
| 249 | static int | 281 | static int |
| 250 | ext4_ext_max_entries(struct inode *inode, int depth) | 282 | ext4_ext_max_entries(struct inode *inode, int depth) |
| 251 | { | 283 | { |
| @@ -524,6 +556,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
| 524 | alloc = 1; | 556 | alloc = 1; |
| 525 | } | 557 | } |
| 526 | path[0].p_hdr = eh; | 558 | path[0].p_hdr = eh; |
| 559 | path[0].p_bh = NULL; | ||
| 527 | 560 | ||
| 528 | i = depth; | 561 | i = depth; |
| 529 | /* walk through the tree */ | 562 | /* walk through the tree */ |
| @@ -552,12 +585,14 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
| 552 | } | 585 | } |
| 553 | 586 | ||
| 554 | path[ppos].p_depth = i; | 587 | path[ppos].p_depth = i; |
| 555 | path[ppos].p_hdr = eh; | ||
| 556 | path[ppos].p_ext = NULL; | 588 | path[ppos].p_ext = NULL; |
| 557 | path[ppos].p_idx = NULL; | 589 | path[ppos].p_idx = NULL; |
| 558 | 590 | ||
| 559 | /* find extent */ | 591 | /* find extent */ |
| 560 | ext4_ext_binsearch(inode, path + ppos, block); | 592 | ext4_ext_binsearch(inode, path + ppos, block); |
| 593 | /* if not an empty leaf */ | ||
| 594 | if (path[ppos].p_ext) | ||
| 595 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | ||
| 561 | 596 | ||
| 562 | ext4_ext_show_path(inode, path); | 597 | ext4_ext_show_path(inode, path); |
| 563 | 598 | ||
| @@ -688,7 +723,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
| 688 | /* allocate all needed blocks */ | 723 | /* allocate all needed blocks */ |
| 689 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); | 724 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); |
| 690 | for (a = 0; a < depth - at; a++) { | 725 | for (a = 0; a < depth - at; a++) { |
| 691 | newblock = ext4_ext_new_block(handle, inode, path, newext, &err); | 726 | newblock = ext4_ext_new_meta_block(handle, inode, path, |
| 727 | newext, &err); | ||
| 692 | if (newblock == 0) | 728 | if (newblock == 0) |
| 693 | goto cleanup; | 729 | goto cleanup; |
| 694 | ablocks[a] = newblock; | 730 | ablocks[a] = newblock; |
| @@ -884,7 +920,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
| 884 | ext4_fsblk_t newblock; | 920 | ext4_fsblk_t newblock; |
| 885 | int err = 0; | 921 | int err = 0; |
| 886 | 922 | ||
| 887 | newblock = ext4_ext_new_block(handle, inode, path, newext, &err); | 923 | newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); |
| 888 | if (newblock == 0) | 924 | if (newblock == 0) |
| 889 | return err; | 925 | return err; |
| 890 | 926 | ||
| @@ -981,6 +1017,8 @@ repeat: | |||
| 981 | /* if we found index with free entry, then use that | 1017 | /* if we found index with free entry, then use that |
| 982 | * entry: create all needed subtree and add new leaf */ | 1018 | * entry: create all needed subtree and add new leaf */ |
| 983 | err = ext4_ext_split(handle, inode, path, newext, i); | 1019 | err = ext4_ext_split(handle, inode, path, newext, i); |
| 1020 | if (err) | ||
| 1021 | goto out; | ||
| 984 | 1022 | ||
| 985 | /* refill path */ | 1023 | /* refill path */ |
| 986 | ext4_ext_drop_refs(path); | 1024 | ext4_ext_drop_refs(path); |
| @@ -1883,11 +1921,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 1883 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 1921 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
| 1884 | #endif | 1922 | #endif |
| 1885 | 1923 | ||
| 1886 | handle = ext4_ext_journal_restart(handle, credits); | 1924 | err = ext4_ext_journal_restart(handle, credits); |
| 1887 | if (IS_ERR(handle)) { | 1925 | if (err) |
| 1888 | err = PTR_ERR(handle); | ||
| 1889 | goto out; | 1926 | goto out; |
| 1890 | } | ||
| 1891 | 1927 | ||
| 1892 | err = ext4_ext_get_access(handle, inode, path + depth); | 1928 | err = ext4_ext_get_access(handle, inode, path + depth); |
| 1893 | if (err) | 1929 | if (err) |
| @@ -2529,6 +2565,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2529 | int err = 0, depth, ret; | 2565 | int err = 0, depth, ret; |
| 2530 | unsigned long allocated = 0; | 2566 | unsigned long allocated = 0; |
| 2531 | struct ext4_allocation_request ar; | 2567 | struct ext4_allocation_request ar; |
| 2568 | loff_t disksize; | ||
| 2532 | 2569 | ||
| 2533 | __clear_bit(BH_New, &bh_result->b_state); | 2570 | __clear_bit(BH_New, &bh_result->b_state); |
| 2534 | ext_debug("blocks %u/%lu requested for inode %u\n", | 2571 | ext_debug("blocks %u/%lu requested for inode %u\n", |
| @@ -2616,8 +2653,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2616 | */ | 2653 | */ |
| 2617 | if (allocated > max_blocks) | 2654 | if (allocated > max_blocks) |
| 2618 | allocated = max_blocks; | 2655 | allocated = max_blocks; |
| 2619 | /* mark the buffer unwritten */ | 2656 | set_buffer_unwritten(bh_result); |
| 2620 | __set_bit(BH_Unwritten, &bh_result->b_state); | ||
| 2621 | goto out2; | 2657 | goto out2; |
| 2622 | } | 2658 | } |
| 2623 | 2659 | ||
| @@ -2716,14 +2752,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2716 | goto out2; | 2752 | goto out2; |
| 2717 | } | 2753 | } |
| 2718 | 2754 | ||
| 2719 | if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) | ||
| 2720 | EXT4_I(inode)->i_disksize = inode->i_size; | ||
| 2721 | |||
| 2722 | /* previous routine could use block we allocated */ | 2755 | /* previous routine could use block we allocated */ |
| 2723 | newblock = ext_pblock(&newex); | 2756 | newblock = ext_pblock(&newex); |
| 2724 | allocated = ext4_ext_get_actual_len(&newex); | 2757 | allocated = ext4_ext_get_actual_len(&newex); |
| 2725 | outnew: | 2758 | outnew: |
| 2726 | __set_bit(BH_New, &bh_result->b_state); | 2759 | if (extend_disksize) { |
| 2760 | disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits; | ||
| 2761 | if (disksize > i_size_read(inode)) | ||
| 2762 | disksize = i_size_read(inode); | ||
| 2763 | if (disksize > EXT4_I(inode)->i_disksize) | ||
| 2764 | EXT4_I(inode)->i_disksize = disksize; | ||
| 2765 | } | ||
| 2766 | |||
| 2767 | set_buffer_new(bh_result); | ||
| 2727 | 2768 | ||
| 2728 | /* Cache only when it is _not_ an uninitialized extent */ | 2769 | /* Cache only when it is _not_ an uninitialized extent */ |
| 2729 | if (create != EXT4_CREATE_UNINITIALIZED_EXT) | 2770 | if (create != EXT4_CREATE_UNINITIALIZED_EXT) |
| @@ -2733,7 +2774,7 @@ out: | |||
| 2733 | if (allocated > max_blocks) | 2774 | if (allocated > max_blocks) |
| 2734 | allocated = max_blocks; | 2775 | allocated = max_blocks; |
| 2735 | ext4_ext_show_leaf(inode, path); | 2776 | ext4_ext_show_leaf(inode, path); |
| 2736 | __set_bit(BH_Mapped, &bh_result->b_state); | 2777 | set_buffer_mapped(bh_result); |
| 2737 | bh_result->b_bdev = inode->i_sb->s_bdev; | 2778 | bh_result->b_bdev = inode->i_sb->s_bdev; |
| 2738 | bh_result->b_blocknr = newblock; | 2779 | bh_result->b_blocknr = newblock; |
| 2739 | out2: | 2780 | out2: |
| @@ -2744,7 +2785,7 @@ out2: | |||
| 2744 | return err ? err : allocated; | 2785 | return err ? err : allocated; |
| 2745 | } | 2786 | } |
| 2746 | 2787 | ||
| 2747 | void ext4_ext_truncate(struct inode * inode, struct page *page) | 2788 | void ext4_ext_truncate(struct inode *inode) |
| 2748 | { | 2789 | { |
| 2749 | struct address_space *mapping = inode->i_mapping; | 2790 | struct address_space *mapping = inode->i_mapping; |
| 2750 | struct super_block *sb = inode->i_sb; | 2791 | struct super_block *sb = inode->i_sb; |
| @@ -2757,18 +2798,14 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
| 2757 | */ | 2798 | */ |
| 2758 | err = ext4_writepage_trans_blocks(inode) + 3; | 2799 | err = ext4_writepage_trans_blocks(inode) + 3; |
| 2759 | handle = ext4_journal_start(inode, err); | 2800 | handle = ext4_journal_start(inode, err); |
| 2760 | if (IS_ERR(handle)) { | 2801 | if (IS_ERR(handle)) |
| 2761 | if (page) { | ||
| 2762 | clear_highpage(page); | ||
| 2763 | flush_dcache_page(page); | ||
| 2764 | unlock_page(page); | ||
| 2765 | page_cache_release(page); | ||
| 2766 | } | ||
| 2767 | return; | 2802 | return; |
| 2768 | } | ||
| 2769 | 2803 | ||
| 2770 | if (page) | 2804 | if (inode->i_size & (sb->s_blocksize - 1)) |
| 2771 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 2805 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
| 2806 | |||
| 2807 | if (ext4_orphan_add(handle, inode)) | ||
| 2808 | goto out_stop; | ||
| 2772 | 2809 | ||
| 2773 | down_write(&EXT4_I(inode)->i_data_sem); | 2810 | down_write(&EXT4_I(inode)->i_data_sem); |
| 2774 | ext4_ext_invalidate_cache(inode); | 2811 | ext4_ext_invalidate_cache(inode); |
| @@ -2780,8 +2817,6 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
| 2780 | * Probably we need not scan at all, | 2817 | * Probably we need not scan at all, |
| 2781 | * because page truncation is enough. | 2818 | * because page truncation is enough. |
| 2782 | */ | 2819 | */ |
| 2783 | if (ext4_orphan_add(handle, inode)) | ||
| 2784 | goto out_stop; | ||
| 2785 | 2820 | ||
| 2786 | /* we have to know where to truncate from in crash case */ | 2821 | /* we have to know where to truncate from in crash case */ |
| 2787 | EXT4_I(inode)->i_disksize = inode->i_size; | 2822 | EXT4_I(inode)->i_disksize = inode->i_size; |
| @@ -2798,6 +2833,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
| 2798 | handle->h_sync = 1; | 2833 | handle->h_sync = 1; |
| 2799 | 2834 | ||
| 2800 | out_stop: | 2835 | out_stop: |
| 2836 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2801 | /* | 2837 | /* |
| 2802 | * If this was a simple ftruncate() and the file will remain alive, | 2838 | * If this was a simple ftruncate() and the file will remain alive, |
| 2803 | * then we need to clear up the orphan record which we created above. | 2839 | * then we need to clear up the orphan record which we created above. |
| @@ -2808,7 +2844,6 @@ out_stop: | |||
| 2808 | if (inode->i_nlink) | 2844 | if (inode->i_nlink) |
| 2809 | ext4_orphan_del(handle, inode); | 2845 | ext4_orphan_del(handle, inode); |
| 2810 | 2846 | ||
| 2811 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2812 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 2847 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
| 2813 | ext4_mark_inode_dirty(handle, inode); | 2848 | ext4_mark_inode_dirty(handle, inode); |
| 2814 | ext4_journal_stop(handle); | 2849 | ext4_journal_stop(handle); |
| @@ -2911,7 +2946,7 @@ retry: | |||
| 2911 | } | 2946 | } |
| 2912 | ret = ext4_get_blocks_wrap(handle, inode, block, | 2947 | ret = ext4_get_blocks_wrap(handle, inode, block, |
| 2913 | max_blocks, &map_bh, | 2948 | max_blocks, &map_bh, |
| 2914 | EXT4_CREATE_UNINITIALIZED_EXT, 0); | 2949 | EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); |
| 2915 | if (ret <= 0) { | 2950 | if (ret <= 0) { |
| 2916 | #ifdef EXT4FS_DEBUG | 2951 | #ifdef EXT4FS_DEBUG |
| 2917 | WARN_ON(ret <= 0); | 2952 | WARN_ON(ret <= 0); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4159be6366ab..430eb7978db4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -123,6 +123,23 @@ force_commit: | |||
| 123 | return ret; | 123 | return ret; |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | static struct vm_operations_struct ext4_file_vm_ops = { | ||
| 127 | .fault = filemap_fault, | ||
| 128 | .page_mkwrite = ext4_page_mkwrite, | ||
| 129 | }; | ||
| 130 | |||
| 131 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 132 | { | ||
| 133 | struct address_space *mapping = file->f_mapping; | ||
| 134 | |||
| 135 | if (!mapping->a_ops->readpage) | ||
| 136 | return -ENOEXEC; | ||
| 137 | file_accessed(file); | ||
| 138 | vma->vm_ops = &ext4_file_vm_ops; | ||
| 139 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
| 140 | return 0; | ||
| 141 | } | ||
| 142 | |||
| 126 | const struct file_operations ext4_file_operations = { | 143 | const struct file_operations ext4_file_operations = { |
| 127 | .llseek = generic_file_llseek, | 144 | .llseek = generic_file_llseek, |
| 128 | .read = do_sync_read, | 145 | .read = do_sync_read, |
| @@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = { | |||
| 133 | #ifdef CONFIG_COMPAT | 150 | #ifdef CONFIG_COMPAT |
| 134 | .compat_ioctl = ext4_compat_ioctl, | 151 | .compat_ioctl = ext4_compat_ioctl, |
| 135 | #endif | 152 | #endif |
| 136 | .mmap = generic_file_mmap, | 153 | .mmap = ext4_file_mmap, |
| 137 | .open = generic_file_open, | 154 | .open = generic_file_open, |
| 138 | .release = ext4_release_file, | 155 | .release = ext4_release_file, |
| 139 | .fsync = ext4_sync_file, | 156 | .fsync = ext4_sync_file, |
| @@ -144,6 +161,7 @@ const struct file_operations ext4_file_operations = { | |||
| 144 | const struct inode_operations ext4_file_inode_operations = { | 161 | const struct inode_operations ext4_file_inode_operations = { |
| 145 | .truncate = ext4_truncate, | 162 | .truncate = ext4_truncate, |
| 146 | .setattr = ext4_setattr, | 163 | .setattr = ext4_setattr, |
| 164 | .getattr = ext4_getattr, | ||
| 147 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 165 | #ifdef CONFIG_EXT4DEV_FS_XATTR |
| 148 | .setxattr = generic_setxattr, | 166 | .setxattr = generic_setxattr, |
| 149 | .getxattr = generic_getxattr, | 167 | .getxattr = generic_getxattr, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 1c8ba48d4f8d..a45c3737ad31 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
| 28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
| 29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
| 30 | #include <linux/blkdev.h> | ||
| 30 | #include "ext4.h" | 31 | #include "ext4.h" |
| 31 | #include "ext4_jbd2.h" | 32 | #include "ext4_jbd2.h" |
| 32 | 33 | ||
| @@ -45,6 +46,7 @@ | |||
| 45 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | 46 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) |
| 46 | { | 47 | { |
| 47 | struct inode *inode = dentry->d_inode; | 48 | struct inode *inode = dentry->d_inode; |
| 49 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
| 48 | int ret = 0; | 50 | int ret = 0; |
| 49 | 51 | ||
| 50 | J_ASSERT(ext4_journal_current_handle() == NULL); | 52 | J_ASSERT(ext4_journal_current_handle() == NULL); |
| @@ -85,6 +87,8 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
| 85 | .nr_to_write = 0, /* sys_fsync did this */ | 87 | .nr_to_write = 0, /* sys_fsync did this */ |
| 86 | }; | 88 | }; |
| 87 | ret = sync_inode(inode, &wbc); | 89 | ret = sync_inode(inode, &wbc); |
| 90 | if (journal && (journal->j_flags & JBD2_BARRIER)) | ||
| 91 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
| 88 | } | 92 | } |
| 89 | out: | 93 | out: |
| 90 | return ret; | 94 | return ret; |
diff --git a/fs/ext4/group.h b/fs/ext4/group.h index 7eb0604e7eea..c2c0a8d06d0e 100644 --- a/fs/ext4/group.h +++ b/fs/ext4/group.h | |||
| @@ -13,7 +13,7 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | |||
| 13 | struct ext4_group_desc *gdp); | 13 | struct ext4_group_desc *gdp); |
| 14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | 14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, |
| 15 | struct ext4_group_desc *gdp); | 15 | struct ext4_group_desc *gdp); |
| 16 | struct buffer_head *read_block_bitmap(struct super_block *sb, | 16 | struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, |
| 17 | ext4_group_t block_group); | 17 | ext4_group_t block_group); |
| 18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | 18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, |
| 19 | struct buffer_head *bh, | 19 | struct buffer_head *bh, |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c6efbab0c801..a92eb305344f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -157,6 +157,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
| 157 | struct ext4_super_block * es; | 157 | struct ext4_super_block * es; |
| 158 | struct ext4_sb_info *sbi; | 158 | struct ext4_sb_info *sbi; |
| 159 | int fatal = 0, err; | 159 | int fatal = 0, err; |
| 160 | ext4_group_t flex_group; | ||
| 160 | 161 | ||
| 161 | if (atomic_read(&inode->i_count) > 1) { | 162 | if (atomic_read(&inode->i_count) > 1) { |
| 162 | printk ("ext4_free_inode: inode has count=%d\n", | 163 | printk ("ext4_free_inode: inode has count=%d\n", |
| @@ -232,6 +233,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
| 232 | if (is_directory) | 233 | if (is_directory) |
| 233 | percpu_counter_dec(&sbi->s_dirs_counter); | 234 | percpu_counter_dec(&sbi->s_dirs_counter); |
| 234 | 235 | ||
| 236 | if (sbi->s_log_groups_per_flex) { | ||
| 237 | flex_group = ext4_flex_group(sbi, block_group); | ||
| 238 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 239 | sbi->s_flex_groups[flex_group].free_inodes++; | ||
| 240 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 241 | } | ||
| 235 | } | 242 | } |
| 236 | BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); | 243 | BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); |
| 237 | err = ext4_journal_dirty_metadata(handle, bh2); | 244 | err = ext4_journal_dirty_metadata(handle, bh2); |
| @@ -286,6 +293,80 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
| 286 | return ret; | 293 | return ret; |
| 287 | } | 294 | } |
| 288 | 295 | ||
| 296 | #define free_block_ratio 10 | ||
| 297 | |||
| 298 | static int find_group_flex(struct super_block *sb, struct inode *parent, | ||
| 299 | ext4_group_t *best_group) | ||
| 300 | { | ||
| 301 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 302 | struct ext4_group_desc *desc; | ||
| 303 | struct buffer_head *bh; | ||
| 304 | struct flex_groups *flex_group = sbi->s_flex_groups; | ||
| 305 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | ||
| 306 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); | ||
| 307 | ext4_group_t ngroups = sbi->s_groups_count; | ||
| 308 | int flex_size = ext4_flex_bg_size(sbi); | ||
| 309 | ext4_group_t best_flex = parent_fbg_group; | ||
| 310 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; | ||
| 311 | int flexbg_free_blocks; | ||
| 312 | int flex_freeb_ratio; | ||
| 313 | ext4_group_t n_fbg_groups; | ||
| 314 | ext4_group_t i; | ||
| 315 | |||
| 316 | n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> | ||
| 317 | sbi->s_log_groups_per_flex; | ||
| 318 | |||
| 319 | find_close_to_parent: | ||
| 320 | flexbg_free_blocks = flex_group[best_flex].free_blocks; | ||
| 321 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
| 322 | if (flex_group[best_flex].free_inodes && | ||
| 323 | flex_freeb_ratio > free_block_ratio) | ||
| 324 | goto found_flexbg; | ||
| 325 | |||
| 326 | if (best_flex && best_flex == parent_fbg_group) { | ||
| 327 | best_flex--; | ||
| 328 | goto find_close_to_parent; | ||
| 329 | } | ||
| 330 | |||
| 331 | for (i = 0; i < n_fbg_groups; i++) { | ||
| 332 | if (i == parent_fbg_group || i == parent_fbg_group - 1) | ||
| 333 | continue; | ||
| 334 | |||
| 335 | flexbg_free_blocks = flex_group[i].free_blocks; | ||
| 336 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
| 337 | |||
| 338 | if (flex_freeb_ratio > free_block_ratio && | ||
| 339 | flex_group[i].free_inodes) { | ||
| 340 | best_flex = i; | ||
| 341 | goto found_flexbg; | ||
| 342 | } | ||
| 343 | |||
| 344 | if (best_flex < 0 || | ||
| 345 | (flex_group[i].free_blocks > | ||
| 346 | flex_group[best_flex].free_blocks && | ||
| 347 | flex_group[i].free_inodes)) | ||
| 348 | best_flex = i; | ||
| 349 | } | ||
| 350 | |||
| 351 | if (!flex_group[best_flex].free_inodes || | ||
| 352 | !flex_group[best_flex].free_blocks) | ||
| 353 | return -1; | ||
| 354 | |||
| 355 | found_flexbg: | ||
| 356 | for (i = best_flex * flex_size; i < ngroups && | ||
| 357 | i < (best_flex + 1) * flex_size; i++) { | ||
| 358 | desc = ext4_get_group_desc(sb, i, &bh); | ||
| 359 | if (le16_to_cpu(desc->bg_free_inodes_count)) { | ||
| 360 | *best_group = i; | ||
| 361 | goto out; | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | return -1; | ||
| 366 | out: | ||
| 367 | return 0; | ||
| 368 | } | ||
| 369 | |||
| 289 | /* | 370 | /* |
| 290 | * Orlov's allocator for directories. | 371 | * Orlov's allocator for directories. |
| 291 | * | 372 | * |
| @@ -501,6 +582,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
| 501 | struct inode *ret; | 582 | struct inode *ret; |
| 502 | ext4_group_t i; | 583 | ext4_group_t i; |
| 503 | int free = 0; | 584 | int free = 0; |
| 585 | ext4_group_t flex_group; | ||
| 504 | 586 | ||
| 505 | /* Cannot create files in a deleted directory */ | 587 | /* Cannot create files in a deleted directory */ |
| 506 | if (!dir || !dir->i_nlink) | 588 | if (!dir || !dir->i_nlink) |
| @@ -514,6 +596,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
| 514 | 596 | ||
| 515 | sbi = EXT4_SB(sb); | 597 | sbi = EXT4_SB(sb); |
| 516 | es = sbi->s_es; | 598 | es = sbi->s_es; |
| 599 | |||
| 600 | if (sbi->s_log_groups_per_flex) { | ||
| 601 | ret2 = find_group_flex(sb, dir, &group); | ||
| 602 | goto got_group; | ||
| 603 | } | ||
| 604 | |||
| 517 | if (S_ISDIR(mode)) { | 605 | if (S_ISDIR(mode)) { |
| 518 | if (test_opt (sb, OLDALLOC)) | 606 | if (test_opt (sb, OLDALLOC)) |
| 519 | ret2 = find_group_dir(sb, dir, &group); | 607 | ret2 = find_group_dir(sb, dir, &group); |
| @@ -522,6 +610,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
| 522 | } else | 610 | } else |
| 523 | ret2 = find_group_other(sb, dir, &group); | 611 | ret2 = find_group_other(sb, dir, &group); |
| 524 | 612 | ||
| 613 | got_group: | ||
| 525 | err = -ENOSPC; | 614 | err = -ENOSPC; |
| 526 | if (ret2 == -1) | 615 | if (ret2 == -1) |
| 527 | goto out; | 616 | goto out; |
| @@ -600,7 +689,7 @@ got: | |||
| 600 | /* We may have to initialize the block bitmap if it isn't already */ | 689 | /* We may have to initialize the block bitmap if it isn't already */ |
| 601 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && | 690 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && |
| 602 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 691 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 603 | struct buffer_head *block_bh = read_block_bitmap(sb, group); | 692 | struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group); |
| 604 | 693 | ||
| 605 | BUFFER_TRACE(block_bh, "get block bitmap access"); | 694 | BUFFER_TRACE(block_bh, "get block bitmap access"); |
| 606 | err = ext4_journal_get_write_access(handle, block_bh); | 695 | err = ext4_journal_get_write_access(handle, block_bh); |
| @@ -676,6 +765,13 @@ got: | |||
| 676 | percpu_counter_inc(&sbi->s_dirs_counter); | 765 | percpu_counter_inc(&sbi->s_dirs_counter); |
| 677 | sb->s_dirt = 1; | 766 | sb->s_dirt = 1; |
| 678 | 767 | ||
| 768 | if (sbi->s_log_groups_per_flex) { | ||
| 769 | flex_group = ext4_flex_group(sbi, group); | ||
| 770 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 771 | sbi->s_flex_groups[flex_group].free_inodes--; | ||
| 772 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 773 | } | ||
| 774 | |||
| 679 | inode->i_uid = current->fsuid; | 775 | inode->i_uid = current->fsuid; |
| 680 | if (test_opt (sb, GRPID)) | 776 | if (test_opt (sb, GRPID)) |
| 681 | inode->i_gid = dir->i_gid; | 777 | inode->i_gid = dir->i_gid; |
| @@ -740,14 +836,10 @@ got: | |||
| 740 | goto fail_free_drop; | 836 | goto fail_free_drop; |
| 741 | 837 | ||
| 742 | if (test_opt(sb, EXTENTS)) { | 838 | if (test_opt(sb, EXTENTS)) { |
| 743 | /* set extent flag only for diretory, file and normal symlink*/ | 839 | /* set extent flag only for directory, file and normal symlink*/ |
| 744 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { | 840 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { |
| 745 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; | 841 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; |
| 746 | ext4_ext_tree_init(handle, inode); | 842 | ext4_ext_tree_init(handle, inode); |
| 747 | err = ext4_update_incompat_feature(handle, sb, | ||
| 748 | EXT4_FEATURE_INCOMPAT_EXTENTS); | ||
| 749 | if (err) | ||
| 750 | goto fail_free_drop; | ||
| 751 | } | 843 | } |
| 752 | } | 844 | } |
| 753 | 845 | ||
| @@ -817,6 +909,14 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | |||
| 817 | if (IS_ERR(inode)) | 909 | if (IS_ERR(inode)) |
| 818 | goto iget_failed; | 910 | goto iget_failed; |
| 819 | 911 | ||
| 912 | /* | ||
| 913 | * If the orphans has i_nlinks > 0 then it should be able to be | ||
| 914 | * truncated, otherwise it won't be removed from the orphan list | ||
| 915 | * during processing and an infinite loop will result. | ||
| 916 | */ | ||
| 917 | if (inode->i_nlink && !ext4_can_truncate(inode)) | ||
| 918 | goto bad_orphan; | ||
| 919 | |||
| 820 | if (NEXT_ORPHAN(inode) > max_ino) | 920 | if (NEXT_ORPHAN(inode) > max_ino) |
| 821 | goto bad_orphan; | 921 | goto bad_orphan; |
| 822 | brelse(bitmap_bh); | 922 | brelse(bitmap_bh); |
| @@ -838,6 +938,7 @@ bad_orphan: | |||
| 838 | printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", | 938 | printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", |
| 839 | NEXT_ORPHAN(inode)); | 939 | NEXT_ORPHAN(inode)); |
| 840 | printk(KERN_NOTICE "max_ino=%lu\n", max_ino); | 940 | printk(KERN_NOTICE "max_ino=%lu\n", max_ino); |
| 941 | printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); | ||
| 841 | /* Avoid freeing blocks if we got a bad deleted inode */ | 942 | /* Avoid freeing blocks if we got a bad deleted inode */ |
| 842 | if (inode->i_nlink == 0) | 943 | if (inode->i_nlink == 0) |
| 843 | inode->i_blocks = 0; | 944 | inode->i_blocks = 0; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8d9707746413..8ca2763df091 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -32,12 +32,23 @@ | |||
| 32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
| 33 | #include <linux/buffer_head.h> | 33 | #include <linux/buffer_head.h> |
| 34 | #include <linux/writeback.h> | 34 | #include <linux/writeback.h> |
| 35 | #include <linux/pagevec.h> | ||
| 35 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
| 36 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
| 37 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
| 38 | #include "ext4_jbd2.h" | 39 | #include "ext4_jbd2.h" |
| 39 | #include "xattr.h" | 40 | #include "xattr.h" |
| 40 | #include "acl.h" | 41 | #include "acl.h" |
| 42 | #include "ext4_extents.h" | ||
| 43 | |||
| 44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | ||
| 45 | loff_t new_size) | ||
| 46 | { | ||
| 47 | return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, | ||
| 48 | new_size); | ||
| 49 | } | ||
| 50 | |||
| 51 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | ||
| 41 | 52 | ||
| 42 | /* | 53 | /* |
| 43 | * Test whether an inode is a fast symlink. | 54 | * Test whether an inode is a fast symlink. |
| @@ -181,6 +192,8 @@ void ext4_delete_inode (struct inode * inode) | |||
| 181 | { | 192 | { |
| 182 | handle_t *handle; | 193 | handle_t *handle; |
| 183 | 194 | ||
| 195 | if (ext4_should_order_data(inode)) | ||
| 196 | ext4_begin_ordered_truncate(inode, 0); | ||
| 184 | truncate_inode_pages(&inode->i_data, 0); | 197 | truncate_inode_pages(&inode->i_data, 0); |
| 185 | 198 | ||
| 186 | if (is_bad_inode(inode)) | 199 | if (is_bad_inode(inode)) |
| @@ -508,11 +521,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks, | |||
| 508 | * direct blocks | 521 | * direct blocks |
| 509 | */ | 522 | */ |
| 510 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 523 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
| 511 | ext4_fsblk_t goal, int indirect_blks, int blks, | 524 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
| 512 | ext4_fsblk_t new_blocks[4], int *err) | 525 | int indirect_blks, int blks, |
| 526 | ext4_fsblk_t new_blocks[4], int *err) | ||
| 513 | { | 527 | { |
| 514 | int target, i; | 528 | int target, i; |
| 515 | unsigned long count = 0; | 529 | unsigned long count = 0, blk_allocated = 0; |
| 516 | int index = 0; | 530 | int index = 0; |
| 517 | ext4_fsblk_t current_block = 0; | 531 | ext4_fsblk_t current_block = 0; |
| 518 | int ret = 0; | 532 | int ret = 0; |
| @@ -525,12 +539,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 525 | * the first direct block of this branch. That's the | 539 | * the first direct block of this branch. That's the |
| 526 | * minimum number of blocks need to allocate(required) | 540 | * minimum number of blocks need to allocate(required) |
| 527 | */ | 541 | */ |
| 528 | target = blks + indirect_blks; | 542 | /* first we try to allocate the indirect blocks */ |
| 529 | 543 | target = indirect_blks; | |
| 530 | while (1) { | 544 | while (target > 0) { |
| 531 | count = target; | 545 | count = target; |
| 532 | /* allocating blocks for indirect blocks and direct blocks */ | 546 | /* allocating blocks for indirect blocks and direct blocks */ |
| 533 | current_block = ext4_new_blocks(handle,inode,goal,&count,err); | 547 | current_block = ext4_new_meta_blocks(handle, inode, |
| 548 | goal, &count, err); | ||
| 534 | if (*err) | 549 | if (*err) |
| 535 | goto failed_out; | 550 | goto failed_out; |
| 536 | 551 | ||
| @@ -540,16 +555,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 540 | new_blocks[index++] = current_block++; | 555 | new_blocks[index++] = current_block++; |
| 541 | count--; | 556 | count--; |
| 542 | } | 557 | } |
| 543 | 558 | if (count > 0) { | |
| 544 | if (count > 0) | 559 | /* |
| 560 | * save the new block number | ||
| 561 | * for the first direct block | ||
| 562 | */ | ||
| 563 | new_blocks[index] = current_block; | ||
| 564 | printk(KERN_INFO "%s returned more blocks than " | ||
| 565 | "requested\n", __func__); | ||
| 566 | WARN_ON(1); | ||
| 545 | break; | 567 | break; |
| 568 | } | ||
| 546 | } | 569 | } |
| 547 | 570 | ||
| 548 | /* save the new block number for the first direct block */ | 571 | target = blks - count ; |
| 549 | new_blocks[index] = current_block; | 572 | blk_allocated = count; |
| 550 | 573 | if (!target) | |
| 574 | goto allocated; | ||
| 575 | /* Now allocate data blocks */ | ||
| 576 | count = target; | ||
| 577 | /* allocating blocks for data blocks */ | ||
| 578 | current_block = ext4_new_blocks(handle, inode, iblock, | ||
| 579 | goal, &count, err); | ||
| 580 | if (*err && (target == blks)) { | ||
| 581 | /* | ||
| 582 | * if the allocation failed and we didn't allocate | ||
| 583 | * any blocks before | ||
| 584 | */ | ||
| 585 | goto failed_out; | ||
| 586 | } | ||
| 587 | if (!*err) { | ||
| 588 | if (target == blks) { | ||
| 589 | /* | ||
| 590 | * save the new block number | ||
| 591 | * for the first direct block | ||
| 592 | */ | ||
| 593 | new_blocks[index] = current_block; | ||
| 594 | } | ||
| 595 | blk_allocated += count; | ||
| 596 | } | ||
| 597 | allocated: | ||
| 551 | /* total number of blocks allocated for direct blocks */ | 598 | /* total number of blocks allocated for direct blocks */ |
| 552 | ret = count; | 599 | ret = blk_allocated; |
| 553 | *err = 0; | 600 | *err = 0; |
| 554 | return ret; | 601 | return ret; |
| 555 | failed_out: | 602 | failed_out: |
| @@ -584,8 +631,9 @@ failed_out: | |||
| 584 | * as described above and return 0. | 631 | * as described above and return 0. |
| 585 | */ | 632 | */ |
| 586 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 633 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, |
| 587 | int indirect_blks, int *blks, ext4_fsblk_t goal, | 634 | ext4_lblk_t iblock, int indirect_blks, |
| 588 | ext4_lblk_t *offsets, Indirect *branch) | 635 | int *blks, ext4_fsblk_t goal, |
| 636 | ext4_lblk_t *offsets, Indirect *branch) | ||
| 589 | { | 637 | { |
| 590 | int blocksize = inode->i_sb->s_blocksize; | 638 | int blocksize = inode->i_sb->s_blocksize; |
| 591 | int i, n = 0; | 639 | int i, n = 0; |
| @@ -595,7 +643,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 595 | ext4_fsblk_t new_blocks[4]; | 643 | ext4_fsblk_t new_blocks[4]; |
| 596 | ext4_fsblk_t current_block; | 644 | ext4_fsblk_t current_block; |
| 597 | 645 | ||
| 598 | num = ext4_alloc_blocks(handle, inode, goal, indirect_blks, | 646 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, |
| 599 | *blks, new_blocks, &err); | 647 | *blks, new_blocks, &err); |
| 600 | if (err) | 648 | if (err) |
| 601 | return err; | 649 | return err; |
| @@ -799,6 +847,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 799 | struct ext4_inode_info *ei = EXT4_I(inode); | 847 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 800 | int count = 0; | 848 | int count = 0; |
| 801 | ext4_fsblk_t first_block = 0; | 849 | ext4_fsblk_t first_block = 0; |
| 850 | loff_t disksize; | ||
| 802 | 851 | ||
| 803 | 852 | ||
| 804 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 853 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
| @@ -855,8 +904,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 855 | /* | 904 | /* |
| 856 | * Block out ext4_truncate while we alter the tree | 905 | * Block out ext4_truncate while we alter the tree |
| 857 | */ | 906 | */ |
| 858 | err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal, | 907 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, |
| 859 | offsets + (partial - chain), partial); | 908 | &count, goal, |
| 909 | offsets + (partial - chain), partial); | ||
| 860 | 910 | ||
| 861 | /* | 911 | /* |
| 862 | * The ext4_splice_branch call will free and forget any buffers | 912 | * The ext4_splice_branch call will free and forget any buffers |
| @@ -873,8 +923,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 873 | * protect it if you're about to implement concurrent | 923 | * protect it if you're about to implement concurrent |
| 874 | * ext4_get_block() -bzzz | 924 | * ext4_get_block() -bzzz |
| 875 | */ | 925 | */ |
| 876 | if (!err && extend_disksize && inode->i_size > ei->i_disksize) | 926 | if (!err && extend_disksize) { |
| 877 | ei->i_disksize = inode->i_size; | 927 | disksize = ((loff_t) iblock + count) << inode->i_blkbits; |
| 928 | if (disksize > i_size_read(inode)) | ||
| 929 | disksize = i_size_read(inode); | ||
| 930 | if (disksize > ei->i_disksize) | ||
| 931 | ei->i_disksize = disksize; | ||
| 932 | } | ||
| 878 | if (err) | 933 | if (err) |
| 879 | goto cleanup; | 934 | goto cleanup; |
| 880 | 935 | ||
| @@ -934,7 +989,7 @@ out: | |||
| 934 | */ | 989 | */ |
| 935 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 990 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, |
| 936 | unsigned long max_blocks, struct buffer_head *bh, | 991 | unsigned long max_blocks, struct buffer_head *bh, |
| 937 | int create, int extend_disksize) | 992 | int create, int extend_disksize, int flag) |
| 938 | { | 993 | { |
| 939 | int retval; | 994 | int retval; |
| 940 | 995 | ||
| @@ -975,6 +1030,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 975 | * with create == 1 flag. | 1030 | * with create == 1 flag. |
| 976 | */ | 1031 | */ |
| 977 | down_write((&EXT4_I(inode)->i_data_sem)); | 1032 | down_write((&EXT4_I(inode)->i_data_sem)); |
| 1033 | |||
| 1034 | /* | ||
| 1035 | * if the caller is from delayed allocation writeout path | ||
| 1036 | * we have already reserved fs blocks for allocation | ||
| 1037 | * let the underlying get_block() function know to | ||
| 1038 | * avoid double accounting | ||
| 1039 | */ | ||
| 1040 | if (flag) | ||
| 1041 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | ||
| 978 | /* | 1042 | /* |
| 979 | * We need to check for EXT4 here because migrate | 1043 | * We need to check for EXT4 here because migrate |
| 980 | * could have changed the inode type in between | 1044 | * could have changed the inode type in between |
| @@ -996,6 +1060,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 996 | ~EXT4_EXT_MIGRATE; | 1060 | ~EXT4_EXT_MIGRATE; |
| 997 | } | 1061 | } |
| 998 | } | 1062 | } |
| 1063 | |||
| 1064 | if (flag) { | ||
| 1065 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | ||
| 1066 | /* | ||
| 1067 | * Update reserved blocks/metadata blocks | ||
| 1068 | * after successful block allocation | ||
| 1069 | * which were deferred till now | ||
| 1070 | */ | ||
| 1071 | if ((retval > 0) && buffer_delay(bh)) | ||
| 1072 | ext4_da_release_space(inode, retval, 0); | ||
| 1073 | } | ||
| 1074 | |||
| 999 | up_write((&EXT4_I(inode)->i_data_sem)); | 1075 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 1000 | return retval; | 1076 | return retval; |
| 1001 | } | 1077 | } |
| @@ -1021,7 +1097,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, | |||
| 1021 | } | 1097 | } |
| 1022 | 1098 | ||
| 1023 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | 1099 | ret = ext4_get_blocks_wrap(handle, inode, iblock, |
| 1024 | max_blocks, bh_result, create, 0); | 1100 | max_blocks, bh_result, create, 0, 0); |
| 1025 | if (ret > 0) { | 1101 | if (ret > 0) { |
| 1026 | bh_result->b_size = (ret << inode->i_blkbits); | 1102 | bh_result->b_size = (ret << inode->i_blkbits); |
| 1027 | ret = 0; | 1103 | ret = 0; |
| @@ -1047,7 +1123,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
| 1047 | dummy.b_blocknr = -1000; | 1123 | dummy.b_blocknr = -1000; |
| 1048 | buffer_trace_init(&dummy.b_history); | 1124 | buffer_trace_init(&dummy.b_history); |
| 1049 | err = ext4_get_blocks_wrap(handle, inode, block, 1, | 1125 | err = ext4_get_blocks_wrap(handle, inode, block, 1, |
| 1050 | &dummy, create, 1); | 1126 | &dummy, create, 1, 0); |
| 1051 | /* | 1127 | /* |
| 1052 | * ext4_get_blocks_handle() returns number of blocks | 1128 | * ext4_get_blocks_handle() returns number of blocks |
| 1053 | * mapped. 0 in case of a HOLE. | 1129 | * mapped. 0 in case of a HOLE. |
| @@ -1203,19 +1279,20 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
| 1203 | to = from + len; | 1279 | to = from + len; |
| 1204 | 1280 | ||
| 1205 | retry: | 1281 | retry: |
| 1206 | page = __grab_cache_page(mapping, index); | ||
| 1207 | if (!page) | ||
| 1208 | return -ENOMEM; | ||
| 1209 | *pagep = page; | ||
| 1210 | |||
| 1211 | handle = ext4_journal_start(inode, needed_blocks); | 1282 | handle = ext4_journal_start(inode, needed_blocks); |
| 1212 | if (IS_ERR(handle)) { | 1283 | if (IS_ERR(handle)) { |
| 1213 | unlock_page(page); | ||
| 1214 | page_cache_release(page); | ||
| 1215 | ret = PTR_ERR(handle); | 1284 | ret = PTR_ERR(handle); |
| 1216 | goto out; | 1285 | goto out; |
| 1217 | } | 1286 | } |
| 1218 | 1287 | ||
| 1288 | page = __grab_cache_page(mapping, index); | ||
| 1289 | if (!page) { | ||
| 1290 | ext4_journal_stop(handle); | ||
| 1291 | ret = -ENOMEM; | ||
| 1292 | goto out; | ||
| 1293 | } | ||
| 1294 | *pagep = page; | ||
| 1295 | |||
| 1219 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1296 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
| 1220 | ext4_get_block); | 1297 | ext4_get_block); |
| 1221 | 1298 | ||
| @@ -1225,8 +1302,8 @@ retry: | |||
| 1225 | } | 1302 | } |
| 1226 | 1303 | ||
| 1227 | if (ret) { | 1304 | if (ret) { |
| 1228 | ext4_journal_stop(handle); | ||
| 1229 | unlock_page(page); | 1305 | unlock_page(page); |
| 1306 | ext4_journal_stop(handle); | ||
| 1230 | page_cache_release(page); | 1307 | page_cache_release(page); |
| 1231 | } | 1308 | } |
| 1232 | 1309 | ||
| @@ -1236,15 +1313,6 @@ out: | |||
| 1236 | return ret; | 1313 | return ret; |
| 1237 | } | 1314 | } |
| 1238 | 1315 | ||
| 1239 | int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
| 1240 | { | ||
| 1241 | int err = jbd2_journal_dirty_data(handle, bh); | ||
| 1242 | if (err) | ||
| 1243 | ext4_journal_abort_handle(__func__, __func__, | ||
| 1244 | bh, handle, err); | ||
| 1245 | return err; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | /* For write_end() in data=journal mode */ | 1316 | /* For write_end() in data=journal mode */ |
| 1249 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1317 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
| 1250 | { | 1318 | { |
| @@ -1255,29 +1323,6 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
| 1255 | } | 1323 | } |
| 1256 | 1324 | ||
| 1257 | /* | 1325 | /* |
| 1258 | * Generic write_end handler for ordered and writeback ext4 journal modes. | ||
| 1259 | * We can't use generic_write_end, because that unlocks the page and we need to | ||
| 1260 | * unlock the page after ext4_journal_stop, but ext4_journal_stop must run | ||
| 1261 | * after block_write_end. | ||
| 1262 | */ | ||
| 1263 | static int ext4_generic_write_end(struct file *file, | ||
| 1264 | struct address_space *mapping, | ||
| 1265 | loff_t pos, unsigned len, unsigned copied, | ||
| 1266 | struct page *page, void *fsdata) | ||
| 1267 | { | ||
| 1268 | struct inode *inode = file->f_mapping->host; | ||
| 1269 | |||
| 1270 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
| 1271 | |||
| 1272 | if (pos+copied > inode->i_size) { | ||
| 1273 | i_size_write(inode, pos+copied); | ||
| 1274 | mark_inode_dirty(inode); | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | return copied; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | /* | ||
| 1281 | * We need to pick up the new inode size which generic_commit_write gave us | 1326 | * We need to pick up the new inode size which generic_commit_write gave us |
| 1282 | * `file' can be NULL - eg, when called from page_symlink(). | 1327 | * `file' can be NULL - eg, when called from page_symlink(). |
| 1283 | * | 1328 | * |
| @@ -1290,15 +1335,14 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1290 | struct page *page, void *fsdata) | 1335 | struct page *page, void *fsdata) |
| 1291 | { | 1336 | { |
| 1292 | handle_t *handle = ext4_journal_current_handle(); | 1337 | handle_t *handle = ext4_journal_current_handle(); |
| 1293 | struct inode *inode = file->f_mapping->host; | 1338 | struct inode *inode = mapping->host; |
| 1294 | unsigned from, to; | 1339 | unsigned from, to; |
| 1295 | int ret = 0, ret2; | 1340 | int ret = 0, ret2; |
| 1296 | 1341 | ||
| 1297 | from = pos & (PAGE_CACHE_SIZE - 1); | 1342 | from = pos & (PAGE_CACHE_SIZE - 1); |
| 1298 | to = from + len; | 1343 | to = from + len; |
| 1299 | 1344 | ||
| 1300 | ret = walk_page_buffers(handle, page_buffers(page), | 1345 | ret = ext4_jbd2_file_inode(handle, inode); |
| 1301 | from, to, NULL, ext4_journal_dirty_data); | ||
| 1302 | 1346 | ||
| 1303 | if (ret == 0) { | 1347 | if (ret == 0) { |
| 1304 | /* | 1348 | /* |
| @@ -1311,7 +1355,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1311 | new_i_size = pos + copied; | 1355 | new_i_size = pos + copied; |
| 1312 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1356 | if (new_i_size > EXT4_I(inode)->i_disksize) |
| 1313 | EXT4_I(inode)->i_disksize = new_i_size; | 1357 | EXT4_I(inode)->i_disksize = new_i_size; |
| 1314 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1358 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1315 | page, fsdata); | 1359 | page, fsdata); |
| 1316 | copied = ret2; | 1360 | copied = ret2; |
| 1317 | if (ret2 < 0) | 1361 | if (ret2 < 0) |
| @@ -1320,8 +1364,6 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1320 | ret2 = ext4_journal_stop(handle); | 1364 | ret2 = ext4_journal_stop(handle); |
| 1321 | if (!ret) | 1365 | if (!ret) |
| 1322 | ret = ret2; | 1366 | ret = ret2; |
| 1323 | unlock_page(page); | ||
| 1324 | page_cache_release(page); | ||
| 1325 | 1367 | ||
| 1326 | return ret ? ret : copied; | 1368 | return ret ? ret : copied; |
| 1327 | } | 1369 | } |
| @@ -1332,7 +1374,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1332 | struct page *page, void *fsdata) | 1374 | struct page *page, void *fsdata) |
| 1333 | { | 1375 | { |
| 1334 | handle_t *handle = ext4_journal_current_handle(); | 1376 | handle_t *handle = ext4_journal_current_handle(); |
| 1335 | struct inode *inode = file->f_mapping->host; | 1377 | struct inode *inode = mapping->host; |
| 1336 | int ret = 0, ret2; | 1378 | int ret = 0, ret2; |
| 1337 | loff_t new_i_size; | 1379 | loff_t new_i_size; |
| 1338 | 1380 | ||
| @@ -1340,7 +1382,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1340 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1382 | if (new_i_size > EXT4_I(inode)->i_disksize) |
| 1341 | EXT4_I(inode)->i_disksize = new_i_size; | 1383 | EXT4_I(inode)->i_disksize = new_i_size; |
| 1342 | 1384 | ||
| 1343 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1385 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1344 | page, fsdata); | 1386 | page, fsdata); |
| 1345 | copied = ret2; | 1387 | copied = ret2; |
| 1346 | if (ret2 < 0) | 1388 | if (ret2 < 0) |
| @@ -1349,8 +1391,6 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1349 | ret2 = ext4_journal_stop(handle); | 1391 | ret2 = ext4_journal_stop(handle); |
| 1350 | if (!ret) | 1392 | if (!ret) |
| 1351 | ret = ret2; | 1393 | ret = ret2; |
| 1352 | unlock_page(page); | ||
| 1353 | page_cache_release(page); | ||
| 1354 | 1394 | ||
| 1355 | return ret ? ret : copied; | 1395 | return ret ? ret : copied; |
| 1356 | } | 1396 | } |
| @@ -1389,14 +1429,965 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1389 | ret = ret2; | 1429 | ret = ret2; |
| 1390 | } | 1430 | } |
| 1391 | 1431 | ||
| 1432 | unlock_page(page); | ||
| 1392 | ret2 = ext4_journal_stop(handle); | 1433 | ret2 = ext4_journal_stop(handle); |
| 1393 | if (!ret) | 1434 | if (!ret) |
| 1394 | ret = ret2; | 1435 | ret = ret2; |
| 1395 | unlock_page(page); | ||
| 1396 | page_cache_release(page); | 1436 | page_cache_release(page); |
| 1397 | 1437 | ||
| 1398 | return ret ? ret : copied; | 1438 | return ret ? ret : copied; |
| 1399 | } | 1439 | } |
| 1440 | /* | ||
| 1441 | * Calculate the number of metadata blocks need to reserve | ||
| 1442 | * to allocate @blocks for non extent file based file | ||
| 1443 | */ | ||
| 1444 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 1445 | { | ||
| 1446 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
| 1447 | int ind_blks, dind_blks, tind_blks; | ||
| 1448 | |||
| 1449 | /* number of new indirect blocks needed */ | ||
| 1450 | ind_blks = (blocks + icap - 1) / icap; | ||
| 1451 | |||
| 1452 | dind_blks = (ind_blks + icap - 1) / icap; | ||
| 1453 | |||
| 1454 | tind_blks = 1; | ||
| 1455 | |||
| 1456 | return ind_blks + dind_blks + tind_blks; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /* | ||
| 1460 | * Calculate the number of metadata blocks need to reserve | ||
| 1461 | * to allocate given number of blocks | ||
| 1462 | */ | ||
| 1463 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 1464 | { | ||
| 1465 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | ||
| 1466 | return ext4_ext_calc_metadata_amount(inode, blocks); | ||
| 1467 | |||
| 1468 | return ext4_indirect_calc_metadata_amount(inode, blocks); | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | ||
| 1472 | { | ||
| 1473 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1474 | unsigned long md_needed, mdblocks, total = 0; | ||
| 1475 | |||
| 1476 | /* | ||
| 1477 | * recalculate the amount of metadata blocks to reserve | ||
| 1478 | * in order to allocate nrblocks | ||
| 1479 | * worse case is one extent per block | ||
| 1480 | */ | ||
| 1481 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1482 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | ||
| 1483 | mdblocks = ext4_calc_metadata_amount(inode, total); | ||
| 1484 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1485 | |||
| 1486 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
| 1487 | total = md_needed + nrblocks; | ||
| 1488 | |||
| 1489 | if (ext4_has_free_blocks(sbi, total) < total) { | ||
| 1490 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1491 | return -ENOSPC; | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | /* reduce fs free blocks counter */ | ||
| 1495 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
| 1496 | |||
| 1497 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | ||
| 1498 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | ||
| 1499 | |||
| 1500 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1501 | return 0; /* success */ | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | void ext4_da_release_space(struct inode *inode, int used, int to_free) | ||
| 1505 | { | ||
| 1506 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1507 | int total, mdb, mdb_free, release; | ||
| 1508 | |||
| 1509 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1510 | /* recalculate the number of metablocks still need to be reserved */ | ||
| 1511 | total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; | ||
| 1512 | mdb = ext4_calc_metadata_amount(inode, total); | ||
| 1513 | |||
| 1514 | /* figure out how many metablocks to release */ | ||
| 1515 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1516 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | ||
| 1517 | |||
| 1518 | /* Account for allocated meta_blocks */ | ||
| 1519 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
| 1520 | |||
| 1521 | release = to_free + mdb_free; | ||
| 1522 | |||
| 1523 | /* update fs free blocks counter for truncate case */ | ||
| 1524 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | ||
| 1525 | |||
| 1526 | /* update per-inode reservations */ | ||
| 1527 | BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); | ||
| 1528 | EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); | ||
| 1529 | |||
| 1530 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1531 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
| 1532 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
| 1533 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | static void ext4_da_page_release_reservation(struct page *page, | ||
| 1537 | unsigned long offset) | ||
| 1538 | { | ||
| 1539 | int to_release = 0; | ||
| 1540 | struct buffer_head *head, *bh; | ||
| 1541 | unsigned int curr_off = 0; | ||
| 1542 | |||
| 1543 | head = page_buffers(page); | ||
| 1544 | bh = head; | ||
| 1545 | do { | ||
| 1546 | unsigned int next_off = curr_off + bh->b_size; | ||
| 1547 | |||
| 1548 | if ((offset <= curr_off) && (buffer_delay(bh))) { | ||
| 1549 | to_release++; | ||
| 1550 | clear_buffer_delay(bh); | ||
| 1551 | } | ||
| 1552 | curr_off = next_off; | ||
| 1553 | } while ((bh = bh->b_this_page) != head); | ||
| 1554 | ext4_da_release_space(page->mapping->host, 0, to_release); | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | /* | ||
| 1558 | * Delayed allocation stuff | ||
| 1559 | */ | ||
| 1560 | |||
| 1561 | struct mpage_da_data { | ||
| 1562 | struct inode *inode; | ||
| 1563 | struct buffer_head lbh; /* extent of blocks */ | ||
| 1564 | unsigned long first_page, next_page; /* extent of pages */ | ||
| 1565 | get_block_t *get_block; | ||
| 1566 | struct writeback_control *wbc; | ||
| 1567 | }; | ||
| 1568 | |||
| 1569 | /* | ||
| 1570 | * mpage_da_submit_io - walks through extent of pages and try to write | ||
| 1571 | * them with __mpage_writepage() | ||
| 1572 | * | ||
| 1573 | * @mpd->inode: inode | ||
| 1574 | * @mpd->first_page: first page of the extent | ||
| 1575 | * @mpd->next_page: page after the last page of the extent | ||
| 1576 | * @mpd->get_block: the filesystem's block mapper function | ||
| 1577 | * | ||
| 1578 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
| 1579 | * to be allocated. this may be wrong if allocation failed. | ||
| 1580 | * | ||
| 1581 | * As pages are already locked by write_cache_pages(), we can't use it | ||
| 1582 | */ | ||
| 1583 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | ||
| 1584 | { | ||
| 1585 | struct address_space *mapping = mpd->inode->i_mapping; | ||
| 1586 | struct mpage_data mpd_pp = { | ||
| 1587 | .bio = NULL, | ||
| 1588 | .last_block_in_bio = 0, | ||
| 1589 | .get_block = mpd->get_block, | ||
| 1590 | .use_writepage = 1, | ||
| 1591 | }; | ||
| 1592 | int ret = 0, err, nr_pages, i; | ||
| 1593 | unsigned long index, end; | ||
| 1594 | struct pagevec pvec; | ||
| 1595 | |||
| 1596 | BUG_ON(mpd->next_page <= mpd->first_page); | ||
| 1597 | |||
| 1598 | pagevec_init(&pvec, 0); | ||
| 1599 | index = mpd->first_page; | ||
| 1600 | end = mpd->next_page - 1; | ||
| 1601 | |||
| 1602 | while (index <= end) { | ||
| 1603 | /* XXX: optimize tail */ | ||
| 1604 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1605 | if (nr_pages == 0) | ||
| 1606 | break; | ||
| 1607 | for (i = 0; i < nr_pages; i++) { | ||
| 1608 | struct page *page = pvec.pages[i]; | ||
| 1609 | |||
| 1610 | index = page->index; | ||
| 1611 | if (index > end) | ||
| 1612 | break; | ||
| 1613 | index++; | ||
| 1614 | |||
| 1615 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | ||
| 1616 | |||
| 1617 | /* | ||
| 1618 | * In error case, we have to continue because | ||
| 1619 | * remaining pages are still locked | ||
| 1620 | * XXX: unlock and re-dirty them? | ||
| 1621 | */ | ||
| 1622 | if (ret == 0) | ||
| 1623 | ret = err; | ||
| 1624 | } | ||
| 1625 | pagevec_release(&pvec); | ||
| 1626 | } | ||
| 1627 | if (mpd_pp.bio) | ||
| 1628 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
| 1629 | |||
| 1630 | return ret; | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | /* | ||
| 1634 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | ||
| 1635 | * | ||
| 1636 | * @mpd->inode - inode to walk through | ||
| 1637 | * @exbh->b_blocknr - first block on a disk | ||
| 1638 | * @exbh->b_size - amount of space in bytes | ||
| 1639 | * @logical - first logical block to start assignment with | ||
| 1640 | * | ||
| 1641 | * the function goes through all passed space and put actual disk | ||
| 1642 | * block numbers into buffer heads, dropping BH_Delay | ||
| 1643 | */ | ||
| 1644 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | ||
| 1645 | struct buffer_head *exbh) | ||
| 1646 | { | ||
| 1647 | struct inode *inode = mpd->inode; | ||
| 1648 | struct address_space *mapping = inode->i_mapping; | ||
| 1649 | int blocks = exbh->b_size >> inode->i_blkbits; | ||
| 1650 | sector_t pblock = exbh->b_blocknr, cur_logical; | ||
| 1651 | struct buffer_head *head, *bh; | ||
| 1652 | unsigned long index, end; | ||
| 1653 | struct pagevec pvec; | ||
| 1654 | int nr_pages, i; | ||
| 1655 | |||
| 1656 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1657 | end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1658 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1659 | |||
| 1660 | pagevec_init(&pvec, 0); | ||
| 1661 | |||
| 1662 | while (index <= end) { | ||
| 1663 | /* XXX: optimize tail */ | ||
| 1664 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1665 | if (nr_pages == 0) | ||
| 1666 | break; | ||
| 1667 | for (i = 0; i < nr_pages; i++) { | ||
| 1668 | struct page *page = pvec.pages[i]; | ||
| 1669 | |||
| 1670 | index = page->index; | ||
| 1671 | if (index > end) | ||
| 1672 | break; | ||
| 1673 | index++; | ||
| 1674 | |||
| 1675 | BUG_ON(!PageLocked(page)); | ||
| 1676 | BUG_ON(PageWriteback(page)); | ||
| 1677 | BUG_ON(!page_has_buffers(page)); | ||
| 1678 | |||
| 1679 | bh = page_buffers(page); | ||
| 1680 | head = bh; | ||
| 1681 | |||
| 1682 | /* skip blocks out of the range */ | ||
| 1683 | do { | ||
| 1684 | if (cur_logical >= logical) | ||
| 1685 | break; | ||
| 1686 | cur_logical++; | ||
| 1687 | } while ((bh = bh->b_this_page) != head); | ||
| 1688 | |||
| 1689 | do { | ||
| 1690 | if (cur_logical >= logical + blocks) | ||
| 1691 | break; | ||
| 1692 | if (buffer_delay(bh)) { | ||
| 1693 | bh->b_blocknr = pblock; | ||
| 1694 | clear_buffer_delay(bh); | ||
| 1695 | } else if (buffer_mapped(bh)) | ||
| 1696 | BUG_ON(bh->b_blocknr != pblock); | ||
| 1697 | |||
| 1698 | cur_logical++; | ||
| 1699 | pblock++; | ||
| 1700 | } while ((bh = bh->b_this_page) != head); | ||
| 1701 | } | ||
| 1702 | pagevec_release(&pvec); | ||
| 1703 | } | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | |||
| 1707 | /* | ||
| 1708 | * __unmap_underlying_blocks - just a helper function to unmap | ||
| 1709 | * set of blocks described by @bh | ||
| 1710 | */ | ||
| 1711 | static inline void __unmap_underlying_blocks(struct inode *inode, | ||
| 1712 | struct buffer_head *bh) | ||
| 1713 | { | ||
| 1714 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
| 1715 | int blocks, i; | ||
| 1716 | |||
| 1717 | blocks = bh->b_size >> inode->i_blkbits; | ||
| 1718 | for (i = 0; i < blocks; i++) | ||
| 1719 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||
| 1720 | } | ||
| 1721 | |||
| 1722 | /* | ||
| 1723 | * mpage_da_map_blocks - go through given space | ||
| 1724 | * | ||
| 1725 | * @mpd->lbh - bh describing space | ||
| 1726 | * @mpd->get_block - the filesystem's block mapper function | ||
| 1727 | * | ||
| 1728 | * The function skips space we know is already mapped to disk blocks. | ||
| 1729 | * | ||
| 1730 | * The function ignores errors ->get_block() returns, thus real | ||
| 1731 | * error handling is postponed to __mpage_writepage() | ||
| 1732 | */ | ||
| 1733 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | ||
| 1734 | { | ||
| 1735 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1736 | int err = 0, remain = lbh->b_size; | ||
| 1737 | sector_t next = lbh->b_blocknr; | ||
| 1738 | struct buffer_head new; | ||
| 1739 | |||
| 1740 | /* | ||
| 1741 | * We consider only non-mapped and non-allocated blocks | ||
| 1742 | */ | ||
| 1743 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | ||
| 1744 | return; | ||
| 1745 | |||
| 1746 | while (remain) { | ||
| 1747 | new.b_state = lbh->b_state; | ||
| 1748 | new.b_blocknr = 0; | ||
| 1749 | new.b_size = remain; | ||
| 1750 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
| 1751 | if (err) { | ||
| 1752 | /* | ||
| 1753 | * Rather than implement own error handling | ||
| 1754 | * here, we just leave remaining blocks | ||
| 1755 | * unallocated and try again with ->writepage() | ||
| 1756 | */ | ||
| 1757 | break; | ||
| 1758 | } | ||
| 1759 | BUG_ON(new.b_size == 0); | ||
| 1760 | |||
| 1761 | if (buffer_new(&new)) | ||
| 1762 | __unmap_underlying_blocks(mpd->inode, &new); | ||
| 1763 | |||
| 1764 | /* | ||
| 1765 | * If blocks are delayed marked, we need to | ||
| 1766 | * put actual blocknr and drop delayed bit | ||
| 1767 | */ | ||
| 1768 | if (buffer_delay(lbh)) | ||
| 1769 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
| 1770 | |||
| 1771 | /* go for the remaining blocks */ | ||
| 1772 | next += new.b_size >> mpd->inode->i_blkbits; | ||
| 1773 | remain -= new.b_size; | ||
| 1774 | } | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | ||
| 1778 | |||
| 1779 | /* | ||
| 1780 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
| 1781 | * | ||
| 1782 | * @mpd->lbh - extent of blocks | ||
| 1783 | * @logical - logical number of the block in the file | ||
| 1784 | * @bh - bh of the block (used to access block's state) | ||
| 1785 | * | ||
| 1786 | * the function is used to collect contig. blocks in same state | ||
| 1787 | */ | ||
| 1788 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | ||
| 1789 | sector_t logical, struct buffer_head *bh) | ||
| 1790 | { | ||
| 1791 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1792 | sector_t next; | ||
| 1793 | |||
| 1794 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | ||
| 1795 | |||
| 1796 | /* | ||
| 1797 | * First block in the extent | ||
| 1798 | */ | ||
| 1799 | if (lbh->b_size == 0) { | ||
| 1800 | lbh->b_blocknr = logical; | ||
| 1801 | lbh->b_size = bh->b_size; | ||
| 1802 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
| 1803 | return; | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | /* | ||
| 1807 | * Can we merge the block to our big extent? | ||
| 1808 | */ | ||
| 1809 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | ||
| 1810 | lbh->b_size += bh->b_size; | ||
| 1811 | return; | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | /* | ||
| 1815 | * We couldn't merge the block to our extent, so we | ||
| 1816 | * need to flush current extent and start new one | ||
| 1817 | */ | ||
| 1818 | mpage_da_map_blocks(mpd); | ||
| 1819 | |||
| 1820 | /* | ||
| 1821 | * Now start a new extent | ||
| 1822 | */ | ||
| 1823 | lbh->b_size = bh->b_size; | ||
| 1824 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
| 1825 | lbh->b_blocknr = logical; | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | /* | ||
| 1829 | * __mpage_da_writepage - finds extent of pages and blocks | ||
| 1830 | * | ||
| 1831 | * @page: page to consider | ||
| 1832 | * @wbc: not used, we just follow rules | ||
| 1833 | * @data: context | ||
| 1834 | * | ||
| 1835 | * The function finds extents of pages and scan them for all blocks. | ||
| 1836 | */ | ||
| 1837 | static int __mpage_da_writepage(struct page *page, | ||
| 1838 | struct writeback_control *wbc, void *data) | ||
| 1839 | { | ||
| 1840 | struct mpage_da_data *mpd = data; | ||
| 1841 | struct inode *inode = mpd->inode; | ||
| 1842 | struct buffer_head *bh, *head, fake; | ||
| 1843 | sector_t logical; | ||
| 1844 | |||
| 1845 | /* | ||
| 1846 | * Can we merge this page to current extent? | ||
| 1847 | */ | ||
| 1848 | if (mpd->next_page != page->index) { | ||
| 1849 | /* | ||
| 1850 | * Nope, we can't. So, we map non-allocated blocks | ||
| 1851 | * and start IO on them using __mpage_writepage() | ||
| 1852 | */ | ||
| 1853 | if (mpd->next_page != mpd->first_page) { | ||
| 1854 | mpage_da_map_blocks(mpd); | ||
| 1855 | mpage_da_submit_io(mpd); | ||
| 1856 | } | ||
| 1857 | |||
| 1858 | /* | ||
| 1859 | * Start next extent of pages ... | ||
| 1860 | */ | ||
| 1861 | mpd->first_page = page->index; | ||
| 1862 | |||
| 1863 | /* | ||
| 1864 | * ... and blocks | ||
| 1865 | */ | ||
| 1866 | mpd->lbh.b_size = 0; | ||
| 1867 | mpd->lbh.b_state = 0; | ||
| 1868 | mpd->lbh.b_blocknr = 0; | ||
| 1869 | } | ||
| 1870 | |||
| 1871 | mpd->next_page = page->index + 1; | ||
| 1872 | logical = (sector_t) page->index << | ||
| 1873 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1874 | |||
| 1875 | if (!page_has_buffers(page)) { | ||
| 1876 | /* | ||
| 1877 | * There is no attached buffer heads yet (mmap?) | ||
| 1878 | * we treat the page asfull of dirty blocks | ||
| 1879 | */ | ||
| 1880 | bh = &fake; | ||
| 1881 | bh->b_size = PAGE_CACHE_SIZE; | ||
| 1882 | bh->b_state = 0; | ||
| 1883 | set_buffer_dirty(bh); | ||
| 1884 | set_buffer_uptodate(bh); | ||
| 1885 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
| 1886 | } else { | ||
| 1887 | /* | ||
| 1888 | * Page with regular buffer heads, just add all dirty ones | ||
| 1889 | */ | ||
| 1890 | head = page_buffers(page); | ||
| 1891 | bh = head; | ||
| 1892 | do { | ||
| 1893 | BUG_ON(buffer_locked(bh)); | ||
| 1894 | if (buffer_dirty(bh)) | ||
| 1895 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
| 1896 | logical++; | ||
| 1897 | } while ((bh = bh->b_this_page) != head); | ||
| 1898 | } | ||
| 1899 | |||
| 1900 | return 0; | ||
| 1901 | } | ||
| 1902 | |||
| 1903 | /* | ||
| 1904 | * mpage_da_writepages - walk the list of dirty pages of the given | ||
| 1905 | * address space, allocates non-allocated blocks, maps newly-allocated | ||
| 1906 | * blocks to existing bhs and issue IO them | ||
| 1907 | * | ||
| 1908 | * @mapping: address space structure to write | ||
| 1909 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||
| 1910 | * @get_block: the filesystem's block mapper function. | ||
| 1911 | * | ||
| 1912 | * This is a library function, which implements the writepages() | ||
| 1913 | * address_space_operation. | ||
| 1914 | * | ||
| 1915 | * In order to avoid duplication of logic that deals with partial pages, | ||
| 1916 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
| 1917 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
| 1918 | * | ||
| 1919 | * It's important that we call __mpage_writepage() only once for each | ||
| 1920 | * involved page, otherwise we'd have to implement more complicated logic | ||
| 1921 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
| 1922 | * | ||
| 1923 | * See comments to mpage_writepages() | ||
| 1924 | */ | ||
| 1925 | static int mpage_da_writepages(struct address_space *mapping, | ||
| 1926 | struct writeback_control *wbc, | ||
| 1927 | get_block_t get_block) | ||
| 1928 | { | ||
| 1929 | struct mpage_da_data mpd; | ||
| 1930 | int ret; | ||
| 1931 | |||
| 1932 | if (!get_block) | ||
| 1933 | return generic_writepages(mapping, wbc); | ||
| 1934 | |||
| 1935 | mpd.wbc = wbc; | ||
| 1936 | mpd.inode = mapping->host; | ||
| 1937 | mpd.lbh.b_size = 0; | ||
| 1938 | mpd.lbh.b_state = 0; | ||
| 1939 | mpd.lbh.b_blocknr = 0; | ||
| 1940 | mpd.first_page = 0; | ||
| 1941 | mpd.next_page = 0; | ||
| 1942 | mpd.get_block = get_block; | ||
| 1943 | |||
| 1944 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | ||
| 1945 | |||
| 1946 | /* | ||
| 1947 | * Handle last extent of pages | ||
| 1948 | */ | ||
| 1949 | if (mpd.next_page != mpd.first_page) { | ||
| 1950 | mpage_da_map_blocks(&mpd); | ||
| 1951 | mpage_da_submit_io(&mpd); | ||
| 1952 | } | ||
| 1953 | |||
| 1954 | return ret; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | /* | ||
| 1958 | * this is a special callback for ->write_begin() only | ||
| 1959 | * it's intention is to return mapped block or reserve space | ||
| 1960 | */ | ||
| 1961 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | ||
| 1962 | struct buffer_head *bh_result, int create) | ||
| 1963 | { | ||
| 1964 | int ret = 0; | ||
| 1965 | |||
| 1966 | BUG_ON(create == 0); | ||
| 1967 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
| 1968 | |||
| 1969 | /* | ||
| 1970 | * first, we need to know whether the block is allocated already | ||
| 1971 | * preallocated blocks are unmapped but should treated | ||
| 1972 | * the same as allocated blocks. | ||
| 1973 | */ | ||
| 1974 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); | ||
| 1975 | if ((ret == 0) && !buffer_delay(bh_result)) { | ||
| 1976 | /* the block isn't (pre)allocated yet, let's reserve space */ | ||
| 1977 | /* | ||
| 1978 | * XXX: __block_prepare_write() unmaps passed block, | ||
| 1979 | * is it OK? | ||
| 1980 | */ | ||
| 1981 | ret = ext4_da_reserve_space(inode, 1); | ||
| 1982 | if (ret) | ||
| 1983 | /* not enough space to reserve */ | ||
| 1984 | return ret; | ||
| 1985 | |||
| 1986 | map_bh(bh_result, inode->i_sb, 0); | ||
| 1987 | set_buffer_new(bh_result); | ||
| 1988 | set_buffer_delay(bh_result); | ||
| 1989 | } else if (ret > 0) { | ||
| 1990 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 1991 | ret = 0; | ||
| 1992 | } | ||
| 1993 | |||
| 1994 | return ret; | ||
| 1995 | } | ||
| 1996 | #define EXT4_DELALLOC_RSVED 1 | ||
| 1997 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
| 1998 | struct buffer_head *bh_result, int create) | ||
| 1999 | { | ||
| 2000 | int ret; | ||
| 2001 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
| 2002 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
| 2003 | handle_t *handle = NULL; | ||
| 2004 | |||
| 2005 | handle = ext4_journal_current_handle(); | ||
| 2006 | if (!handle) { | ||
| 2007 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
| 2008 | bh_result, 0, 0, 0); | ||
| 2009 | BUG_ON(!ret); | ||
| 2010 | } else { | ||
| 2011 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
| 2012 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
| 2013 | } | ||
| 2014 | |||
| 2015 | if (ret > 0) { | ||
| 2016 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 2017 | |||
| 2018 | /* | ||
| 2019 | * Update on-disk size along with block allocation | ||
| 2020 | * we don't use 'extend_disksize' as size may change | ||
| 2021 | * within already allocated block -bzzz | ||
| 2022 | */ | ||
| 2023 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
| 2024 | if (disksize > i_size_read(inode)) | ||
| 2025 | disksize = i_size_read(inode); | ||
| 2026 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
| 2027 | /* | ||
| 2028 | * XXX: replace with spinlock if seen contended -bzzz | ||
| 2029 | */ | ||
| 2030 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 2031 | if (disksize > EXT4_I(inode)->i_disksize) | ||
| 2032 | EXT4_I(inode)->i_disksize = disksize; | ||
| 2033 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2034 | |||
| 2035 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
| 2036 | ret = ext4_mark_inode_dirty(handle, inode); | ||
| 2037 | return ret; | ||
| 2038 | } | ||
| 2039 | } | ||
| 2040 | ret = 0; | ||
| 2041 | } | ||
| 2042 | return ret; | ||
| 2043 | } | ||
| 2044 | |||
| 2045 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
| 2046 | { | ||
| 2047 | /* | ||
| 2048 | * unmapped buffer is possible for holes. | ||
| 2049 | * delay buffer is possible with delayed allocation | ||
| 2050 | */ | ||
| 2051 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | ||
| 2052 | } | ||
| 2053 | |||
| 2054 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | ||
| 2055 | struct buffer_head *bh_result, int create) | ||
| 2056 | { | ||
| 2057 | int ret = 0; | ||
| 2058 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
| 2059 | |||
| 2060 | /* | ||
| 2061 | * we don't want to do block allocation in writepage | ||
| 2062 | * so call get_block_wrap with create = 0 | ||
| 2063 | */ | ||
| 2064 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | ||
| 2065 | bh_result, 0, 0, 0); | ||
| 2066 | if (ret > 0) { | ||
| 2067 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 2068 | ret = 0; | ||
| 2069 | } | ||
| 2070 | return ret; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | /* | ||
| 2074 | * get called vi ext4_da_writepages after taking page lock (have journal handle) | ||
| 2075 | * get called via journal_submit_inode_data_buffers (no journal handle) | ||
| 2076 | * get called via shrink_page_list via pdflush (no journal handle) | ||
| 2077 | * or grab_page_cache when doing write_begin (have journal handle) | ||
| 2078 | */ | ||
| 2079 | static int ext4_da_writepage(struct page *page, | ||
| 2080 | struct writeback_control *wbc) | ||
| 2081 | { | ||
| 2082 | int ret = 0; | ||
| 2083 | loff_t size; | ||
| 2084 | unsigned long len; | ||
| 2085 | struct buffer_head *page_bufs; | ||
| 2086 | struct inode *inode = page->mapping->host; | ||
| 2087 | |||
| 2088 | size = i_size_read(inode); | ||
| 2089 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 2090 | len = size & ~PAGE_CACHE_MASK; | ||
| 2091 | else | ||
| 2092 | len = PAGE_CACHE_SIZE; | ||
| 2093 | |||
| 2094 | if (page_has_buffers(page)) { | ||
| 2095 | page_bufs = page_buffers(page); | ||
| 2096 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2097 | ext4_bh_unmapped_or_delay)) { | ||
| 2098 | /* | ||
| 2099 | * We don't want to do block allocation | ||
| 2100 | * So redirty the page and return | ||
| 2101 | * We may reach here when we do a journal commit | ||
| 2102 | * via journal_submit_inode_data_buffers. | ||
| 2103 | * If we don't have mapping block we just ignore | ||
| 2104 | * them. We can also reach here via shrink_page_list | ||
| 2105 | */ | ||
| 2106 | redirty_page_for_writepage(wbc, page); | ||
| 2107 | unlock_page(page); | ||
| 2108 | return 0; | ||
| 2109 | } | ||
| 2110 | } else { | ||
| 2111 | /* | ||
| 2112 | * The test for page_has_buffers() is subtle: | ||
| 2113 | * We know the page is dirty but it lost buffers. That means | ||
| 2114 | * that at some moment in time after write_begin()/write_end() | ||
| 2115 | * has been called all buffers have been clean and thus they | ||
| 2116 | * must have been written at least once. So they are all | ||
| 2117 | * mapped and we can happily proceed with mapping them | ||
| 2118 | * and writing the page. | ||
| 2119 | * | ||
| 2120 | * Try to initialize the buffer_heads and check whether | ||
| 2121 | * all are mapped and non delay. We don't want to | ||
| 2122 | * do block allocation here. | ||
| 2123 | */ | ||
| 2124 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | ||
| 2125 | ext4_normal_get_block_write); | ||
| 2126 | if (!ret) { | ||
| 2127 | page_bufs = page_buffers(page); | ||
| 2128 | /* check whether all are mapped and non delay */ | ||
| 2129 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2130 | ext4_bh_unmapped_or_delay)) { | ||
| 2131 | redirty_page_for_writepage(wbc, page); | ||
| 2132 | unlock_page(page); | ||
| 2133 | return 0; | ||
| 2134 | } | ||
| 2135 | } else { | ||
| 2136 | /* | ||
| 2137 | * We can't do block allocation here | ||
| 2138 | * so just redity the page and unlock | ||
| 2139 | * and return | ||
| 2140 | */ | ||
| 2141 | redirty_page_for_writepage(wbc, page); | ||
| 2142 | unlock_page(page); | ||
| 2143 | return 0; | ||
| 2144 | } | ||
| 2145 | } | ||
| 2146 | |||
| 2147 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | ||
| 2148 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); | ||
| 2149 | else | ||
| 2150 | ret = block_write_full_page(page, | ||
| 2151 | ext4_normal_get_block_write, | ||
| 2152 | wbc); | ||
| 2153 | |||
| 2154 | return ret; | ||
| 2155 | } | ||
| 2156 | |||
| 2157 | /* | ||
| 2158 | * For now just follow the DIO way to estimate the max credits | ||
| 2159 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | ||
| 2160 | * todo: need to calculate the max credits need for | ||
| 2161 | * extent based files, currently the DIO credits is based on | ||
| 2162 | * indirect-blocks mapping way. | ||
| 2163 | * | ||
| 2164 | * Probably should have a generic way to calculate credits | ||
| 2165 | * for DIO, writepages, and truncate | ||
| 2166 | */ | ||
| 2167 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | ||
| 2168 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | ||
| 2169 | |||
| 2170 | static int ext4_da_writepages(struct address_space *mapping, | ||
| 2171 | struct writeback_control *wbc) | ||
| 2172 | { | ||
| 2173 | struct inode *inode = mapping->host; | ||
| 2174 | handle_t *handle = NULL; | ||
| 2175 | int needed_blocks; | ||
| 2176 | int ret = 0; | ||
| 2177 | long to_write; | ||
| 2178 | loff_t range_start = 0; | ||
| 2179 | |||
| 2180 | /* | ||
| 2181 | * No pages to write? This is mainly a kludge to avoid starting | ||
| 2182 | * a transaction for special inodes like journal inode on last iput() | ||
| 2183 | * because that could violate lock ordering on umount | ||
| 2184 | */ | ||
| 2185 | if (!mapping->nrpages) | ||
| 2186 | return 0; | ||
| 2187 | |||
| 2188 | /* | ||
| 2189 | * Estimate the worse case needed credits to write out | ||
| 2190 | * EXT4_MAX_BUF_BLOCKS pages | ||
| 2191 | */ | ||
| 2192 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | ||
| 2193 | |||
| 2194 | to_write = wbc->nr_to_write; | ||
| 2195 | if (!wbc->range_cyclic) { | ||
| 2196 | /* | ||
| 2197 | * If range_cyclic is not set force range_cont | ||
| 2198 | * and save the old writeback_index | ||
| 2199 | */ | ||
| 2200 | wbc->range_cont = 1; | ||
| 2201 | range_start = wbc->range_start; | ||
| 2202 | } | ||
| 2203 | |||
| 2204 | while (!ret && to_write) { | ||
| 2205 | /* start a new transaction*/ | ||
| 2206 | handle = ext4_journal_start(inode, needed_blocks); | ||
| 2207 | if (IS_ERR(handle)) { | ||
| 2208 | ret = PTR_ERR(handle); | ||
| 2209 | goto out_writepages; | ||
| 2210 | } | ||
| 2211 | if (ext4_should_order_data(inode)) { | ||
| 2212 | /* | ||
| 2213 | * With ordered mode we need to add | ||
| 2214 | * the inode to the journal handle | ||
| 2215 | * when we do block allocation. | ||
| 2216 | */ | ||
| 2217 | ret = ext4_jbd2_file_inode(handle, inode); | ||
| 2218 | if (ret) { | ||
| 2219 | ext4_journal_stop(handle); | ||
| 2220 | goto out_writepages; | ||
| 2221 | } | ||
| 2222 | |||
| 2223 | } | ||
| 2224 | /* | ||
| 2225 | * set the max dirty pages could be write at a time | ||
| 2226 | * to fit into the reserved transaction credits | ||
| 2227 | */ | ||
| 2228 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
| 2229 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
| 2230 | |||
| 2231 | to_write -= wbc->nr_to_write; | ||
| 2232 | ret = mpage_da_writepages(mapping, wbc, | ||
| 2233 | ext4_da_get_block_write); | ||
| 2234 | ext4_journal_stop(handle); | ||
| 2235 | if (wbc->nr_to_write) { | ||
| 2236 | /* | ||
| 2237 | * There is no more writeout needed | ||
| 2238 | * or we requested for a noblocking writeout | ||
| 2239 | * and we found the device congested | ||
| 2240 | */ | ||
| 2241 | to_write += wbc->nr_to_write; | ||
| 2242 | break; | ||
| 2243 | } | ||
| 2244 | wbc->nr_to_write = to_write; | ||
| 2245 | } | ||
| 2246 | |||
| 2247 | out_writepages: | ||
| 2248 | wbc->nr_to_write = to_write; | ||
| 2249 | if (range_start) | ||
| 2250 | wbc->range_start = range_start; | ||
| 2251 | return ret; | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | ||
| 2255 | loff_t pos, unsigned len, unsigned flags, | ||
| 2256 | struct page **pagep, void **fsdata) | ||
| 2257 | { | ||
| 2258 | int ret, retries = 0; | ||
| 2259 | struct page *page; | ||
| 2260 | pgoff_t index; | ||
| 2261 | unsigned from, to; | ||
| 2262 | struct inode *inode = mapping->host; | ||
| 2263 | handle_t *handle; | ||
| 2264 | |||
| 2265 | index = pos >> PAGE_CACHE_SHIFT; | ||
| 2266 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2267 | to = from + len; | ||
| 2268 | |||
| 2269 | retry: | ||
| 2270 | /* | ||
| 2271 | * With delayed allocation, we don't log the i_disksize update | ||
| 2272 | * if there is delayed block allocation. But we still need | ||
| 2273 | * to journalling the i_disksize update if writes to the end | ||
| 2274 | * of file which has an already mapped buffer. | ||
| 2275 | */ | ||
| 2276 | handle = ext4_journal_start(inode, 1); | ||
| 2277 | if (IS_ERR(handle)) { | ||
| 2278 | ret = PTR_ERR(handle); | ||
| 2279 | goto out; | ||
| 2280 | } | ||
| 2281 | |||
| 2282 | page = __grab_cache_page(mapping, index); | ||
| 2283 | if (!page) | ||
| 2284 | return -ENOMEM; | ||
| 2285 | *pagep = page; | ||
| 2286 | |||
| 2287 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | ||
| 2288 | ext4_da_get_block_prep); | ||
| 2289 | if (ret < 0) { | ||
| 2290 | unlock_page(page); | ||
| 2291 | ext4_journal_stop(handle); | ||
| 2292 | page_cache_release(page); | ||
| 2293 | } | ||
| 2294 | |||
| 2295 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
| 2296 | goto retry; | ||
| 2297 | out: | ||
| 2298 | return ret; | ||
| 2299 | } | ||
| 2300 | |||
| 2301 | /* | ||
| 2302 | * Check if we should update i_disksize | ||
| 2303 | * when write to the end of file but not require block allocation | ||
| 2304 | */ | ||
| 2305 | static int ext4_da_should_update_i_disksize(struct page *page, | ||
| 2306 | unsigned long offset) | ||
| 2307 | { | ||
| 2308 | struct buffer_head *bh; | ||
| 2309 | struct inode *inode = page->mapping->host; | ||
| 2310 | unsigned int idx; | ||
| 2311 | int i; | ||
| 2312 | |||
| 2313 | bh = page_buffers(page); | ||
| 2314 | idx = offset >> inode->i_blkbits; | ||
| 2315 | |||
| 2316 | for (i=0; i < idx; i++) | ||
| 2317 | bh = bh->b_this_page; | ||
| 2318 | |||
| 2319 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | ||
| 2320 | return 0; | ||
| 2321 | return 1; | ||
| 2322 | } | ||
| 2323 | |||
| 2324 | static int ext4_da_write_end(struct file *file, | ||
| 2325 | struct address_space *mapping, | ||
| 2326 | loff_t pos, unsigned len, unsigned copied, | ||
| 2327 | struct page *page, void *fsdata) | ||
| 2328 | { | ||
| 2329 | struct inode *inode = mapping->host; | ||
| 2330 | int ret = 0, ret2; | ||
| 2331 | handle_t *handle = ext4_journal_current_handle(); | ||
| 2332 | loff_t new_i_size; | ||
| 2333 | unsigned long start, end; | ||
| 2334 | |||
| 2335 | start = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2336 | end = start + copied -1; | ||
| 2337 | |||
| 2338 | /* | ||
| 2339 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
| 2340 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
| 2341 | * into that. | ||
| 2342 | */ | ||
| 2343 | |||
| 2344 | new_i_size = pos + copied; | ||
| 2345 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
| 2346 | if (ext4_da_should_update_i_disksize(page, end)) { | ||
| 2347 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 2348 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
| 2349 | /* | ||
| 2350 | * Updating i_disksize when extending file | ||
| 2351 | * without needing block allocation | ||
| 2352 | */ | ||
| 2353 | if (ext4_should_order_data(inode)) | ||
| 2354 | ret = ext4_jbd2_file_inode(handle, | ||
| 2355 | inode); | ||
| 2356 | |||
| 2357 | EXT4_I(inode)->i_disksize = new_i_size; | ||
| 2358 | } | ||
| 2359 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2360 | } | ||
| 2361 | } | ||
| 2362 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
| 2363 | page, fsdata); | ||
| 2364 | copied = ret2; | ||
| 2365 | if (ret2 < 0) | ||
| 2366 | ret = ret2; | ||
| 2367 | ret2 = ext4_journal_stop(handle); | ||
| 2368 | if (!ret) | ||
| 2369 | ret = ret2; | ||
| 2370 | |||
| 2371 | return ret ? ret : copied; | ||
| 2372 | } | ||
| 2373 | |||
| 2374 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | ||
| 2375 | { | ||
| 2376 | /* | ||
| 2377 | * Drop reserved blocks | ||
| 2378 | */ | ||
| 2379 | BUG_ON(!PageLocked(page)); | ||
| 2380 | if (!page_has_buffers(page)) | ||
| 2381 | goto out; | ||
| 2382 | |||
| 2383 | ext4_da_page_release_reservation(page, offset); | ||
| 2384 | |||
| 2385 | out: | ||
| 2386 | ext4_invalidatepage(page, offset); | ||
| 2387 | |||
| 2388 | return; | ||
| 2389 | } | ||
| 2390 | |||
| 1400 | 2391 | ||
| 1401 | /* | 2392 | /* |
| 1402 | * bmap() is special. It gets used by applications such as lilo and by | 2393 | * bmap() is special. It gets used by applications such as lilo and by |
| @@ -1418,6 +2409,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 1418 | journal_t *journal; | 2409 | journal_t *journal; |
| 1419 | int err; | 2410 | int err; |
| 1420 | 2411 | ||
| 2412 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | ||
| 2413 | test_opt(inode->i_sb, DELALLOC)) { | ||
| 2414 | /* | ||
| 2415 | * With delalloc we want to sync the file | ||
| 2416 | * so that we can make sure we allocate | ||
| 2417 | * blocks for file | ||
| 2418 | */ | ||
| 2419 | filemap_write_and_wait(mapping); | ||
| 2420 | } | ||
| 2421 | |||
| 1421 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 2422 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { |
| 1422 | /* | 2423 | /* |
| 1423 | * This is a REALLY heavyweight approach, but the use of | 2424 | * This is a REALLY heavyweight approach, but the use of |
| @@ -1462,21 +2463,17 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
| 1462 | return 0; | 2463 | return 0; |
| 1463 | } | 2464 | } |
| 1464 | 2465 | ||
| 1465 | static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | ||
| 1466 | { | ||
| 1467 | if (buffer_mapped(bh)) | ||
| 1468 | return ext4_journal_dirty_data(handle, bh); | ||
| 1469 | return 0; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | /* | 2466 | /* |
| 1473 | * Note that we always start a transaction even if we're not journalling | 2467 | * Note that we don't need to start a transaction unless we're journaling data |
| 1474 | * data. This is to preserve ordering: any hole instantiation within | 2468 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
| 1475 | * __block_write_full_page -> ext4_get_block() should be journalled | 2469 | * need to file the inode to the transaction's list in ordered mode because if |
| 1476 | * along with the data so we don't crash and then get metadata which | 2470 | * we are writing back data added by write(), the inode is already there and if |
| 1477 | * refers to old data. | 2471 | * we are writing back data modified via mmap(), noone guarantees in which |
| 2472 | * transaction the data will hit the disk. In case we are journaling data, we | ||
| 2473 | * cannot start transaction directly because transaction start ranks above page | ||
| 2474 | * lock so we have to do some magic. | ||
| 1478 | * | 2475 | * |
| 1479 | * In all journalling modes block_write_full_page() will start the I/O. | 2476 | * In all journaling modes block_write_full_page() will start the I/O. |
| 1480 | * | 2477 | * |
| 1481 | * Problem: | 2478 | * Problem: |
| 1482 | * | 2479 | * |
| @@ -1518,105 +2515,103 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | |||
| 1518 | * disastrous. Any write() or metadata operation will sync the fs for | 2515 | * disastrous. Any write() or metadata operation will sync the fs for |
| 1519 | * us. | 2516 | * us. |
| 1520 | * | 2517 | * |
| 1521 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, | ||
| 1522 | * we don't need to open a transaction here. | ||
| 1523 | */ | 2518 | */ |
| 1524 | static int ext4_ordered_writepage(struct page *page, | 2519 | static int __ext4_normal_writepage(struct page *page, |
| 1525 | struct writeback_control *wbc) | 2520 | struct writeback_control *wbc) |
| 1526 | { | 2521 | { |
| 1527 | struct inode *inode = page->mapping->host; | 2522 | struct inode *inode = page->mapping->host; |
| 1528 | struct buffer_head *page_bufs; | ||
| 1529 | handle_t *handle = NULL; | ||
| 1530 | int ret = 0; | ||
| 1531 | int err; | ||
| 1532 | |||
| 1533 | J_ASSERT(PageLocked(page)); | ||
| 1534 | |||
| 1535 | /* | ||
| 1536 | * We give up here if we're reentered, because it might be for a | ||
| 1537 | * different filesystem. | ||
| 1538 | */ | ||
| 1539 | if (ext4_journal_current_handle()) | ||
| 1540 | goto out_fail; | ||
| 1541 | 2523 | ||
| 1542 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2524 | if (test_opt(inode->i_sb, NOBH)) |
| 2525 | return nobh_writepage(page, | ||
| 2526 | ext4_normal_get_block_write, wbc); | ||
| 2527 | else | ||
| 2528 | return block_write_full_page(page, | ||
| 2529 | ext4_normal_get_block_write, | ||
| 2530 | wbc); | ||
| 2531 | } | ||
| 1543 | 2532 | ||
| 1544 | if (IS_ERR(handle)) { | 2533 | static int ext4_normal_writepage(struct page *page, |
| 1545 | ret = PTR_ERR(handle); | 2534 | struct writeback_control *wbc) |
| 1546 | goto out_fail; | 2535 | { |
| 1547 | } | 2536 | struct inode *inode = page->mapping->host; |
| 2537 | loff_t size = i_size_read(inode); | ||
| 2538 | loff_t len; | ||
| 1548 | 2539 | ||
| 1549 | if (!page_has_buffers(page)) { | 2540 | J_ASSERT(PageLocked(page)); |
| 1550 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 2541 | if (page->index == size >> PAGE_CACHE_SHIFT) |
| 1551 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 2542 | len = size & ~PAGE_CACHE_MASK; |
| 2543 | else | ||
| 2544 | len = PAGE_CACHE_SIZE; | ||
| 2545 | |||
| 2546 | if (page_has_buffers(page)) { | ||
| 2547 | /* if page has buffers it should all be mapped | ||
| 2548 | * and allocated. If there are not buffers attached | ||
| 2549 | * to the page we know the page is dirty but it lost | ||
| 2550 | * buffers. That means that at some moment in time | ||
| 2551 | * after write_begin() / write_end() has been called | ||
| 2552 | * all buffers have been clean and thus they must have been | ||
| 2553 | * written at least once. So they are all mapped and we can | ||
| 2554 | * happily proceed with mapping them and writing the page. | ||
| 2555 | */ | ||
| 2556 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 2557 | ext4_bh_unmapped_or_delay)); | ||
| 1552 | } | 2558 | } |
| 1553 | page_bufs = page_buffers(page); | ||
| 1554 | walk_page_buffers(handle, page_bufs, 0, | ||
| 1555 | PAGE_CACHE_SIZE, NULL, bget_one); | ||
| 1556 | |||
| 1557 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
| 1558 | 2559 | ||
| 1559 | /* | 2560 | if (!ext4_journal_current_handle()) |
| 1560 | * The page can become unlocked at any point now, and | 2561 | return __ext4_normal_writepage(page, wbc); |
| 1561 | * truncate can then come in and change things. So we | ||
| 1562 | * can't touch *page from now on. But *page_bufs is | ||
| 1563 | * safe due to elevated refcount. | ||
| 1564 | */ | ||
| 1565 | 2562 | ||
| 1566 | /* | ||
| 1567 | * And attach them to the current transaction. But only if | ||
| 1568 | * block_write_full_page() succeeded. Otherwise they are unmapped, | ||
| 1569 | * and generally junk. | ||
| 1570 | */ | ||
| 1571 | if (ret == 0) { | ||
| 1572 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, | ||
| 1573 | NULL, jbd2_journal_dirty_data_fn); | ||
| 1574 | if (!ret) | ||
| 1575 | ret = err; | ||
| 1576 | } | ||
| 1577 | walk_page_buffers(handle, page_bufs, 0, | ||
| 1578 | PAGE_CACHE_SIZE, NULL, bput_one); | ||
| 1579 | err = ext4_journal_stop(handle); | ||
| 1580 | if (!ret) | ||
| 1581 | ret = err; | ||
| 1582 | return ret; | ||
| 1583 | |||
| 1584 | out_fail: | ||
| 1585 | redirty_page_for_writepage(wbc, page); | 2563 | redirty_page_for_writepage(wbc, page); |
| 1586 | unlock_page(page); | 2564 | unlock_page(page); |
| 1587 | return ret; | 2565 | return 0; |
| 1588 | } | 2566 | } |
| 1589 | 2567 | ||
| 1590 | static int ext4_writeback_writepage(struct page *page, | 2568 | static int __ext4_journalled_writepage(struct page *page, |
| 1591 | struct writeback_control *wbc) | 2569 | struct writeback_control *wbc) |
| 1592 | { | 2570 | { |
| 1593 | struct inode *inode = page->mapping->host; | 2571 | struct address_space *mapping = page->mapping; |
| 2572 | struct inode *inode = mapping->host; | ||
| 2573 | struct buffer_head *page_bufs; | ||
| 1594 | handle_t *handle = NULL; | 2574 | handle_t *handle = NULL; |
| 1595 | int ret = 0; | 2575 | int ret = 0; |
| 1596 | int err; | 2576 | int err; |
| 1597 | 2577 | ||
| 1598 | if (ext4_journal_current_handle()) | 2578 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
| 1599 | goto out_fail; | 2579 | ext4_normal_get_block_write); |
| 2580 | if (ret != 0) | ||
| 2581 | goto out_unlock; | ||
| 2582 | |||
| 2583 | page_bufs = page_buffers(page); | ||
| 2584 | walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, | ||
| 2585 | bget_one); | ||
| 2586 | /* As soon as we unlock the page, it can go away, but we have | ||
| 2587 | * references to buffers so we are safe */ | ||
| 2588 | unlock_page(page); | ||
| 1600 | 2589 | ||
| 1601 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2590 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
| 1602 | if (IS_ERR(handle)) { | 2591 | if (IS_ERR(handle)) { |
| 1603 | ret = PTR_ERR(handle); | 2592 | ret = PTR_ERR(handle); |
| 1604 | goto out_fail; | 2593 | goto out; |
| 1605 | } | 2594 | } |
| 1606 | 2595 | ||
| 1607 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2596 | ret = walk_page_buffers(handle, page_bufs, 0, |
| 1608 | ret = nobh_writepage(page, ext4_get_block, wbc); | 2597 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); |
| 1609 | else | ||
| 1610 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
| 1611 | 2598 | ||
| 2599 | err = walk_page_buffers(handle, page_bufs, 0, | ||
| 2600 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
| 2601 | if (ret == 0) | ||
| 2602 | ret = err; | ||
| 1612 | err = ext4_journal_stop(handle); | 2603 | err = ext4_journal_stop(handle); |
| 1613 | if (!ret) | 2604 | if (!ret) |
| 1614 | ret = err; | 2605 | ret = err; |
| 1615 | return ret; | ||
| 1616 | 2606 | ||
| 1617 | out_fail: | 2607 | walk_page_buffers(handle, page_bufs, 0, |
| 1618 | redirty_page_for_writepage(wbc, page); | 2608 | PAGE_CACHE_SIZE, NULL, bput_one); |
| 2609 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 2610 | goto out; | ||
| 2611 | |||
| 2612 | out_unlock: | ||
| 1619 | unlock_page(page); | 2613 | unlock_page(page); |
| 2614 | out: | ||
| 1620 | return ret; | 2615 | return ret; |
| 1621 | } | 2616 | } |
| 1622 | 2617 | ||
| @@ -1624,59 +2619,53 @@ static int ext4_journalled_writepage(struct page *page, | |||
| 1624 | struct writeback_control *wbc) | 2619 | struct writeback_control *wbc) |
| 1625 | { | 2620 | { |
| 1626 | struct inode *inode = page->mapping->host; | 2621 | struct inode *inode = page->mapping->host; |
| 1627 | handle_t *handle = NULL; | 2622 | loff_t size = i_size_read(inode); |
| 1628 | int ret = 0; | 2623 | loff_t len; |
| 1629 | int err; | ||
| 1630 | 2624 | ||
| 1631 | if (ext4_journal_current_handle()) | 2625 | J_ASSERT(PageLocked(page)); |
| 1632 | goto no_write; | 2626 | if (page->index == size >> PAGE_CACHE_SHIFT) |
| 2627 | len = size & ~PAGE_CACHE_MASK; | ||
| 2628 | else | ||
| 2629 | len = PAGE_CACHE_SIZE; | ||
| 2630 | |||
| 2631 | if (page_has_buffers(page)) { | ||
| 2632 | /* if page has buffers it should all be mapped | ||
| 2633 | * and allocated. If there are not buffers attached | ||
| 2634 | * to the page we know the page is dirty but it lost | ||
| 2635 | * buffers. That means that at some moment in time | ||
| 2636 | * after write_begin() / write_end() has been called | ||
| 2637 | * all buffers have been clean and thus they must have been | ||
| 2638 | * written at least once. So they are all mapped and we can | ||
| 2639 | * happily proceed with mapping them and writing the page. | ||
| 2640 | */ | ||
| 2641 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 2642 | ext4_bh_unmapped_or_delay)); | ||
| 2643 | } | ||
| 1633 | 2644 | ||
| 1634 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2645 | if (ext4_journal_current_handle()) |
| 1635 | if (IS_ERR(handle)) { | ||
| 1636 | ret = PTR_ERR(handle); | ||
| 1637 | goto no_write; | 2646 | goto no_write; |
| 1638 | } | ||
| 1639 | 2647 | ||
| 1640 | if (!page_has_buffers(page) || PageChecked(page)) { | 2648 | if (PageChecked(page)) { |
| 1641 | /* | 2649 | /* |
| 1642 | * It's mmapped pagecache. Add buffers and journal it. There | 2650 | * It's mmapped pagecache. Add buffers and journal it. There |
| 1643 | * doesn't seem much point in redirtying the page here. | 2651 | * doesn't seem much point in redirtying the page here. |
| 1644 | */ | 2652 | */ |
| 1645 | ClearPageChecked(page); | 2653 | ClearPageChecked(page); |
| 1646 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2654 | return __ext4_journalled_writepage(page, wbc); |
| 1647 | ext4_get_block); | ||
| 1648 | if (ret != 0) { | ||
| 1649 | ext4_journal_stop(handle); | ||
| 1650 | goto out_unlock; | ||
| 1651 | } | ||
| 1652 | ret = walk_page_buffers(handle, page_buffers(page), 0, | ||
| 1653 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); | ||
| 1654 | |||
| 1655 | err = walk_page_buffers(handle, page_buffers(page), 0, | ||
| 1656 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
| 1657 | if (ret == 0) | ||
| 1658 | ret = err; | ||
| 1659 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 1660 | unlock_page(page); | ||
| 1661 | } else { | 2655 | } else { |
| 1662 | /* | 2656 | /* |
| 1663 | * It may be a page full of checkpoint-mode buffers. We don't | 2657 | * It may be a page full of checkpoint-mode buffers. We don't |
| 1664 | * really know unless we go poke around in the buffer_heads. | 2658 | * really know unless we go poke around in the buffer_heads. |
| 1665 | * But block_write_full_page will do the right thing. | 2659 | * But block_write_full_page will do the right thing. |
| 1666 | */ | 2660 | */ |
| 1667 | ret = block_write_full_page(page, ext4_get_block, wbc); | 2661 | return block_write_full_page(page, |
| 2662 | ext4_normal_get_block_write, | ||
| 2663 | wbc); | ||
| 1668 | } | 2664 | } |
| 1669 | err = ext4_journal_stop(handle); | ||
| 1670 | if (!ret) | ||
| 1671 | ret = err; | ||
| 1672 | out: | ||
| 1673 | return ret; | ||
| 1674 | |||
| 1675 | no_write: | 2665 | no_write: |
| 1676 | redirty_page_for_writepage(wbc, page); | 2666 | redirty_page_for_writepage(wbc, page); |
| 1677 | out_unlock: | ||
| 1678 | unlock_page(page); | 2667 | unlock_page(page); |
| 1679 | goto out; | 2668 | return 0; |
| 1680 | } | 2669 | } |
| 1681 | 2670 | ||
| 1682 | static int ext4_readpage(struct file *file, struct page *page) | 2671 | static int ext4_readpage(struct file *file, struct page *page) |
| @@ -1819,7 +2808,7 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
| 1819 | static const struct address_space_operations ext4_ordered_aops = { | 2808 | static const struct address_space_operations ext4_ordered_aops = { |
| 1820 | .readpage = ext4_readpage, | 2809 | .readpage = ext4_readpage, |
| 1821 | .readpages = ext4_readpages, | 2810 | .readpages = ext4_readpages, |
| 1822 | .writepage = ext4_ordered_writepage, | 2811 | .writepage = ext4_normal_writepage, |
| 1823 | .sync_page = block_sync_page, | 2812 | .sync_page = block_sync_page, |
| 1824 | .write_begin = ext4_write_begin, | 2813 | .write_begin = ext4_write_begin, |
| 1825 | .write_end = ext4_ordered_write_end, | 2814 | .write_end = ext4_ordered_write_end, |
| @@ -1833,7 +2822,7 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
| 1833 | static const struct address_space_operations ext4_writeback_aops = { | 2822 | static const struct address_space_operations ext4_writeback_aops = { |
| 1834 | .readpage = ext4_readpage, | 2823 | .readpage = ext4_readpage, |
| 1835 | .readpages = ext4_readpages, | 2824 | .readpages = ext4_readpages, |
| 1836 | .writepage = ext4_writeback_writepage, | 2825 | .writepage = ext4_normal_writepage, |
| 1837 | .sync_page = block_sync_page, | 2826 | .sync_page = block_sync_page, |
| 1838 | .write_begin = ext4_write_begin, | 2827 | .write_begin = ext4_write_begin, |
| 1839 | .write_end = ext4_writeback_write_end, | 2828 | .write_end = ext4_writeback_write_end, |
| @@ -1857,10 +2846,31 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
| 1857 | .releasepage = ext4_releasepage, | 2846 | .releasepage = ext4_releasepage, |
| 1858 | }; | 2847 | }; |
| 1859 | 2848 | ||
| 2849 | static const struct address_space_operations ext4_da_aops = { | ||
| 2850 | .readpage = ext4_readpage, | ||
| 2851 | .readpages = ext4_readpages, | ||
| 2852 | .writepage = ext4_da_writepage, | ||
| 2853 | .writepages = ext4_da_writepages, | ||
| 2854 | .sync_page = block_sync_page, | ||
| 2855 | .write_begin = ext4_da_write_begin, | ||
| 2856 | .write_end = ext4_da_write_end, | ||
| 2857 | .bmap = ext4_bmap, | ||
| 2858 | .invalidatepage = ext4_da_invalidatepage, | ||
| 2859 | .releasepage = ext4_releasepage, | ||
| 2860 | .direct_IO = ext4_direct_IO, | ||
| 2861 | .migratepage = buffer_migrate_page, | ||
| 2862 | }; | ||
| 2863 | |||
| 1860 | void ext4_set_aops(struct inode *inode) | 2864 | void ext4_set_aops(struct inode *inode) |
| 1861 | { | 2865 | { |
| 1862 | if (ext4_should_order_data(inode)) | 2866 | if (ext4_should_order_data(inode) && |
| 2867 | test_opt(inode->i_sb, DELALLOC)) | ||
| 2868 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
| 2869 | else if (ext4_should_order_data(inode)) | ||
| 1863 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 2870 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
| 2871 | else if (ext4_should_writeback_data(inode) && | ||
| 2872 | test_opt(inode->i_sb, DELALLOC)) | ||
| 2873 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
| 1864 | else if (ext4_should_writeback_data(inode)) | 2874 | else if (ext4_should_writeback_data(inode)) |
| 1865 | inode->i_mapping->a_ops = &ext4_writeback_aops; | 2875 | inode->i_mapping->a_ops = &ext4_writeback_aops; |
| 1866 | else | 2876 | else |
| @@ -1873,7 +2883,7 @@ void ext4_set_aops(struct inode *inode) | |||
| 1873 | * This required during truncate. We need to physically zero the tail end | 2883 | * This required during truncate. We need to physically zero the tail end |
| 1874 | * of that block so it doesn't yield old data if the file is later grown. | 2884 | * of that block so it doesn't yield old data if the file is later grown. |
| 1875 | */ | 2885 | */ |
| 1876 | int ext4_block_truncate_page(handle_t *handle, struct page *page, | 2886 | int ext4_block_truncate_page(handle_t *handle, |
| 1877 | struct address_space *mapping, loff_t from) | 2887 | struct address_space *mapping, loff_t from) |
| 1878 | { | 2888 | { |
| 1879 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 2889 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
| @@ -1882,8 +2892,13 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
| 1882 | ext4_lblk_t iblock; | 2892 | ext4_lblk_t iblock; |
| 1883 | struct inode *inode = mapping->host; | 2893 | struct inode *inode = mapping->host; |
| 1884 | struct buffer_head *bh; | 2894 | struct buffer_head *bh; |
| 2895 | struct page *page; | ||
| 1885 | int err = 0; | 2896 | int err = 0; |
| 1886 | 2897 | ||
| 2898 | page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); | ||
| 2899 | if (!page) | ||
| 2900 | return -EINVAL; | ||
| 2901 | |||
| 1887 | blocksize = inode->i_sb->s_blocksize; | 2902 | blocksize = inode->i_sb->s_blocksize; |
| 1888 | length = blocksize - (offset & (blocksize - 1)); | 2903 | length = blocksize - (offset & (blocksize - 1)); |
| 1889 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 2904 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
| @@ -1956,7 +2971,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
| 1956 | err = ext4_journal_dirty_metadata(handle, bh); | 2971 | err = ext4_journal_dirty_metadata(handle, bh); |
| 1957 | } else { | 2972 | } else { |
| 1958 | if (ext4_should_order_data(inode)) | 2973 | if (ext4_should_order_data(inode)) |
| 1959 | err = ext4_journal_dirty_data(handle, bh); | 2974 | err = ext4_jbd2_file_inode(handle, inode); |
| 1960 | mark_buffer_dirty(bh); | 2975 | mark_buffer_dirty(bh); |
| 1961 | } | 2976 | } |
| 1962 | 2977 | ||
| @@ -2179,7 +3194,21 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
| 2179 | 3194 | ||
| 2180 | if (this_bh) { | 3195 | if (this_bh) { |
| 2181 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); | 3196 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); |
| 2182 | ext4_journal_dirty_metadata(handle, this_bh); | 3197 | |
| 3198 | /* | ||
| 3199 | * The buffer head should have an attached journal head at this | ||
| 3200 | * point. However, if the data is corrupted and an indirect | ||
| 3201 | * block pointed to itself, it would have been detached when | ||
| 3202 | * the block was cleared. Check for this instead of OOPSing. | ||
| 3203 | */ | ||
| 3204 | if (bh2jh(this_bh)) | ||
| 3205 | ext4_journal_dirty_metadata(handle, this_bh); | ||
| 3206 | else | ||
| 3207 | ext4_error(inode->i_sb, __func__, | ||
| 3208 | "circular indirect block detected, " | ||
| 3209 | "inode=%lu, block=%llu", | ||
| 3210 | inode->i_ino, | ||
| 3211 | (unsigned long long) this_bh->b_blocknr); | ||
| 2183 | } | 3212 | } |
| 2184 | } | 3213 | } |
| 2185 | 3214 | ||
| @@ -2305,6 +3334,19 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 2305 | } | 3334 | } |
| 2306 | } | 3335 | } |
| 2307 | 3336 | ||
| 3337 | int ext4_can_truncate(struct inode *inode) | ||
| 3338 | { | ||
| 3339 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
| 3340 | return 0; | ||
| 3341 | if (S_ISREG(inode->i_mode)) | ||
| 3342 | return 1; | ||
| 3343 | if (S_ISDIR(inode->i_mode)) | ||
| 3344 | return 1; | ||
| 3345 | if (S_ISLNK(inode->i_mode)) | ||
| 3346 | return !ext4_inode_is_fast_symlink(inode); | ||
| 3347 | return 0; | ||
| 3348 | } | ||
| 3349 | |||
| 2308 | /* | 3350 | /* |
| 2309 | * ext4_truncate() | 3351 | * ext4_truncate() |
| 2310 | * | 3352 | * |
| @@ -2347,51 +3389,25 @@ void ext4_truncate(struct inode *inode) | |||
| 2347 | int n; | 3389 | int n; |
| 2348 | ext4_lblk_t last_block; | 3390 | ext4_lblk_t last_block; |
| 2349 | unsigned blocksize = inode->i_sb->s_blocksize; | 3391 | unsigned blocksize = inode->i_sb->s_blocksize; |
| 2350 | struct page *page; | ||
| 2351 | 3392 | ||
| 2352 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 3393 | if (!ext4_can_truncate(inode)) |
| 2353 | S_ISLNK(inode->i_mode))) | ||
| 2354 | return; | ||
| 2355 | if (ext4_inode_is_fast_symlink(inode)) | ||
| 2356 | return; | ||
| 2357 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
| 2358 | return; | 3394 | return; |
| 2359 | 3395 | ||
| 2360 | /* | ||
| 2361 | * We have to lock the EOF page here, because lock_page() nests | ||
| 2362 | * outside jbd2_journal_start(). | ||
| 2363 | */ | ||
| 2364 | if ((inode->i_size & (blocksize - 1)) == 0) { | ||
| 2365 | /* Block boundary? Nothing to do */ | ||
| 2366 | page = NULL; | ||
| 2367 | } else { | ||
| 2368 | page = grab_cache_page(mapping, | ||
| 2369 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
| 2370 | if (!page) | ||
| 2371 | return; | ||
| 2372 | } | ||
| 2373 | |||
| 2374 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3396 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
| 2375 | ext4_ext_truncate(inode, page); | 3397 | ext4_ext_truncate(inode); |
| 2376 | return; | 3398 | return; |
| 2377 | } | 3399 | } |
| 2378 | 3400 | ||
| 2379 | handle = start_transaction(inode); | 3401 | handle = start_transaction(inode); |
| 2380 | if (IS_ERR(handle)) { | 3402 | if (IS_ERR(handle)) |
| 2381 | if (page) { | ||
| 2382 | clear_highpage(page); | ||
| 2383 | flush_dcache_page(page); | ||
| 2384 | unlock_page(page); | ||
| 2385 | page_cache_release(page); | ||
| 2386 | } | ||
| 2387 | return; /* AKPM: return what? */ | 3403 | return; /* AKPM: return what? */ |
| 2388 | } | ||
| 2389 | 3404 | ||
| 2390 | last_block = (inode->i_size + blocksize-1) | 3405 | last_block = (inode->i_size + blocksize-1) |
| 2391 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 3406 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
| 2392 | 3407 | ||
| 2393 | if (page) | 3408 | if (inode->i_size & (blocksize - 1)) |
| 2394 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 3409 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
| 3410 | goto out_stop; | ||
| 2395 | 3411 | ||
| 2396 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 3412 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
| 2397 | if (n == 0) | 3413 | if (n == 0) |
| @@ -2410,6 +3426,11 @@ void ext4_truncate(struct inode *inode) | |||
| 2410 | goto out_stop; | 3426 | goto out_stop; |
| 2411 | 3427 | ||
| 2412 | /* | 3428 | /* |
| 3429 | * From here we block out all ext4_get_block() callers who want to | ||
| 3430 | * modify the block allocation tree. | ||
| 3431 | */ | ||
| 3432 | down_write(&ei->i_data_sem); | ||
| 3433 | /* | ||
| 2413 | * The orphan list entry will now protect us from any crash which | 3434 | * The orphan list entry will now protect us from any crash which |
| 2414 | * occurs before the truncate completes, so it is now safe to propagate | 3435 | * occurs before the truncate completes, so it is now safe to propagate |
| 2415 | * the new, shorter inode size (held for now in i_size) into the | 3436 | * the new, shorter inode size (held for now in i_size) into the |
| @@ -2418,12 +3439,6 @@ void ext4_truncate(struct inode *inode) | |||
| 2418 | */ | 3439 | */ |
| 2419 | ei->i_disksize = inode->i_size; | 3440 | ei->i_disksize = inode->i_size; |
| 2420 | 3441 | ||
| 2421 | /* | ||
| 2422 | * From here we block out all ext4_get_block() callers who want to | ||
| 2423 | * modify the block allocation tree. | ||
| 2424 | */ | ||
| 2425 | down_write(&ei->i_data_sem); | ||
| 2426 | |||
| 2427 | if (n == 1) { /* direct blocks */ | 3442 | if (n == 1) { /* direct blocks */ |
| 2428 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 3443 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
| 2429 | i_data + EXT4_NDIR_BLOCKS); | 3444 | i_data + EXT4_NDIR_BLOCKS); |
| @@ -3107,7 +4122,14 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
| 3107 | * be freed, so we have a strong guarantee that no future commit will | 4122 | * be freed, so we have a strong guarantee that no future commit will |
| 3108 | * leave these blocks visible to the user.) | 4123 | * leave these blocks visible to the user.) |
| 3109 | * | 4124 | * |
| 3110 | * Called with inode->sem down. | 4125 | * Another thing we have to assure is that if we are in ordered mode |
| 4126 | * and inode is still attached to the committing transaction, we must | ||
| 4127 | * we start writeout of all the dirty pages which are being truncated. | ||
| 4128 | * This way we are sure that all the data written in the previous | ||
| 4129 | * transaction are already on disk (truncate waits for pages under | ||
| 4130 | * writeback). | ||
| 4131 | * | ||
| 4132 | * Called with inode->i_mutex down. | ||
| 3111 | */ | 4133 | */ |
| 3112 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) | 4134 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) |
| 3113 | { | 4135 | { |
| @@ -3173,6 +4195,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 3173 | if (!error) | 4195 | if (!error) |
| 3174 | error = rc; | 4196 | error = rc; |
| 3175 | ext4_journal_stop(handle); | 4197 | ext4_journal_stop(handle); |
| 4198 | |||
| 4199 | if (ext4_should_order_data(inode)) { | ||
| 4200 | error = ext4_begin_ordered_truncate(inode, | ||
| 4201 | attr->ia_size); | ||
| 4202 | if (error) { | ||
| 4203 | /* Do as much error cleanup as possible */ | ||
| 4204 | handle = ext4_journal_start(inode, 3); | ||
| 4205 | if (IS_ERR(handle)) { | ||
| 4206 | ext4_orphan_del(NULL, inode); | ||
| 4207 | goto err_out; | ||
| 4208 | } | ||
| 4209 | ext4_orphan_del(handle, inode); | ||
| 4210 | ext4_journal_stop(handle); | ||
| 4211 | goto err_out; | ||
| 4212 | } | ||
| 4213 | } | ||
| 3176 | } | 4214 | } |
| 3177 | 4215 | ||
| 3178 | rc = inode_setattr(inode, attr); | 4216 | rc = inode_setattr(inode, attr); |
| @@ -3193,6 +4231,32 @@ err_out: | |||
| 3193 | return error; | 4231 | return error; |
| 3194 | } | 4232 | } |
| 3195 | 4233 | ||
| 4234 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 4235 | struct kstat *stat) | ||
| 4236 | { | ||
| 4237 | struct inode *inode; | ||
| 4238 | unsigned long delalloc_blocks; | ||
| 4239 | |||
| 4240 | inode = dentry->d_inode; | ||
| 4241 | generic_fillattr(inode, stat); | ||
| 4242 | |||
| 4243 | /* | ||
| 4244 | * We can't update i_blocks if the block allocation is delayed | ||
| 4245 | * otherwise in the case of system crash before the real block | ||
| 4246 | * allocation is done, we will have i_blocks inconsistent with | ||
| 4247 | * on-disk file blocks. | ||
| 4248 | * We always keep i_blocks updated together with real | ||
| 4249 | * allocation. But to not confuse with user, stat | ||
| 4250 | * will return the blocks that include the delayed allocation | ||
| 4251 | * blocks for this file. | ||
| 4252 | */ | ||
| 4253 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 4254 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
| 4255 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 4256 | |||
| 4257 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | ||
| 4258 | return 0; | ||
| 4259 | } | ||
| 3196 | 4260 | ||
| 3197 | /* | 4261 | /* |
| 3198 | * How many blocks doth make a writepage()? | 4262 | * How many blocks doth make a writepage()? |
| @@ -3506,3 +4570,64 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 3506 | 4570 | ||
| 3507 | return err; | 4571 | return err; |
| 3508 | } | 4572 | } |
| 4573 | |||
| 4574 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | ||
| 4575 | { | ||
| 4576 | return !buffer_mapped(bh); | ||
| 4577 | } | ||
| 4578 | |||
| 4579 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
| 4580 | { | ||
| 4581 | loff_t size; | ||
| 4582 | unsigned long len; | ||
| 4583 | int ret = -EINVAL; | ||
| 4584 | struct file *file = vma->vm_file; | ||
| 4585 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 4586 | struct address_space *mapping = inode->i_mapping; | ||
| 4587 | |||
| 4588 | /* | ||
| 4589 | * Get i_alloc_sem to stop truncates messing with the inode. We cannot | ||
| 4590 | * get i_mutex because we are already holding mmap_sem. | ||
| 4591 | */ | ||
| 4592 | down_read(&inode->i_alloc_sem); | ||
| 4593 | size = i_size_read(inode); | ||
| 4594 | if (page->mapping != mapping || size <= page_offset(page) | ||
| 4595 | || !PageUptodate(page)) { | ||
| 4596 | /* page got truncated from under us? */ | ||
| 4597 | goto out_unlock; | ||
| 4598 | } | ||
| 4599 | ret = 0; | ||
| 4600 | if (PageMappedToDisk(page)) | ||
| 4601 | goto out_unlock; | ||
| 4602 | |||
| 4603 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 4604 | len = size & ~PAGE_CACHE_MASK; | ||
| 4605 | else | ||
| 4606 | len = PAGE_CACHE_SIZE; | ||
| 4607 | |||
| 4608 | if (page_has_buffers(page)) { | ||
| 4609 | /* return if we have all the buffers mapped */ | ||
| 4610 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 4611 | ext4_bh_unmapped)) | ||
| 4612 | goto out_unlock; | ||
| 4613 | } | ||
| 4614 | /* | ||
| 4615 | * OK, we need to fill the hole... Do write_begin write_end | ||
| 4616 | * to do block allocation/reservation.We are not holding | ||
| 4617 | * inode.i__mutex here. That allow * parallel write_begin, | ||
| 4618 | * write_end call. lock_page prevent this from happening | ||
| 4619 | * on the same page though | ||
| 4620 | */ | ||
| 4621 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | ||
| 4622 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
| 4623 | if (ret < 0) | ||
| 4624 | goto out_unlock; | ||
| 4625 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | ||
| 4626 | len, len, page, NULL); | ||
| 4627 | if (ret < 0) | ||
| 4628 | goto out_unlock; | ||
| 4629 | ret = 0; | ||
| 4630 | out_unlock: | ||
| 4631 | up_read(&inode->i_alloc_sem); | ||
| 4632 | return ret; | ||
| 4633 | } | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c9900aade150..8d141a25bbee 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -381,22 +381,28 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) | |||
| 381 | 381 | ||
| 382 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) | 382 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) |
| 383 | { | 383 | { |
| 384 | int fix = 0; | 384 | int fix = 0, ret, tmpmax; |
| 385 | addr = mb_correct_addr_and_bit(&fix, addr); | 385 | addr = mb_correct_addr_and_bit(&fix, addr); |
| 386 | max += fix; | 386 | tmpmax = max + fix; |
| 387 | start += fix; | 387 | start += fix; |
| 388 | 388 | ||
| 389 | return ext4_find_next_zero_bit(addr, max, start) - fix; | 389 | ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix; |
| 390 | if (ret > max) | ||
| 391 | return max; | ||
| 392 | return ret; | ||
| 390 | } | 393 | } |
| 391 | 394 | ||
| 392 | static inline int mb_find_next_bit(void *addr, int max, int start) | 395 | static inline int mb_find_next_bit(void *addr, int max, int start) |
| 393 | { | 396 | { |
| 394 | int fix = 0; | 397 | int fix = 0, ret, tmpmax; |
| 395 | addr = mb_correct_addr_and_bit(&fix, addr); | 398 | addr = mb_correct_addr_and_bit(&fix, addr); |
| 396 | max += fix; | 399 | tmpmax = max + fix; |
| 397 | start += fix; | 400 | start += fix; |
| 398 | 401 | ||
| 399 | return ext4_find_next_bit(addr, max, start) - fix; | 402 | ret = ext4_find_next_bit(addr, tmpmax, start) - fix; |
| 403 | if (ret > max) | ||
| 404 | return max; | ||
| 405 | return ret; | ||
| 400 | } | 406 | } |
| 401 | 407 | ||
| 402 | static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | 408 | static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) |
| @@ -803,6 +809,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
| 803 | if (!buffer_uptodate(bh[i])) | 809 | if (!buffer_uptodate(bh[i])) |
| 804 | goto out; | 810 | goto out; |
| 805 | 811 | ||
| 812 | err = 0; | ||
| 806 | first_block = page->index * blocks_per_page; | 813 | first_block = page->index * blocks_per_page; |
| 807 | for (i = 0; i < blocks_per_page; i++) { | 814 | for (i = 0; i < blocks_per_page; i++) { |
| 808 | int group; | 815 | int group; |
| @@ -883,6 +890,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 883 | int pnum; | 890 | int pnum; |
| 884 | int poff; | 891 | int poff; |
| 885 | struct page *page; | 892 | struct page *page; |
| 893 | int ret; | ||
| 886 | 894 | ||
| 887 | mb_debug("load group %lu\n", group); | 895 | mb_debug("load group %lu\n", group); |
| 888 | 896 | ||
| @@ -914,15 +922,21 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 914 | if (page) { | 922 | if (page) { |
| 915 | BUG_ON(page->mapping != inode->i_mapping); | 923 | BUG_ON(page->mapping != inode->i_mapping); |
| 916 | if (!PageUptodate(page)) { | 924 | if (!PageUptodate(page)) { |
| 917 | ext4_mb_init_cache(page, NULL); | 925 | ret = ext4_mb_init_cache(page, NULL); |
| 926 | if (ret) { | ||
| 927 | unlock_page(page); | ||
| 928 | goto err; | ||
| 929 | } | ||
| 918 | mb_cmp_bitmaps(e4b, page_address(page) + | 930 | mb_cmp_bitmaps(e4b, page_address(page) + |
| 919 | (poff * sb->s_blocksize)); | 931 | (poff * sb->s_blocksize)); |
| 920 | } | 932 | } |
| 921 | unlock_page(page); | 933 | unlock_page(page); |
| 922 | } | 934 | } |
| 923 | } | 935 | } |
| 924 | if (page == NULL || !PageUptodate(page)) | 936 | if (page == NULL || !PageUptodate(page)) { |
| 937 | ret = -EIO; | ||
| 925 | goto err; | 938 | goto err; |
| 939 | } | ||
| 926 | e4b->bd_bitmap_page = page; | 940 | e4b->bd_bitmap_page = page; |
| 927 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); | 941 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); |
| 928 | mark_page_accessed(page); | 942 | mark_page_accessed(page); |
| @@ -938,14 +952,20 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
| 938 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | 952 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); |
| 939 | if (page) { | 953 | if (page) { |
| 940 | BUG_ON(page->mapping != inode->i_mapping); | 954 | BUG_ON(page->mapping != inode->i_mapping); |
| 941 | if (!PageUptodate(page)) | 955 | if (!PageUptodate(page)) { |
| 942 | ext4_mb_init_cache(page, e4b->bd_bitmap); | 956 | ret = ext4_mb_init_cache(page, e4b->bd_bitmap); |
| 943 | 957 | if (ret) { | |
| 958 | unlock_page(page); | ||
| 959 | goto err; | ||
| 960 | } | ||
| 961 | } | ||
| 944 | unlock_page(page); | 962 | unlock_page(page); |
| 945 | } | 963 | } |
| 946 | } | 964 | } |
| 947 | if (page == NULL || !PageUptodate(page)) | 965 | if (page == NULL || !PageUptodate(page)) { |
| 966 | ret = -EIO; | ||
| 948 | goto err; | 967 | goto err; |
| 968 | } | ||
| 949 | e4b->bd_buddy_page = page; | 969 | e4b->bd_buddy_page = page; |
| 950 | e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); | 970 | e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); |
| 951 | mark_page_accessed(page); | 971 | mark_page_accessed(page); |
| @@ -962,7 +982,7 @@ err: | |||
| 962 | page_cache_release(e4b->bd_buddy_page); | 982 | page_cache_release(e4b->bd_buddy_page); |
| 963 | e4b->bd_buddy = NULL; | 983 | e4b->bd_buddy = NULL; |
| 964 | e4b->bd_bitmap = NULL; | 984 | e4b->bd_bitmap = NULL; |
| 965 | return -EIO; | 985 | return ret; |
| 966 | } | 986 | } |
| 967 | 987 | ||
| 968 | static void ext4_mb_release_desc(struct ext4_buddy *e4b) | 988 | static void ext4_mb_release_desc(struct ext4_buddy *e4b) |
| @@ -1031,7 +1051,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
| 1031 | } | 1051 | } |
| 1032 | } | 1052 | } |
| 1033 | 1053 | ||
| 1034 | static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | 1054 | static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, |
| 1035 | int first, int count) | 1055 | int first, int count) |
| 1036 | { | 1056 | { |
| 1037 | int block = 0; | 1057 | int block = 0; |
| @@ -1071,11 +1091,12 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1071 | blocknr += block; | 1091 | blocknr += block; |
| 1072 | blocknr += | 1092 | blocknr += |
| 1073 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 1093 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
| 1074 | 1094 | ext4_unlock_group(sb, e4b->bd_group); | |
| 1075 | ext4_error(sb, __func__, "double-free of inode" | 1095 | ext4_error(sb, __func__, "double-free of inode" |
| 1076 | " %lu's block %llu(bit %u in group %lu)\n", | 1096 | " %lu's block %llu(bit %u in group %lu)\n", |
| 1077 | inode ? inode->i_ino : 0, blocknr, block, | 1097 | inode ? inode->i_ino : 0, blocknr, block, |
| 1078 | e4b->bd_group); | 1098 | e4b->bd_group); |
| 1099 | ext4_lock_group(sb, e4b->bd_group); | ||
| 1079 | } | 1100 | } |
| 1080 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1101 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); |
| 1081 | e4b->bd_info->bb_counters[order]++; | 1102 | e4b->bd_info->bb_counters[order]++; |
| @@ -1113,8 +1134,6 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1113 | } while (1); | 1134 | } while (1); |
| 1114 | } | 1135 | } |
| 1115 | mb_check_buddy(e4b); | 1136 | mb_check_buddy(e4b); |
| 1116 | |||
| 1117 | return 0; | ||
| 1118 | } | 1137 | } |
| 1119 | 1138 | ||
| 1120 | static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | 1139 | static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, |
| @@ -1730,10 +1749,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
| 1730 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; | 1749 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; |
| 1731 | spin_unlock(&sbi->s_md_lock); | 1750 | spin_unlock(&sbi->s_md_lock); |
| 1732 | } | 1751 | } |
| 1733 | |||
| 1734 | /* searching for the right group start from the goal value specified */ | ||
| 1735 | group = ac->ac_g_ex.fe_group; | ||
| 1736 | |||
| 1737 | /* Let's just scan groups to find more-less suitable blocks */ | 1752 | /* Let's just scan groups to find more-less suitable blocks */ |
| 1738 | cr = ac->ac_2order ? 0 : 1; | 1753 | cr = ac->ac_2order ? 0 : 1; |
| 1739 | /* | 1754 | /* |
| @@ -1743,6 +1758,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
| 1743 | repeat: | 1758 | repeat: |
| 1744 | for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { | 1759 | for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { |
| 1745 | ac->ac_criteria = cr; | 1760 | ac->ac_criteria = cr; |
| 1761 | /* | ||
| 1762 | * searching for the right group start | ||
| 1763 | * from the goal value specified | ||
| 1764 | */ | ||
| 1765 | group = ac->ac_g_ex.fe_group; | ||
| 1766 | |||
| 1746 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { | 1767 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { |
| 1747 | struct ext4_group_info *grp; | 1768 | struct ext4_group_info *grp; |
| 1748 | struct ext4_group_desc *desc; | 1769 | struct ext4_group_desc *desc; |
| @@ -1963,6 +1984,8 @@ static int ext4_mb_seq_history_open(struct inode *inode, struct file *file) | |||
| 1963 | int rc; | 1984 | int rc; |
| 1964 | int size; | 1985 | int size; |
| 1965 | 1986 | ||
| 1987 | if (unlikely(sbi->s_mb_history == NULL)) | ||
| 1988 | return -ENOMEM; | ||
| 1966 | s = kmalloc(sizeof(*s), GFP_KERNEL); | 1989 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
| 1967 | if (s == NULL) | 1990 | if (s == NULL) |
| 1968 | return -ENOMEM; | 1991 | return -ENOMEM; |
| @@ -2165,9 +2188,7 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
| 2165 | sbi->s_mb_history_cur = 0; | 2188 | sbi->s_mb_history_cur = 0; |
| 2166 | spin_lock_init(&sbi->s_mb_history_lock); | 2189 | spin_lock_init(&sbi->s_mb_history_lock); |
| 2167 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); | 2190 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); |
| 2168 | sbi->s_mb_history = kmalloc(i, GFP_KERNEL); | 2191 | sbi->s_mb_history = kzalloc(i, GFP_KERNEL); |
| 2169 | if (likely(sbi->s_mb_history != NULL)) | ||
| 2170 | memset(sbi->s_mb_history, 0, i); | ||
| 2171 | /* if we can't allocate history, then we simple won't use it */ | 2192 | /* if we can't allocate history, then we simple won't use it */ |
| 2172 | } | 2193 | } |
| 2173 | 2194 | ||
| @@ -2215,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) | |||
| 2215 | #define ext4_mb_history_init(sb) | 2236 | #define ext4_mb_history_init(sb) |
| 2216 | #endif | 2237 | #endif |
| 2217 | 2238 | ||
| 2239 | |||
| 2240 | /* Create and initialize ext4_group_info data for the given group. */ | ||
| 2241 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | ||
| 2242 | struct ext4_group_desc *desc) | ||
| 2243 | { | ||
| 2244 | int i, len; | ||
| 2245 | int metalen = 0; | ||
| 2246 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 2247 | struct ext4_group_info **meta_group_info; | ||
| 2248 | |||
| 2249 | /* | ||
| 2250 | * First check if this group is the first of a reserved block. | ||
| 2251 | * If it's true, we have to allocate a new table of pointers | ||
| 2252 | * to ext4_group_info structures | ||
| 2253 | */ | ||
| 2254 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { | ||
| 2255 | metalen = sizeof(*meta_group_info) << | ||
| 2256 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
| 2257 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
| 2258 | if (meta_group_info == NULL) { | ||
| 2259 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
| 2260 | "buddy group\n"); | ||
| 2261 | goto exit_meta_group_info; | ||
| 2262 | } | ||
| 2263 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = | ||
| 2264 | meta_group_info; | ||
| 2265 | } | ||
| 2266 | |||
| 2267 | /* | ||
| 2268 | * calculate needed size. if change bb_counters size, | ||
| 2269 | * don't forget about ext4_mb_generate_buddy() | ||
| 2270 | */ | ||
| 2271 | len = offsetof(typeof(**meta_group_info), | ||
| 2272 | bb_counters[sb->s_blocksize_bits + 2]); | ||
| 2273 | |||
| 2274 | meta_group_info = | ||
| 2275 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | ||
| 2276 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | ||
| 2277 | |||
| 2278 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | ||
| 2279 | if (meta_group_info[i] == NULL) { | ||
| 2280 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | ||
| 2281 | goto exit_group_info; | ||
| 2282 | } | ||
| 2283 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | ||
| 2284 | &(meta_group_info[i]->bb_state)); | ||
| 2285 | |||
| 2286 | /* | ||
| 2287 | * initialize bb_free to be able to skip | ||
| 2288 | * empty groups without initialization | ||
| 2289 | */ | ||
| 2290 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
| 2291 | meta_group_info[i]->bb_free = | ||
| 2292 | ext4_free_blocks_after_init(sb, group, desc); | ||
| 2293 | } else { | ||
| 2294 | meta_group_info[i]->bb_free = | ||
| 2295 | le16_to_cpu(desc->bg_free_blocks_count); | ||
| 2296 | } | ||
| 2297 | |||
| 2298 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | ||
| 2299 | |||
| 2300 | #ifdef DOUBLE_CHECK | ||
| 2301 | { | ||
| 2302 | struct buffer_head *bh; | ||
| 2303 | meta_group_info[i]->bb_bitmap = | ||
| 2304 | kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
| 2305 | BUG_ON(meta_group_info[i]->bb_bitmap == NULL); | ||
| 2306 | bh = ext4_read_block_bitmap(sb, group); | ||
| 2307 | BUG_ON(bh == NULL); | ||
| 2308 | memcpy(meta_group_info[i]->bb_bitmap, bh->b_data, | ||
| 2309 | sb->s_blocksize); | ||
| 2310 | put_bh(bh); | ||
| 2311 | } | ||
| 2312 | #endif | ||
| 2313 | |||
| 2314 | return 0; | ||
| 2315 | |||
| 2316 | exit_group_info: | ||
| 2317 | /* If a meta_group_info table has been allocated, release it now */ | ||
| 2318 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) | ||
| 2319 | kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); | ||
| 2320 | exit_meta_group_info: | ||
| 2321 | return -ENOMEM; | ||
| 2322 | } /* ext4_mb_add_groupinfo */ | ||
| 2323 | |||
| 2324 | /* | ||
| 2325 | * Add a group to the existing groups. | ||
| 2326 | * This function is used for online resize | ||
| 2327 | */ | ||
| 2328 | int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group, | ||
| 2329 | struct ext4_group_desc *desc) | ||
| 2330 | { | ||
| 2331 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 2332 | struct inode *inode = sbi->s_buddy_cache; | ||
| 2333 | int blocks_per_page; | ||
| 2334 | int block; | ||
| 2335 | int pnum; | ||
| 2336 | struct page *page; | ||
| 2337 | int err; | ||
| 2338 | |||
| 2339 | /* Add group based on group descriptor*/ | ||
| 2340 | err = ext4_mb_add_groupinfo(sb, group, desc); | ||
| 2341 | if (err) | ||
| 2342 | return err; | ||
| 2343 | |||
| 2344 | /* | ||
| 2345 | * Cache pages containing dynamic mb_alloc datas (buddy and bitmap | ||
| 2346 | * datas) are set not up to date so that they will be re-initilaized | ||
| 2347 | * during the next call to ext4_mb_load_buddy | ||
| 2348 | */ | ||
| 2349 | |||
| 2350 | /* Set buddy page as not up to date */ | ||
| 2351 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
| 2352 | block = group * 2; | ||
| 2353 | pnum = block / blocks_per_page; | ||
| 2354 | page = find_get_page(inode->i_mapping, pnum); | ||
| 2355 | if (page != NULL) { | ||
| 2356 | ClearPageUptodate(page); | ||
| 2357 | page_cache_release(page); | ||
| 2358 | } | ||
| 2359 | |||
| 2360 | /* Set bitmap page as not up to date */ | ||
| 2361 | block++; | ||
| 2362 | pnum = block / blocks_per_page; | ||
| 2363 | page = find_get_page(inode->i_mapping, pnum); | ||
| 2364 | if (page != NULL) { | ||
| 2365 | ClearPageUptodate(page); | ||
| 2366 | page_cache_release(page); | ||
| 2367 | } | ||
| 2368 | |||
| 2369 | return 0; | ||
| 2370 | } | ||
| 2371 | |||
| 2372 | /* | ||
| 2373 | * Update an existing group. | ||
| 2374 | * This function is used for online resize | ||
| 2375 | */ | ||
| 2376 | void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | ||
| 2377 | { | ||
| 2378 | grp->bb_free += add; | ||
| 2379 | } | ||
| 2380 | |||
| 2218 | static int ext4_mb_init_backend(struct super_block *sb) | 2381 | static int ext4_mb_init_backend(struct super_block *sb) |
| 2219 | { | 2382 | { |
| 2220 | ext4_group_t i; | 2383 | ext4_group_t i; |
| 2221 | int j, len, metalen; | 2384 | int metalen; |
| 2222 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2385 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2223 | int num_meta_group_infos = | 2386 | struct ext4_super_block *es = sbi->s_es; |
| 2224 | (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2387 | int num_meta_group_infos; |
| 2225 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2388 | int num_meta_group_infos_max; |
| 2389 | int array_size; | ||
| 2226 | struct ext4_group_info **meta_group_info; | 2390 | struct ext4_group_info **meta_group_info; |
| 2391 | struct ext4_group_desc *desc; | ||
| 2392 | |||
| 2393 | /* This is the number of blocks used by GDT */ | ||
| 2394 | num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - | ||
| 2395 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | ||
| 2396 | |||
| 2397 | /* | ||
| 2398 | * This is the total number of blocks used by GDT including | ||
| 2399 | * the number of reserved blocks for GDT. | ||
| 2400 | * The s_group_info array is allocated with this value | ||
| 2401 | * to allow a clean online resize without a complex | ||
| 2402 | * manipulation of pointer. | ||
| 2403 | * The drawback is the unused memory when no resize | ||
| 2404 | * occurs but it's very low in terms of pages | ||
| 2405 | * (see comments below) | ||
| 2406 | * Need to handle this properly when META_BG resizing is allowed | ||
| 2407 | */ | ||
| 2408 | num_meta_group_infos_max = num_meta_group_infos + | ||
| 2409 | le16_to_cpu(es->s_reserved_gdt_blocks); | ||
| 2227 | 2410 | ||
| 2411 | /* | ||
| 2412 | * array_size is the size of s_group_info array. We round it | ||
| 2413 | * to the next power of two because this approximation is done | ||
| 2414 | * internally by kmalloc so we can have some more memory | ||
| 2415 | * for free here (e.g. may be used for META_BG resize). | ||
| 2416 | */ | ||
| 2417 | array_size = 1; | ||
| 2418 | while (array_size < sizeof(*sbi->s_group_info) * | ||
| 2419 | num_meta_group_infos_max) | ||
| 2420 | array_size = array_size << 1; | ||
| 2228 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2421 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
| 2229 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2422 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
| 2230 | * So a two level scheme suffices for now. */ | 2423 | * So a two level scheme suffices for now. */ |
| 2231 | sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * | 2424 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); |
| 2232 | num_meta_group_infos, GFP_KERNEL); | ||
| 2233 | if (sbi->s_group_info == NULL) { | 2425 | if (sbi->s_group_info == NULL) { |
| 2234 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2426 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
| 2235 | return -ENOMEM; | 2427 | return -ENOMEM; |
| @@ -2256,63 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
| 2256 | sbi->s_group_info[i] = meta_group_info; | 2448 | sbi->s_group_info[i] = meta_group_info; |
| 2257 | } | 2449 | } |
| 2258 | 2450 | ||
| 2259 | /* | ||
| 2260 | * calculate needed size. if change bb_counters size, | ||
| 2261 | * don't forget about ext4_mb_generate_buddy() | ||
| 2262 | */ | ||
| 2263 | len = sizeof(struct ext4_group_info); | ||
| 2264 | len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); | ||
| 2265 | for (i = 0; i < sbi->s_groups_count; i++) { | 2451 | for (i = 0; i < sbi->s_groups_count; i++) { |
| 2266 | struct ext4_group_desc *desc; | ||
| 2267 | |||
| 2268 | meta_group_info = | ||
| 2269 | sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | ||
| 2270 | j = i & (EXT4_DESC_PER_BLOCK(sb) - 1); | ||
| 2271 | |||
| 2272 | meta_group_info[j] = kzalloc(len, GFP_KERNEL); | ||
| 2273 | if (meta_group_info[j] == NULL) { | ||
| 2274 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | ||
| 2275 | goto err_freebuddy; | ||
| 2276 | } | ||
| 2277 | desc = ext4_get_group_desc(sb, i, NULL); | 2452 | desc = ext4_get_group_desc(sb, i, NULL); |
| 2278 | if (desc == NULL) { | 2453 | if (desc == NULL) { |
| 2279 | printk(KERN_ERR | 2454 | printk(KERN_ERR |
| 2280 | "EXT4-fs: can't read descriptor %lu\n", i); | 2455 | "EXT4-fs: can't read descriptor %lu\n", i); |
| 2281 | i++; | ||
| 2282 | goto err_freebuddy; | 2456 | goto err_freebuddy; |
| 2283 | } | 2457 | } |
| 2284 | memset(meta_group_info[j], 0, len); | 2458 | if (ext4_mb_add_groupinfo(sb, i, desc) != 0) |
| 2285 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2459 | goto err_freebuddy; |
| 2286 | &(meta_group_info[j]->bb_state)); | ||
| 2287 | |||
| 2288 | /* | ||
| 2289 | * initialize bb_free to be able to skip | ||
| 2290 | * empty groups without initialization | ||
| 2291 | */ | ||
| 2292 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
| 2293 | meta_group_info[j]->bb_free = | ||
| 2294 | ext4_free_blocks_after_init(sb, i, desc); | ||
| 2295 | } else { | ||
| 2296 | meta_group_info[j]->bb_free = | ||
| 2297 | le16_to_cpu(desc->bg_free_blocks_count); | ||
| 2298 | } | ||
| 2299 | |||
| 2300 | INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list); | ||
| 2301 | |||
| 2302 | #ifdef DOUBLE_CHECK | ||
| 2303 | { | ||
| 2304 | struct buffer_head *bh; | ||
| 2305 | meta_group_info[j]->bb_bitmap = | ||
| 2306 | kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
| 2307 | BUG_ON(meta_group_info[j]->bb_bitmap == NULL); | ||
| 2308 | bh = read_block_bitmap(sb, i); | ||
| 2309 | BUG_ON(bh == NULL); | ||
| 2310 | memcpy(meta_group_info[j]->bb_bitmap, bh->b_data, | ||
| 2311 | sb->s_blocksize); | ||
| 2312 | put_bh(bh); | ||
| 2313 | } | ||
| 2314 | #endif | ||
| 2315 | |||
| 2316 | } | 2460 | } |
| 2317 | 2461 | ||
| 2318 | return 0; | 2462 | return 0; |
| @@ -2336,6 +2480,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2336 | unsigned i; | 2480 | unsigned i; |
| 2337 | unsigned offset; | 2481 | unsigned offset; |
| 2338 | unsigned max; | 2482 | unsigned max; |
| 2483 | int ret; | ||
| 2339 | 2484 | ||
| 2340 | if (!test_opt(sb, MBALLOC)) | 2485 | if (!test_opt(sb, MBALLOC)) |
| 2341 | return 0; | 2486 | return 0; |
| @@ -2370,12 +2515,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2370 | } while (i <= sb->s_blocksize_bits + 1); | 2515 | } while (i <= sb->s_blocksize_bits + 1); |
| 2371 | 2516 | ||
| 2372 | /* init file for buddy data */ | 2517 | /* init file for buddy data */ |
| 2373 | i = ext4_mb_init_backend(sb); | 2518 | ret = ext4_mb_init_backend(sb); |
| 2374 | if (i) { | 2519 | if (ret != 0) { |
| 2375 | clear_opt(sbi->s_mount_opt, MBALLOC); | 2520 | clear_opt(sbi->s_mount_opt, MBALLOC); |
| 2376 | kfree(sbi->s_mb_offsets); | 2521 | kfree(sbi->s_mb_offsets); |
| 2377 | kfree(sbi->s_mb_maxs); | 2522 | kfree(sbi->s_mb_maxs); |
| 2378 | return i; | 2523 | return ret; |
| 2379 | } | 2524 | } |
| 2380 | 2525 | ||
| 2381 | spin_lock_init(&sbi->s_md_lock); | 2526 | spin_lock_init(&sbi->s_md_lock); |
| @@ -2548,8 +2693,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
| 2548 | ext4_lock_group(sb, md->group); | 2693 | ext4_lock_group(sb, md->group); |
| 2549 | for (i = 0; i < md->num; i++) { | 2694 | for (i = 0; i < md->num; i++) { |
| 2550 | mb_debug(" %u", md->blocks[i]); | 2695 | mb_debug(" %u", md->blocks[i]); |
| 2551 | err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2696 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); |
| 2552 | BUG_ON(err != 0); | ||
| 2553 | } | 2697 | } |
| 2554 | mb_debug("\n"); | 2698 | mb_debug("\n"); |
| 2555 | ext4_unlock_group(sb, md->group); | 2699 | ext4_unlock_group(sb, md->group); |
| @@ -2575,25 +2719,24 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
| 2575 | 2719 | ||
| 2576 | 2720 | ||
| 2577 | 2721 | ||
| 2578 | #define MB_PROC_VALUE_READ(name) \ | 2722 | #define MB_PROC_FOPS(name) \ |
| 2579 | static int ext4_mb_read_##name(char *page, char **start, \ | 2723 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ |
| 2580 | off_t off, int count, int *eof, void *data) \ | ||
| 2581 | { \ | 2724 | { \ |
| 2582 | struct ext4_sb_info *sbi = data; \ | 2725 | struct ext4_sb_info *sbi = m->private; \ |
| 2583 | int len; \ | 2726 | \ |
| 2584 | *eof = 1; \ | 2727 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ |
| 2585 | if (off != 0) \ | 2728 | return 0; \ |
| 2586 | return 0; \ | 2729 | } \ |
| 2587 | len = sprintf(page, "%ld\n", sbi->s_mb_##name); \ | 2730 | \ |
| 2588 | *start = page; \ | 2731 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ |
| 2589 | return len; \ | 2732 | { \ |
| 2590 | } | 2733 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ |
| 2591 | 2734 | } \ | |
| 2592 | #define MB_PROC_VALUE_WRITE(name) \ | 2735 | \ |
| 2593 | static int ext4_mb_write_##name(struct file *file, \ | 2736 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ |
| 2594 | const char __user *buf, unsigned long cnt, void *data) \ | 2737 | const char __user *buf, size_t cnt, loff_t *ppos) \ |
| 2595 | { \ | 2738 | { \ |
| 2596 | struct ext4_sb_info *sbi = data; \ | 2739 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ |
| 2597 | char str[32]; \ | 2740 | char str[32]; \ |
| 2598 | long value; \ | 2741 | long value; \ |
| 2599 | if (cnt >= sizeof(str)) \ | 2742 | if (cnt >= sizeof(str)) \ |
| @@ -2605,31 +2748,32 @@ static int ext4_mb_write_##name(struct file *file, \ | |||
| 2605 | return -ERANGE; \ | 2748 | return -ERANGE; \ |
| 2606 | sbi->s_mb_##name = value; \ | 2749 | sbi->s_mb_##name = value; \ |
| 2607 | return cnt; \ | 2750 | return cnt; \ |
| 2608 | } | 2751 | } \ |
| 2752 | \ | ||
| 2753 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
| 2754 | .owner = THIS_MODULE, \ | ||
| 2755 | .open = ext4_mb_##name##_proc_open, \ | ||
| 2756 | .read = seq_read, \ | ||
| 2757 | .llseek = seq_lseek, \ | ||
| 2758 | .release = single_release, \ | ||
| 2759 | .write = ext4_mb_##name##_proc_write, \ | ||
| 2760 | }; | ||
| 2609 | 2761 | ||
| 2610 | MB_PROC_VALUE_READ(stats); | 2762 | MB_PROC_FOPS(stats); |
| 2611 | MB_PROC_VALUE_WRITE(stats); | 2763 | MB_PROC_FOPS(max_to_scan); |
| 2612 | MB_PROC_VALUE_READ(max_to_scan); | 2764 | MB_PROC_FOPS(min_to_scan); |
| 2613 | MB_PROC_VALUE_WRITE(max_to_scan); | 2765 | MB_PROC_FOPS(order2_reqs); |
| 2614 | MB_PROC_VALUE_READ(min_to_scan); | 2766 | MB_PROC_FOPS(stream_request); |
| 2615 | MB_PROC_VALUE_WRITE(min_to_scan); | 2767 | MB_PROC_FOPS(group_prealloc); |
| 2616 | MB_PROC_VALUE_READ(order2_reqs); | ||
| 2617 | MB_PROC_VALUE_WRITE(order2_reqs); | ||
| 2618 | MB_PROC_VALUE_READ(stream_request); | ||
| 2619 | MB_PROC_VALUE_WRITE(stream_request); | ||
| 2620 | MB_PROC_VALUE_READ(group_prealloc); | ||
| 2621 | MB_PROC_VALUE_WRITE(group_prealloc); | ||
| 2622 | 2768 | ||
| 2623 | #define MB_PROC_HANDLER(name, var) \ | 2769 | #define MB_PROC_HANDLER(name, var) \ |
| 2624 | do { \ | 2770 | do { \ |
| 2625 | proc = create_proc_entry(name, mode, sbi->s_mb_proc); \ | 2771 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ |
| 2772 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
| 2626 | if (proc == NULL) { \ | 2773 | if (proc == NULL) { \ |
| 2627 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | 2774 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ |
| 2628 | goto err_out; \ | 2775 | goto err_out; \ |
| 2629 | } \ | 2776 | } \ |
| 2630 | proc->data = sbi; \ | ||
| 2631 | proc->read_proc = ext4_mb_read_##var ; \ | ||
| 2632 | proc->write_proc = ext4_mb_write_##var; \ | ||
| 2633 | } while (0) | 2777 | } while (0) |
| 2634 | 2778 | ||
| 2635 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2779 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
| @@ -2639,6 +2783,10 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb) | |||
| 2639 | struct proc_dir_entry *proc; | 2783 | struct proc_dir_entry *proc; |
| 2640 | char devname[64]; | 2784 | char devname[64]; |
| 2641 | 2785 | ||
| 2786 | if (proc_root_ext4 == NULL) { | ||
| 2787 | sbi->s_mb_proc = NULL; | ||
| 2788 | return -EINVAL; | ||
| 2789 | } | ||
| 2642 | bdevname(sb->s_bdev, devname); | 2790 | bdevname(sb->s_bdev, devname); |
| 2643 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | 2791 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); |
| 2644 | 2792 | ||
| @@ -2747,7 +2895,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2747 | 2895 | ||
| 2748 | 2896 | ||
| 2749 | err = -EIO; | 2897 | err = -EIO; |
| 2750 | bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); | 2898 | bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); |
| 2751 | if (!bitmap_bh) | 2899 | if (!bitmap_bh) |
| 2752 | goto out_err; | 2900 | goto out_err; |
| 2753 | 2901 | ||
| @@ -2816,7 +2964,23 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2816 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2964 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
| 2817 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2965 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
| 2818 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2966 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
| 2819 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | 2967 | |
| 2968 | /* | ||
| 2969 | * free blocks account has already be reduced/reserved | ||
| 2970 | * at write_begin() time for delayed allocation | ||
| 2971 | * do not double accounting | ||
| 2972 | */ | ||
| 2973 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | ||
| 2974 | percpu_counter_sub(&sbi->s_freeblocks_counter, | ||
| 2975 | ac->ac_b_ex.fe_len); | ||
| 2976 | |||
| 2977 | if (sbi->s_log_groups_per_flex) { | ||
| 2978 | ext4_group_t flex_group = ext4_flex_group(sbi, | ||
| 2979 | ac->ac_b_ex.fe_group); | ||
| 2980 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 2981 | sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len; | ||
| 2982 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 2983 | } | ||
| 2820 | 2984 | ||
| 2821 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | 2985 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); |
| 2822 | if (err) | 2986 | if (err) |
| @@ -3473,8 +3637,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
| 3473 | if (bit >= end) | 3637 | if (bit >= end) |
| 3474 | break; | 3638 | break; |
| 3475 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3639 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
| 3476 | if (next > end) | ||
| 3477 | next = end; | ||
| 3478 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3640 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + |
| 3479 | le32_to_cpu(sbi->s_es->s_first_data_block); | 3641 | le32_to_cpu(sbi->s_es->s_first_data_block); |
| 3480 | mb_debug(" free preallocated %u/%u in group %u\n", | 3642 | mb_debug(" free preallocated %u/%u in group %u\n", |
| @@ -3569,7 +3731,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
| 3569 | if (list_empty(&grp->bb_prealloc_list)) | 3731 | if (list_empty(&grp->bb_prealloc_list)) |
| 3570 | return 0; | 3732 | return 0; |
| 3571 | 3733 | ||
| 3572 | bitmap_bh = read_block_bitmap(sb, group); | 3734 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
| 3573 | if (bitmap_bh == NULL) { | 3735 | if (bitmap_bh == NULL) { |
| 3574 | /* error handling here */ | 3736 | /* error handling here */ |
| 3575 | ext4_mb_release_desc(&e4b); | 3737 | ext4_mb_release_desc(&e4b); |
| @@ -3743,7 +3905,7 @@ repeat: | |||
| 3743 | err = ext4_mb_load_buddy(sb, group, &e4b); | 3905 | err = ext4_mb_load_buddy(sb, group, &e4b); |
| 3744 | BUG_ON(err != 0); /* error handling here */ | 3906 | BUG_ON(err != 0); /* error handling here */ |
| 3745 | 3907 | ||
| 3746 | bitmap_bh = read_block_bitmap(sb, group); | 3908 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
| 3747 | if (bitmap_bh == NULL) { | 3909 | if (bitmap_bh == NULL) { |
| 3748 | /* error handling here */ | 3910 | /* error handling here */ |
| 3749 | ext4_mb_release_desc(&e4b); | 3911 | ext4_mb_release_desc(&e4b); |
| @@ -4011,10 +4173,21 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4011 | sbi = EXT4_SB(sb); | 4173 | sbi = EXT4_SB(sb); |
| 4012 | 4174 | ||
| 4013 | if (!test_opt(sb, MBALLOC)) { | 4175 | if (!test_opt(sb, MBALLOC)) { |
| 4014 | block = ext4_new_blocks_old(handle, ar->inode, ar->goal, | 4176 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, |
| 4015 | &(ar->len), errp); | 4177 | &(ar->len), errp); |
| 4016 | return block; | 4178 | return block; |
| 4017 | } | 4179 | } |
| 4180 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | ||
| 4181 | /* | ||
| 4182 | * With delalloc we already reserved the blocks | ||
| 4183 | */ | ||
| 4184 | ar->len = ext4_has_free_blocks(sbi, ar->len); | ||
| 4185 | } | ||
| 4186 | |||
| 4187 | if (ar->len == 0) { | ||
| 4188 | *errp = -ENOSPC; | ||
| 4189 | return 0; | ||
| 4190 | } | ||
| 4018 | 4191 | ||
| 4019 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4192 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
| 4020 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4193 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
| @@ -4026,10 +4199,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4026 | } | 4199 | } |
| 4027 | inquota = ar->len; | 4200 | inquota = ar->len; |
| 4028 | 4201 | ||
| 4202 | if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) | ||
| 4203 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | ||
| 4204 | |||
| 4029 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4205 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
| 4030 | if (!ac) { | 4206 | if (!ac) { |
| 4207 | ar->len = 0; | ||
| 4031 | *errp = -ENOMEM; | 4208 | *errp = -ENOMEM; |
| 4032 | return 0; | 4209 | goto out1; |
| 4033 | } | 4210 | } |
| 4034 | 4211 | ||
| 4035 | ext4_mb_poll_new_transaction(sb, handle); | 4212 | ext4_mb_poll_new_transaction(sb, handle); |
| @@ -4037,12 +4214,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4037 | *errp = ext4_mb_initialize_context(ac, ar); | 4214 | *errp = ext4_mb_initialize_context(ac, ar); |
| 4038 | if (*errp) { | 4215 | if (*errp) { |
| 4039 | ar->len = 0; | 4216 | ar->len = 0; |
| 4040 | goto out; | 4217 | goto out2; |
| 4041 | } | 4218 | } |
| 4042 | 4219 | ||
| 4043 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; | 4220 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; |
| 4044 | if (!ext4_mb_use_preallocated(ac)) { | 4221 | if (!ext4_mb_use_preallocated(ac)) { |
| 4045 | |||
| 4046 | ac->ac_op = EXT4_MB_HISTORY_ALLOC; | 4222 | ac->ac_op = EXT4_MB_HISTORY_ALLOC; |
| 4047 | ext4_mb_normalize_request(ac, ar); | 4223 | ext4_mb_normalize_request(ac, ar); |
| 4048 | repeat: | 4224 | repeat: |
| @@ -4085,11 +4261,12 @@ repeat: | |||
| 4085 | 4261 | ||
| 4086 | ext4_mb_release_context(ac); | 4262 | ext4_mb_release_context(ac); |
| 4087 | 4263 | ||
| 4088 | out: | 4264 | out2: |
| 4265 | kmem_cache_free(ext4_ac_cachep, ac); | ||
| 4266 | out1: | ||
| 4089 | if (ar->len < inquota) | 4267 | if (ar->len < inquota) |
| 4090 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); | 4268 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); |
| 4091 | 4269 | ||
| 4092 | kmem_cache_free(ext4_ac_cachep, ac); | ||
| 4093 | return block; | 4270 | return block; |
| 4094 | } | 4271 | } |
| 4095 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | 4272 | static void ext4_mb_poll_new_transaction(struct super_block *sb, |
| @@ -4242,7 +4419,7 @@ do_more: | |||
| 4242 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); | 4419 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); |
| 4243 | count -= overflow; | 4420 | count -= overflow; |
| 4244 | } | 4421 | } |
| 4245 | bitmap_bh = read_block_bitmap(sb, block_group); | 4422 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
| 4246 | if (!bitmap_bh) | 4423 | if (!bitmap_bh) |
| 4247 | goto error_return; | 4424 | goto error_return; |
| 4248 | gdp = ext4_get_group_desc(sb, block_group, &gd_bh); | 4425 | gdp = ext4_get_group_desc(sb, block_group, &gd_bh); |
| @@ -4309,10 +4486,9 @@ do_more: | |||
| 4309 | ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); | 4486 | ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); |
| 4310 | } else { | 4487 | } else { |
| 4311 | ext4_lock_group(sb, block_group); | 4488 | ext4_lock_group(sb, block_group); |
| 4312 | err = mb_free_blocks(inode, &e4b, bit, count); | 4489 | mb_free_blocks(inode, &e4b, bit, count); |
| 4313 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4490 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
| 4314 | ext4_unlock_group(sb, block_group); | 4491 | ext4_unlock_group(sb, block_group); |
| 4315 | BUG_ON(err != 0); | ||
| 4316 | } | 4492 | } |
| 4317 | 4493 | ||
| 4318 | spin_lock(sb_bgl_lock(sbi, block_group)); | 4494 | spin_lock(sb_bgl_lock(sbi, block_group)); |
| @@ -4321,6 +4497,13 @@ do_more: | |||
| 4321 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 4497 | spin_unlock(sb_bgl_lock(sbi, block_group)); |
| 4322 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 4498 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
| 4323 | 4499 | ||
| 4500 | if (sbi->s_log_groups_per_flex) { | ||
| 4501 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
| 4502 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 4503 | sbi->s_flex_groups[flex_group].free_blocks += count; | ||
| 4504 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 4505 | } | ||
| 4506 | |||
| 4324 | ext4_mb_release_desc(&e4b); | 4507 | ext4_mb_release_desc(&e4b); |
| 4325 | 4508 | ||
| 4326 | *freed += count; | 4509 | *freed += count; |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ab16beaa830d..387ad98350c3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -183,6 +183,16 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 183 | struct inode *inode); | 183 | struct inode *inode); |
| 184 | 184 | ||
| 185 | /* | 185 | /* |
| 186 | * p is at least 6 bytes before the end of page | ||
| 187 | */ | ||
| 188 | static inline struct ext4_dir_entry_2 * | ||
| 189 | ext4_next_entry(struct ext4_dir_entry_2 *p) | ||
| 190 | { | ||
| 191 | return (struct ext4_dir_entry_2 *)((char *)p + | ||
| 192 | ext4_rec_len_from_disk(p->rec_len)); | ||
| 193 | } | ||
| 194 | |||
| 195 | /* | ||
| 186 | * Future: use high four bits of block for coalesce-on-delete flags | 196 | * Future: use high four bits of block for coalesce-on-delete flags |
| 187 | * Mask them off for now. | 197 | * Mask them off for now. |
| 188 | */ | 198 | */ |
| @@ -231,13 +241,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) | |||
| 231 | { | 241 | { |
| 232 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 242 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
| 233 | EXT4_DIR_REC_LEN(2) - infosize; | 243 | EXT4_DIR_REC_LEN(2) - infosize; |
| 234 | return 0? 20: entry_space / sizeof(struct dx_entry); | 244 | return entry_space / sizeof(struct dx_entry); |
| 235 | } | 245 | } |
| 236 | 246 | ||
| 237 | static inline unsigned dx_node_limit (struct inode *dir) | 247 | static inline unsigned dx_node_limit (struct inode *dir) |
| 238 | { | 248 | { |
| 239 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 249 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
| 240 | return 0? 22: entry_space / sizeof(struct dx_entry); | 250 | return entry_space / sizeof(struct dx_entry); |
| 241 | } | 251 | } |
| 242 | 252 | ||
| 243 | /* | 253 | /* |
| @@ -554,15 +564,6 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
| 554 | 564 | ||
| 555 | 565 | ||
| 556 | /* | 566 | /* |
| 557 | * p is at least 6 bytes before the end of page | ||
| 558 | */ | ||
| 559 | static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) | ||
| 560 | { | ||
| 561 | return (struct ext4_dir_entry_2 *)((char *)p + | ||
| 562 | ext4_rec_len_from_disk(p->rec_len)); | ||
| 563 | } | ||
| 564 | |||
| 565 | /* | ||
| 566 | * This function fills a red-black tree with information from a | 567 | * This function fills a red-black tree with information from a |
| 567 | * directory block. It returns the number directory entries loaded | 568 | * directory block. It returns the number directory entries loaded |
| 568 | * into the tree. If there is an error it is returned in err. | 569 | * into the tree. If there is an error it is returned in err. |
| @@ -993,19 +994,21 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
| 993 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 994 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
| 994 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - | 995 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - |
| 995 | EXT4_DIR_REC_LEN(0)); | 996 | EXT4_DIR_REC_LEN(0)); |
| 996 | for (; de < top; de = ext4_next_entry(de)) | 997 | for (; de < top; de = ext4_next_entry(de)) { |
| 997 | if (ext4_match (namelen, name, de)) { | 998 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) |
| 998 | if (!ext4_check_dir_entry("ext4_find_entry", | 999 | + ((char *) de - bh->b_data); |
| 999 | dir, de, bh, | 1000 | |
| 1000 | (block<<EXT4_BLOCK_SIZE_BITS(sb)) | 1001 | if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) { |
| 1001 | +((char *)de - bh->b_data))) { | 1002 | brelse(bh); |
| 1002 | brelse (bh); | ||
| 1003 | *err = ERR_BAD_DX_DIR; | 1003 | *err = ERR_BAD_DX_DIR; |
| 1004 | goto errout; | 1004 | goto errout; |
| 1005 | } | 1005 | } |
| 1006 | *res_dir = de; | 1006 | |
| 1007 | dx_release (frames); | 1007 | if (ext4_match(namelen, name, de)) { |
| 1008 | return bh; | 1008 | *res_dir = de; |
| 1009 | dx_release(frames); | ||
| 1010 | return bh; | ||
| 1011 | } | ||
| 1009 | } | 1012 | } |
| 1010 | brelse (bh); | 1013 | brelse (bh); |
| 1011 | /* Check to see if we should continue to search */ | 1014 | /* Check to see if we should continue to search */ |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9ecb92f68543..f000fbe2cd93 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 855 | */ | 855 | */ |
| 856 | 856 | ||
| 857 | /* Update group descriptor block for new group */ | 857 | /* Update group descriptor block for new group */ |
| 858 | gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; | 858 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + |
| 859 | gdb_off * EXT4_DESC_SIZE(sb)); | ||
| 859 | 860 | ||
| 860 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ | 861 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ |
| 861 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ | 862 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ |
| @@ -865,6 +866,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 865 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | 866 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); |
| 866 | 867 | ||
| 867 | /* | 868 | /* |
| 869 | * We can allocate memory for mb_alloc based on the new group | ||
| 870 | * descriptor | ||
| 871 | */ | ||
| 872 | if (test_opt(sb, MBALLOC)) { | ||
| 873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | ||
| 874 | if (err) | ||
| 875 | goto exit_journal; | ||
| 876 | } | ||
| 877 | /* | ||
| 868 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
| 869 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
| 870 | * all of its blocks and inodes are already valid. | 880 | * all of its blocks and inodes are already valid. |
| @@ -956,6 +966,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 956 | handle_t *handle; | 966 | handle_t *handle; |
| 957 | int err; | 967 | int err; |
| 958 | unsigned long freed_blocks; | 968 | unsigned long freed_blocks; |
| 969 | ext4_group_t group; | ||
| 970 | struct ext4_group_info *grp; | ||
| 959 | 971 | ||
| 960 | /* We don't need to worry about locking wrt other resizers just | 972 | /* We don't need to worry about locking wrt other resizers just |
| 961 | * yet: we're going to revalidate es->s_blocks_count after | 973 | * yet: we're going to revalidate es->s_blocks_count after |
| @@ -987,7 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 987 | } | 999 | } |
| 988 | 1000 | ||
| 989 | /* Handle the remaining blocks in the last group only. */ | 1001 | /* Handle the remaining blocks in the last group only. */ |
| 990 | ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); | 1002 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); |
| 991 | 1003 | ||
| 992 | if (last == 0) { | 1004 | if (last == 0) { |
| 993 | ext4_warning(sb, __func__, | 1005 | ext4_warning(sb, __func__, |
| @@ -1059,6 +1071,45 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 1059 | o_blocks_count + add); | 1071 | o_blocks_count + add); |
| 1060 | if ((err = ext4_journal_stop(handle))) | 1072 | if ((err = ext4_journal_stop(handle))) |
| 1061 | goto exit_put; | 1073 | goto exit_put; |
| 1074 | |||
| 1075 | /* | ||
| 1076 | * Mark mballoc pages as not up to date so that they will be updated | ||
| 1077 | * next time they are loaded by ext4_mb_load_buddy. | ||
| 1078 | */ | ||
| 1079 | if (test_opt(sb, MBALLOC)) { | ||
| 1080 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 1081 | struct inode *inode = sbi->s_buddy_cache; | ||
| 1082 | int blocks_per_page; | ||
| 1083 | int block; | ||
| 1084 | int pnum; | ||
| 1085 | struct page *page; | ||
| 1086 | |||
| 1087 | /* Set buddy page as not up to date */ | ||
| 1088 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
| 1089 | block = group * 2; | ||
| 1090 | pnum = block / blocks_per_page; | ||
| 1091 | page = find_get_page(inode->i_mapping, pnum); | ||
| 1092 | if (page != NULL) { | ||
| 1093 | ClearPageUptodate(page); | ||
| 1094 | page_cache_release(page); | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | /* Set bitmap page as not up to date */ | ||
| 1098 | block++; | ||
| 1099 | pnum = block / blocks_per_page; | ||
| 1100 | page = find_get_page(inode->i_mapping, pnum); | ||
| 1101 | if (page != NULL) { | ||
| 1102 | ClearPageUptodate(page); | ||
| 1103 | page_cache_release(page); | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | /* Get the info on the last group */ | ||
| 1107 | grp = ext4_get_group_info(sb, group); | ||
| 1108 | |||
| 1109 | /* Update free blocks in group info */ | ||
| 1110 | ext4_mb_update_group_info(grp, add); | ||
| 1111 | } | ||
| 1112 | |||
| 1062 | if (test_opt(sb, DEBUG)) | 1113 | if (test_opt(sb, DEBUG)) |
| 1063 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", | 1114 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", |
| 1064 | ext4_blocks_count(es)); | 1115 | ext4_blocks_count(es)); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cb96f127c366..1cb371dcd609 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -506,6 +506,7 @@ static void ext4_put_super (struct super_block * sb) | |||
| 506 | ext4_ext_release(sb); | 506 | ext4_ext_release(sb); |
| 507 | ext4_xattr_put_super(sb); | 507 | ext4_xattr_put_super(sb); |
| 508 | jbd2_journal_destroy(sbi->s_journal); | 508 | jbd2_journal_destroy(sbi->s_journal); |
| 509 | sbi->s_journal = NULL; | ||
| 509 | if (!(sb->s_flags & MS_RDONLY)) { | 510 | if (!(sb->s_flags & MS_RDONLY)) { |
| 510 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 511 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
| 511 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 512 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
| @@ -517,6 +518,7 @@ static void ext4_put_super (struct super_block * sb) | |||
| 517 | for (i = 0; i < sbi->s_gdb_count; i++) | 518 | for (i = 0; i < sbi->s_gdb_count; i++) |
| 518 | brelse(sbi->s_group_desc[i]); | 519 | brelse(sbi->s_group_desc[i]); |
| 519 | kfree(sbi->s_group_desc); | 520 | kfree(sbi->s_group_desc); |
| 521 | kfree(sbi->s_flex_groups); | ||
| 520 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 522 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
| 521 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 523 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| 522 | percpu_counter_destroy(&sbi->s_dirs_counter); | 524 | percpu_counter_destroy(&sbi->s_dirs_counter); |
| @@ -571,6 +573,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 571 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 573 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
| 572 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 574 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
| 573 | spin_lock_init(&ei->i_prealloc_lock); | 575 | spin_lock_init(&ei->i_prealloc_lock); |
| 576 | jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); | ||
| 577 | ei->i_reserved_data_blocks = 0; | ||
| 578 | ei->i_reserved_meta_blocks = 0; | ||
| 579 | ei->i_allocated_meta_blocks = 0; | ||
| 580 | ei->i_delalloc_reserved_flag = 0; | ||
| 581 | spin_lock_init(&(ei->i_block_reservation_lock)); | ||
| 574 | return &ei->vfs_inode; | 582 | return &ei->vfs_inode; |
| 575 | } | 583 | } |
| 576 | 584 | ||
| @@ -635,6 +643,8 @@ static void ext4_clear_inode(struct inode *inode) | |||
| 635 | EXT4_I(inode)->i_block_alloc_info = NULL; | 643 | EXT4_I(inode)->i_block_alloc_info = NULL; |
| 636 | if (unlikely(rsv)) | 644 | if (unlikely(rsv)) |
| 637 | kfree(rsv); | 645 | kfree(rsv); |
| 646 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | ||
| 647 | &EXT4_I(inode)->jinode); | ||
| 638 | } | 648 | } |
| 639 | 649 | ||
| 640 | static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) | 650 | static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) |
| @@ -671,7 +681,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 671 | unsigned long def_mount_opts; | 681 | unsigned long def_mount_opts; |
| 672 | struct super_block *sb = vfs->mnt_sb; | 682 | struct super_block *sb = vfs->mnt_sb; |
| 673 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 683 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 674 | journal_t *journal = sbi->s_journal; | ||
| 675 | struct ext4_super_block *es = sbi->s_es; | 684 | struct ext4_super_block *es = sbi->s_es; |
| 676 | 685 | ||
| 677 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 686 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
| @@ -747,6 +756,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 747 | seq_puts(seq, ",nomballoc"); | 756 | seq_puts(seq, ",nomballoc"); |
| 748 | if (test_opt(sb, I_VERSION)) | 757 | if (test_opt(sb, I_VERSION)) |
| 749 | seq_puts(seq, ",i_version"); | 758 | seq_puts(seq, ",i_version"); |
| 759 | if (!test_opt(sb, DELALLOC)) | ||
| 760 | seq_puts(seq, ",nodelalloc"); | ||
| 761 | |||
| 750 | 762 | ||
| 751 | if (sbi->s_stripe) | 763 | if (sbi->s_stripe) |
| 752 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 764 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
| @@ -894,7 +906,7 @@ enum { | |||
| 894 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 906 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
| 895 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 907 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
| 896 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 908 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
| 897 | Opt_mballoc, Opt_nomballoc, Opt_stripe, | 909 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
| 898 | }; | 910 | }; |
| 899 | 911 | ||
| 900 | static match_table_t tokens = { | 912 | static match_table_t tokens = { |
| @@ -953,6 +965,8 @@ static match_table_t tokens = { | |||
| 953 | {Opt_nomballoc, "nomballoc"}, | 965 | {Opt_nomballoc, "nomballoc"}, |
| 954 | {Opt_stripe, "stripe=%u"}, | 966 | {Opt_stripe, "stripe=%u"}, |
| 955 | {Opt_resize, "resize"}, | 967 | {Opt_resize, "resize"}, |
| 968 | {Opt_delalloc, "delalloc"}, | ||
| 969 | {Opt_nodelalloc, "nodelalloc"}, | ||
| 956 | {Opt_err, NULL}, | 970 | {Opt_err, NULL}, |
| 957 | }; | 971 | }; |
| 958 | 972 | ||
| @@ -990,6 +1004,7 @@ static int parse_options (char *options, struct super_block *sb, | |||
| 990 | int qtype, qfmt; | 1004 | int qtype, qfmt; |
| 991 | char *qname; | 1005 | char *qname; |
| 992 | #endif | 1006 | #endif |
| 1007 | ext4_fsblk_t last_block; | ||
| 993 | 1008 | ||
| 994 | if (!options) | 1009 | if (!options) |
| 995 | return 1; | 1010 | return 1; |
| @@ -1309,15 +1324,39 @@ set_qf_format: | |||
| 1309 | clear_opt(sbi->s_mount_opt, NOBH); | 1324 | clear_opt(sbi->s_mount_opt, NOBH); |
| 1310 | break; | 1325 | break; |
| 1311 | case Opt_extents: | 1326 | case Opt_extents: |
| 1327 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
| 1328 | EXT4_FEATURE_INCOMPAT_EXTENTS)) { | ||
| 1329 | ext4_warning(sb, __func__, | ||
| 1330 | "extents feature not enabled " | ||
| 1331 | "on this filesystem, use tune2fs\n"); | ||
| 1332 | return 0; | ||
| 1333 | } | ||
| 1312 | set_opt (sbi->s_mount_opt, EXTENTS); | 1334 | set_opt (sbi->s_mount_opt, EXTENTS); |
| 1313 | break; | 1335 | break; |
| 1314 | case Opt_noextents: | 1336 | case Opt_noextents: |
| 1337 | /* | ||
| 1338 | * When e2fsprogs support resizing an already existing | ||
| 1339 | * ext3 file system to greater than 2**32 we need to | ||
| 1340 | * add support to block allocator to handle growing | ||
| 1341 | * already existing block mapped inode so that blocks | ||
| 1342 | * allocated for them fall within 2**32 | ||
| 1343 | */ | ||
| 1344 | last_block = ext4_blocks_count(sbi->s_es) - 1; | ||
| 1345 | if (last_block > 0xffffffffULL) { | ||
| 1346 | printk(KERN_ERR "EXT4-fs: Filesystem too " | ||
| 1347 | "large to mount with " | ||
| 1348 | "-o noextents options\n"); | ||
| 1349 | return 0; | ||
| 1350 | } | ||
| 1315 | clear_opt (sbi->s_mount_opt, EXTENTS); | 1351 | clear_opt (sbi->s_mount_opt, EXTENTS); |
| 1316 | break; | 1352 | break; |
| 1317 | case Opt_i_version: | 1353 | case Opt_i_version: |
| 1318 | set_opt(sbi->s_mount_opt, I_VERSION); | 1354 | set_opt(sbi->s_mount_opt, I_VERSION); |
| 1319 | sb->s_flags |= MS_I_VERSION; | 1355 | sb->s_flags |= MS_I_VERSION; |
| 1320 | break; | 1356 | break; |
| 1357 | case Opt_nodelalloc: | ||
| 1358 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
| 1359 | break; | ||
| 1321 | case Opt_mballoc: | 1360 | case Opt_mballoc: |
| 1322 | set_opt(sbi->s_mount_opt, MBALLOC); | 1361 | set_opt(sbi->s_mount_opt, MBALLOC); |
| 1323 | break; | 1362 | break; |
| @@ -1331,6 +1370,9 @@ set_qf_format: | |||
| 1331 | return 0; | 1370 | return 0; |
| 1332 | sbi->s_stripe = option; | 1371 | sbi->s_stripe = option; |
| 1333 | break; | 1372 | break; |
| 1373 | case Opt_delalloc: | ||
| 1374 | set_opt(sbi->s_mount_opt, DELALLOC); | ||
| 1375 | break; | ||
| 1334 | default: | 1376 | default: |
| 1335 | printk (KERN_ERR | 1377 | printk (KERN_ERR |
| 1336 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1378 | "EXT4-fs: Unrecognized mount option \"%s\" " |
| @@ -1443,6 +1485,54 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
| 1443 | return res; | 1485 | return res; |
| 1444 | } | 1486 | } |
| 1445 | 1487 | ||
| 1488 | static int ext4_fill_flex_info(struct super_block *sb) | ||
| 1489 | { | ||
| 1490 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 1491 | struct ext4_group_desc *gdp = NULL; | ||
| 1492 | struct buffer_head *bh; | ||
| 1493 | ext4_group_t flex_group_count; | ||
| 1494 | ext4_group_t flex_group; | ||
| 1495 | int groups_per_flex = 0; | ||
| 1496 | __u64 block_bitmap = 0; | ||
| 1497 | int i; | ||
| 1498 | |||
| 1499 | if (!sbi->s_es->s_log_groups_per_flex) { | ||
| 1500 | sbi->s_log_groups_per_flex = 0; | ||
| 1501 | return 1; | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | ||
| 1505 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | ||
| 1506 | |||
| 1507 | flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / | ||
| 1508 | groups_per_flex; | ||
| 1509 | sbi->s_flex_groups = kmalloc(flex_group_count * | ||
| 1510 | sizeof(struct flex_groups), GFP_KERNEL); | ||
| 1511 | if (sbi->s_flex_groups == NULL) { | ||
| 1512 | printk(KERN_ERR "EXT4-fs: not enough memory\n"); | ||
| 1513 | goto failed; | ||
| 1514 | } | ||
| 1515 | memset(sbi->s_flex_groups, 0, flex_group_count * | ||
| 1516 | sizeof(struct flex_groups)); | ||
| 1517 | |||
| 1518 | gdp = ext4_get_group_desc(sb, 1, &bh); | ||
| 1519 | block_bitmap = ext4_block_bitmap(sb, gdp) - 1; | ||
| 1520 | |||
| 1521 | for (i = 0; i < sbi->s_groups_count; i++) { | ||
| 1522 | gdp = ext4_get_group_desc(sb, i, &bh); | ||
| 1523 | |||
| 1524 | flex_group = ext4_flex_group(sbi, i); | ||
| 1525 | sbi->s_flex_groups[flex_group].free_inodes += | ||
| 1526 | le16_to_cpu(gdp->bg_free_inodes_count); | ||
| 1527 | sbi->s_flex_groups[flex_group].free_blocks += | ||
| 1528 | le16_to_cpu(gdp->bg_free_blocks_count); | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | return 1; | ||
| 1532 | failed: | ||
| 1533 | return 0; | ||
| 1534 | } | ||
| 1535 | |||
| 1446 | __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | 1536 | __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, |
| 1447 | struct ext4_group_desc *gdp) | 1537 | struct ext4_group_desc *gdp) |
| 1448 | { | 1538 | { |
| @@ -1810,8 +1900,8 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | |||
| 1810 | } | 1900 | } |
| 1811 | 1901 | ||
| 1812 | static int ext4_fill_super (struct super_block *sb, void *data, int silent) | 1902 | static int ext4_fill_super (struct super_block *sb, void *data, int silent) |
| 1813 | __releases(kernel_sem) | 1903 | __releases(kernel_lock) |
| 1814 | __acquires(kernel_sem) | 1904 | __acquires(kernel_lock) |
| 1815 | 1905 | ||
| 1816 | { | 1906 | { |
| 1817 | struct buffer_head * bh; | 1907 | struct buffer_head * bh; |
| @@ -1851,11 +1941,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 1851 | goto out_fail; | 1941 | goto out_fail; |
| 1852 | } | 1942 | } |
| 1853 | 1943 | ||
| 1854 | if (!sb_set_blocksize(sb, blocksize)) { | ||
| 1855 | printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize); | ||
| 1856 | goto out_fail; | ||
| 1857 | } | ||
| 1858 | |||
| 1859 | /* | 1944 | /* |
| 1860 | * The ext4 superblock will not be buffer aligned for other than 1kB | 1945 | * The ext4 superblock will not be buffer aligned for other than 1kB |
| 1861 | * block sizes. We need to calculate the offset from buffer start. | 1946 | * block sizes. We need to calculate the offset from buffer start. |
| @@ -1919,15 +2004,28 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 1919 | 2004 | ||
| 1920 | /* | 2005 | /* |
| 1921 | * turn on extents feature by default in ext4 filesystem | 2006 | * turn on extents feature by default in ext4 filesystem |
| 1922 | * User -o noextents to turn it off | 2007 | * only if feature flag already set by mkfs or tune2fs. |
| 2008 | * Use -o noextents to turn it off | ||
| 1923 | */ | 2009 | */ |
| 1924 | set_opt(sbi->s_mount_opt, EXTENTS); | 2010 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) |
| 2011 | set_opt(sbi->s_mount_opt, EXTENTS); | ||
| 2012 | else | ||
| 2013 | ext4_warning(sb, __func__, | ||
| 2014 | "extents feature not enabled on this filesystem, " | ||
| 2015 | "use tune2fs.\n"); | ||
| 1925 | /* | 2016 | /* |
| 1926 | * turn on mballoc feature by default in ext4 filesystem | 2017 | * turn on mballoc code by default in ext4 filesystem |
| 1927 | * User -o nomballoc to turn it off | 2018 | * Use -o nomballoc to turn it off |
| 1928 | */ | 2019 | */ |
| 1929 | set_opt(sbi->s_mount_opt, MBALLOC); | 2020 | set_opt(sbi->s_mount_opt, MBALLOC); |
| 1930 | 2021 | ||
| 2022 | /* | ||
| 2023 | * enable delayed allocation by default | ||
| 2024 | * Use -o nodelalloc to turn it off | ||
| 2025 | */ | ||
| 2026 | set_opt(sbi->s_mount_opt, DELALLOC); | ||
| 2027 | |||
| 2028 | |||
| 1931 | if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, | 2029 | if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, |
| 1932 | NULL, 0)) | 2030 | NULL, 0)) |
| 1933 | goto failed_mount; | 2031 | goto failed_mount; |
| @@ -2138,6 +2236,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 2138 | printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); | 2236 | printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); |
| 2139 | goto failed_mount2; | 2237 | goto failed_mount2; |
| 2140 | } | 2238 | } |
| 2239 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | ||
| 2240 | if (!ext4_fill_flex_info(sb)) { | ||
| 2241 | printk(KERN_ERR | ||
| 2242 | "EXT4-fs: unable to initialize " | ||
| 2243 | "flex_bg meta info!\n"); | ||
| 2244 | goto failed_mount2; | ||
| 2245 | } | ||
| 2246 | |||
| 2141 | sbi->s_gdb_count = db_count; | 2247 | sbi->s_gdb_count = db_count; |
| 2142 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 2248 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
| 2143 | spin_lock_init(&sbi->s_next_gen_lock); | 2249 | spin_lock_init(&sbi->s_next_gen_lock); |
| @@ -2358,6 +2464,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
| 2358 | test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": | 2464 | test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": |
| 2359 | "writeback"); | 2465 | "writeback"); |
| 2360 | 2466 | ||
| 2467 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
| 2468 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
| 2469 | "requested data journaling mode\n"); | ||
| 2470 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
| 2471 | } else if (test_opt(sb, DELALLOC)) | ||
| 2472 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
| 2473 | |||
| 2361 | ext4_ext_init(sb); | 2474 | ext4_ext_init(sb); |
| 2362 | ext4_mb_init(sb, needs_recovery); | 2475 | ext4_mb_init(sb, needs_recovery); |
| 2363 | 2476 | ||
| @@ -2372,6 +2485,7 @@ cantfind_ext4: | |||
| 2372 | 2485 | ||
| 2373 | failed_mount4: | 2486 | failed_mount4: |
| 2374 | jbd2_journal_destroy(sbi->s_journal); | 2487 | jbd2_journal_destroy(sbi->s_journal); |
| 2488 | sbi->s_journal = NULL; | ||
| 2375 | failed_mount3: | 2489 | failed_mount3: |
| 2376 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2490 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
| 2377 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2491 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| @@ -3325,7 +3439,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
| 3325 | err = ext4_journal_dirty_metadata(handle, bh); | 3439 | err = ext4_journal_dirty_metadata(handle, bh); |
| 3326 | else { | 3440 | else { |
| 3327 | /* Always do at least ordered writes for quotas */ | 3441 | /* Always do at least ordered writes for quotas */ |
| 3328 | err = ext4_journal_dirty_data(handle, bh); | 3442 | err = ext4_jbd2_file_inode(handle, inode); |
| 3329 | mark_buffer_dirty(bh); | 3443 | mark_buffer_dirty(bh); |
| 3330 | } | 3444 | } |
| 3331 | brelse(bh); | 3445 | brelse(bh); |
| @@ -3337,8 +3451,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
| 3337 | blk++; | 3451 | blk++; |
| 3338 | } | 3452 | } |
| 3339 | out: | 3453 | out: |
| 3340 | if (len == towrite) | 3454 | if (len == towrite) { |
| 3455 | mutex_unlock(&inode->i_mutex); | ||
| 3341 | return err; | 3456 | return err; |
| 3457 | } | ||
| 3342 | if (inode->i_size < off+len-towrite) { | 3458 | if (inode->i_size < off+len-towrite) { |
| 3343 | i_size_write(inode, off+len-towrite); | 3459 | i_size_write(inode, off+len-towrite); |
| 3344 | EXT4_I(inode)->i_disksize = inode->i_size; | 3460 | EXT4_I(inode)->i_disksize = inode->i_size; |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index ff08633f398e..93c5fdcdad2e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -810,7 +810,7 @@ inserted: | |||
| 810 | /* We need to allocate a new block */ | 810 | /* We need to allocate a new block */ |
| 811 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, | 811 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, |
| 812 | EXT4_I(inode)->i_block_group); | 812 | EXT4_I(inode)->i_block_group); |
| 813 | ext4_fsblk_t block = ext4_new_block(handle, inode, | 813 | ext4_fsblk_t block = ext4_new_meta_block(handle, inode, |
| 814 | goal, &error); | 814 | goal, &error); |
| 815 | if (error) | 815 | if (error) |
| 816 | goto cleanup; | 816 | goto cleanup; |
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index fff33382cadc..ac1a52cf2a37 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c | |||
| @@ -13,13 +13,11 @@ | |||
| 13 | #include "ext4.h" | 13 | #include "ext4.h" |
| 14 | #include "xattr.h" | 14 | #include "xattr.h" |
| 15 | 15 | ||
| 16 | #define XATTR_TRUSTED_PREFIX "trusted." | ||
| 17 | |||
| 18 | static size_t | 16 | static size_t |
| 19 | ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, | 17 | ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, |
| 20 | const char *name, size_t name_len) | 18 | const char *name, size_t name_len) |
| 21 | { | 19 | { |
| 22 | const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; | 20 | const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; |
| 23 | const size_t total_len = prefix_len + name_len + 1; | 21 | const size_t total_len = prefix_len + name_len + 1; |
| 24 | 22 | ||
| 25 | if (!capable(CAP_SYS_ADMIN)) | 23 | if (!capable(CAP_SYS_ADMIN)) |
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 67be723fcc4e..d91aa61b42aa 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c | |||
| @@ -12,13 +12,11 @@ | |||
| 12 | #include "ext4.h" | 12 | #include "ext4.h" |
| 13 | #include "xattr.h" | 13 | #include "xattr.h" |
| 14 | 14 | ||
| 15 | #define XATTR_USER_PREFIX "user." | ||
| 16 | |||
| 17 | static size_t | 15 | static size_t |
| 18 | ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, | 16 | ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, |
| 19 | const char *name, size_t name_len) | 17 | const char *name, size_t name_len) |
| 20 | { | 18 | { |
| 21 | const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; | 19 | const size_t prefix_len = XATTR_USER_PREFIX_LEN; |
| 22 | const size_t total_len = prefix_len + name_len + 1; | 20 | const size_t total_len = prefix_len + name_len + 1; |
| 23 | 21 | ||
| 24 | if (!test_opt(inode->i_sb, XATTR_USER)) | 22 | if (!test_opt(inode->i_sb, XATTR_USER)) |
diff --git a/fs/fat/cache.c b/fs/fat/cache.c index fda25479af26..3a9ecac8d61f 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c | |||
| @@ -61,7 +61,7 @@ void fat_cache_destroy(void) | |||
| 61 | 61 | ||
| 62 | static inline struct fat_cache *fat_cache_alloc(struct inode *inode) | 62 | static inline struct fat_cache *fat_cache_alloc(struct inode *inode) |
| 63 | { | 63 | { |
| 64 | return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL); | 64 | return kmem_cache_alloc(fat_cache_cachep, GFP_NOFS); |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | static inline void fat_cache_free(struct fat_cache *cache) | 67 | static inline void fat_cache_free(struct fat_cache *cache) |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 486725ee99ae..34541d06e626 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
| @@ -472,7 +472,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
| 472 | loff_t cpos; | 472 | loff_t cpos; |
| 473 | int ret = 0; | 473 | int ret = 0; |
| 474 | 474 | ||
| 475 | lock_kernel(); | 475 | lock_super(sb); |
| 476 | 476 | ||
| 477 | cpos = filp->f_pos; | 477 | cpos = filp->f_pos; |
| 478 | /* Fake . and .. for the root directory. */ | 478 | /* Fake . and .. for the root directory. */ |
| @@ -654,7 +654,7 @@ FillFailed: | |||
| 654 | if (unicode) | 654 | if (unicode) |
| 655 | __putname(unicode); | 655 | __putname(unicode); |
| 656 | out: | 656 | out: |
| 657 | unlock_kernel(); | 657 | unlock_super(sb); |
| 658 | return ret; | 658 | return ret; |
| 659 | } | 659 | } |
| 660 | 660 | ||
diff --git a/fs/fat/file.c b/fs/fat/file.c index 27cc1164ec36..c672df4036e9 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <linux/mount.h> | 11 | #include <linux/mount.h> |
| 12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
| 13 | #include <linux/msdos_fs.h> | 13 | #include <linux/msdos_fs.h> |
| 14 | #include <linux/smp_lock.h> | ||
| 15 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
| 16 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
| 17 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
| @@ -242,9 +241,7 @@ void fat_truncate(struct inode *inode) | |||
| 242 | 241 | ||
| 243 | nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; | 242 | nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits; |
| 244 | 243 | ||
| 245 | lock_kernel(); | ||
| 246 | fat_free(inode, nr_clusters); | 244 | fat_free(inode, nr_clusters); |
| 247 | unlock_kernel(); | ||
| 248 | fat_flush_inodes(inode->i_sb, inode, NULL); | 245 | fat_flush_inodes(inode->i_sb, inode, NULL); |
| 249 | } | 246 | } |
| 250 | 247 | ||
| @@ -257,26 +254,34 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
| 257 | } | 254 | } |
| 258 | EXPORT_SYMBOL_GPL(fat_getattr); | 255 | EXPORT_SYMBOL_GPL(fat_getattr); |
| 259 | 256 | ||
| 260 | static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode, | 257 | static int fat_sanitize_mode(const struct msdos_sb_info *sbi, |
| 261 | mode_t mode) | 258 | struct inode *inode, umode_t *mode_ptr) |
| 262 | { | 259 | { |
| 263 | mode_t mask, req = mode & ~S_IFMT; | 260 | mode_t mask, perm; |
| 264 | 261 | ||
| 265 | if (S_ISREG(mode)) | 262 | /* |
| 263 | * Note, the basic check is already done by a caller of | ||
| 264 | * (attr->ia_mode & ~MSDOS_VALID_MODE) | ||
| 265 | */ | ||
| 266 | |||
| 267 | if (S_ISREG(inode->i_mode)) | ||
| 266 | mask = sbi->options.fs_fmask; | 268 | mask = sbi->options.fs_fmask; |
| 267 | else | 269 | else |
| 268 | mask = sbi->options.fs_dmask; | 270 | mask = sbi->options.fs_dmask; |
| 269 | 271 | ||
| 272 | perm = *mode_ptr & ~(S_IFMT | mask); | ||
| 273 | |||
| 270 | /* | 274 | /* |
| 271 | * Of the r and x bits, all (subject to umask) must be present. Of the | 275 | * Of the r and x bits, all (subject to umask) must be present. Of the |
| 272 | * w bits, either all (subject to umask) or none must be present. | 276 | * w bits, either all (subject to umask) or none must be present. |
| 273 | */ | 277 | */ |
| 274 | req &= ~mask; | 278 | if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) |
| 275 | if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) | ||
| 276 | return -EPERM; | 279 | return -EPERM; |
| 277 | if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask))) | 280 | if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) |
| 278 | return -EPERM; | 281 | return -EPERM; |
| 279 | 282 | ||
| 283 | *mode_ptr &= S_IFMT | perm; | ||
| 284 | |||
| 280 | return 0; | 285 | return 0; |
| 281 | } | 286 | } |
| 282 | 287 | ||
| @@ -299,11 +304,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 299 | { | 304 | { |
| 300 | struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); | 305 | struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); |
| 301 | struct inode *inode = dentry->d_inode; | 306 | struct inode *inode = dentry->d_inode; |
| 302 | int mask, error = 0; | 307 | int error = 0; |
| 303 | unsigned int ia_valid; | 308 | unsigned int ia_valid; |
| 304 | 309 | ||
| 305 | lock_kernel(); | ||
| 306 | |||
| 307 | /* | 310 | /* |
| 308 | * Expand the file. Since inode_setattr() updates ->i_size | 311 | * Expand the file. Since inode_setattr() updates ->i_size |
| 309 | * before calling the ->truncate(), but FAT needs to fill the | 312 | * before calling the ->truncate(), but FAT needs to fill the |
| @@ -332,12 +335,13 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 332 | error = 0; | 335 | error = 0; |
| 333 | goto out; | 336 | goto out; |
| 334 | } | 337 | } |
| 338 | |||
| 335 | if (((attr->ia_valid & ATTR_UID) && | 339 | if (((attr->ia_valid & ATTR_UID) && |
| 336 | (attr->ia_uid != sbi->options.fs_uid)) || | 340 | (attr->ia_uid != sbi->options.fs_uid)) || |
| 337 | ((attr->ia_valid & ATTR_GID) && | 341 | ((attr->ia_valid & ATTR_GID) && |
| 338 | (attr->ia_gid != sbi->options.fs_gid)) || | 342 | (attr->ia_gid != sbi->options.fs_gid)) || |
| 339 | ((attr->ia_valid & ATTR_MODE) && | 343 | ((attr->ia_valid & ATTR_MODE) && |
| 340 | fat_check_mode(sbi, inode, attr->ia_mode) < 0)) | 344 | (attr->ia_mode & ~MSDOS_VALID_MODE))) |
| 341 | error = -EPERM; | 345 | error = -EPERM; |
| 342 | 346 | ||
| 343 | if (error) { | 347 | if (error) { |
| @@ -346,17 +350,17 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 346 | goto out; | 350 | goto out; |
| 347 | } | 351 | } |
| 348 | 352 | ||
| 349 | error = inode_setattr(inode, attr); | 353 | /* |
| 350 | if (error) | 354 | * We don't return -EPERM here. Yes, strange, but this is too |
| 351 | goto out; | 355 | * old behavior. |
| 356 | */ | ||
| 357 | if (attr->ia_valid & ATTR_MODE) { | ||
| 358 | if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) | ||
| 359 | attr->ia_valid &= ~ATTR_MODE; | ||
| 360 | } | ||
| 352 | 361 | ||
| 353 | if (S_ISDIR(inode->i_mode)) | 362 | error = inode_setattr(inode, attr); |
| 354 | mask = sbi->options.fs_dmask; | ||
| 355 | else | ||
| 356 | mask = sbi->options.fs_fmask; | ||
| 357 | inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask); | ||
| 358 | out: | 363 | out: |
| 359 | unlock_kernel(); | ||
| 360 | return error; | 364 | return error; |
| 361 | } | 365 | } |
| 362 | EXPORT_SYMBOL_GPL(fat_setattr); | 366 | EXPORT_SYMBOL_GPL(fat_setattr); |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4e0a3dd9d677..46a4508ffd2e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -440,14 +440,13 @@ static void fat_delete_inode(struct inode *inode) | |||
| 440 | 440 | ||
| 441 | static void fat_clear_inode(struct inode *inode) | 441 | static void fat_clear_inode(struct inode *inode) |
| 442 | { | 442 | { |
| 443 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | 443 | struct super_block *sb = inode->i_sb; |
| 444 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | ||
| 444 | 445 | ||
| 445 | lock_kernel(); | ||
| 446 | spin_lock(&sbi->inode_hash_lock); | 446 | spin_lock(&sbi->inode_hash_lock); |
| 447 | fat_cache_inval_inode(inode); | 447 | fat_cache_inval_inode(inode); |
| 448 | hlist_del_init(&MSDOS_I(inode)->i_fat_hash); | 448 | hlist_del_init(&MSDOS_I(inode)->i_fat_hash); |
| 449 | spin_unlock(&sbi->inode_hash_lock); | 449 | spin_unlock(&sbi->inode_hash_lock); |
| 450 | unlock_kernel(); | ||
| 451 | } | 450 | } |
| 452 | 451 | ||
| 453 | static void fat_write_super(struct super_block *sb) | 452 | static void fat_write_super(struct super_block *sb) |
| @@ -485,7 +484,7 @@ static struct kmem_cache *fat_inode_cachep; | |||
| 485 | static struct inode *fat_alloc_inode(struct super_block *sb) | 484 | static struct inode *fat_alloc_inode(struct super_block *sb) |
| 486 | { | 485 | { |
| 487 | struct msdos_inode_info *ei; | 486 | struct msdos_inode_info *ei; |
| 488 | ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL); | 487 | ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS); |
| 489 | if (!ei) | 488 | if (!ei) |
| 490 | return NULL; | 489 | return NULL; |
| 491 | return &ei->vfs_inode; | 490 | return &ei->vfs_inode; |
| @@ -567,7 +566,7 @@ retry: | |||
| 567 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) | 566 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) |
| 568 | return 0; | 567 | return 0; |
| 569 | 568 | ||
| 570 | lock_kernel(); | 569 | lock_super(sb); |
| 571 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); | 570 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); |
| 572 | if (!bh) { | 571 | if (!bh) { |
| 573 | printk(KERN_ERR "FAT: unable to read inode block " | 572 | printk(KERN_ERR "FAT: unable to read inode block " |
| @@ -579,7 +578,7 @@ retry: | |||
| 579 | if (i_pos != MSDOS_I(inode)->i_pos) { | 578 | if (i_pos != MSDOS_I(inode)->i_pos) { |
| 580 | spin_unlock(&sbi->inode_hash_lock); | 579 | spin_unlock(&sbi->inode_hash_lock); |
| 581 | brelse(bh); | 580 | brelse(bh); |
| 582 | unlock_kernel(); | 581 | unlock_super(sb); |
| 583 | goto retry; | 582 | goto retry; |
| 584 | } | 583 | } |
| 585 | 584 | ||
| @@ -606,7 +605,7 @@ retry: | |||
| 606 | err = sync_dirty_buffer(bh); | 605 | err = sync_dirty_buffer(bh); |
| 607 | brelse(bh); | 606 | brelse(bh); |
| 608 | out: | 607 | out: |
| 609 | unlock_kernel(); | 608 | unlock_super(sb); |
| 610 | return err; | 609 | return err; |
| 611 | } | 610 | } |
| 612 | 611 | ||
| @@ -736,6 +735,7 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) | |||
| 736 | 735 | ||
| 737 | static struct dentry *fat_get_parent(struct dentry *child) | 736 | static struct dentry *fat_get_parent(struct dentry *child) |
| 738 | { | 737 | { |
| 738 | struct super_block *sb = child->d_sb; | ||
| 739 | struct buffer_head *bh; | 739 | struct buffer_head *bh; |
| 740 | struct msdos_dir_entry *de; | 740 | struct msdos_dir_entry *de; |
| 741 | loff_t i_pos; | 741 | loff_t i_pos; |
| @@ -743,14 +743,14 @@ static struct dentry *fat_get_parent(struct dentry *child) | |||
| 743 | struct inode *inode; | 743 | struct inode *inode; |
| 744 | int err; | 744 | int err; |
| 745 | 745 | ||
| 746 | lock_kernel(); | 746 | lock_super(sb); |
| 747 | 747 | ||
| 748 | err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); | 748 | err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos); |
| 749 | if (err) { | 749 | if (err) { |
| 750 | parent = ERR_PTR(err); | 750 | parent = ERR_PTR(err); |
| 751 | goto out; | 751 | goto out; |
| 752 | } | 752 | } |
| 753 | inode = fat_build_inode(child->d_sb, de, i_pos); | 753 | inode = fat_build_inode(sb, de, i_pos); |
| 754 | brelse(bh); | 754 | brelse(bh); |
| 755 | if (IS_ERR(inode)) { | 755 | if (IS_ERR(inode)) { |
| 756 | parent = ERR_CAST(inode); | 756 | parent = ERR_CAST(inode); |
| @@ -762,7 +762,7 @@ static struct dentry *fat_get_parent(struct dentry *child) | |||
| 762 | parent = ERR_PTR(-ENOMEM); | 762 | parent = ERR_PTR(-ENOMEM); |
| 763 | } | 763 | } |
| 764 | out: | 764 | out: |
| 765 | unlock_kernel(); | 765 | unlock_super(sb); |
| 766 | 766 | ||
| 767 | return parent; | 767 | return parent; |
| 768 | } | 768 | } |
| @@ -1172,6 +1172,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1172 | long error; | 1172 | long error; |
| 1173 | char buf[50]; | 1173 | char buf[50]; |
| 1174 | 1174 | ||
| 1175 | /* | ||
| 1176 | * GFP_KERNEL is ok here, because while we do hold the | ||
| 1177 | * supeblock lock, memory pressure can't call back into | ||
| 1178 | * the filesystem, since we're only just about to mount | ||
| 1179 | * it and have no inodes etc active! | ||
| 1180 | */ | ||
| 1175 | sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL); | 1181 | sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL); |
| 1176 | if (!sbi) | 1182 | if (!sbi) |
| 1177 | return -ENOMEM; | 1183 | return -ENOMEM; |
diff --git a/fs/fcntl.c b/fs/fcntl.c index bfd776509a72..330a7d782591 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/fdtable.h> | 12 | #include <linux/fdtable.h> |
| 13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
| 14 | #include <linux/dnotify.h> | 14 | #include <linux/dnotify.h> |
| 15 | #include <linux/smp_lock.h> | ||
| 16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 17 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 18 | #include <linux/security.h> | 17 | #include <linux/security.h> |
| @@ -227,7 +226,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg) | |||
| 227 | if (error) | 226 | if (error) |
| 228 | return error; | 227 | return error; |
| 229 | 228 | ||
| 230 | lock_kernel(); | ||
| 231 | if ((arg ^ filp->f_flags) & FASYNC) { | 229 | if ((arg ^ filp->f_flags) & FASYNC) { |
| 232 | if (filp->f_op && filp->f_op->fasync) { | 230 | if (filp->f_op && filp->f_op->fasync) { |
| 233 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); | 231 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); |
| @@ -238,7 +236,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg) | |||
| 238 | 236 | ||
| 239 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); | 237 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); |
| 240 | out: | 238 | out: |
| 241 | unlock_kernel(); | ||
| 242 | return error; | 239 | return error; |
| 243 | } | 240 | } |
| 244 | 241 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ae45f77765c0..25adfc3c693a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -424,8 +424,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 424 | * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so | 424 | * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so |
| 425 | * that it can be located for waiting on in __writeback_single_inode(). | 425 | * that it can be located for waiting on in __writeback_single_inode(). |
| 426 | * | 426 | * |
| 427 | * Called under inode_lock. | ||
| 428 | * | ||
| 429 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 427 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. |
| 430 | * This function assumes that the blockdev superblock's inodes are backed by | 428 | * This function assumes that the blockdev superblock's inodes are backed by |
| 431 | * a variety of queues, so all inodes are searched. For other superblocks, | 429 | * a variety of queues, so all inodes are searched. For other superblocks, |
| @@ -441,11 +439,12 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 441 | * on the writer throttling path, and we get decent balancing between many | 439 | * on the writer throttling path, and we get decent balancing between many |
| 442 | * throttled threads: we don't want them all piling up on inode_sync_wait. | 440 | * throttled threads: we don't want them all piling up on inode_sync_wait. |
| 443 | */ | 441 | */ |
| 444 | static void | 442 | void generic_sync_sb_inodes(struct super_block *sb, |
| 445 | sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | 443 | struct writeback_control *wbc) |
| 446 | { | 444 | { |
| 447 | const unsigned long start = jiffies; /* livelock avoidance */ | 445 | const unsigned long start = jiffies; /* livelock avoidance */ |
| 448 | 446 | ||
| 447 | spin_lock(&inode_lock); | ||
| 449 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | 448 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) |
| 450 | queue_io(sb, wbc->older_than_this); | 449 | queue_io(sb, wbc->older_than_this); |
| 451 | 450 | ||
| @@ -524,8 +523,16 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
| 524 | if (!list_empty(&sb->s_more_io)) | 523 | if (!list_empty(&sb->s_more_io)) |
| 525 | wbc->more_io = 1; | 524 | wbc->more_io = 1; |
| 526 | } | 525 | } |
| 526 | spin_unlock(&inode_lock); | ||
| 527 | return; /* Leave any unwritten inodes on s_io */ | 527 | return; /* Leave any unwritten inodes on s_io */ |
| 528 | } | 528 | } |
| 529 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); | ||
| 530 | |||
| 531 | static void sync_sb_inodes(struct super_block *sb, | ||
| 532 | struct writeback_control *wbc) | ||
| 533 | { | ||
| 534 | generic_sync_sb_inodes(sb, wbc); | ||
| 535 | } | ||
| 529 | 536 | ||
| 530 | /* | 537 | /* |
| 531 | * Start writeback of dirty pagecache data against all unlocked inodes. | 538 | * Start writeback of dirty pagecache data against all unlocked inodes. |
| @@ -565,11 +572,8 @@ restart: | |||
| 565 | * be unmounted by the time it is released. | 572 | * be unmounted by the time it is released. |
| 566 | */ | 573 | */ |
| 567 | if (down_read_trylock(&sb->s_umount)) { | 574 | if (down_read_trylock(&sb->s_umount)) { |
| 568 | if (sb->s_root) { | 575 | if (sb->s_root) |
| 569 | spin_lock(&inode_lock); | ||
| 570 | sync_sb_inodes(sb, wbc); | 576 | sync_sb_inodes(sb, wbc); |
| 571 | spin_unlock(&inode_lock); | ||
| 572 | } | ||
| 573 | up_read(&sb->s_umount); | 577 | up_read(&sb->s_umount); |
| 574 | } | 578 | } |
| 575 | spin_lock(&sb_lock); | 579 | spin_lock(&sb_lock); |
| @@ -607,9 +611,7 @@ void sync_inodes_sb(struct super_block *sb, int wait) | |||
| 607 | (inodes_stat.nr_inodes - inodes_stat.nr_unused) + | 611 | (inodes_stat.nr_inodes - inodes_stat.nr_unused) + |
| 608 | nr_dirty + nr_unstable; | 612 | nr_dirty + nr_unstable; |
| 609 | wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ | 613 | wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ |
| 610 | spin_lock(&inode_lock); | ||
| 611 | sync_sb_inodes(sb, &wbc); | 614 | sync_sb_inodes(sb, &wbc); |
| 612 | spin_unlock(&inode_lock); | ||
| 613 | } | 615 | } |
| 614 | 616 | ||
| 615 | /* | 617 | /* |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 43e99513334a..3141690558c8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
| @@ -591,7 +591,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
| 591 | fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); | 591 | fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); |
| 592 | fc->minor = arg->minor; | 592 | fc->minor = arg->minor; |
| 593 | fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; | 593 | fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; |
| 594 | fc->max_write = min_t(unsigned, 4096, fc->max_write); | 594 | fc->max_write = max_t(unsigned, 4096, fc->max_write); |
| 595 | fc->conn_init = 1; | 595 | fc->conn_init = 1; |
| 596 | } | 596 | } |
| 597 | fuse_put_request(fc, req); | 597 | fuse_put_request(fc, req); |
| @@ -667,7 +667,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
| 667 | fc->flags = d.flags; | 667 | fc->flags = d.flags; |
| 668 | fc->user_id = d.user_id; | 668 | fc->user_id = d.user_id; |
| 669 | fc->group_id = d.group_id; | 669 | fc->group_id = d.group_id; |
| 670 | fc->max_read = min_t(unsigned, 4096, d.max_read); | 670 | fc->max_read = max_t(unsigned, 4096, d.max_read); |
| 671 | 671 | ||
| 672 | /* Used by get_root_inode() */ | 672 | /* Used by get_root_inode() */ |
| 673 | sb->s_fs_info = fc; | 673 | sb->s_fs_info = fc; |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 7f7947e3dfbb..ab2f57e3fb87 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
| @@ -14,23 +14,11 @@ config GFS2_FS | |||
| 14 | GFS is perfect consistency -- changes made to the filesystem on one | 14 | GFS is perfect consistency -- changes made to the filesystem on one |
| 15 | machine show up immediately on all other machines in the cluster. | 15 | machine show up immediately on all other machines in the cluster. |
| 16 | 16 | ||
| 17 | To use the GFS2 filesystem, you will need to enable one or more of | 17 | To use the GFS2 filesystem in a cluster, you will need to enable |
| 18 | the below locking modules. Documentation and utilities for GFS2 can | 18 | the locking module below. Documentation and utilities for GFS2 can |
| 19 | be found here: http://sources.redhat.com/cluster | 19 | be found here: http://sources.redhat.com/cluster |
| 20 | 20 | ||
| 21 | config GFS2_FS_LOCKING_NOLOCK | 21 | The "nolock" lock module is now built in to GFS2 by default. |
| 22 | tristate "GFS2 \"nolock\" locking module" | ||
| 23 | depends on GFS2_FS | ||
| 24 | help | ||
| 25 | Single node locking module for GFS2. | ||
| 26 | |||
| 27 | Use this module if you want to use GFS2 on a single node without | ||
| 28 | its clustering features. You can still take advantage of the | ||
| 29 | large file support, and upgrade to running a full cluster later on | ||
| 30 | if required. | ||
| 31 | |||
| 32 | If you will only be using GFS2 in cluster mode, you do not need this | ||
| 33 | module. | ||
| 34 | 22 | ||
| 35 | config GFS2_FS_LOCKING_DLM | 23 | config GFS2_FS_LOCKING_DLM |
| 36 | tristate "GFS2 DLM locking module" | 24 | tristate "GFS2 DLM locking module" |
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index e2350df02a07..ec65851ec80a 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
| @@ -5,6 +5,5 @@ gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ | |||
| 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ | 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ |
| 6 | recovery.o rgrp.o super.o sys.o trans.o util.o | 6 | recovery.o rgrp.o super.o sys.o trans.o util.o |
| 7 | 7 | ||
| 8 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ | ||
| 9 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ | 8 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ |
| 10 | 9 | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c19184f2e70e..bec76b1c2bb0 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
| @@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block, | |||
| 246 | 246 | ||
| 247 | } | 247 | } |
| 248 | 248 | ||
| 249 | static inline unsigned int zero_metapath_length(const struct metapath *mp, | 249 | static inline unsigned int metapath_branch_start(const struct metapath *mp) |
| 250 | unsigned height) | ||
| 251 | { | 250 | { |
| 252 | unsigned int i; | 251 | if (mp->mp_list[0] == 0) |
| 253 | for (i = 0; i < height - 1; i++) { | 252 | return 2; |
| 254 | if (mp->mp_list[i] != 0) | 253 | return 1; |
| 255 | return i; | ||
| 256 | } | ||
| 257 | return height; | ||
| 258 | } | 254 | } |
| 259 | 255 | ||
| 260 | /** | 256 | /** |
| @@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
| 436 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 432 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
| 437 | struct buffer_head *dibh = mp->mp_bh[0]; | 433 | struct buffer_head *dibh = mp->mp_bh[0]; |
| 438 | u64 bn, dblock = 0; | 434 | u64 bn, dblock = 0; |
| 439 | unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; | 435 | unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; |
| 440 | unsigned dblks = 0; | 436 | unsigned dblks = 0; |
| 441 | unsigned ptrs_per_blk; | 437 | unsigned ptrs_per_blk; |
| 442 | const unsigned end_of_metadata = height - 1; | 438 | const unsigned end_of_metadata = height - 1; |
| @@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
| 471 | /* Building up tree height */ | 467 | /* Building up tree height */ |
| 472 | state = ALLOC_GROW_HEIGHT; | 468 | state = ALLOC_GROW_HEIGHT; |
| 473 | iblks = height - ip->i_height; | 469 | iblks = height - ip->i_height; |
| 474 | zmpl = zero_metapath_length(mp, height); | 470 | branch_start = metapath_branch_start(mp); |
| 475 | iblks -= zmpl; | 471 | iblks += (height - branch_start); |
| 476 | iblks += height; | ||
| 477 | } | 472 | } |
| 478 | } | 473 | } |
| 479 | 474 | ||
| @@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
| 509 | sizeof(struct gfs2_meta_header)); | 504 | sizeof(struct gfs2_meta_header)); |
| 510 | *ptr = zero_bn; | 505 | *ptr = zero_bn; |
| 511 | state = ALLOC_GROW_DEPTH; | 506 | state = ALLOC_GROW_DEPTH; |
| 512 | for(i = zmpl; i < height; i++) { | 507 | for(i = branch_start; i < height; i++) { |
| 513 | if (mp->mp_bh[i] == NULL) | 508 | if (mp->mp_bh[i] == NULL) |
| 514 | break; | 509 | break; |
| 515 | brelse(mp->mp_bh[i]); | 510 | brelse(mp->mp_bh[i]); |
| 516 | mp->mp_bh[i] = NULL; | 511 | mp->mp_bh[i] = NULL; |
| 517 | } | 512 | } |
| 518 | i = zmpl; | 513 | i = branch_start; |
| 519 | } | 514 | } |
| 520 | if (n == 0) | 515 | if (n == 0) |
| 521 | break; | 516 | break; |
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h index 3bb11c0f8b56..ef606e3a5cf4 100644 --- a/fs/gfs2/gfs2.h +++ b/fs/gfs2/gfs2.h | |||
| @@ -16,11 +16,6 @@ enum { | |||
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | enum { | 18 | enum { |
| 19 | NO_WAIT = 0, | ||
| 20 | WAIT = 1, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum { | ||
| 24 | NO_FORCE = 0, | 19 | NO_FORCE = 0, |
| 25 | FORCE = 1, | 20 | FORCE = 1, |
| 26 | }; | 21 | }; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d636b3e80f5d..13391e546616 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -45,21 +45,19 @@ struct gfs2_gl_hash_bucket { | |||
| 45 | struct hlist_head hb_list; | 45 | struct hlist_head hb_list; |
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | struct glock_iter { | 48 | struct gfs2_glock_iter { |
| 49 | int hash; /* hash bucket index */ | 49 | int hash; /* hash bucket index */ |
| 50 | struct gfs2_sbd *sdp; /* incore superblock */ | 50 | struct gfs2_sbd *sdp; /* incore superblock */ |
| 51 | struct gfs2_glock *gl; /* current glock struct */ | 51 | struct gfs2_glock *gl; /* current glock struct */ |
| 52 | struct seq_file *seq; /* sequence file for debugfs */ | 52 | char string[512]; /* scratch space */ |
| 53 | char string[512]; /* scratch space */ | ||
| 54 | }; | 53 | }; |
| 55 | 54 | ||
| 56 | typedef void (*glock_examiner) (struct gfs2_glock * gl); | 55 | typedef void (*glock_examiner) (struct gfs2_glock * gl); |
| 57 | 56 | ||
| 58 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | 57 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); |
| 59 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); | 58 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); |
| 60 | static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); | 59 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) |
| 61 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); | 60 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); |
| 62 | static void run_queue(struct gfs2_glock *gl); | ||
| 63 | 61 | ||
| 64 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | 62 | static DECLARE_RWSEM(gfs2_umount_flush_sem); |
| 65 | static struct dentry *gfs2_root; | 63 | static struct dentry *gfs2_root; |
| @@ -123,33 +121,6 @@ static inline rwlock_t *gl_lock_addr(unsigned int x) | |||
| 123 | #endif | 121 | #endif |
| 124 | 122 | ||
| 125 | /** | 123 | /** |
| 126 | * relaxed_state_ok - is a requested lock compatible with the current lock mode? | ||
| 127 | * @actual: the current state of the lock | ||
| 128 | * @requested: the lock state that was requested by the caller | ||
| 129 | * @flags: the modifier flags passed in by the caller | ||
| 130 | * | ||
| 131 | * Returns: 1 if the locks are compatible, 0 otherwise | ||
| 132 | */ | ||
| 133 | |||
| 134 | static inline int relaxed_state_ok(unsigned int actual, unsigned requested, | ||
| 135 | int flags) | ||
| 136 | { | ||
| 137 | if (actual == requested) | ||
| 138 | return 1; | ||
| 139 | |||
| 140 | if (flags & GL_EXACT) | ||
| 141 | return 0; | ||
| 142 | |||
| 143 | if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED) | ||
| 144 | return 1; | ||
| 145 | |||
| 146 | if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY)) | ||
| 147 | return 1; | ||
| 148 | |||
| 149 | return 0; | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * gl_hash() - Turn glock number into hash bucket number | 124 | * gl_hash() - Turn glock number into hash bucket number |
| 154 | * @lock: The glock number | 125 | * @lock: The glock number |
| 155 | * | 126 | * |
| @@ -182,7 +153,7 @@ static void glock_free(struct gfs2_glock *gl) | |||
| 182 | struct gfs2_sbd *sdp = gl->gl_sbd; | 153 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 183 | struct inode *aspace = gl->gl_aspace; | 154 | struct inode *aspace = gl->gl_aspace; |
| 184 | 155 | ||
| 185 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 156 | if (sdp->sd_lockstruct.ls_ops->lm_put_lock) |
| 186 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); | 157 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); |
| 187 | 158 | ||
| 188 | if (aspace) | 159 | if (aspace) |
| @@ -211,17 +182,14 @@ static void gfs2_glock_hold(struct gfs2_glock *gl) | |||
| 211 | int gfs2_glock_put(struct gfs2_glock *gl) | 182 | int gfs2_glock_put(struct gfs2_glock *gl) |
| 212 | { | 183 | { |
| 213 | int rv = 0; | 184 | int rv = 0; |
| 214 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 215 | 185 | ||
| 216 | write_lock(gl_lock_addr(gl->gl_hash)); | 186 | write_lock(gl_lock_addr(gl->gl_hash)); |
| 217 | if (atomic_dec_and_test(&gl->gl_ref)) { | 187 | if (atomic_dec_and_test(&gl->gl_ref)) { |
| 218 | hlist_del(&gl->gl_list); | 188 | hlist_del(&gl->gl_list); |
| 219 | write_unlock(gl_lock_addr(gl->gl_hash)); | 189 | write_unlock(gl_lock_addr(gl->gl_hash)); |
| 220 | gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); | 190 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED); |
| 221 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); | 191 | GLOCK_BUG_ON(gl, !list_empty(&gl->gl_reclaim)); |
| 222 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); | 192 | GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); |
| 223 | gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); | ||
| 224 | gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); | ||
| 225 | glock_free(gl); | 193 | glock_free(gl); |
| 226 | rv = 1; | 194 | rv = 1; |
| 227 | goto out; | 195 | goto out; |
| @@ -281,22 +249,401 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, | |||
| 281 | return gl; | 249 | return gl; |
| 282 | } | 250 | } |
| 283 | 251 | ||
| 252 | /** | ||
| 253 | * may_grant - check if its ok to grant a new lock | ||
| 254 | * @gl: The glock | ||
| 255 | * @gh: The lock request which we wish to grant | ||
| 256 | * | ||
| 257 | * Returns: true if its ok to grant the lock | ||
| 258 | */ | ||
| 259 | |||
| 260 | static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) | ||
| 261 | { | ||
| 262 | const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list); | ||
| 263 | if ((gh->gh_state == LM_ST_EXCLUSIVE || | ||
| 264 | gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) | ||
| 265 | return 0; | ||
| 266 | if (gl->gl_state == gh->gh_state) | ||
| 267 | return 1; | ||
| 268 | if (gh->gh_flags & GL_EXACT) | ||
| 269 | return 0; | ||
| 270 | if (gl->gl_state == LM_ST_EXCLUSIVE) { | ||
| 271 | if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED) | ||
| 272 | return 1; | ||
| 273 | if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED) | ||
| 274 | return 1; | ||
| 275 | } | ||
| 276 | if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY)) | ||
| 277 | return 1; | ||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | static void gfs2_holder_wake(struct gfs2_holder *gh) | ||
| 282 | { | ||
| 283 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
| 284 | smp_mb__after_clear_bit(); | ||
| 285 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
| 286 | } | ||
| 287 | |||
| 288 | /** | ||
| 289 | * do_promote - promote as many requests as possible on the current queue | ||
| 290 | * @gl: The glock | ||
| 291 | * | ||
| 292 | * Returns: true if there is a blocked holder at the head of the list | ||
| 293 | */ | ||
| 294 | |||
| 295 | static int do_promote(struct gfs2_glock *gl) | ||
| 296 | { | ||
| 297 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 298 | struct gfs2_holder *gh, *tmp; | ||
| 299 | int ret; | ||
| 300 | |||
| 301 | restart: | ||
| 302 | list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { | ||
| 303 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 304 | continue; | ||
| 305 | if (may_grant(gl, gh)) { | ||
| 306 | if (gh->gh_list.prev == &gl->gl_holders && | ||
| 307 | glops->go_lock) { | ||
| 308 | spin_unlock(&gl->gl_spin); | ||
| 309 | /* FIXME: eliminate this eventually */ | ||
| 310 | ret = glops->go_lock(gh); | ||
| 311 | spin_lock(&gl->gl_spin); | ||
| 312 | if (ret) { | ||
| 313 | gh->gh_error = ret; | ||
| 314 | list_del_init(&gh->gh_list); | ||
| 315 | gfs2_holder_wake(gh); | ||
| 316 | goto restart; | ||
| 317 | } | ||
| 318 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 319 | gfs2_holder_wake(gh); | ||
| 320 | goto restart; | ||
| 321 | } | ||
| 322 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 323 | gfs2_holder_wake(gh); | ||
| 324 | continue; | ||
| 325 | } | ||
| 326 | if (gh->gh_list.prev == &gl->gl_holders) | ||
| 327 | return 1; | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | return 0; | ||
| 331 | } | ||
| 332 | |||
| 333 | /** | ||
| 334 | * do_error - Something unexpected has happened during a lock request | ||
| 335 | * | ||
| 336 | */ | ||
| 337 | |||
| 338 | static inline void do_error(struct gfs2_glock *gl, const int ret) | ||
| 339 | { | ||
| 340 | struct gfs2_holder *gh, *tmp; | ||
| 341 | |||
| 342 | list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { | ||
| 343 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 344 | continue; | ||
| 345 | if (ret & LM_OUT_ERROR) | ||
| 346 | gh->gh_error = -EIO; | ||
| 347 | else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) | ||
| 348 | gh->gh_error = GLR_TRYFAILED; | ||
| 349 | else | ||
| 350 | continue; | ||
| 351 | list_del_init(&gh->gh_list); | ||
| 352 | gfs2_holder_wake(gh); | ||
| 353 | } | ||
| 354 | } | ||
| 355 | |||
| 356 | /** | ||
| 357 | * find_first_waiter - find the first gh that's waiting for the glock | ||
| 358 | * @gl: the glock | ||
| 359 | */ | ||
| 360 | |||
| 361 | static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) | ||
| 362 | { | ||
| 363 | struct gfs2_holder *gh; | ||
| 364 | |||
| 365 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | ||
| 366 | if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 367 | return gh; | ||
| 368 | } | ||
| 369 | return NULL; | ||
| 370 | } | ||
| 371 | |||
| 372 | /** | ||
| 373 | * state_change - record that the glock is now in a different state | ||
| 374 | * @gl: the glock | ||
| 375 | * @new_state the new state | ||
| 376 | * | ||
| 377 | */ | ||
| 378 | |||
| 379 | static void state_change(struct gfs2_glock *gl, unsigned int new_state) | ||
| 380 | { | ||
| 381 | int held1, held2; | ||
| 382 | |||
| 383 | held1 = (gl->gl_state != LM_ST_UNLOCKED); | ||
| 384 | held2 = (new_state != LM_ST_UNLOCKED); | ||
| 385 | |||
| 386 | if (held1 != held2) { | ||
| 387 | if (held2) | ||
| 388 | gfs2_glock_hold(gl); | ||
| 389 | else | ||
| 390 | gfs2_glock_put(gl); | ||
| 391 | } | ||
| 392 | |||
| 393 | gl->gl_state = new_state; | ||
| 394 | gl->gl_tchange = jiffies; | ||
| 395 | } | ||
| 396 | |||
| 397 | static void gfs2_demote_wake(struct gfs2_glock *gl) | ||
| 398 | { | ||
| 399 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | ||
| 400 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 401 | smp_mb__after_clear_bit(); | ||
| 402 | wake_up_bit(&gl->gl_flags, GLF_DEMOTE); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * finish_xmote - The DLM has replied to one of our lock requests | ||
| 407 | * @gl: The glock | ||
| 408 | * @ret: The status from the DLM | ||
| 409 | * | ||
| 410 | */ | ||
| 411 | |||
| 412 | static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) | ||
| 413 | { | ||
| 414 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 415 | struct gfs2_holder *gh; | ||
| 416 | unsigned state = ret & LM_OUT_ST_MASK; | ||
| 417 | |||
| 418 | spin_lock(&gl->gl_spin); | ||
| 419 | state_change(gl, state); | ||
| 420 | gh = find_first_waiter(gl); | ||
| 421 | |||
| 422 | /* Demote to UN request arrived during demote to SH or DF */ | ||
| 423 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && | ||
| 424 | state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) | ||
| 425 | gl->gl_target = LM_ST_UNLOCKED; | ||
| 426 | |||
| 427 | /* Check for state != intended state */ | ||
| 428 | if (unlikely(state != gl->gl_target)) { | ||
| 429 | if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { | ||
| 430 | /* move to back of queue and try next entry */ | ||
| 431 | if (ret & LM_OUT_CANCELED) { | ||
| 432 | if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) | ||
| 433 | list_move_tail(&gh->gh_list, &gl->gl_holders); | ||
| 434 | gh = find_first_waiter(gl); | ||
| 435 | gl->gl_target = gh->gh_state; | ||
| 436 | goto retry; | ||
| 437 | } | ||
| 438 | /* Some error or failed "try lock" - report it */ | ||
| 439 | if ((ret & LM_OUT_ERROR) || | ||
| 440 | (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { | ||
| 441 | gl->gl_target = gl->gl_state; | ||
| 442 | do_error(gl, ret); | ||
| 443 | goto out; | ||
| 444 | } | ||
| 445 | } | ||
| 446 | switch(state) { | ||
| 447 | /* Unlocked due to conversion deadlock, try again */ | ||
| 448 | case LM_ST_UNLOCKED: | ||
| 449 | retry: | ||
| 450 | do_xmote(gl, gh, gl->gl_target); | ||
| 451 | break; | ||
| 452 | /* Conversion fails, unlock and try again */ | ||
| 453 | case LM_ST_SHARED: | ||
| 454 | case LM_ST_DEFERRED: | ||
| 455 | do_xmote(gl, gh, LM_ST_UNLOCKED); | ||
| 456 | break; | ||
| 457 | default: /* Everything else */ | ||
| 458 | printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); | ||
| 459 | GLOCK_BUG_ON(gl, 1); | ||
| 460 | } | ||
| 461 | spin_unlock(&gl->gl_spin); | ||
| 462 | gfs2_glock_put(gl); | ||
| 463 | return; | ||
| 464 | } | ||
| 465 | |||
| 466 | /* Fast path - we got what we asked for */ | ||
| 467 | if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) | ||
| 468 | gfs2_demote_wake(gl); | ||
| 469 | if (state != LM_ST_UNLOCKED) { | ||
| 470 | if (glops->go_xmote_bh) { | ||
| 471 | int rv; | ||
| 472 | spin_unlock(&gl->gl_spin); | ||
| 473 | rv = glops->go_xmote_bh(gl, gh); | ||
| 474 | if (rv == -EAGAIN) | ||
| 475 | return; | ||
| 476 | spin_lock(&gl->gl_spin); | ||
| 477 | if (rv) { | ||
| 478 | do_error(gl, rv); | ||
| 479 | goto out; | ||
| 480 | } | ||
| 481 | } | ||
| 482 | do_promote(gl); | ||
| 483 | } | ||
| 484 | out: | ||
| 485 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 486 | spin_unlock(&gl->gl_spin); | ||
| 487 | gfs2_glock_put(gl); | ||
| 488 | } | ||
| 489 | |||
| 490 | static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 491 | unsigned int cur_state, unsigned int req_state, | ||
| 492 | unsigned int flags) | ||
| 493 | { | ||
| 494 | int ret = LM_OUT_ERROR; | ||
| 495 | |||
| 496 | if (!sdp->sd_lockstruct.ls_ops->lm_lock) | ||
| 497 | return req_state == LM_ST_UNLOCKED ? 0 : req_state; | ||
| 498 | |||
| 499 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 500 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
| 501 | req_state, flags); | ||
| 502 | return ret; | ||
| 503 | } | ||
| 504 | |||
| 505 | /** | ||
| 506 | * do_xmote - Calls the DLM to change the state of a lock | ||
| 507 | * @gl: The lock state | ||
| 508 | * @gh: The holder (only for promotes) | ||
| 509 | * @target: The target lock state | ||
| 510 | * | ||
| 511 | */ | ||
| 512 | |||
| 513 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) | ||
| 514 | { | ||
| 515 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 516 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 517 | unsigned int lck_flags = gh ? gh->gh_flags : 0; | ||
| 518 | int ret; | ||
| 519 | |||
| 520 | lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | | ||
| 521 | LM_FLAG_PRIORITY); | ||
| 522 | BUG_ON(gl->gl_state == target); | ||
| 523 | BUG_ON(gl->gl_state == gl->gl_target); | ||
| 524 | if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && | ||
| 525 | glops->go_inval) { | ||
| 526 | set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); | ||
| 527 | do_error(gl, 0); /* Fail queued try locks */ | ||
| 528 | } | ||
| 529 | spin_unlock(&gl->gl_spin); | ||
| 530 | if (glops->go_xmote_th) | ||
| 531 | glops->go_xmote_th(gl); | ||
| 532 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) | ||
| 533 | glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); | ||
| 534 | clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); | ||
| 535 | |||
| 536 | gfs2_glock_hold(gl); | ||
| 537 | if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED || | ||
| 538 | gl->gl_state == LM_ST_DEFERRED) && | ||
| 539 | !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) | ||
| 540 | lck_flags |= LM_FLAG_TRY_1CB; | ||
| 541 | ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags); | ||
| 542 | |||
| 543 | if (!(ret & LM_OUT_ASYNC)) { | ||
| 544 | finish_xmote(gl, ret); | ||
| 545 | gfs2_glock_hold(gl); | ||
| 546 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
| 547 | gfs2_glock_put(gl); | ||
| 548 | } else { | ||
| 549 | GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC); | ||
| 550 | } | ||
| 551 | spin_lock(&gl->gl_spin); | ||
| 552 | } | ||
| 553 | |||
| 554 | /** | ||
| 555 | * find_first_holder - find the first "holder" gh | ||
| 556 | * @gl: the glock | ||
| 557 | */ | ||
| 558 | |||
| 559 | static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) | ||
| 560 | { | ||
| 561 | struct gfs2_holder *gh; | ||
| 562 | |||
| 563 | if (!list_empty(&gl->gl_holders)) { | ||
| 564 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); | ||
| 565 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 566 | return gh; | ||
| 567 | } | ||
| 568 | return NULL; | ||
| 569 | } | ||
| 570 | |||
| 571 | /** | ||
| 572 | * run_queue - do all outstanding tasks related to a glock | ||
| 573 | * @gl: The glock in question | ||
| 574 | * @nonblock: True if we must not block in run_queue | ||
| 575 | * | ||
| 576 | */ | ||
| 577 | |||
| 578 | static void run_queue(struct gfs2_glock *gl, const int nonblock) | ||
| 579 | { | ||
| 580 | struct gfs2_holder *gh = NULL; | ||
| 581 | |||
| 582 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 583 | return; | ||
| 584 | |||
| 585 | GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); | ||
| 586 | |||
| 587 | if (test_bit(GLF_DEMOTE, &gl->gl_flags) && | ||
| 588 | gl->gl_demote_state != gl->gl_state) { | ||
| 589 | if (find_first_holder(gl)) | ||
| 590 | goto out; | ||
| 591 | if (nonblock) | ||
| 592 | goto out_sched; | ||
| 593 | set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
| 594 | GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); | ||
| 595 | gl->gl_target = gl->gl_demote_state; | ||
| 596 | } else { | ||
| 597 | if (test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
| 598 | gfs2_demote_wake(gl); | ||
| 599 | if (do_promote(gl) == 0) | ||
| 600 | goto out; | ||
| 601 | gh = find_first_waiter(gl); | ||
| 602 | gl->gl_target = gh->gh_state; | ||
| 603 | if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) | ||
| 604 | do_error(gl, 0); /* Fail queued try locks */ | ||
| 605 | } | ||
| 606 | do_xmote(gl, gh, gl->gl_target); | ||
| 607 | return; | ||
| 608 | |||
| 609 | out_sched: | ||
| 610 | gfs2_glock_hold(gl); | ||
| 611 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
| 612 | gfs2_glock_put(gl); | ||
| 613 | out: | ||
| 614 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 615 | } | ||
| 616 | |||
| 284 | static void glock_work_func(struct work_struct *work) | 617 | static void glock_work_func(struct work_struct *work) |
| 285 | { | 618 | { |
| 619 | unsigned long delay = 0; | ||
| 286 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); | 620 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); |
| 287 | 621 | ||
| 622 | if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | ||
| 623 | finish_xmote(gl, gl->gl_reply); | ||
| 288 | spin_lock(&gl->gl_spin); | 624 | spin_lock(&gl->gl_spin); |
| 289 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) | 625 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && |
| 290 | set_bit(GLF_DEMOTE, &gl->gl_flags); | 626 | gl->gl_state != LM_ST_UNLOCKED && |
| 291 | run_queue(gl); | 627 | gl->gl_demote_state != LM_ST_EXCLUSIVE) { |
| 628 | unsigned long holdtime, now = jiffies; | ||
| 629 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | ||
| 630 | if (time_before(now, holdtime)) | ||
| 631 | delay = holdtime - now; | ||
| 632 | set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); | ||
| 633 | } | ||
| 634 | run_queue(gl, 0); | ||
| 292 | spin_unlock(&gl->gl_spin); | 635 | spin_unlock(&gl->gl_spin); |
| 293 | gfs2_glock_put(gl); | 636 | if (!delay || |
| 637 | queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) | ||
| 638 | gfs2_glock_put(gl); | ||
| 294 | } | 639 | } |
| 295 | 640 | ||
| 296 | static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | 641 | static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, |
| 297 | void **lockp) | 642 | void **lockp) |
| 298 | { | 643 | { |
| 299 | int error = -EIO; | 644 | int error = -EIO; |
| 645 | if (!sdp->sd_lockstruct.ls_ops->lm_get_lock) | ||
| 646 | return 0; | ||
| 300 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 647 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 301 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( | 648 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( |
| 302 | sdp->sd_lockstruct.ls_lockspace, name, lockp); | 649 | sdp->sd_lockstruct.ls_lockspace, name, lockp); |
| @@ -342,12 +689,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
| 342 | gl->gl_name = name; | 689 | gl->gl_name = name; |
| 343 | atomic_set(&gl->gl_ref, 1); | 690 | atomic_set(&gl->gl_ref, 1); |
| 344 | gl->gl_state = LM_ST_UNLOCKED; | 691 | gl->gl_state = LM_ST_UNLOCKED; |
| 692 | gl->gl_target = LM_ST_UNLOCKED; | ||
| 345 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | 693 | gl->gl_demote_state = LM_ST_EXCLUSIVE; |
| 346 | gl->gl_hash = hash; | 694 | gl->gl_hash = hash; |
| 347 | gl->gl_owner_pid = NULL; | ||
| 348 | gl->gl_ip = 0; | ||
| 349 | gl->gl_ops = glops; | 695 | gl->gl_ops = glops; |
| 350 | gl->gl_req_gh = NULL; | ||
| 351 | gl->gl_stamp = jiffies; | 696 | gl->gl_stamp = jiffies; |
| 352 | gl->gl_tchange = jiffies; | 697 | gl->gl_tchange = jiffies; |
| 353 | gl->gl_object = NULL; | 698 | gl->gl_object = NULL; |
| @@ -447,13 +792,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) | |||
| 447 | gh->gh_ip = 0; | 792 | gh->gh_ip = 0; |
| 448 | } | 793 | } |
| 449 | 794 | ||
| 450 | static void gfs2_holder_wake(struct gfs2_holder *gh) | ||
| 451 | { | ||
| 452 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
| 453 | smp_mb__after_clear_bit(); | ||
| 454 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
| 455 | } | ||
| 456 | |||
| 457 | static int just_schedule(void *word) | 795 | static int just_schedule(void *word) |
| 458 | { | 796 | { |
| 459 | schedule(); | 797 | schedule(); |
| @@ -466,14 +804,6 @@ static void wait_on_holder(struct gfs2_holder *gh) | |||
| 466 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); | 804 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); |
| 467 | } | 805 | } |
| 468 | 806 | ||
| 469 | static void gfs2_demote_wake(struct gfs2_glock *gl) | ||
| 470 | { | ||
| 471 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | ||
| 472 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 473 | smp_mb__after_clear_bit(); | ||
| 474 | wake_up_bit(&gl->gl_flags, GLF_DEMOTE); | ||
| 475 | } | ||
| 476 | |||
| 477 | static void wait_on_demote(struct gfs2_glock *gl) | 807 | static void wait_on_demote(struct gfs2_glock *gl) |
| 478 | { | 808 | { |
| 479 | might_sleep(); | 809 | might_sleep(); |
| @@ -481,217 +811,6 @@ static void wait_on_demote(struct gfs2_glock *gl) | |||
| 481 | } | 811 | } |
| 482 | 812 | ||
| 483 | /** | 813 | /** |
| 484 | * rq_mutex - process a mutex request in the queue | ||
| 485 | * @gh: the glock holder | ||
| 486 | * | ||
| 487 | * Returns: 1 if the queue is blocked | ||
| 488 | */ | ||
| 489 | |||
| 490 | static int rq_mutex(struct gfs2_holder *gh) | ||
| 491 | { | ||
| 492 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 493 | |||
| 494 | list_del_init(&gh->gh_list); | ||
| 495 | /* gh->gh_error never examined. */ | ||
| 496 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 497 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
| 498 | smp_mb(); | ||
| 499 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
| 500 | |||
| 501 | return 1; | ||
| 502 | } | ||
| 503 | |||
| 504 | /** | ||
| 505 | * rq_promote - process a promote request in the queue | ||
| 506 | * @gh: the glock holder | ||
| 507 | * | ||
| 508 | * Acquire a new inter-node lock, or change a lock state to more restrictive. | ||
| 509 | * | ||
| 510 | * Returns: 1 if the queue is blocked | ||
| 511 | */ | ||
| 512 | |||
| 513 | static int rq_promote(struct gfs2_holder *gh) | ||
| 514 | { | ||
| 515 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 516 | |||
| 517 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | ||
| 518 | if (list_empty(&gl->gl_holders)) { | ||
| 519 | gl->gl_req_gh = gh; | ||
| 520 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 521 | spin_unlock(&gl->gl_spin); | ||
| 522 | gfs2_glock_xmote_th(gh->gh_gl, gh); | ||
| 523 | spin_lock(&gl->gl_spin); | ||
| 524 | } | ||
| 525 | return 1; | ||
| 526 | } | ||
| 527 | |||
| 528 | if (list_empty(&gl->gl_holders)) { | ||
| 529 | set_bit(HIF_FIRST, &gh->gh_iflags); | ||
| 530 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 531 | } else { | ||
| 532 | struct gfs2_holder *next_gh; | ||
| 533 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
| 534 | return 1; | ||
| 535 | next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, | ||
| 536 | gh_list); | ||
| 537 | if (next_gh->gh_state == LM_ST_EXCLUSIVE) | ||
| 538 | return 1; | ||
| 539 | } | ||
| 540 | |||
| 541 | list_move_tail(&gh->gh_list, &gl->gl_holders); | ||
| 542 | gh->gh_error = 0; | ||
| 543 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 544 | |||
| 545 | gfs2_holder_wake(gh); | ||
| 546 | |||
| 547 | return 0; | ||
| 548 | } | ||
| 549 | |||
| 550 | /** | ||
| 551 | * rq_demote - process a demote request in the queue | ||
| 552 | * @gh: the glock holder | ||
| 553 | * | ||
| 554 | * Returns: 1 if the queue is blocked | ||
| 555 | */ | ||
| 556 | |||
| 557 | static int rq_demote(struct gfs2_glock *gl) | ||
| 558 | { | ||
| 559 | if (!list_empty(&gl->gl_holders)) | ||
| 560 | return 1; | ||
| 561 | |||
| 562 | if (gl->gl_state == gl->gl_demote_state || | ||
| 563 | gl->gl_state == LM_ST_UNLOCKED) { | ||
| 564 | gfs2_demote_wake(gl); | ||
| 565 | return 0; | ||
| 566 | } | ||
| 567 | |||
| 568 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 569 | set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
| 570 | |||
| 571 | if (gl->gl_demote_state == LM_ST_UNLOCKED || | ||
| 572 | gl->gl_state != LM_ST_EXCLUSIVE) { | ||
| 573 | spin_unlock(&gl->gl_spin); | ||
| 574 | gfs2_glock_drop_th(gl); | ||
| 575 | } else { | ||
| 576 | spin_unlock(&gl->gl_spin); | ||
| 577 | gfs2_glock_xmote_th(gl, NULL); | ||
| 578 | } | ||
| 579 | |||
| 580 | spin_lock(&gl->gl_spin); | ||
| 581 | clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
| 582 | |||
| 583 | return 0; | ||
| 584 | } | ||
| 585 | |||
| 586 | /** | ||
| 587 | * run_queue - process holder structures on a glock | ||
| 588 | * @gl: the glock | ||
| 589 | * | ||
| 590 | */ | ||
| 591 | static void run_queue(struct gfs2_glock *gl) | ||
| 592 | { | ||
| 593 | struct gfs2_holder *gh; | ||
| 594 | int blocked = 1; | ||
| 595 | |||
| 596 | for (;;) { | ||
| 597 | if (test_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 598 | break; | ||
| 599 | |||
| 600 | if (!list_empty(&gl->gl_waiters1)) { | ||
| 601 | gh = list_entry(gl->gl_waiters1.next, | ||
| 602 | struct gfs2_holder, gh_list); | ||
| 603 | blocked = rq_mutex(gh); | ||
| 604 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | ||
| 605 | blocked = rq_demote(gl); | ||
| 606 | if (test_bit(GLF_WAITERS2, &gl->gl_flags) && | ||
| 607 | !blocked) { | ||
| 608 | set_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 609 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
| 610 | } | ||
| 611 | clear_bit(GLF_WAITERS2, &gl->gl_flags); | ||
| 612 | } else if (!list_empty(&gl->gl_waiters3)) { | ||
| 613 | gh = list_entry(gl->gl_waiters3.next, | ||
| 614 | struct gfs2_holder, gh_list); | ||
| 615 | blocked = rq_promote(gh); | ||
| 616 | } else | ||
| 617 | break; | ||
| 618 | |||
| 619 | if (blocked) | ||
| 620 | break; | ||
| 621 | } | ||
| 622 | } | ||
| 623 | |||
| 624 | /** | ||
| 625 | * gfs2_glmutex_lock - acquire a local lock on a glock | ||
| 626 | * @gl: the glock | ||
| 627 | * | ||
| 628 | * Gives caller exclusive access to manipulate a glock structure. | ||
| 629 | */ | ||
| 630 | |||
| 631 | static void gfs2_glmutex_lock(struct gfs2_glock *gl) | ||
| 632 | { | ||
| 633 | spin_lock(&gl->gl_spin); | ||
| 634 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | ||
| 635 | struct gfs2_holder gh; | ||
| 636 | |||
| 637 | gfs2_holder_init(gl, 0, 0, &gh); | ||
| 638 | set_bit(HIF_WAIT, &gh.gh_iflags); | ||
| 639 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); | ||
| 640 | spin_unlock(&gl->gl_spin); | ||
| 641 | wait_on_holder(&gh); | ||
| 642 | gfs2_holder_uninit(&gh); | ||
| 643 | } else { | ||
| 644 | gl->gl_owner_pid = get_pid(task_pid(current)); | ||
| 645 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | ||
| 646 | spin_unlock(&gl->gl_spin); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | |||
| 650 | /** | ||
| 651 | * gfs2_glmutex_trylock - try to acquire a local lock on a glock | ||
| 652 | * @gl: the glock | ||
| 653 | * | ||
| 654 | * Returns: 1 if the glock is acquired | ||
| 655 | */ | ||
| 656 | |||
| 657 | static int gfs2_glmutex_trylock(struct gfs2_glock *gl) | ||
| 658 | { | ||
| 659 | int acquired = 1; | ||
| 660 | |||
| 661 | spin_lock(&gl->gl_spin); | ||
| 662 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | ||
| 663 | acquired = 0; | ||
| 664 | } else { | ||
| 665 | gl->gl_owner_pid = get_pid(task_pid(current)); | ||
| 666 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | ||
| 667 | } | ||
| 668 | spin_unlock(&gl->gl_spin); | ||
| 669 | |||
| 670 | return acquired; | ||
| 671 | } | ||
| 672 | |||
| 673 | /** | ||
| 674 | * gfs2_glmutex_unlock - release a local lock on a glock | ||
| 675 | * @gl: the glock | ||
| 676 | * | ||
| 677 | */ | ||
| 678 | |||
| 679 | static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | ||
| 680 | { | ||
| 681 | struct pid *pid; | ||
| 682 | |||
| 683 | spin_lock(&gl->gl_spin); | ||
| 684 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 685 | pid = gl->gl_owner_pid; | ||
| 686 | gl->gl_owner_pid = NULL; | ||
| 687 | gl->gl_ip = 0; | ||
| 688 | run_queue(gl); | ||
| 689 | spin_unlock(&gl->gl_spin); | ||
| 690 | |||
| 691 | put_pid(pid); | ||
| 692 | } | ||
| 693 | |||
| 694 | /** | ||
| 695 | * handle_callback - process a demote request | 814 | * handle_callback - process a demote request |
| 696 | * @gl: the glock | 815 | * @gl: the glock |
| 697 | * @state: the state the caller wants us to change to | 816 | * @state: the state the caller wants us to change to |
| @@ -705,398 +824,45 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
| 705 | { | 824 | { |
| 706 | int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; | 825 | int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; |
| 707 | 826 | ||
| 708 | spin_lock(&gl->gl_spin); | ||
| 709 | set_bit(bit, &gl->gl_flags); | 827 | set_bit(bit, &gl->gl_flags); |
| 710 | if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { | 828 | if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { |
| 711 | gl->gl_demote_state = state; | 829 | gl->gl_demote_state = state; |
| 712 | gl->gl_demote_time = jiffies; | 830 | gl->gl_demote_time = jiffies; |
| 713 | if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && | 831 | if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && |
| 714 | gl->gl_object) { | 832 | gl->gl_object) |
| 715 | gfs2_glock_schedule_for_reclaim(gl); | 833 | gfs2_glock_schedule_for_reclaim(gl); |
| 716 | spin_unlock(&gl->gl_spin); | ||
| 717 | return; | ||
| 718 | } | ||
| 719 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && | 834 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && |
| 720 | gl->gl_demote_state != state) { | 835 | gl->gl_demote_state != state) { |
| 721 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) | 836 | gl->gl_demote_state = LM_ST_UNLOCKED; |
| 722 | set_bit(GLF_WAITERS2, &gl->gl_flags); | ||
| 723 | else | ||
| 724 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
| 725 | } | ||
| 726 | spin_unlock(&gl->gl_spin); | ||
| 727 | } | ||
| 728 | |||
| 729 | /** | ||
| 730 | * state_change - record that the glock is now in a different state | ||
| 731 | * @gl: the glock | ||
| 732 | * @new_state the new state | ||
| 733 | * | ||
| 734 | */ | ||
| 735 | |||
| 736 | static void state_change(struct gfs2_glock *gl, unsigned int new_state) | ||
| 737 | { | ||
| 738 | int held1, held2; | ||
| 739 | |||
| 740 | held1 = (gl->gl_state != LM_ST_UNLOCKED); | ||
| 741 | held2 = (new_state != LM_ST_UNLOCKED); | ||
| 742 | |||
| 743 | if (held1 != held2) { | ||
| 744 | if (held2) | ||
| 745 | gfs2_glock_hold(gl); | ||
| 746 | else | ||
| 747 | gfs2_glock_put(gl); | ||
| 748 | } | 837 | } |
| 749 | |||
| 750 | gl->gl_state = new_state; | ||
| 751 | gl->gl_tchange = jiffies; | ||
| 752 | } | 838 | } |
| 753 | 839 | ||
| 754 | /** | 840 | /** |
| 755 | * drop_bh - Called after a lock module unlock completes | 841 | * gfs2_glock_wait - wait on a glock acquisition |
| 756 | * @gl: the glock | ||
| 757 | * @ret: the return status | ||
| 758 | * | ||
| 759 | * Doesn't wake up the process waiting on the struct gfs2_holder (if any) | ||
| 760 | * Doesn't drop the reference on the glock the top half took out | ||
| 761 | * | ||
| 762 | */ | ||
| 763 | |||
| 764 | static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 765 | { | ||
| 766 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 767 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 768 | |||
| 769 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 770 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 771 | gfs2_assert_warn(sdp, !ret); | ||
| 772 | |||
| 773 | state_change(gl, LM_ST_UNLOCKED); | ||
| 774 | |||
| 775 | if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) { | ||
| 776 | spin_lock(&gl->gl_spin); | ||
| 777 | gh->gh_error = 0; | ||
| 778 | spin_unlock(&gl->gl_spin); | ||
| 779 | gfs2_glock_xmote_th(gl, gl->gl_req_gh); | ||
| 780 | gfs2_glock_put(gl); | ||
| 781 | return; | ||
| 782 | } | ||
| 783 | |||
| 784 | spin_lock(&gl->gl_spin); | ||
| 785 | gfs2_demote_wake(gl); | ||
| 786 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 787 | spin_unlock(&gl->gl_spin); | ||
| 788 | gfs2_glock_put(gl); | ||
| 789 | } | ||
| 790 | |||
| 791 | /** | ||
| 792 | * xmote_bh - Called after the lock module is done acquiring a lock | ||
| 793 | * @gl: The glock in question | ||
| 794 | * @ret: the int returned from the lock module | ||
| 795 | * | ||
| 796 | */ | ||
| 797 | |||
| 798 | static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 799 | { | ||
| 800 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 801 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 802 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 803 | int op_done = 1; | ||
| 804 | |||
| 805 | if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) { | ||
| 806 | drop_bh(gl, ret); | ||
| 807 | return; | ||
| 808 | } | ||
| 809 | |||
| 810 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 811 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 812 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); | ||
| 813 | |||
| 814 | state_change(gl, ret & LM_OUT_ST_MASK); | ||
| 815 | |||
| 816 | /* Deal with each possible exit condition */ | ||
| 817 | |||
| 818 | if (!gh) { | ||
| 819 | gl->gl_stamp = jiffies; | ||
| 820 | if (ret & LM_OUT_CANCELED) { | ||
| 821 | op_done = 0; | ||
| 822 | } else { | ||
| 823 | spin_lock(&gl->gl_spin); | ||
| 824 | if (gl->gl_state != gl->gl_demote_state) { | ||
| 825 | spin_unlock(&gl->gl_spin); | ||
| 826 | gfs2_glock_drop_th(gl); | ||
| 827 | gfs2_glock_put(gl); | ||
| 828 | return; | ||
| 829 | } | ||
| 830 | gfs2_demote_wake(gl); | ||
| 831 | spin_unlock(&gl->gl_spin); | ||
| 832 | } | ||
| 833 | } else { | ||
| 834 | spin_lock(&gl->gl_spin); | ||
| 835 | if (ret & LM_OUT_CONV_DEADLK) { | ||
| 836 | gh->gh_error = 0; | ||
| 837 | set_bit(GLF_CONV_DEADLK, &gl->gl_flags); | ||
| 838 | spin_unlock(&gl->gl_spin); | ||
| 839 | gfs2_glock_drop_th(gl); | ||
| 840 | gfs2_glock_put(gl); | ||
| 841 | return; | ||
| 842 | } | ||
| 843 | list_del_init(&gh->gh_list); | ||
| 844 | gh->gh_error = -EIO; | ||
| 845 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 846 | goto out; | ||
| 847 | gh->gh_error = GLR_CANCELED; | ||
| 848 | if (ret & LM_OUT_CANCELED) | ||
| 849 | goto out; | ||
| 850 | if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | ||
| 851 | list_add_tail(&gh->gh_list, &gl->gl_holders); | ||
| 852 | gh->gh_error = 0; | ||
| 853 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 854 | set_bit(HIF_FIRST, &gh->gh_iflags); | ||
| 855 | op_done = 0; | ||
| 856 | goto out; | ||
| 857 | } | ||
| 858 | gh->gh_error = GLR_TRYFAILED; | ||
| 859 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) | ||
| 860 | goto out; | ||
| 861 | gh->gh_error = -EINVAL; | ||
| 862 | if (gfs2_assert_withdraw(sdp, 0) == -1) | ||
| 863 | fs_err(sdp, "ret = 0x%.8X\n", ret); | ||
| 864 | out: | ||
| 865 | spin_unlock(&gl->gl_spin); | ||
| 866 | } | ||
| 867 | |||
| 868 | if (glops->go_xmote_bh) | ||
| 869 | glops->go_xmote_bh(gl); | ||
| 870 | |||
| 871 | if (op_done) { | ||
| 872 | spin_lock(&gl->gl_spin); | ||
| 873 | gl->gl_req_gh = NULL; | ||
| 874 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 875 | spin_unlock(&gl->gl_spin); | ||
| 876 | } | ||
| 877 | |||
| 878 | gfs2_glock_put(gl); | ||
| 879 | |||
| 880 | if (gh) | ||
| 881 | gfs2_holder_wake(gh); | ||
| 882 | } | ||
| 883 | |||
| 884 | static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 885 | unsigned int cur_state, unsigned int req_state, | ||
| 886 | unsigned int flags) | ||
| 887 | { | ||
| 888 | int ret = 0; | ||
| 889 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 890 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
| 891 | req_state, flags); | ||
| 892 | return ret; | ||
| 893 | } | ||
| 894 | |||
| 895 | /** | ||
| 896 | * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock | ||
| 897 | * @gl: The glock in question | ||
| 898 | * @state: the requested state | ||
| 899 | * @flags: modifier flags to the lock call | ||
| 900 | * | ||
| 901 | */ | ||
| 902 | |||
| 903 | static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | ||
| 904 | { | ||
| 905 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 906 | int flags = gh ? gh->gh_flags : 0; | ||
| 907 | unsigned state = gh ? gh->gh_state : gl->gl_demote_state; | ||
| 908 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 909 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | | ||
| 910 | LM_FLAG_NOEXP | LM_FLAG_ANY | | ||
| 911 | LM_FLAG_PRIORITY); | ||
| 912 | unsigned int lck_ret; | ||
| 913 | |||
| 914 | if (glops->go_xmote_th) | ||
| 915 | glops->go_xmote_th(gl); | ||
| 916 | if (state == LM_ST_DEFERRED && glops->go_inval) | ||
| 917 | glops->go_inval(gl, DIO_METADATA); | ||
| 918 | |||
| 919 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 920 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 921 | gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); | ||
| 922 | gfs2_assert_warn(sdp, state != gl->gl_state); | ||
| 923 | |||
| 924 | gfs2_glock_hold(gl); | ||
| 925 | |||
| 926 | lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); | ||
| 927 | |||
| 928 | if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR))) | ||
| 929 | return; | ||
| 930 | |||
| 931 | if (lck_ret & LM_OUT_ASYNC) | ||
| 932 | gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC); | ||
| 933 | else | ||
| 934 | xmote_bh(gl, lck_ret); | ||
| 935 | } | ||
| 936 | |||
| 937 | static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
| 938 | unsigned int cur_state) | ||
| 939 | { | ||
| 940 | int ret = 0; | ||
| 941 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 942 | ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); | ||
| 943 | return ret; | ||
| 944 | } | ||
| 945 | |||
| 946 | /** | ||
| 947 | * gfs2_glock_drop_th - call into the lock module to unlock a lock | ||
| 948 | * @gl: the glock | ||
| 949 | * | ||
| 950 | */ | ||
| 951 | |||
| 952 | static void gfs2_glock_drop_th(struct gfs2_glock *gl) | ||
| 953 | { | ||
| 954 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 955 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 956 | unsigned int ret; | ||
| 957 | |||
| 958 | if (glops->go_xmote_th) | ||
| 959 | glops->go_xmote_th(gl); | ||
| 960 | if (glops->go_inval) | ||
| 961 | glops->go_inval(gl, DIO_METADATA); | ||
| 962 | |||
| 963 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 964 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 965 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); | ||
| 966 | |||
| 967 | gfs2_glock_hold(gl); | ||
| 968 | |||
| 969 | ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); | ||
| 970 | |||
| 971 | if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR))) | ||
| 972 | return; | ||
| 973 | |||
| 974 | if (!ret) | ||
| 975 | drop_bh(gl, ret); | ||
| 976 | else | ||
| 977 | gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC); | ||
| 978 | } | ||
| 979 | |||
| 980 | /** | ||
| 981 | * do_cancels - cancel requests for locks stuck waiting on an expire flag | ||
| 982 | * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock | ||
| 983 | * | ||
| 984 | * Don't cancel GL_NOCANCEL requests. | ||
| 985 | */ | ||
| 986 | |||
| 987 | static void do_cancels(struct gfs2_holder *gh) | ||
| 988 | { | ||
| 989 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 990 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 991 | |||
| 992 | spin_lock(&gl->gl_spin); | ||
| 993 | |||
| 994 | while (gl->gl_req_gh != gh && | ||
| 995 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | ||
| 996 | !list_empty(&gh->gh_list)) { | ||
| 997 | if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { | ||
| 998 | spin_unlock(&gl->gl_spin); | ||
| 999 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 1000 | sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); | ||
| 1001 | msleep(100); | ||
| 1002 | spin_lock(&gl->gl_spin); | ||
| 1003 | } else { | ||
| 1004 | spin_unlock(&gl->gl_spin); | ||
| 1005 | msleep(100); | ||
| 1006 | spin_lock(&gl->gl_spin); | ||
| 1007 | } | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | spin_unlock(&gl->gl_spin); | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | /** | ||
| 1014 | * glock_wait_internal - wait on a glock acquisition | ||
| 1015 | * @gh: the glock holder | 842 | * @gh: the glock holder |
| 1016 | * | 843 | * |
| 1017 | * Returns: 0 on success | 844 | * Returns: 0 on success |
| 1018 | */ | 845 | */ |
| 1019 | 846 | ||
| 1020 | static int glock_wait_internal(struct gfs2_holder *gh) | 847 | int gfs2_glock_wait(struct gfs2_holder *gh) |
| 1021 | { | 848 | { |
| 1022 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1023 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1024 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1025 | |||
| 1026 | if (test_bit(HIF_ABORTED, &gh->gh_iflags)) | ||
| 1027 | return -EIO; | ||
| 1028 | |||
| 1029 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { | ||
| 1030 | spin_lock(&gl->gl_spin); | ||
| 1031 | if (gl->gl_req_gh != gh && | ||
| 1032 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | ||
| 1033 | !list_empty(&gh->gh_list)) { | ||
| 1034 | list_del_init(&gh->gh_list); | ||
| 1035 | gh->gh_error = GLR_TRYFAILED; | ||
| 1036 | run_queue(gl); | ||
| 1037 | spin_unlock(&gl->gl_spin); | ||
| 1038 | return gh->gh_error; | ||
| 1039 | } | ||
| 1040 | spin_unlock(&gl->gl_spin); | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | if (gh->gh_flags & LM_FLAG_PRIORITY) | ||
| 1044 | do_cancels(gh); | ||
| 1045 | |||
| 1046 | wait_on_holder(gh); | 849 | wait_on_holder(gh); |
| 1047 | if (gh->gh_error) | ||
| 1048 | return gh->gh_error; | ||
| 1049 | |||
| 1050 | gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags)); | ||
| 1051 | gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state, | ||
| 1052 | gh->gh_flags)); | ||
| 1053 | |||
| 1054 | if (test_bit(HIF_FIRST, &gh->gh_iflags)) { | ||
| 1055 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 1056 | |||
| 1057 | if (glops->go_lock) { | ||
| 1058 | gh->gh_error = glops->go_lock(gh); | ||
| 1059 | if (gh->gh_error) { | ||
| 1060 | spin_lock(&gl->gl_spin); | ||
| 1061 | list_del_init(&gh->gh_list); | ||
| 1062 | spin_unlock(&gl->gl_spin); | ||
| 1063 | } | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | spin_lock(&gl->gl_spin); | ||
| 1067 | gl->gl_req_gh = NULL; | ||
| 1068 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1069 | run_queue(gl); | ||
| 1070 | spin_unlock(&gl->gl_spin); | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | return gh->gh_error; | 850 | return gh->gh_error; |
| 1074 | } | 851 | } |
| 1075 | 852 | ||
| 1076 | static inline struct gfs2_holder * | 853 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) |
| 1077 | find_holder_by_owner(struct list_head *head, struct pid *pid) | ||
| 1078 | { | ||
| 1079 | struct gfs2_holder *gh; | ||
| 1080 | |||
| 1081 | list_for_each_entry(gh, head, gh_list) { | ||
| 1082 | if (gh->gh_owner_pid == pid) | ||
| 1083 | return gh; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | return NULL; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | static void print_dbg(struct glock_iter *gi, const char *fmt, ...) | ||
| 1090 | { | 854 | { |
| 1091 | va_list args; | 855 | va_list args; |
| 1092 | 856 | ||
| 1093 | va_start(args, fmt); | 857 | va_start(args, fmt); |
| 1094 | if (gi) { | 858 | if (seq) { |
| 859 | struct gfs2_glock_iter *gi = seq->private; | ||
| 1095 | vsprintf(gi->string, fmt, args); | 860 | vsprintf(gi->string, fmt, args); |
| 1096 | seq_printf(gi->seq, gi->string); | 861 | seq_printf(seq, gi->string); |
| 1097 | } | 862 | } else { |
| 1098 | else | 863 | printk(KERN_ERR " "); |
| 1099 | vprintk(fmt, args); | 864 | vprintk(fmt, args); |
| 865 | } | ||
| 1100 | va_end(args); | 866 | va_end(args); |
| 1101 | } | 867 | } |
| 1102 | 868 | ||
| @@ -1104,50 +870,76 @@ static void print_dbg(struct glock_iter *gi, const char *fmt, ...) | |||
| 1104 | * add_to_queue - Add a holder to the wait queue (but look for recursion) | 870 | * add_to_queue - Add a holder to the wait queue (but look for recursion) |
| 1105 | * @gh: the holder structure to add | 871 | * @gh: the holder structure to add |
| 1106 | * | 872 | * |
| 873 | * Eventually we should move the recursive locking trap to a | ||
| 874 | * debugging option or something like that. This is the fast | ||
| 875 | * path and needs to have the minimum number of distractions. | ||
| 876 | * | ||
| 1107 | */ | 877 | */ |
| 1108 | 878 | ||
| 1109 | static void add_to_queue(struct gfs2_holder *gh) | 879 | static inline void add_to_queue(struct gfs2_holder *gh) |
| 1110 | { | 880 | { |
| 1111 | struct gfs2_glock *gl = gh->gh_gl; | 881 | struct gfs2_glock *gl = gh->gh_gl; |
| 1112 | struct gfs2_holder *existing; | 882 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 883 | struct list_head *insert_pt = NULL; | ||
| 884 | struct gfs2_holder *gh2; | ||
| 885 | int try_lock = 0; | ||
| 1113 | 886 | ||
| 1114 | BUG_ON(gh->gh_owner_pid == NULL); | 887 | BUG_ON(gh->gh_owner_pid == NULL); |
| 1115 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) | 888 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) |
| 1116 | BUG(); | 889 | BUG(); |
| 1117 | 890 | ||
| 1118 | if (!(gh->gh_flags & GL_FLOCK)) { | 891 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { |
| 1119 | existing = find_holder_by_owner(&gl->gl_holders, | 892 | if (test_bit(GLF_LOCK, &gl->gl_flags)) |
| 1120 | gh->gh_owner_pid); | 893 | try_lock = 1; |
| 1121 | if (existing) { | 894 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) |
| 1122 | print_symbol(KERN_WARNING "original: %s\n", | 895 | goto fail; |
| 1123 | existing->gh_ip); | 896 | } |
| 1124 | printk(KERN_INFO "pid : %d\n", | 897 | |
| 1125 | pid_nr(existing->gh_owner_pid)); | 898 | list_for_each_entry(gh2, &gl->gl_holders, gh_list) { |
| 1126 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 899 | if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && |
| 1127 | existing->gh_gl->gl_name.ln_type, | 900 | (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) |
| 1128 | existing->gh_gl->gl_state); | 901 | goto trap_recursive; |
| 1129 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | 902 | if (try_lock && |
| 1130 | printk(KERN_INFO "pid : %d\n", | 903 | !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && |
| 1131 | pid_nr(gh->gh_owner_pid)); | 904 | !may_grant(gl, gh)) { |
| 1132 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 905 | fail: |
| 1133 | gl->gl_name.ln_type, gl->gl_state); | 906 | gh->gh_error = GLR_TRYFAILED; |
| 1134 | BUG(); | 907 | gfs2_holder_wake(gh); |
| 1135 | } | 908 | return; |
| 1136 | |||
| 1137 | existing = find_holder_by_owner(&gl->gl_waiters3, | ||
| 1138 | gh->gh_owner_pid); | ||
| 1139 | if (existing) { | ||
| 1140 | print_symbol(KERN_WARNING "original: %s\n", | ||
| 1141 | existing->gh_ip); | ||
| 1142 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | ||
| 1143 | BUG(); | ||
| 1144 | } | 909 | } |
| 910 | if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) | ||
| 911 | continue; | ||
| 912 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) | ||
| 913 | insert_pt = &gh2->gh_list; | ||
| 914 | } | ||
| 915 | if (likely(insert_pt == NULL)) { | ||
| 916 | list_add_tail(&gh->gh_list, &gl->gl_holders); | ||
| 917 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) | ||
| 918 | goto do_cancel; | ||
| 919 | return; | ||
| 920 | } | ||
| 921 | list_add_tail(&gh->gh_list, insert_pt); | ||
| 922 | do_cancel: | ||
| 923 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); | ||
| 924 | if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { | ||
| 925 | spin_unlock(&gl->gl_spin); | ||
| 926 | if (sdp->sd_lockstruct.ls_ops->lm_cancel) | ||
| 927 | sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); | ||
| 928 | spin_lock(&gl->gl_spin); | ||
| 1145 | } | 929 | } |
| 930 | return; | ||
| 1146 | 931 | ||
| 1147 | if (gh->gh_flags & LM_FLAG_PRIORITY) | 932 | trap_recursive: |
| 1148 | list_add(&gh->gh_list, &gl->gl_waiters3); | 933 | print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip); |
| 1149 | else | 934 | printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); |
| 1150 | list_add_tail(&gh->gh_list, &gl->gl_waiters3); | 935 | printk(KERN_ERR "lock type: %d req lock state : %d\n", |
| 936 | gh2->gh_gl->gl_name.ln_type, gh2->gh_state); | ||
| 937 | print_symbol(KERN_ERR "new: %s\n", gh->gh_ip); | ||
| 938 | printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); | ||
| 939 | printk(KERN_ERR "lock type: %d req lock state : %d\n", | ||
| 940 | gh->gh_gl->gl_name.ln_type, gh->gh_state); | ||
| 941 | __dump_glock(NULL, gl); | ||
| 942 | BUG(); | ||
| 1151 | } | 943 | } |
| 1152 | 944 | ||
| 1153 | /** | 945 | /** |
| @@ -1165,24 +957,16 @@ int gfs2_glock_nq(struct gfs2_holder *gh) | |||
| 1165 | struct gfs2_sbd *sdp = gl->gl_sbd; | 957 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 1166 | int error = 0; | 958 | int error = 0; |
| 1167 | 959 | ||
| 1168 | restart: | 960 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 1169 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { | ||
| 1170 | set_bit(HIF_ABORTED, &gh->gh_iflags); | ||
| 1171 | return -EIO; | 961 | return -EIO; |
| 1172 | } | ||
| 1173 | 962 | ||
| 1174 | spin_lock(&gl->gl_spin); | 963 | spin_lock(&gl->gl_spin); |
| 1175 | add_to_queue(gh); | 964 | add_to_queue(gh); |
| 1176 | run_queue(gl); | 965 | run_queue(gl, 1); |
| 1177 | spin_unlock(&gl->gl_spin); | 966 | spin_unlock(&gl->gl_spin); |
| 1178 | 967 | ||
| 1179 | if (!(gh->gh_flags & GL_ASYNC)) { | 968 | if (!(gh->gh_flags & GL_ASYNC)) |
| 1180 | error = glock_wait_internal(gh); | 969 | error = gfs2_glock_wait(gh); |
| 1181 | if (error == GLR_CANCELED) { | ||
| 1182 | msleep(100); | ||
| 1183 | goto restart; | ||
| 1184 | } | ||
| 1185 | } | ||
| 1186 | 970 | ||
| 1187 | return error; | 971 | return error; |
| 1188 | } | 972 | } |
| @@ -1196,48 +980,7 @@ restart: | |||
| 1196 | 980 | ||
| 1197 | int gfs2_glock_poll(struct gfs2_holder *gh) | 981 | int gfs2_glock_poll(struct gfs2_holder *gh) |
| 1198 | { | 982 | { |
| 1199 | struct gfs2_glock *gl = gh->gh_gl; | 983 | return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; |
| 1200 | int ready = 0; | ||
| 1201 | |||
| 1202 | spin_lock(&gl->gl_spin); | ||
| 1203 | |||
| 1204 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 1205 | ready = 1; | ||
| 1206 | else if (list_empty(&gh->gh_list)) { | ||
| 1207 | if (gh->gh_error == GLR_CANCELED) { | ||
| 1208 | spin_unlock(&gl->gl_spin); | ||
| 1209 | msleep(100); | ||
| 1210 | if (gfs2_glock_nq(gh)) | ||
| 1211 | return 1; | ||
| 1212 | return 0; | ||
| 1213 | } else | ||
| 1214 | ready = 1; | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | spin_unlock(&gl->gl_spin); | ||
| 1218 | |||
| 1219 | return ready; | ||
| 1220 | } | ||
| 1221 | |||
| 1222 | /** | ||
| 1223 | * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC | ||
| 1224 | * @gh: the holder structure | ||
| 1225 | * | ||
| 1226 | * Returns: 0, GLR_TRYFAILED, or errno on failure | ||
| 1227 | */ | ||
| 1228 | |||
| 1229 | int gfs2_glock_wait(struct gfs2_holder *gh) | ||
| 1230 | { | ||
| 1231 | int error; | ||
| 1232 | |||
| 1233 | error = glock_wait_internal(gh); | ||
| 1234 | if (error == GLR_CANCELED) { | ||
| 1235 | msleep(100); | ||
| 1236 | gh->gh_flags &= ~GL_ASYNC; | ||
| 1237 | error = gfs2_glock_nq(gh); | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | return error; | ||
| 1241 | } | 984 | } |
| 1242 | 985 | ||
| 1243 | /** | 986 | /** |
| @@ -1251,26 +994,30 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
| 1251 | struct gfs2_glock *gl = gh->gh_gl; | 994 | struct gfs2_glock *gl = gh->gh_gl; |
| 1252 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 995 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
| 1253 | unsigned delay = 0; | 996 | unsigned delay = 0; |
| 997 | int fast_path = 0; | ||
| 1254 | 998 | ||
| 999 | spin_lock(&gl->gl_spin); | ||
| 1255 | if (gh->gh_flags & GL_NOCACHE) | 1000 | if (gh->gh_flags & GL_NOCACHE) |
| 1256 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); | 1001 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
| 1257 | 1002 | ||
| 1258 | gfs2_glmutex_lock(gl); | ||
| 1259 | |||
| 1260 | spin_lock(&gl->gl_spin); | ||
| 1261 | list_del_init(&gh->gh_list); | 1003 | list_del_init(&gh->gh_list); |
| 1262 | 1004 | if (find_first_holder(gl) == NULL) { | |
| 1263 | if (list_empty(&gl->gl_holders)) { | ||
| 1264 | if (glops->go_unlock) { | 1005 | if (glops->go_unlock) { |
| 1006 | GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 1265 | spin_unlock(&gl->gl_spin); | 1007 | spin_unlock(&gl->gl_spin); |
| 1266 | glops->go_unlock(gh); | 1008 | glops->go_unlock(gh); |
| 1267 | spin_lock(&gl->gl_spin); | 1009 | spin_lock(&gl->gl_spin); |
| 1010 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1268 | } | 1011 | } |
| 1269 | gl->gl_stamp = jiffies; | 1012 | gl->gl_stamp = jiffies; |
| 1013 | if (list_empty(&gl->gl_holders) && | ||
| 1014 | !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && | ||
| 1015 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
| 1016 | fast_path = 1; | ||
| 1270 | } | 1017 | } |
| 1271 | |||
| 1272 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1273 | spin_unlock(&gl->gl_spin); | 1018 | spin_unlock(&gl->gl_spin); |
| 1019 | if (likely(fast_path)) | ||
| 1020 | return; | ||
| 1274 | 1021 | ||
| 1275 | gfs2_glock_hold(gl); | 1022 | gfs2_glock_hold(gl); |
| 1276 | if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && | 1023 | if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && |
| @@ -1454,6 +1201,8 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
| 1454 | static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) | 1201 | static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) |
| 1455 | { | 1202 | { |
| 1456 | int error = -EIO; | 1203 | int error = -EIO; |
| 1204 | if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb) | ||
| 1205 | return 0; | ||
| 1457 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 1206 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 1458 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); | 1207 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); |
| 1459 | return error; | 1208 | return error; |
| @@ -1469,20 +1218,14 @@ int gfs2_lvb_hold(struct gfs2_glock *gl) | |||
| 1469 | { | 1218 | { |
| 1470 | int error; | 1219 | int error; |
| 1471 | 1220 | ||
| 1472 | gfs2_glmutex_lock(gl); | ||
| 1473 | |||
| 1474 | if (!atomic_read(&gl->gl_lvb_count)) { | 1221 | if (!atomic_read(&gl->gl_lvb_count)) { |
| 1475 | error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); | 1222 | error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); |
| 1476 | if (error) { | 1223 | if (error) |
| 1477 | gfs2_glmutex_unlock(gl); | ||
| 1478 | return error; | 1224 | return error; |
| 1479 | } | ||
| 1480 | gfs2_glock_hold(gl); | 1225 | gfs2_glock_hold(gl); |
| 1481 | } | 1226 | } |
| 1482 | atomic_inc(&gl->gl_lvb_count); | 1227 | atomic_inc(&gl->gl_lvb_count); |
| 1483 | 1228 | ||
| 1484 | gfs2_glmutex_unlock(gl); | ||
| 1485 | |||
| 1486 | return 0; | 1229 | return 0; |
| 1487 | } | 1230 | } |
| 1488 | 1231 | ||
| @@ -1497,17 +1240,13 @@ void gfs2_lvb_unhold(struct gfs2_glock *gl) | |||
| 1497 | struct gfs2_sbd *sdp = gl->gl_sbd; | 1240 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 1498 | 1241 | ||
| 1499 | gfs2_glock_hold(gl); | 1242 | gfs2_glock_hold(gl); |
| 1500 | gfs2_glmutex_lock(gl); | ||
| 1501 | |||
| 1502 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); | 1243 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); |
| 1503 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { | 1244 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { |
| 1504 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 1245 | if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb) |
| 1505 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); | 1246 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); |
| 1506 | gl->gl_lvb = NULL; | 1247 | gl->gl_lvb = NULL; |
| 1507 | gfs2_glock_put(gl); | 1248 | gfs2_glock_put(gl); |
| 1508 | } | 1249 | } |
| 1509 | |||
| 1510 | gfs2_glmutex_unlock(gl); | ||
| 1511 | gfs2_glock_put(gl); | 1250 | gfs2_glock_put(gl); |
| 1512 | } | 1251 | } |
| 1513 | 1252 | ||
| @@ -1527,7 +1266,9 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
| 1527 | if (time_before(now, holdtime)) | 1266 | if (time_before(now, holdtime)) |
| 1528 | delay = holdtime - now; | 1267 | delay = holdtime - now; |
| 1529 | 1268 | ||
| 1269 | spin_lock(&gl->gl_spin); | ||
| 1530 | handle_callback(gl, state, 1, delay); | 1270 | handle_callback(gl, state, 1, delay); |
| 1271 | spin_unlock(&gl->gl_spin); | ||
| 1531 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) | 1272 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) |
| 1532 | gfs2_glock_put(gl); | 1273 | gfs2_glock_put(gl); |
| 1533 | } | 1274 | } |
| @@ -1568,7 +1309,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
| 1568 | gl = gfs2_glock_find(sdp, &async->lc_name); | 1309 | gl = gfs2_glock_find(sdp, &async->lc_name); |
| 1569 | if (gfs2_assert_warn(sdp, gl)) | 1310 | if (gfs2_assert_warn(sdp, gl)) |
| 1570 | return; | 1311 | return; |
| 1571 | xmote_bh(gl, async->lc_ret); | 1312 | gl->gl_reply = async->lc_ret; |
| 1313 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); | ||
| 1572 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 1314 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
| 1573 | gfs2_glock_put(gl); | 1315 | gfs2_glock_put(gl); |
| 1574 | up_read(&gfs2_umount_flush_sem); | 1316 | up_read(&gfs2_umount_flush_sem); |
| @@ -1581,11 +1323,6 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
| 1581 | wake_up_process(sdp->sd_recoverd_process); | 1323 | wake_up_process(sdp->sd_recoverd_process); |
| 1582 | return; | 1324 | return; |
| 1583 | 1325 | ||
| 1584 | case LM_CB_DROPLOCKS: | ||
| 1585 | gfs2_gl_hash_clear(sdp, NO_WAIT); | ||
| 1586 | gfs2_quota_scan(sdp); | ||
| 1587 | return; | ||
| 1588 | |||
| 1589 | default: | 1326 | default: |
| 1590 | gfs2_assert_warn(sdp, 0); | 1327 | gfs2_assert_warn(sdp, 0); |
| 1591 | return; | 1328 | return; |
| @@ -1646,6 +1383,7 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | |||
| 1646 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | 1383 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp) |
| 1647 | { | 1384 | { |
| 1648 | struct gfs2_glock *gl; | 1385 | struct gfs2_glock *gl; |
| 1386 | int done_callback = 0; | ||
| 1649 | 1387 | ||
| 1650 | spin_lock(&sdp->sd_reclaim_lock); | 1388 | spin_lock(&sdp->sd_reclaim_lock); |
| 1651 | if (list_empty(&sdp->sd_reclaim_list)) { | 1389 | if (list_empty(&sdp->sd_reclaim_list)) { |
| @@ -1660,14 +1398,16 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | |||
| 1660 | atomic_dec(&sdp->sd_reclaim_count); | 1398 | atomic_dec(&sdp->sd_reclaim_count); |
| 1661 | atomic_inc(&sdp->sd_reclaimed); | 1399 | atomic_inc(&sdp->sd_reclaimed); |
| 1662 | 1400 | ||
| 1663 | if (gfs2_glmutex_trylock(gl)) { | 1401 | spin_lock(&gl->gl_spin); |
| 1664 | if (list_empty(&gl->gl_holders) && | 1402 | if (find_first_holder(gl) == NULL && |
| 1665 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1403 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) { |
| 1666 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); | 1404 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
| 1667 | gfs2_glmutex_unlock(gl); | 1405 | done_callback = 1; |
| 1668 | } | 1406 | } |
| 1669 | 1407 | spin_unlock(&gl->gl_spin); | |
| 1670 | gfs2_glock_put(gl); | 1408 | if (!done_callback || |
| 1409 | queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
| 1410 | gfs2_glock_put(gl); | ||
| 1671 | } | 1411 | } |
| 1672 | 1412 | ||
| 1673 | /** | 1413 | /** |
| @@ -1724,18 +1464,14 @@ static void scan_glock(struct gfs2_glock *gl) | |||
| 1724 | { | 1464 | { |
| 1725 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) | 1465 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) |
| 1726 | return; | 1466 | return; |
| 1467 | if (test_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 1468 | return; | ||
| 1727 | 1469 | ||
| 1728 | if (gfs2_glmutex_trylock(gl)) { | 1470 | spin_lock(&gl->gl_spin); |
| 1729 | if (list_empty(&gl->gl_holders) && | 1471 | if (find_first_holder(gl) == NULL && |
| 1730 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1472 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) |
| 1731 | goto out_schedule; | 1473 | gfs2_glock_schedule_for_reclaim(gl); |
| 1732 | gfs2_glmutex_unlock(gl); | 1474 | spin_unlock(&gl->gl_spin); |
| 1733 | } | ||
| 1734 | return; | ||
| 1735 | |||
| 1736 | out_schedule: | ||
| 1737 | gfs2_glmutex_unlock(gl); | ||
| 1738 | gfs2_glock_schedule_for_reclaim(gl); | ||
| 1739 | } | 1475 | } |
| 1740 | 1476 | ||
| 1741 | /** | 1477 | /** |
| @@ -1760,12 +1496,13 @@ static void clear_glock(struct gfs2_glock *gl) | |||
| 1760 | spin_unlock(&sdp->sd_reclaim_lock); | 1496 | spin_unlock(&sdp->sd_reclaim_lock); |
| 1761 | } | 1497 | } |
| 1762 | 1498 | ||
| 1763 | if (gfs2_glmutex_trylock(gl)) { | 1499 | spin_lock(&gl->gl_spin); |
| 1764 | if (list_empty(&gl->gl_holders) && | 1500 | if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) |
| 1765 | gl->gl_state != LM_ST_UNLOCKED) | 1501 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); |
| 1766 | handle_callback(gl, LM_ST_UNLOCKED, 0, 0); | 1502 | spin_unlock(&gl->gl_spin); |
| 1767 | gfs2_glmutex_unlock(gl); | 1503 | gfs2_glock_hold(gl); |
| 1768 | } | 1504 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
| 1505 | gfs2_glock_put(gl); | ||
| 1769 | } | 1506 | } |
| 1770 | 1507 | ||
| 1771 | /** | 1508 | /** |
| @@ -1773,11 +1510,10 @@ static void clear_glock(struct gfs2_glock *gl) | |||
| 1773 | * @sdp: the filesystem | 1510 | * @sdp: the filesystem |
| 1774 | * @wait: wait until it's all gone | 1511 | * @wait: wait until it's all gone |
| 1775 | * | 1512 | * |
| 1776 | * Called when unmounting the filesystem, or when inter-node lock manager | 1513 | * Called when unmounting the filesystem. |
| 1777 | * requests DROPLOCKS because it is running out of capacity. | ||
| 1778 | */ | 1514 | */ |
| 1779 | 1515 | ||
| 1780 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | 1516 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) |
| 1781 | { | 1517 | { |
| 1782 | unsigned long t; | 1518 | unsigned long t; |
| 1783 | unsigned int x; | 1519 | unsigned int x; |
| @@ -1792,7 +1528,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
| 1792 | cont = 1; | 1528 | cont = 1; |
| 1793 | } | 1529 | } |
| 1794 | 1530 | ||
| 1795 | if (!wait || !cont) | 1531 | if (!cont) |
| 1796 | break; | 1532 | break; |
| 1797 | 1533 | ||
| 1798 | if (time_after_eq(jiffies, | 1534 | if (time_after_eq(jiffies, |
| @@ -1810,180 +1546,164 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
| 1810 | } | 1546 | } |
| 1811 | } | 1547 | } |
| 1812 | 1548 | ||
| 1813 | /* | 1549 | static const char *state2str(unsigned state) |
| 1814 | * Diagnostic routines to help debug distributed deadlock | ||
| 1815 | */ | ||
| 1816 | |||
| 1817 | static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, | ||
| 1818 | unsigned long address) | ||
| 1819 | { | 1550 | { |
| 1820 | char buffer[KSYM_SYMBOL_LEN]; | 1551 | switch(state) { |
| 1821 | 1552 | case LM_ST_UNLOCKED: | |
| 1822 | sprint_symbol(buffer, address); | 1553 | return "UN"; |
| 1823 | print_dbg(gi, fmt, buffer); | 1554 | case LM_ST_SHARED: |
| 1555 | return "SH"; | ||
| 1556 | case LM_ST_DEFERRED: | ||
| 1557 | return "DF"; | ||
| 1558 | case LM_ST_EXCLUSIVE: | ||
| 1559 | return "EX"; | ||
| 1560 | } | ||
| 1561 | return "??"; | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | ||
| 1565 | { | ||
| 1566 | char *p = buf; | ||
| 1567 | if (flags & LM_FLAG_TRY) | ||
| 1568 | *p++ = 't'; | ||
| 1569 | if (flags & LM_FLAG_TRY_1CB) | ||
| 1570 | *p++ = 'T'; | ||
| 1571 | if (flags & LM_FLAG_NOEXP) | ||
| 1572 | *p++ = 'e'; | ||
| 1573 | if (flags & LM_FLAG_ANY) | ||
| 1574 | *p++ = 'a'; | ||
| 1575 | if (flags & LM_FLAG_PRIORITY) | ||
| 1576 | *p++ = 'p'; | ||
| 1577 | if (flags & GL_ASYNC) | ||
| 1578 | *p++ = 'a'; | ||
| 1579 | if (flags & GL_EXACT) | ||
| 1580 | *p++ = 'E'; | ||
| 1581 | if (flags & GL_ATIME) | ||
| 1582 | *p++ = 'a'; | ||
| 1583 | if (flags & GL_NOCACHE) | ||
| 1584 | *p++ = 'c'; | ||
| 1585 | if (test_bit(HIF_HOLDER, &iflags)) | ||
| 1586 | *p++ = 'H'; | ||
| 1587 | if (test_bit(HIF_WAIT, &iflags)) | ||
| 1588 | *p++ = 'W'; | ||
| 1589 | if (test_bit(HIF_FIRST, &iflags)) | ||
| 1590 | *p++ = 'F'; | ||
| 1591 | *p = 0; | ||
| 1592 | return buf; | ||
| 1824 | } | 1593 | } |
| 1825 | 1594 | ||
| 1826 | /** | 1595 | /** |
| 1827 | * dump_holder - print information about a glock holder | 1596 | * dump_holder - print information about a glock holder |
| 1828 | * @str: a string naming the type of holder | 1597 | * @seq: the seq_file struct |
| 1829 | * @gh: the glock holder | 1598 | * @gh: the glock holder |
| 1830 | * | 1599 | * |
| 1831 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1600 | * Returns: 0 on success, -ENOBUFS when we run out of space |
| 1832 | */ | 1601 | */ |
| 1833 | 1602 | ||
| 1834 | static int dump_holder(struct glock_iter *gi, char *str, | 1603 | static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) |
| 1835 | struct gfs2_holder *gh) | ||
| 1836 | { | 1604 | { |
| 1837 | unsigned int x; | 1605 | struct task_struct *gh_owner = NULL; |
| 1838 | struct task_struct *gh_owner; | 1606 | char buffer[KSYM_SYMBOL_LEN]; |
| 1607 | char flags_buf[32]; | ||
| 1839 | 1608 | ||
| 1840 | print_dbg(gi, " %s\n", str); | 1609 | sprint_symbol(buffer, gh->gh_ip); |
| 1841 | if (gh->gh_owner_pid) { | 1610 | if (gh->gh_owner_pid) |
| 1842 | print_dbg(gi, " owner = %ld ", | ||
| 1843 | (long)pid_nr(gh->gh_owner_pid)); | ||
| 1844 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); | 1611 | gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); |
| 1845 | if (gh_owner) | 1612 | gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", |
| 1846 | print_dbg(gi, "(%s)\n", gh_owner->comm); | 1613 | state2str(gh->gh_state), |
| 1847 | else | 1614 | hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), |
| 1848 | print_dbg(gi, "(ended)\n"); | 1615 | gh->gh_error, |
| 1849 | } else | 1616 | gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, |
| 1850 | print_dbg(gi, " owner = -1\n"); | 1617 | gh_owner ? gh_owner->comm : "(ended)", buffer); |
| 1851 | print_dbg(gi, " gh_state = %u\n", gh->gh_state); | ||
| 1852 | print_dbg(gi, " gh_flags ="); | ||
| 1853 | for (x = 0; x < 32; x++) | ||
| 1854 | if (gh->gh_flags & (1 << x)) | ||
| 1855 | print_dbg(gi, " %u", x); | ||
| 1856 | print_dbg(gi, " \n"); | ||
| 1857 | print_dbg(gi, " error = %d\n", gh->gh_error); | ||
| 1858 | print_dbg(gi, " gh_iflags ="); | ||
| 1859 | for (x = 0; x < 32; x++) | ||
| 1860 | if (test_bit(x, &gh->gh_iflags)) | ||
| 1861 | print_dbg(gi, " %u", x); | ||
| 1862 | print_dbg(gi, " \n"); | ||
| 1863 | gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); | ||
| 1864 | |||
| 1865 | return 0; | 1618 | return 0; |
| 1866 | } | 1619 | } |
| 1867 | 1620 | ||
| 1868 | /** | 1621 | static const char *gflags2str(char *buf, const unsigned long *gflags) |
| 1869 | * dump_inode - print information about an inode | 1622 | { |
| 1870 | * @ip: the inode | 1623 | char *p = buf; |
| 1871 | * | 1624 | if (test_bit(GLF_LOCK, gflags)) |
| 1872 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1625 | *p++ = 'l'; |
| 1873 | */ | 1626 | if (test_bit(GLF_STICKY, gflags)) |
| 1874 | 1627 | *p++ = 's'; | |
| 1875 | static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) | 1628 | if (test_bit(GLF_DEMOTE, gflags)) |
| 1876 | { | 1629 | *p++ = 'D'; |
| 1877 | unsigned int x; | 1630 | if (test_bit(GLF_PENDING_DEMOTE, gflags)) |
| 1878 | 1631 | *p++ = 'd'; | |
| 1879 | print_dbg(gi, " Inode:\n"); | 1632 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) |
| 1880 | print_dbg(gi, " num = %llu/%llu\n", | 1633 | *p++ = 'p'; |
| 1881 | (unsigned long long)ip->i_no_formal_ino, | 1634 | if (test_bit(GLF_DIRTY, gflags)) |
| 1882 | (unsigned long long)ip->i_no_addr); | 1635 | *p++ = 'y'; |
| 1883 | print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); | 1636 | if (test_bit(GLF_LFLUSH, gflags)) |
| 1884 | print_dbg(gi, " i_flags ="); | 1637 | *p++ = 'f'; |
| 1885 | for (x = 0; x < 32; x++) | 1638 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) |
| 1886 | if (test_bit(x, &ip->i_flags)) | 1639 | *p++ = 'i'; |
| 1887 | print_dbg(gi, " %u", x); | 1640 | if (test_bit(GLF_REPLY_PENDING, gflags)) |
| 1888 | print_dbg(gi, " \n"); | 1641 | *p++ = 'r'; |
| 1889 | return 0; | 1642 | *p = 0; |
| 1643 | return buf; | ||
| 1890 | } | 1644 | } |
| 1891 | 1645 | ||
| 1892 | /** | 1646 | /** |
| 1893 | * dump_glock - print information about a glock | 1647 | * __dump_glock - print information about a glock |
| 1648 | * @seq: The seq_file struct | ||
| 1894 | * @gl: the glock | 1649 | * @gl: the glock |
| 1895 | * @count: where we are in the buffer | 1650 | * |
| 1651 | * The file format is as follows: | ||
| 1652 | * One line per object, capital letters are used to indicate objects | ||
| 1653 | * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, | ||
| 1654 | * other objects are indented by a single space and follow the glock to | ||
| 1655 | * which they are related. Fields are indicated by lower case letters | ||
| 1656 | * followed by a colon and the field value, except for strings which are in | ||
| 1657 | * [] so that its possible to see if they are composed of spaces for | ||
| 1658 | * example. The field's are n = number (id of the object), f = flags, | ||
| 1659 | * t = type, s = state, r = refcount, e = error, p = pid. | ||
| 1896 | * | 1660 | * |
| 1897 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1661 | * Returns: 0 on success, -ENOBUFS when we run out of space |
| 1898 | */ | 1662 | */ |
| 1899 | 1663 | ||
| 1900 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) | 1664 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) |
| 1901 | { | 1665 | { |
| 1902 | struct gfs2_holder *gh; | 1666 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
| 1903 | unsigned int x; | 1667 | unsigned long long dtime; |
| 1904 | int error = -ENOBUFS; | 1668 | const struct gfs2_holder *gh; |
| 1905 | struct task_struct *gl_owner; | 1669 | char gflags_buf[32]; |
| 1670 | int error = 0; | ||
| 1906 | 1671 | ||
| 1907 | spin_lock(&gl->gl_spin); | 1672 | dtime = jiffies - gl->gl_demote_time; |
| 1673 | dtime *= 1000000/HZ; /* demote time in uSec */ | ||
| 1674 | if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
| 1675 | dtime = 0; | ||
| 1676 | gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n", | ||
| 1677 | state2str(gl->gl_state), | ||
| 1678 | gl->gl_name.ln_type, | ||
| 1679 | (unsigned long long)gl->gl_name.ln_number, | ||
| 1680 | gflags2str(gflags_buf, &gl->gl_flags), | ||
| 1681 | state2str(gl->gl_target), | ||
| 1682 | state2str(gl->gl_demote_state), dtime, | ||
| 1683 | atomic_read(&gl->gl_lvb_count), | ||
| 1684 | atomic_read(&gl->gl_ail_count), | ||
| 1685 | atomic_read(&gl->gl_ref)); | ||
| 1908 | 1686 | ||
| 1909 | print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, | ||
| 1910 | (unsigned long long)gl->gl_name.ln_number); | ||
| 1911 | print_dbg(gi, " gl_flags ="); | ||
| 1912 | for (x = 0; x < 32; x++) { | ||
| 1913 | if (test_bit(x, &gl->gl_flags)) | ||
| 1914 | print_dbg(gi, " %u", x); | ||
| 1915 | } | ||
| 1916 | if (!test_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 1917 | print_dbg(gi, " (unlocked)"); | ||
| 1918 | print_dbg(gi, " \n"); | ||
| 1919 | print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); | ||
| 1920 | print_dbg(gi, " gl_state = %u\n", gl->gl_state); | ||
| 1921 | if (gl->gl_owner_pid) { | ||
| 1922 | gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID); | ||
| 1923 | if (gl_owner) | ||
| 1924 | print_dbg(gi, " gl_owner = pid %d (%s)\n", | ||
| 1925 | pid_nr(gl->gl_owner_pid), gl_owner->comm); | ||
| 1926 | else | ||
| 1927 | print_dbg(gi, " gl_owner = %d (ended)\n", | ||
| 1928 | pid_nr(gl->gl_owner_pid)); | ||
| 1929 | } else | ||
| 1930 | print_dbg(gi, " gl_owner = -1\n"); | ||
| 1931 | print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); | ||
| 1932 | print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); | ||
| 1933 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | ||
| 1934 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); | ||
| 1935 | print_dbg(gi, " reclaim = %s\n", | ||
| 1936 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); | ||
| 1937 | if (gl->gl_aspace) | ||
| 1938 | print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, | ||
| 1939 | gl->gl_aspace->i_mapping->nrpages); | ||
| 1940 | else | ||
| 1941 | print_dbg(gi, " aspace = no\n"); | ||
| 1942 | print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); | ||
| 1943 | if (gl->gl_req_gh) { | ||
| 1944 | error = dump_holder(gi, "Request", gl->gl_req_gh); | ||
| 1945 | if (error) | ||
| 1946 | goto out; | ||
| 1947 | } | ||
| 1948 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 1687 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { |
| 1949 | error = dump_holder(gi, "Holder", gh); | 1688 | error = dump_holder(seq, gh); |
| 1950 | if (error) | 1689 | if (error) |
| 1951 | goto out; | 1690 | goto out; |
| 1952 | } | 1691 | } |
| 1953 | list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { | 1692 | if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) |
| 1954 | error = dump_holder(gi, "Waiter1", gh); | 1693 | error = glops->go_dump(seq, gl); |
| 1955 | if (error) | ||
| 1956 | goto out; | ||
| 1957 | } | ||
| 1958 | list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { | ||
| 1959 | error = dump_holder(gi, "Waiter3", gh); | ||
| 1960 | if (error) | ||
| 1961 | goto out; | ||
| 1962 | } | ||
| 1963 | if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | ||
| 1964 | print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", | ||
| 1965 | gl->gl_demote_state, (unsigned long long) | ||
| 1966 | (jiffies - gl->gl_demote_time)*(1000000/HZ)); | ||
| 1967 | } | ||
| 1968 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { | ||
| 1969 | if (!test_bit(GLF_LOCK, &gl->gl_flags) && | ||
| 1970 | list_empty(&gl->gl_holders)) { | ||
| 1971 | error = dump_inode(gi, gl->gl_object); | ||
| 1972 | if (error) | ||
| 1973 | goto out; | ||
| 1974 | } else { | ||
| 1975 | error = -ENOBUFS; | ||
| 1976 | print_dbg(gi, " Inode: busy\n"); | ||
| 1977 | } | ||
| 1978 | } | ||
| 1979 | |||
| 1980 | error = 0; | ||
| 1981 | |||
| 1982 | out: | 1694 | out: |
| 1983 | spin_unlock(&gl->gl_spin); | ||
| 1984 | return error; | 1695 | return error; |
| 1985 | } | 1696 | } |
| 1986 | 1697 | ||
| 1698 | static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) | ||
| 1699 | { | ||
| 1700 | int ret; | ||
| 1701 | spin_lock(&gl->gl_spin); | ||
| 1702 | ret = __dump_glock(seq, gl); | ||
| 1703 | spin_unlock(&gl->gl_spin); | ||
| 1704 | return ret; | ||
| 1705 | } | ||
| 1706 | |||
| 1987 | /** | 1707 | /** |
| 1988 | * gfs2_dump_lockstate - print out the current lockstate | 1708 | * gfs2_dump_lockstate - print out the current lockstate |
| 1989 | * @sdp: the filesystem | 1709 | * @sdp: the filesystem |
| @@ -2086,7 +1806,7 @@ void gfs2_glock_exit(void) | |||
| 2086 | module_param(scand_secs, uint, S_IRUGO|S_IWUSR); | 1806 | module_param(scand_secs, uint, S_IRUGO|S_IWUSR); |
| 2087 | MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); | 1807 | MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); |
| 2088 | 1808 | ||
| 2089 | static int gfs2_glock_iter_next(struct glock_iter *gi) | 1809 | static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) |
| 2090 | { | 1810 | { |
| 2091 | struct gfs2_glock *gl; | 1811 | struct gfs2_glock *gl; |
| 2092 | 1812 | ||
| @@ -2104,7 +1824,7 @@ restart: | |||
| 2104 | gfs2_glock_put(gl); | 1824 | gfs2_glock_put(gl); |
| 2105 | if (gl && gi->gl == NULL) | 1825 | if (gl && gi->gl == NULL) |
| 2106 | gi->hash++; | 1826 | gi->hash++; |
| 2107 | while(gi->gl == NULL) { | 1827 | while (gi->gl == NULL) { |
| 2108 | if (gi->hash >= GFS2_GL_HASH_SIZE) | 1828 | if (gi->hash >= GFS2_GL_HASH_SIZE) |
| 2109 | return 1; | 1829 | return 1; |
| 2110 | read_lock(gl_lock_addr(gi->hash)); | 1830 | read_lock(gl_lock_addr(gi->hash)); |
| @@ -2122,58 +1842,34 @@ restart: | |||
| 2122 | return 0; | 1842 | return 0; |
| 2123 | } | 1843 | } |
| 2124 | 1844 | ||
| 2125 | static void gfs2_glock_iter_free(struct glock_iter *gi) | 1845 | static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi) |
| 2126 | { | 1846 | { |
| 2127 | if (gi->gl) | 1847 | if (gi->gl) |
| 2128 | gfs2_glock_put(gi->gl); | 1848 | gfs2_glock_put(gi->gl); |
| 2129 | kfree(gi); | ||
| 2130 | } | ||
| 2131 | |||
| 2132 | static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) | ||
| 2133 | { | ||
| 2134 | struct glock_iter *gi; | ||
| 2135 | |||
| 2136 | gi = kmalloc(sizeof (*gi), GFP_KERNEL); | ||
| 2137 | if (!gi) | ||
| 2138 | return NULL; | ||
| 2139 | |||
| 2140 | gi->sdp = sdp; | ||
| 2141 | gi->hash = 0; | ||
| 2142 | gi->seq = NULL; | ||
| 2143 | gi->gl = NULL; | 1849 | gi->gl = NULL; |
| 2144 | memset(gi->string, 0, sizeof(gi->string)); | ||
| 2145 | |||
| 2146 | if (gfs2_glock_iter_next(gi)) { | ||
| 2147 | gfs2_glock_iter_free(gi); | ||
| 2148 | return NULL; | ||
| 2149 | } | ||
| 2150 | |||
| 2151 | return gi; | ||
| 2152 | } | 1850 | } |
| 2153 | 1851 | ||
| 2154 | static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) | 1852 | static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) |
| 2155 | { | 1853 | { |
| 2156 | struct glock_iter *gi; | 1854 | struct gfs2_glock_iter *gi = seq->private; |
| 2157 | loff_t n = *pos; | 1855 | loff_t n = *pos; |
| 2158 | 1856 | ||
| 2159 | gi = gfs2_glock_iter_init(file->private); | 1857 | gi->hash = 0; |
| 2160 | if (!gi) | ||
| 2161 | return NULL; | ||
| 2162 | 1858 | ||
| 2163 | while(n--) { | 1859 | do { |
| 2164 | if (gfs2_glock_iter_next(gi)) { | 1860 | if (gfs2_glock_iter_next(gi)) { |
| 2165 | gfs2_glock_iter_free(gi); | 1861 | gfs2_glock_iter_free(gi); |
| 2166 | return NULL; | 1862 | return NULL; |
| 2167 | } | 1863 | } |
| 2168 | } | 1864 | } while (n--); |
| 2169 | 1865 | ||
| 2170 | return gi; | 1866 | return gi->gl; |
| 2171 | } | 1867 | } |
| 2172 | 1868 | ||
| 2173 | static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, | 1869 | static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, |
| 2174 | loff_t *pos) | 1870 | loff_t *pos) |
| 2175 | { | 1871 | { |
| 2176 | struct glock_iter *gi = iter_ptr; | 1872 | struct gfs2_glock_iter *gi = seq->private; |
| 2177 | 1873 | ||
| 2178 | (*pos)++; | 1874 | (*pos)++; |
| 2179 | 1875 | ||
| @@ -2182,24 +1878,18 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, | |||
| 2182 | return NULL; | 1878 | return NULL; |
| 2183 | } | 1879 | } |
| 2184 | 1880 | ||
| 2185 | return gi; | 1881 | return gi->gl; |
| 2186 | } | 1882 | } |
| 2187 | 1883 | ||
| 2188 | static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) | 1884 | static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) |
| 2189 | { | 1885 | { |
| 2190 | struct glock_iter *gi = iter_ptr; | 1886 | struct gfs2_glock_iter *gi = seq->private; |
| 2191 | if (gi) | 1887 | gfs2_glock_iter_free(gi); |
| 2192 | gfs2_glock_iter_free(gi); | ||
| 2193 | } | 1888 | } |
| 2194 | 1889 | ||
| 2195 | static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) | 1890 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) |
| 2196 | { | 1891 | { |
| 2197 | struct glock_iter *gi = iter_ptr; | 1892 | return dump_glock(seq, iter_ptr); |
| 2198 | |||
| 2199 | gi->seq = file; | ||
| 2200 | dump_glock(gi, gi->gl); | ||
| 2201 | |||
| 2202 | return 0; | ||
| 2203 | } | 1893 | } |
| 2204 | 1894 | ||
| 2205 | static const struct seq_operations gfs2_glock_seq_ops = { | 1895 | static const struct seq_operations gfs2_glock_seq_ops = { |
| @@ -2211,17 +1901,14 @@ static const struct seq_operations gfs2_glock_seq_ops = { | |||
| 2211 | 1901 | ||
| 2212 | static int gfs2_debugfs_open(struct inode *inode, struct file *file) | 1902 | static int gfs2_debugfs_open(struct inode *inode, struct file *file) |
| 2213 | { | 1903 | { |
| 2214 | struct seq_file *seq; | 1904 | int ret = seq_open_private(file, &gfs2_glock_seq_ops, |
| 2215 | int ret; | 1905 | sizeof(struct gfs2_glock_iter)); |
| 2216 | 1906 | if (ret == 0) { | |
| 2217 | ret = seq_open(file, &gfs2_glock_seq_ops); | 1907 | struct seq_file *seq = file->private_data; |
| 2218 | if (ret) | 1908 | struct gfs2_glock_iter *gi = seq->private; |
| 2219 | return ret; | 1909 | gi->sdp = inode->i_private; |
| 2220 | 1910 | } | |
| 2221 | seq = file->private_data; | 1911 | return ret; |
| 2222 | seq->private = inode->i_private; | ||
| 2223 | |||
| 2224 | return 0; | ||
| 2225 | } | 1912 | } |
| 2226 | 1913 | ||
| 2227 | static const struct file_operations gfs2_debug_fops = { | 1914 | static const struct file_operations gfs2_debug_fops = { |
| @@ -2229,7 +1916,7 @@ static const struct file_operations gfs2_debug_fops = { | |||
| 2229 | .open = gfs2_debugfs_open, | 1916 | .open = gfs2_debugfs_open, |
| 2230 | .read = seq_read, | 1917 | .read = seq_read, |
| 2231 | .llseek = seq_lseek, | 1918 | .llseek = seq_lseek, |
| 2232 | .release = seq_release | 1919 | .release = seq_release_private, |
| 2233 | }; | 1920 | }; |
| 2234 | 1921 | ||
| 2235 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) | 1922 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index cdad3e6f8150..971d92af70fc 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
| @@ -26,11 +26,8 @@ | |||
| 26 | #define GL_SKIP 0x00000100 | 26 | #define GL_SKIP 0x00000100 |
| 27 | #define GL_ATIME 0x00000200 | 27 | #define GL_ATIME 0x00000200 |
| 28 | #define GL_NOCACHE 0x00000400 | 28 | #define GL_NOCACHE 0x00000400 |
| 29 | #define GL_FLOCK 0x00000800 | ||
| 30 | #define GL_NOCANCEL 0x00001000 | ||
| 31 | 29 | ||
| 32 | #define GLR_TRYFAILED 13 | 30 | #define GLR_TRYFAILED 13 |
| 33 | #define GLR_CANCELED 14 | ||
| 34 | 31 | ||
| 35 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | 32 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) |
| 36 | { | 33 | { |
| @@ -41,6 +38,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * | |||
| 41 | spin_lock(&gl->gl_spin); | 38 | spin_lock(&gl->gl_spin); |
| 42 | pid = task_pid(current); | 39 | pid = task_pid(current); |
| 43 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 40 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { |
| 41 | if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 42 | break; | ||
| 44 | if (gh->gh_owner_pid == pid) | 43 | if (gh->gh_owner_pid == pid) |
| 45 | goto out; | 44 | goto out; |
| 46 | } | 45 | } |
| @@ -70,7 +69,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) | |||
| 70 | { | 69 | { |
| 71 | int ret; | 70 | int ret; |
| 72 | spin_lock(&gl->gl_spin); | 71 | spin_lock(&gl->gl_spin); |
| 73 | ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3); | 72 | ret = test_bit(GLF_DEMOTE, &gl->gl_flags); |
| 74 | spin_unlock(&gl->gl_spin); | 73 | spin_unlock(&gl->gl_spin); |
| 75 | return ret; | 74 | return ret; |
| 76 | } | 75 | } |
| @@ -98,6 +97,7 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp, | |||
| 98 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 97 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
| 99 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 98 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
| 100 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | 99 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); |
| 100 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | ||
| 101 | 101 | ||
| 102 | /** | 102 | /** |
| 103 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 103 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock |
| @@ -130,10 +130,9 @@ int gfs2_lvb_hold(struct gfs2_glock *gl); | |||
| 130 | void gfs2_lvb_unhold(struct gfs2_glock *gl); | 130 | void gfs2_lvb_unhold(struct gfs2_glock *gl); |
| 131 | 131 | ||
| 132 | void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); | 132 | void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); |
| 133 | |||
| 134 | void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); | 133 | void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); |
| 135 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp); | 134 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp); |
| 136 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); | 135 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); |
| 137 | 136 | ||
| 138 | int __init gfs2_glock_init(void); | 137 | int __init gfs2_glock_init(void); |
| 139 | void gfs2_glock_exit(void); | 138 | void gfs2_glock_exit(void); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 07d84d16cda4..c6c318c2a0f6 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
| 14 | #include <linux/gfs2_ondisk.h> | 14 | #include <linux/gfs2_ondisk.h> |
| 15 | #include <linux/lm_interface.h> | 15 | #include <linux/lm_interface.h> |
| 16 | #include <linux/bio.h> | ||
| 16 | 17 | ||
| 17 | #include "gfs2.h" | 18 | #include "gfs2.h" |
| 18 | #include "incore.h" | 19 | #include "incore.h" |
| @@ -172,26 +173,6 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
| 172 | } | 173 | } |
| 173 | 174 | ||
| 174 | /** | 175 | /** |
| 175 | * inode_go_xmote_bh - After promoting/demoting a glock | ||
| 176 | * @gl: the glock | ||
| 177 | * | ||
| 178 | */ | ||
| 179 | |||
| 180 | static void inode_go_xmote_bh(struct gfs2_glock *gl) | ||
| 181 | { | ||
| 182 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 183 | struct buffer_head *bh; | ||
| 184 | int error; | ||
| 185 | |||
| 186 | if (gl->gl_state != LM_ST_UNLOCKED && | ||
| 187 | (!gh || !(gh->gh_flags & GL_SKIP))) { | ||
| 188 | error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh); | ||
| 189 | if (!error) | ||
| 190 | brelse(bh); | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | /** | ||
| 195 | * inode_go_inval - prepare a inode glock to be released | 176 | * inode_go_inval - prepare a inode glock to be released |
| 196 | * @gl: the glock | 177 | * @gl: the glock |
| 197 | * @flags: | 178 | * @flags: |
| @@ -267,6 +248,26 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
| 267 | } | 248 | } |
| 268 | 249 | ||
| 269 | /** | 250 | /** |
| 251 | * inode_go_dump - print information about an inode | ||
| 252 | * @seq: The iterator | ||
| 253 | * @ip: the inode | ||
| 254 | * | ||
| 255 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
| 256 | */ | ||
| 257 | |||
| 258 | static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | ||
| 259 | { | ||
| 260 | const struct gfs2_inode *ip = gl->gl_object; | ||
| 261 | if (ip == NULL) | ||
| 262 | return 0; | ||
| 263 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%08lx\n", | ||
| 264 | (unsigned long long)ip->i_no_formal_ino, | ||
| 265 | (unsigned long long)ip->i_no_addr, | ||
| 266 | IF2DT(ip->i_inode.i_mode), ip->i_flags); | ||
| 267 | return 0; | ||
| 268 | } | ||
| 269 | |||
| 270 | /** | ||
| 270 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | 271 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock |
| 271 | * @gl: the glock | 272 | * @gl: the glock |
| 272 | * | 273 | * |
| @@ -306,6 +307,22 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) | |||
| 306 | } | 307 | } |
| 307 | 308 | ||
| 308 | /** | 309 | /** |
| 310 | * rgrp_go_dump - print out an rgrp | ||
| 311 | * @seq: The iterator | ||
| 312 | * @gl: The glock in question | ||
| 313 | * | ||
| 314 | */ | ||
| 315 | |||
| 316 | static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | ||
| 317 | { | ||
| 318 | const struct gfs2_rgrpd *rgd = gl->gl_object; | ||
| 319 | if (rgd == NULL) | ||
| 320 | return 0; | ||
| 321 | gfs2_print_dbg(seq, " R: n:%llu\n", (unsigned long long)rgd->rd_addr); | ||
| 322 | return 0; | ||
| 323 | } | ||
| 324 | |||
| 325 | /** | ||
| 309 | * trans_go_sync - promote/demote the transaction glock | 326 | * trans_go_sync - promote/demote the transaction glock |
| 310 | * @gl: the glock | 327 | * @gl: the glock |
| 311 | * @state: the requested state | 328 | * @state: the requested state |
| @@ -330,7 +347,7 @@ static void trans_go_sync(struct gfs2_glock *gl) | |||
| 330 | * | 347 | * |
| 331 | */ | 348 | */ |
| 332 | 349 | ||
| 333 | static void trans_go_xmote_bh(struct gfs2_glock *gl) | 350 | static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) |
| 334 | { | 351 | { |
| 335 | struct gfs2_sbd *sdp = gl->gl_sbd; | 352 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 336 | struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); | 353 | struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); |
| @@ -338,8 +355,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
| 338 | struct gfs2_log_header_host head; | 355 | struct gfs2_log_header_host head; |
| 339 | int error; | 356 | int error; |
| 340 | 357 | ||
| 341 | if (gl->gl_state != LM_ST_UNLOCKED && | 358 | if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
| 342 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
| 343 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 359 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
| 344 | 360 | ||
| 345 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 361 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
| @@ -354,6 +370,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
| 354 | gfs2_log_pointers_init(sdp, head.lh_blkno); | 370 | gfs2_log_pointers_init(sdp, head.lh_blkno); |
| 355 | } | 371 | } |
| 356 | } | 372 | } |
| 373 | return 0; | ||
| 357 | } | 374 | } |
| 358 | 375 | ||
| 359 | /** | 376 | /** |
| @@ -375,12 +392,12 @@ const struct gfs2_glock_operations gfs2_meta_glops = { | |||
| 375 | 392 | ||
| 376 | const struct gfs2_glock_operations gfs2_inode_glops = { | 393 | const struct gfs2_glock_operations gfs2_inode_glops = { |
| 377 | .go_xmote_th = inode_go_sync, | 394 | .go_xmote_th = inode_go_sync, |
| 378 | .go_xmote_bh = inode_go_xmote_bh, | ||
| 379 | .go_inval = inode_go_inval, | 395 | .go_inval = inode_go_inval, |
| 380 | .go_demote_ok = inode_go_demote_ok, | 396 | .go_demote_ok = inode_go_demote_ok, |
| 381 | .go_lock = inode_go_lock, | 397 | .go_lock = inode_go_lock, |
| 398 | .go_dump = inode_go_dump, | ||
| 382 | .go_type = LM_TYPE_INODE, | 399 | .go_type = LM_TYPE_INODE, |
| 383 | .go_min_hold_time = HZ / 10, | 400 | .go_min_hold_time = HZ / 5, |
| 384 | }; | 401 | }; |
| 385 | 402 | ||
| 386 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 403 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
| @@ -389,8 +406,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
| 389 | .go_demote_ok = rgrp_go_demote_ok, | 406 | .go_demote_ok = rgrp_go_demote_ok, |
| 390 | .go_lock = rgrp_go_lock, | 407 | .go_lock = rgrp_go_lock, |
| 391 | .go_unlock = rgrp_go_unlock, | 408 | .go_unlock = rgrp_go_unlock, |
| 409 | .go_dump = rgrp_go_dump, | ||
| 392 | .go_type = LM_TYPE_RGRP, | 410 | .go_type = LM_TYPE_RGRP, |
| 393 | .go_min_hold_time = HZ / 10, | 411 | .go_min_hold_time = HZ / 5, |
| 394 | }; | 412 | }; |
| 395 | 413 | ||
| 396 | const struct gfs2_glock_operations gfs2_trans_glops = { | 414 | const struct gfs2_glock_operations gfs2_trans_glops = { |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index eabe5eac41da..448697a5c462 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -77,7 +77,6 @@ struct gfs2_rgrp_host { | |||
| 77 | struct gfs2_rgrpd { | 77 | struct gfs2_rgrpd { |
| 78 | struct list_head rd_list; /* Link with superblock */ | 78 | struct list_head rd_list; /* Link with superblock */ |
| 79 | struct list_head rd_list_mru; | 79 | struct list_head rd_list_mru; |
| 80 | struct list_head rd_recent; /* Recently used rgrps */ | ||
| 81 | struct gfs2_glock *rd_gl; /* Glock for this rgrp */ | 80 | struct gfs2_glock *rd_gl; /* Glock for this rgrp */ |
| 82 | u64 rd_addr; /* grp block disk address */ | 81 | u64 rd_addr; /* grp block disk address */ |
| 83 | u64 rd_data0; /* first data location */ | 82 | u64 rd_data0; /* first data location */ |
| @@ -128,20 +127,20 @@ struct gfs2_bufdata { | |||
| 128 | 127 | ||
| 129 | struct gfs2_glock_operations { | 128 | struct gfs2_glock_operations { |
| 130 | void (*go_xmote_th) (struct gfs2_glock *gl); | 129 | void (*go_xmote_th) (struct gfs2_glock *gl); |
| 131 | void (*go_xmote_bh) (struct gfs2_glock *gl); | 130 | int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); |
| 132 | void (*go_inval) (struct gfs2_glock *gl, int flags); | 131 | void (*go_inval) (struct gfs2_glock *gl, int flags); |
| 133 | int (*go_demote_ok) (struct gfs2_glock *gl); | 132 | int (*go_demote_ok) (struct gfs2_glock *gl); |
| 134 | int (*go_lock) (struct gfs2_holder *gh); | 133 | int (*go_lock) (struct gfs2_holder *gh); |
| 135 | void (*go_unlock) (struct gfs2_holder *gh); | 134 | void (*go_unlock) (struct gfs2_holder *gh); |
| 135 | int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); | ||
| 136 | const int go_type; | 136 | const int go_type; |
| 137 | const unsigned long go_min_hold_time; | 137 | const unsigned long go_min_hold_time; |
| 138 | }; | 138 | }; |
| 139 | 139 | ||
| 140 | enum { | 140 | enum { |
| 141 | /* States */ | 141 | /* States */ |
| 142 | HIF_HOLDER = 6, | 142 | HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ |
| 143 | HIF_FIRST = 7, | 143 | HIF_FIRST = 7, |
| 144 | HIF_ABORTED = 9, | ||
| 145 | HIF_WAIT = 10, | 144 | HIF_WAIT = 10, |
| 146 | }; | 145 | }; |
| 147 | 146 | ||
| @@ -154,20 +153,20 @@ struct gfs2_holder { | |||
| 154 | unsigned gh_flags; | 153 | unsigned gh_flags; |
| 155 | 154 | ||
| 156 | int gh_error; | 155 | int gh_error; |
| 157 | unsigned long gh_iflags; | 156 | unsigned long gh_iflags; /* HIF_... */ |
| 158 | unsigned long gh_ip; | 157 | unsigned long gh_ip; |
| 159 | }; | 158 | }; |
| 160 | 159 | ||
| 161 | enum { | 160 | enum { |
| 162 | GLF_LOCK = 1, | 161 | GLF_LOCK = 1, |
| 163 | GLF_STICKY = 2, | 162 | GLF_STICKY = 2, |
| 164 | GLF_DEMOTE = 3, | 163 | GLF_DEMOTE = 3, |
| 165 | GLF_PENDING_DEMOTE = 4, | 164 | GLF_PENDING_DEMOTE = 4, |
| 166 | GLF_DIRTY = 5, | 165 | GLF_DEMOTE_IN_PROGRESS = 5, |
| 167 | GLF_DEMOTE_IN_PROGRESS = 6, | 166 | GLF_DIRTY = 6, |
| 168 | GLF_LFLUSH = 7, | 167 | GLF_LFLUSH = 7, |
| 169 | GLF_WAITERS2 = 8, | 168 | GLF_INVALIDATE_IN_PROGRESS = 8, |
| 170 | GLF_CONV_DEADLK = 9, | 169 | GLF_REPLY_PENDING = 9, |
| 171 | }; | 170 | }; |
| 172 | 171 | ||
| 173 | struct gfs2_glock { | 172 | struct gfs2_glock { |
| @@ -179,19 +178,14 @@ struct gfs2_glock { | |||
| 179 | spinlock_t gl_spin; | 178 | spinlock_t gl_spin; |
| 180 | 179 | ||
| 181 | unsigned int gl_state; | 180 | unsigned int gl_state; |
| 181 | unsigned int gl_target; | ||
| 182 | unsigned int gl_reply; | ||
| 182 | unsigned int gl_hash; | 183 | unsigned int gl_hash; |
| 183 | unsigned int gl_demote_state; /* state requested by remote node */ | 184 | unsigned int gl_demote_state; /* state requested by remote node */ |
| 184 | unsigned long gl_demote_time; /* time of first demote request */ | 185 | unsigned long gl_demote_time; /* time of first demote request */ |
| 185 | struct pid *gl_owner_pid; | ||
| 186 | unsigned long gl_ip; | ||
| 187 | struct list_head gl_holders; | 186 | struct list_head gl_holders; |
| 188 | struct list_head gl_waiters1; /* HIF_MUTEX */ | ||
| 189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | ||
| 190 | 187 | ||
| 191 | const struct gfs2_glock_operations *gl_ops; | 188 | const struct gfs2_glock_operations *gl_ops; |
| 192 | |||
| 193 | struct gfs2_holder *gl_req_gh; | ||
| 194 | |||
| 195 | void *gl_lock; | 189 | void *gl_lock; |
| 196 | char *gl_lvb; | 190 | char *gl_lvb; |
| 197 | atomic_t gl_lvb_count; | 191 | atomic_t gl_lvb_count; |
| @@ -427,7 +421,6 @@ struct gfs2_tune { | |||
| 427 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | 421 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ |
| 428 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | 422 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ |
| 429 | unsigned int gt_new_files_jdata; | 423 | unsigned int gt_new_files_jdata; |
| 430 | unsigned int gt_new_files_directio; | ||
| 431 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
| 432 | unsigned int gt_stall_secs; /* Detects trouble! */ | 425 | unsigned int gt_stall_secs; /* Detects trouble! */ |
| 433 | unsigned int gt_complain_secs; | 426 | unsigned int gt_complain_secs; |
| @@ -534,7 +527,6 @@ struct gfs2_sbd { | |||
| 534 | struct mutex sd_rindex_mutex; | 527 | struct mutex sd_rindex_mutex; |
| 535 | struct list_head sd_rindex_list; | 528 | struct list_head sd_rindex_list; |
| 536 | struct list_head sd_rindex_mru_list; | 529 | struct list_head sd_rindex_mru_list; |
| 537 | struct list_head sd_rindex_recent_list; | ||
| 538 | struct gfs2_rgrpd *sd_rindex_forward; | 530 | struct gfs2_rgrpd *sd_rindex_forward; |
| 539 | unsigned int sd_rgrps; | 531 | unsigned int sd_rgrps; |
| 540 | 532 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 09453d057e41..6da0ab355b8a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -504,7 +504,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
| 504 | } | 504 | } |
| 505 | 505 | ||
| 506 | if (!is_root) { | 506 | if (!is_root) { |
| 507 | error = permission(dir, MAY_EXEC, NULL); | 507 | error = gfs2_permission(dir, MAY_EXEC); |
| 508 | if (error) | 508 | if (error) |
| 509 | goto out; | 509 | goto out; |
| 510 | } | 510 | } |
| @@ -667,7 +667,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
| 667 | { | 667 | { |
| 668 | int error; | 668 | int error; |
| 669 | 669 | ||
| 670 | error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); | 670 | error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); |
| 671 | if (error) | 671 | if (error) |
| 672 | return error; | 672 | return error; |
| 673 | 673 | ||
| @@ -789,13 +789,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
| 789 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || | 789 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || |
| 790 | gfs2_tune_get(sdp, gt_new_files_jdata)) | 790 | gfs2_tune_get(sdp, gt_new_files_jdata)) |
| 791 | di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); | 791 | di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); |
| 792 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) || | ||
| 793 | gfs2_tune_get(sdp, gt_new_files_directio)) | ||
| 794 | di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO); | ||
| 795 | } else if (S_ISDIR(mode)) { | 792 | } else if (S_ISDIR(mode)) { |
| 796 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & | 793 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & |
| 797 | GFS2_DIF_INHERIT_DIRECTIO); | ||
| 798 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & | ||
| 799 | GFS2_DIF_INHERIT_JDATA); | 794 | GFS2_DIF_INHERIT_JDATA); |
| 800 | } | 795 | } |
| 801 | 796 | ||
| @@ -1134,7 +1129,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
| 1134 | if (IS_APPEND(&dip->i_inode)) | 1129 | if (IS_APPEND(&dip->i_inode)) |
| 1135 | return -EPERM; | 1130 | return -EPERM; |
| 1136 | 1131 | ||
| 1137 | error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); | 1132 | error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); |
| 1138 | if (error) | 1133 | if (error) |
| 1139 | return error; | 1134 | return error; |
| 1140 | 1135 | ||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 580da454b38f..6074c2506f75 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
| @@ -72,7 +72,6 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | 74 | ||
| 75 | void gfs2_inode_attr_in(struct gfs2_inode *ip); | ||
| 76 | void gfs2_set_iop(struct inode *inode); | 75 | void gfs2_set_iop(struct inode *inode); |
| 77 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 76 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
| 78 | u64 no_addr, u64 no_formal_ino, | 77 | u64 no_addr, u64 no_formal_ino, |
| @@ -91,6 +90,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
| 91 | struct gfs2_inode *ip); | 90 | struct gfs2_inode *ip); |
| 92 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, |
| 93 | const struct gfs2_inode *ip); | 92 | const struct gfs2_inode *ip); |
| 93 | int gfs2_permission(struct inode *inode, int mask); | ||
| 94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | 94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); |
| 95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); |
| 96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | 96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); |
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c index 663fee728783..523243a13a21 100644 --- a/fs/gfs2/locking.c +++ b/fs/gfs2/locking.c | |||
| @@ -23,12 +23,54 @@ struct lmh_wrapper { | |||
| 23 | const struct lm_lockops *lw_ops; | 23 | const struct lm_lockops *lw_ops; |
| 24 | }; | 24 | }; |
| 25 | 25 | ||
| 26 | static int nolock_mount(char *table_name, char *host_data, | ||
| 27 | lm_callback_t cb, void *cb_data, | ||
| 28 | unsigned int min_lvb_size, int flags, | ||
| 29 | struct lm_lockstruct *lockstruct, | ||
| 30 | struct kobject *fskobj); | ||
| 31 | |||
| 26 | /* List of registered low-level locking protocols. A file system selects one | 32 | /* List of registered low-level locking protocols. A file system selects one |
| 27 | of them by name at mount time, e.g. lock_nolock, lock_dlm. */ | 33 | of them by name at mount time, e.g. lock_nolock, lock_dlm. */ |
| 28 | 34 | ||
| 35 | static const struct lm_lockops nolock_ops = { | ||
| 36 | .lm_proto_name = "lock_nolock", | ||
| 37 | .lm_mount = nolock_mount, | ||
| 38 | }; | ||
| 39 | |||
| 40 | static struct lmh_wrapper nolock_proto = { | ||
| 41 | .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list), | ||
| 42 | .lw_ops = &nolock_ops, | ||
| 43 | }; | ||
| 44 | |||
| 29 | static LIST_HEAD(lmh_list); | 45 | static LIST_HEAD(lmh_list); |
| 30 | static DEFINE_MUTEX(lmh_lock); | 46 | static DEFINE_MUTEX(lmh_lock); |
| 31 | 47 | ||
| 48 | static int nolock_mount(char *table_name, char *host_data, | ||
| 49 | lm_callback_t cb, void *cb_data, | ||
| 50 | unsigned int min_lvb_size, int flags, | ||
| 51 | struct lm_lockstruct *lockstruct, | ||
| 52 | struct kobject *fskobj) | ||
| 53 | { | ||
| 54 | char *c; | ||
| 55 | unsigned int jid; | ||
| 56 | |||
| 57 | c = strstr(host_data, "jid="); | ||
| 58 | if (!c) | ||
| 59 | jid = 0; | ||
| 60 | else { | ||
| 61 | c += 4; | ||
| 62 | sscanf(c, "%u", &jid); | ||
| 63 | } | ||
| 64 | |||
| 65 | lockstruct->ls_jid = jid; | ||
| 66 | lockstruct->ls_first = 1; | ||
| 67 | lockstruct->ls_lvb_size = min_lvb_size; | ||
| 68 | lockstruct->ls_ops = &nolock_ops; | ||
| 69 | lockstruct->ls_flags = LM_LSFLAG_LOCAL; | ||
| 70 | |||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | |||
| 32 | /** | 74 | /** |
| 33 | * gfs2_register_lockproto - Register a low-level locking protocol | 75 | * gfs2_register_lockproto - Register a low-level locking protocol |
| 34 | * @proto: the protocol definition | 76 | * @proto: the protocol definition |
| @@ -116,9 +158,13 @@ int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data, | |||
| 116 | int try = 0; | 158 | int try = 0; |
| 117 | int error, found; | 159 | int error, found; |
| 118 | 160 | ||
| 161 | |||
| 119 | retry: | 162 | retry: |
| 120 | mutex_lock(&lmh_lock); | 163 | mutex_lock(&lmh_lock); |
| 121 | 164 | ||
| 165 | if (list_empty(&nolock_proto.lw_list)) | ||
| 166 | list_add(&nolock_proto.lw_list, &lmh_list); | ||
| 167 | |||
| 122 | found = 0; | 168 | found = 0; |
| 123 | list_for_each_entry(lw, &lmh_list, lw_list) { | 169 | list_for_each_entry(lw, &lmh_list, lw_list) { |
| 124 | if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) { | 170 | if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) { |
| @@ -139,7 +185,8 @@ retry: | |||
| 139 | goto out; | 185 | goto out; |
| 140 | } | 186 | } |
| 141 | 187 | ||
| 142 | if (!try_module_get(lw->lw_ops->lm_owner)) { | 188 | if (lw->lw_ops->lm_owner && |
| 189 | !try_module_get(lw->lw_ops->lm_owner)) { | ||
| 143 | try = 0; | 190 | try = 0; |
| 144 | mutex_unlock(&lmh_lock); | 191 | mutex_unlock(&lmh_lock); |
| 145 | msleep(1000); | 192 | msleep(1000); |
| @@ -158,7 +205,8 @@ out: | |||
| 158 | void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct) | 205 | void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct) |
| 159 | { | 206 | { |
| 160 | mutex_lock(&lmh_lock); | 207 | mutex_lock(&lmh_lock); |
| 161 | lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); | 208 | if (lockstruct->ls_ops->lm_unmount) |
| 209 | lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); | ||
| 162 | if (lockstruct->ls_ops->lm_owner) | 210 | if (lockstruct->ls_ops->lm_owner) |
| 163 | module_put(lockstruct->ls_ops->lm_owner); | 211 | module_put(lockstruct->ls_ops->lm_owner); |
| 164 | mutex_unlock(&lmh_lock); | 212 | mutex_unlock(&lmh_lock); |
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index cf7ea8abec87..2482c9047505 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c | |||
| @@ -11,46 +11,60 @@ | |||
| 11 | 11 | ||
| 12 | static char junk_lvb[GDLM_LVB_SIZE]; | 12 | static char junk_lvb[GDLM_LVB_SIZE]; |
| 13 | 13 | ||
| 14 | static void queue_complete(struct gdlm_lock *lp) | 14 | |
| 15 | /* convert dlm lock-mode to gfs lock-state */ | ||
| 16 | |||
| 17 | static s16 gdlm_make_lmstate(s16 dlmmode) | ||
| 15 | { | 18 | { |
| 16 | struct gdlm_ls *ls = lp->ls; | 19 | switch (dlmmode) { |
| 20 | case DLM_LOCK_IV: | ||
| 21 | case DLM_LOCK_NL: | ||
| 22 | return LM_ST_UNLOCKED; | ||
| 23 | case DLM_LOCK_EX: | ||
| 24 | return LM_ST_EXCLUSIVE; | ||
| 25 | case DLM_LOCK_CW: | ||
| 26 | return LM_ST_DEFERRED; | ||
| 27 | case DLM_LOCK_PR: | ||
| 28 | return LM_ST_SHARED; | ||
| 29 | } | ||
| 30 | gdlm_assert(0, "unknown DLM mode %d", dlmmode); | ||
| 31 | return -1; | ||
| 32 | } | ||
| 17 | 33 | ||
| 18 | clear_bit(LFL_ACTIVE, &lp->flags); | 34 | /* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm |
| 35 | thread gets to it. */ | ||
| 36 | |||
| 37 | static void queue_submit(struct gdlm_lock *lp) | ||
| 38 | { | ||
| 39 | struct gdlm_ls *ls = lp->ls; | ||
| 19 | 40 | ||
| 20 | spin_lock(&ls->async_lock); | 41 | spin_lock(&ls->async_lock); |
| 21 | list_add_tail(&lp->clist, &ls->complete); | 42 | list_add_tail(&lp->delay_list, &ls->submit); |
| 22 | spin_unlock(&ls->async_lock); | 43 | spin_unlock(&ls->async_lock); |
| 23 | wake_up(&ls->thread_wait); | 44 | wake_up(&ls->thread_wait); |
| 24 | } | 45 | } |
| 25 | 46 | ||
| 26 | static inline void gdlm_ast(void *astarg) | 47 | static void wake_up_ast(struct gdlm_lock *lp) |
| 27 | { | 48 | { |
| 28 | queue_complete(astarg); | 49 | clear_bit(LFL_AST_WAIT, &lp->flags); |
| 50 | smp_mb__after_clear_bit(); | ||
| 51 | wake_up_bit(&lp->flags, LFL_AST_WAIT); | ||
| 29 | } | 52 | } |
| 30 | 53 | ||
| 31 | static inline void gdlm_bast(void *astarg, int mode) | 54 | static void gdlm_delete_lp(struct gdlm_lock *lp) |
| 32 | { | 55 | { |
| 33 | struct gdlm_lock *lp = astarg; | ||
| 34 | struct gdlm_ls *ls = lp->ls; | 56 | struct gdlm_ls *ls = lp->ls; |
| 35 | 57 | ||
| 36 | if (!mode) { | ||
| 37 | printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n", | ||
| 38 | lp->lockname.ln_type, | ||
| 39 | (unsigned long long)lp->lockname.ln_number); | ||
| 40 | return; | ||
| 41 | } | ||
| 42 | |||
| 43 | spin_lock(&ls->async_lock); | 58 | spin_lock(&ls->async_lock); |
| 44 | if (!lp->bast_mode) { | 59 | if (!list_empty(&lp->delay_list)) |
| 45 | list_add_tail(&lp->blist, &ls->blocking); | 60 | list_del_init(&lp->delay_list); |
| 46 | lp->bast_mode = mode; | 61 | ls->all_locks_count--; |
| 47 | } else if (lp->bast_mode < mode) | ||
| 48 | lp->bast_mode = mode; | ||
| 49 | spin_unlock(&ls->async_lock); | 62 | spin_unlock(&ls->async_lock); |
| 50 | wake_up(&ls->thread_wait); | 63 | |
| 64 | kfree(lp); | ||
| 51 | } | 65 | } |
| 52 | 66 | ||
| 53 | void gdlm_queue_delayed(struct gdlm_lock *lp) | 67 | static void gdlm_queue_delayed(struct gdlm_lock *lp) |
| 54 | { | 68 | { |
| 55 | struct gdlm_ls *ls = lp->ls; | 69 | struct gdlm_ls *ls = lp->ls; |
| 56 | 70 | ||
| @@ -59,6 +73,236 @@ void gdlm_queue_delayed(struct gdlm_lock *lp) | |||
| 59 | spin_unlock(&ls->async_lock); | 73 | spin_unlock(&ls->async_lock); |
| 60 | } | 74 | } |
| 61 | 75 | ||
| 76 | static void process_complete(struct gdlm_lock *lp) | ||
| 77 | { | ||
| 78 | struct gdlm_ls *ls = lp->ls; | ||
| 79 | struct lm_async_cb acb; | ||
| 80 | |||
| 81 | memset(&acb, 0, sizeof(acb)); | ||
| 82 | |||
| 83 | if (lp->lksb.sb_status == -DLM_ECANCEL) { | ||
| 84 | log_info("complete dlm cancel %x,%llx flags %lx", | ||
| 85 | lp->lockname.ln_type, | ||
| 86 | (unsigned long long)lp->lockname.ln_number, | ||
| 87 | lp->flags); | ||
| 88 | |||
| 89 | lp->req = lp->cur; | ||
| 90 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 91 | if (lp->cur == DLM_LOCK_IV) | ||
| 92 | lp->lksb.sb_lkid = 0; | ||
| 93 | goto out; | ||
| 94 | } | ||
| 95 | |||
| 96 | if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { | ||
| 97 | if (lp->lksb.sb_status != -DLM_EUNLOCK) { | ||
| 98 | log_info("unlock sb_status %d %x,%llx flags %lx", | ||
| 99 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 100 | (unsigned long long)lp->lockname.ln_number, | ||
| 101 | lp->flags); | ||
| 102 | return; | ||
| 103 | } | ||
| 104 | |||
| 105 | lp->cur = DLM_LOCK_IV; | ||
| 106 | lp->req = DLM_LOCK_IV; | ||
| 107 | lp->lksb.sb_lkid = 0; | ||
| 108 | |||
| 109 | if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) { | ||
| 110 | gdlm_delete_lp(lp); | ||
| 111 | return; | ||
| 112 | } | ||
| 113 | goto out; | ||
| 114 | } | ||
| 115 | |||
| 116 | if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID) | ||
| 117 | memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); | ||
| 118 | |||
| 119 | if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) { | ||
| 120 | if (lp->req == DLM_LOCK_PR) | ||
| 121 | lp->req = DLM_LOCK_CW; | ||
| 122 | else if (lp->req == DLM_LOCK_CW) | ||
| 123 | lp->req = DLM_LOCK_PR; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * A canceled lock request. The lock was just taken off the delayed | ||
| 128 | * list and was never even submitted to dlm. | ||
| 129 | */ | ||
| 130 | |||
| 131 | if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { | ||
| 132 | log_info("complete internal cancel %x,%llx", | ||
| 133 | lp->lockname.ln_type, | ||
| 134 | (unsigned long long)lp->lockname.ln_number); | ||
| 135 | lp->req = lp->cur; | ||
| 136 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 137 | goto out; | ||
| 138 | } | ||
| 139 | |||
| 140 | /* | ||
| 141 | * An error occured. | ||
| 142 | */ | ||
| 143 | |||
| 144 | if (lp->lksb.sb_status) { | ||
| 145 | /* a "normal" error */ | ||
| 146 | if ((lp->lksb.sb_status == -EAGAIN) && | ||
| 147 | (lp->lkf & DLM_LKF_NOQUEUE)) { | ||
| 148 | lp->req = lp->cur; | ||
| 149 | if (lp->cur == DLM_LOCK_IV) | ||
| 150 | lp->lksb.sb_lkid = 0; | ||
| 151 | goto out; | ||
| 152 | } | ||
| 153 | |||
| 154 | /* this could only happen with cancels I think */ | ||
| 155 | log_info("ast sb_status %d %x,%llx flags %lx", | ||
| 156 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 157 | (unsigned long long)lp->lockname.ln_number, | ||
| 158 | lp->flags); | ||
| 159 | return; | ||
| 160 | } | ||
| 161 | |||
| 162 | /* | ||
| 163 | * This is an AST for an EX->EX conversion for sync_lvb from GFS. | ||
| 164 | */ | ||
| 165 | |||
| 166 | if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { | ||
| 167 | wake_up_ast(lp); | ||
| 168 | return; | ||
| 169 | } | ||
| 170 | |||
| 171 | /* | ||
| 172 | * A lock has been demoted to NL because it initially completed during | ||
| 173 | * BLOCK_LOCKS. Now it must be requested in the originally requested | ||
| 174 | * mode. | ||
| 175 | */ | ||
| 176 | |||
| 177 | if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { | ||
| 178 | gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx", | ||
| 179 | lp->lockname.ln_type, | ||
| 180 | (unsigned long long)lp->lockname.ln_number); | ||
| 181 | gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx", | ||
| 182 | lp->lockname.ln_type, | ||
| 183 | (unsigned long long)lp->lockname.ln_number); | ||
| 184 | |||
| 185 | lp->cur = DLM_LOCK_NL; | ||
| 186 | lp->req = lp->prev_req; | ||
| 187 | lp->prev_req = DLM_LOCK_IV; | ||
| 188 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 189 | |||
| 190 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 191 | |||
| 192 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 193 | !test_bit(LFL_NOBLOCK, &lp->flags)) | ||
| 194 | gdlm_queue_delayed(lp); | ||
| 195 | else | ||
| 196 | queue_submit(lp); | ||
| 197 | return; | ||
| 198 | } | ||
| 199 | |||
| 200 | /* | ||
| 201 | * A request is granted during dlm recovery. It may be granted | ||
| 202 | * because the locks of a failed node were cleared. In that case, | ||
| 203 | * there may be inconsistent data beneath this lock and we must wait | ||
| 204 | * for recovery to complete to use it. When gfs recovery is done this | ||
| 205 | * granted lock will be converted to NL and then reacquired in this | ||
| 206 | * granted state. | ||
| 207 | */ | ||
| 208 | |||
| 209 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 210 | !test_bit(LFL_NOBLOCK, &lp->flags) && | ||
| 211 | lp->req != DLM_LOCK_NL) { | ||
| 212 | |||
| 213 | lp->cur = lp->req; | ||
| 214 | lp->prev_req = lp->req; | ||
| 215 | lp->req = DLM_LOCK_NL; | ||
| 216 | lp->lkf |= DLM_LKF_CONVERT; | ||
| 217 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 218 | |||
| 219 | log_debug("rereq %x,%llx id %x %d,%d", | ||
| 220 | lp->lockname.ln_type, | ||
| 221 | (unsigned long long)lp->lockname.ln_number, | ||
| 222 | lp->lksb.sb_lkid, lp->cur, lp->req); | ||
| 223 | |||
| 224 | set_bit(LFL_REREQUEST, &lp->flags); | ||
| 225 | queue_submit(lp); | ||
| 226 | return; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* | ||
| 230 | * DLM demoted the lock to NL before it was granted so GFS must be | ||
| 231 | * told it cannot cache data for this lock. | ||
| 232 | */ | ||
| 233 | |||
| 234 | if (lp->lksb.sb_flags & DLM_SBF_DEMOTED) | ||
| 235 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 236 | |||
| 237 | out: | ||
| 238 | /* | ||
| 239 | * This is an internal lock_dlm lock | ||
| 240 | */ | ||
| 241 | |||
| 242 | if (test_bit(LFL_INLOCK, &lp->flags)) { | ||
| 243 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 244 | lp->cur = lp->req; | ||
| 245 | wake_up_ast(lp); | ||
| 246 | return; | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * Normal completion of a lock request. Tell GFS it now has the lock. | ||
| 251 | */ | ||
| 252 | |||
| 253 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 254 | lp->cur = lp->req; | ||
| 255 | |||
| 256 | acb.lc_name = lp->lockname; | ||
| 257 | acb.lc_ret |= gdlm_make_lmstate(lp->cur); | ||
| 258 | |||
| 259 | ls->fscb(ls->sdp, LM_CB_ASYNC, &acb); | ||
| 260 | } | ||
| 261 | |||
| 262 | static void gdlm_ast(void *astarg) | ||
| 263 | { | ||
| 264 | struct gdlm_lock *lp = astarg; | ||
| 265 | clear_bit(LFL_ACTIVE, &lp->flags); | ||
| 266 | process_complete(lp); | ||
| 267 | } | ||
| 268 | |||
| 269 | static void process_blocking(struct gdlm_lock *lp, int bast_mode) | ||
| 270 | { | ||
| 271 | struct gdlm_ls *ls = lp->ls; | ||
| 272 | unsigned int cb = 0; | ||
| 273 | |||
| 274 | switch (gdlm_make_lmstate(bast_mode)) { | ||
| 275 | case LM_ST_EXCLUSIVE: | ||
| 276 | cb = LM_CB_NEED_E; | ||
| 277 | break; | ||
| 278 | case LM_ST_DEFERRED: | ||
| 279 | cb = LM_CB_NEED_D; | ||
| 280 | break; | ||
| 281 | case LM_ST_SHARED: | ||
| 282 | cb = LM_CB_NEED_S; | ||
| 283 | break; | ||
| 284 | default: | ||
| 285 | gdlm_assert(0, "unknown bast mode %u", bast_mode); | ||
| 286 | } | ||
| 287 | |||
| 288 | ls->fscb(ls->sdp, cb, &lp->lockname); | ||
| 289 | } | ||
| 290 | |||
| 291 | |||
| 292 | static void gdlm_bast(void *astarg, int mode) | ||
| 293 | { | ||
| 294 | struct gdlm_lock *lp = astarg; | ||
| 295 | |||
| 296 | if (!mode) { | ||
| 297 | printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n", | ||
| 298 | lp->lockname.ln_type, | ||
| 299 | (unsigned long long)lp->lockname.ln_number); | ||
| 300 | return; | ||
| 301 | } | ||
| 302 | |||
| 303 | process_blocking(lp, mode); | ||
| 304 | } | ||
| 305 | |||
| 62 | /* convert gfs lock-state to dlm lock-mode */ | 306 | /* convert gfs lock-state to dlm lock-mode */ |
| 63 | 307 | ||
| 64 | static s16 make_mode(s16 lmstate) | 308 | static s16 make_mode(s16 lmstate) |
| @@ -77,24 +321,6 @@ static s16 make_mode(s16 lmstate) | |||
| 77 | return -1; | 321 | return -1; |
| 78 | } | 322 | } |
| 79 | 323 | ||
| 80 | /* convert dlm lock-mode to gfs lock-state */ | ||
| 81 | |||
| 82 | s16 gdlm_make_lmstate(s16 dlmmode) | ||
| 83 | { | ||
| 84 | switch (dlmmode) { | ||
| 85 | case DLM_LOCK_IV: | ||
| 86 | case DLM_LOCK_NL: | ||
| 87 | return LM_ST_UNLOCKED; | ||
| 88 | case DLM_LOCK_EX: | ||
| 89 | return LM_ST_EXCLUSIVE; | ||
| 90 | case DLM_LOCK_CW: | ||
| 91 | return LM_ST_DEFERRED; | ||
| 92 | case DLM_LOCK_PR: | ||
| 93 | return LM_ST_SHARED; | ||
| 94 | } | ||
| 95 | gdlm_assert(0, "unknown DLM mode %d", dlmmode); | ||
| 96 | return -1; | ||
| 97 | } | ||
| 98 | 324 | ||
| 99 | /* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and | 325 | /* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and |
| 100 | DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */ | 326 | DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */ |
| @@ -134,14 +360,6 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, | |||
| 134 | 360 | ||
| 135 | if (lp->lksb.sb_lkid != 0) { | 361 | if (lp->lksb.sb_lkid != 0) { |
| 136 | lkf |= DLM_LKF_CONVERT; | 362 | lkf |= DLM_LKF_CONVERT; |
| 137 | |||
| 138 | /* Conversion deadlock avoidance by DLM */ | ||
| 139 | |||
| 140 | if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) && | ||
| 141 | !test_bit(LFL_FORCE_PROMOTE, &lp->flags) && | ||
| 142 | !(lkf & DLM_LKF_NOQUEUE) && | ||
| 143 | cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) | ||
| 144 | lkf |= DLM_LKF_CONVDEADLK; | ||
| 145 | } | 363 | } |
| 146 | 364 | ||
| 147 | if (lp->lvb) | 365 | if (lp->lvb) |
| @@ -173,14 +391,9 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | |||
| 173 | make_strname(name, &lp->strname); | 391 | make_strname(name, &lp->strname); |
| 174 | lp->ls = ls; | 392 | lp->ls = ls; |
| 175 | lp->cur = DLM_LOCK_IV; | 393 | lp->cur = DLM_LOCK_IV; |
| 176 | lp->lvb = NULL; | ||
| 177 | lp->hold_null = NULL; | ||
| 178 | INIT_LIST_HEAD(&lp->clist); | ||
| 179 | INIT_LIST_HEAD(&lp->blist); | ||
| 180 | INIT_LIST_HEAD(&lp->delay_list); | 394 | INIT_LIST_HEAD(&lp->delay_list); |
| 181 | 395 | ||
| 182 | spin_lock(&ls->async_lock); | 396 | spin_lock(&ls->async_lock); |
| 183 | list_add(&lp->all_list, &ls->all_locks); | ||
| 184 | ls->all_locks_count++; | 397 | ls->all_locks_count++; |
| 185 | spin_unlock(&ls->async_lock); | 398 | spin_unlock(&ls->async_lock); |
| 186 | 399 | ||
| @@ -188,26 +401,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | |||
| 188 | return 0; | 401 | return 0; |
| 189 | } | 402 | } |
| 190 | 403 | ||
| 191 | void gdlm_delete_lp(struct gdlm_lock *lp) | ||
| 192 | { | ||
| 193 | struct gdlm_ls *ls = lp->ls; | ||
| 194 | |||
| 195 | spin_lock(&ls->async_lock); | ||
| 196 | if (!list_empty(&lp->clist)) | ||
| 197 | list_del_init(&lp->clist); | ||
| 198 | if (!list_empty(&lp->blist)) | ||
| 199 | list_del_init(&lp->blist); | ||
| 200 | if (!list_empty(&lp->delay_list)) | ||
| 201 | list_del_init(&lp->delay_list); | ||
| 202 | gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type, | ||
| 203 | (unsigned long long)lp->lockname.ln_number); | ||
| 204 | list_del_init(&lp->all_list); | ||
| 205 | ls->all_locks_count--; | ||
| 206 | spin_unlock(&ls->async_lock); | ||
| 207 | |||
| 208 | kfree(lp); | ||
| 209 | } | ||
| 210 | |||
| 211 | int gdlm_get_lock(void *lockspace, struct lm_lockname *name, | 404 | int gdlm_get_lock(void *lockspace, struct lm_lockname *name, |
| 212 | void **lockp) | 405 | void **lockp) |
| 213 | { | 406 | { |
| @@ -261,7 +454,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) | |||
| 261 | 454 | ||
| 262 | if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { | 455 | if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { |
| 263 | lp->lksb.sb_status = -EAGAIN; | 456 | lp->lksb.sb_status = -EAGAIN; |
| 264 | queue_complete(lp); | 457 | gdlm_ast(lp); |
| 265 | error = 0; | 458 | error = 0; |
| 266 | } | 459 | } |
| 267 | 460 | ||
| @@ -308,6 +501,12 @@ unsigned int gdlm_lock(void *lock, unsigned int cur_state, | |||
| 308 | { | 501 | { |
| 309 | struct gdlm_lock *lp = lock; | 502 | struct gdlm_lock *lp = lock; |
| 310 | 503 | ||
| 504 | if (req_state == LM_ST_UNLOCKED) | ||
| 505 | return gdlm_unlock(lock, cur_state); | ||
| 506 | |||
| 507 | if (req_state == LM_ST_UNLOCKED) | ||
| 508 | return gdlm_unlock(lock, cur_state); | ||
| 509 | |||
| 311 | clear_bit(LFL_DLM_CANCEL, &lp->flags); | 510 | clear_bit(LFL_DLM_CANCEL, &lp->flags); |
| 312 | if (flags & LM_FLAG_NOEXP) | 511 | if (flags & LM_FLAG_NOEXP) |
| 313 | set_bit(LFL_NOBLOCK, &lp->flags); | 512 | set_bit(LFL_NOBLOCK, &lp->flags); |
| @@ -351,7 +550,7 @@ void gdlm_cancel(void *lock) | |||
| 351 | if (delay_list) { | 550 | if (delay_list) { |
| 352 | set_bit(LFL_CANCEL, &lp->flags); | 551 | set_bit(LFL_CANCEL, &lp->flags); |
| 353 | set_bit(LFL_ACTIVE, &lp->flags); | 552 | set_bit(LFL_ACTIVE, &lp->flags); |
| 354 | queue_complete(lp); | 553 | gdlm_ast(lp); |
| 355 | return; | 554 | return; |
| 356 | } | 555 | } |
| 357 | 556 | ||
| @@ -507,22 +706,3 @@ void gdlm_submit_delayed(struct gdlm_ls *ls) | |||
| 507 | wake_up(&ls->thread_wait); | 706 | wake_up(&ls->thread_wait); |
| 508 | } | 707 | } |
| 509 | 708 | ||
| 510 | int gdlm_release_all_locks(struct gdlm_ls *ls) | ||
| 511 | { | ||
| 512 | struct gdlm_lock *lp, *safe; | ||
| 513 | int count = 0; | ||
| 514 | |||
| 515 | spin_lock(&ls->async_lock); | ||
| 516 | list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) { | ||
| 517 | list_del_init(&lp->all_list); | ||
| 518 | |||
| 519 | if (lp->lvb && lp->lvb != junk_lvb) | ||
| 520 | kfree(lp->lvb); | ||
| 521 | kfree(lp); | ||
| 522 | count++; | ||
| 523 | } | ||
| 524 | spin_unlock(&ls->async_lock); | ||
| 525 | |||
| 526 | return count; | ||
| 527 | } | ||
| 528 | |||
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index a243cf69c54e..3c98e7c6f93b 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
| @@ -72,19 +72,12 @@ struct gdlm_ls { | |||
| 72 | int recover_jid_done; | 72 | int recover_jid_done; |
| 73 | int recover_jid_status; | 73 | int recover_jid_status; |
| 74 | spinlock_t async_lock; | 74 | spinlock_t async_lock; |
| 75 | struct list_head complete; | ||
| 76 | struct list_head blocking; | ||
| 77 | struct list_head delayed; | 75 | struct list_head delayed; |
| 78 | struct list_head submit; | 76 | struct list_head submit; |
| 79 | struct list_head all_locks; | ||
| 80 | u32 all_locks_count; | 77 | u32 all_locks_count; |
| 81 | wait_queue_head_t wait_control; | 78 | wait_queue_head_t wait_control; |
| 82 | struct task_struct *thread1; | 79 | struct task_struct *thread; |
| 83 | struct task_struct *thread2; | ||
| 84 | wait_queue_head_t thread_wait; | 80 | wait_queue_head_t thread_wait; |
| 85 | unsigned long drop_time; | ||
| 86 | int drop_locks_count; | ||
| 87 | int drop_locks_period; | ||
| 88 | }; | 81 | }; |
| 89 | 82 | ||
| 90 | enum { | 83 | enum { |
| @@ -117,12 +110,7 @@ struct gdlm_lock { | |||
| 117 | u32 lkf; /* dlm flags DLM_LKF_ */ | 110 | u32 lkf; /* dlm flags DLM_LKF_ */ |
| 118 | unsigned long flags; /* lock_dlm flags LFL_ */ | 111 | unsigned long flags; /* lock_dlm flags LFL_ */ |
| 119 | 112 | ||
| 120 | int bast_mode; /* protected by async_lock */ | ||
| 121 | |||
| 122 | struct list_head clist; /* complete */ | ||
| 123 | struct list_head blist; /* blocking */ | ||
| 124 | struct list_head delay_list; /* delayed */ | 113 | struct list_head delay_list; /* delayed */ |
| 125 | struct list_head all_list; /* all locks for the fs */ | ||
| 126 | struct gdlm_lock *hold_null; /* NL lock for hold_lvb */ | 114 | struct gdlm_lock *hold_null; /* NL lock for hold_lvb */ |
| 127 | }; | 115 | }; |
| 128 | 116 | ||
| @@ -159,11 +147,7 @@ void gdlm_release_threads(struct gdlm_ls *); | |||
| 159 | 147 | ||
| 160 | /* lock.c */ | 148 | /* lock.c */ |
| 161 | 149 | ||
| 162 | s16 gdlm_make_lmstate(s16); | ||
| 163 | void gdlm_queue_delayed(struct gdlm_lock *); | ||
| 164 | void gdlm_submit_delayed(struct gdlm_ls *); | 150 | void gdlm_submit_delayed(struct gdlm_ls *); |
| 165 | int gdlm_release_all_locks(struct gdlm_ls *); | ||
| 166 | void gdlm_delete_lp(struct gdlm_lock *); | ||
| 167 | unsigned int gdlm_do_lock(struct gdlm_lock *); | 151 | unsigned int gdlm_do_lock(struct gdlm_lock *); |
| 168 | 152 | ||
| 169 | int gdlm_get_lock(void *, struct lm_lockname *, void **); | 153 | int gdlm_get_lock(void *, struct lm_lockname *, void **); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 470bdf650b50..09d78c216f48 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
| @@ -22,22 +22,14 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, | |||
| 22 | if (!ls) | 22 | if (!ls) |
| 23 | return NULL; | 23 | return NULL; |
| 24 | 24 | ||
| 25 | ls->drop_locks_count = GDLM_DROP_COUNT; | ||
| 26 | ls->drop_locks_period = GDLM_DROP_PERIOD; | ||
| 27 | ls->fscb = cb; | 25 | ls->fscb = cb; |
| 28 | ls->sdp = sdp; | 26 | ls->sdp = sdp; |
| 29 | ls->fsflags = flags; | 27 | ls->fsflags = flags; |
| 30 | spin_lock_init(&ls->async_lock); | 28 | spin_lock_init(&ls->async_lock); |
| 31 | INIT_LIST_HEAD(&ls->complete); | ||
| 32 | INIT_LIST_HEAD(&ls->blocking); | ||
| 33 | INIT_LIST_HEAD(&ls->delayed); | 29 | INIT_LIST_HEAD(&ls->delayed); |
| 34 | INIT_LIST_HEAD(&ls->submit); | 30 | INIT_LIST_HEAD(&ls->submit); |
| 35 | INIT_LIST_HEAD(&ls->all_locks); | ||
| 36 | init_waitqueue_head(&ls->thread_wait); | 31 | init_waitqueue_head(&ls->thread_wait); |
| 37 | init_waitqueue_head(&ls->wait_control); | 32 | init_waitqueue_head(&ls->wait_control); |
| 38 | ls->thread1 = NULL; | ||
| 39 | ls->thread2 = NULL; | ||
| 40 | ls->drop_time = jiffies; | ||
| 41 | ls->jid = -1; | 33 | ls->jid = -1; |
| 42 | 34 | ||
| 43 | strncpy(buf, table_name, 256); | 35 | strncpy(buf, table_name, 256); |
| @@ -180,7 +172,6 @@ out: | |||
| 180 | static void gdlm_unmount(void *lockspace) | 172 | static void gdlm_unmount(void *lockspace) |
| 181 | { | 173 | { |
| 182 | struct gdlm_ls *ls = lockspace; | 174 | struct gdlm_ls *ls = lockspace; |
| 183 | int rv; | ||
| 184 | 175 | ||
| 185 | log_debug("unmount flags %lx", ls->flags); | 176 | log_debug("unmount flags %lx", ls->flags); |
| 186 | 177 | ||
| @@ -194,9 +185,7 @@ static void gdlm_unmount(void *lockspace) | |||
| 194 | gdlm_kobject_release(ls); | 185 | gdlm_kobject_release(ls); |
| 195 | dlm_release_lockspace(ls->dlm_lockspace, 2); | 186 | dlm_release_lockspace(ls->dlm_lockspace, 2); |
| 196 | gdlm_release_threads(ls); | 187 | gdlm_release_threads(ls); |
| 197 | rv = gdlm_release_all_locks(ls); | 188 | BUG_ON(ls->all_locks_count); |
| 198 | if (rv) | ||
| 199 | log_info("gdlm_unmount: %d stray locks freed", rv); | ||
| 200 | out: | 189 | out: |
| 201 | kfree(ls); | 190 | kfree(ls); |
| 202 | } | 191 | } |
| @@ -232,7 +221,6 @@ static void gdlm_withdraw(void *lockspace) | |||
| 232 | 221 | ||
| 233 | dlm_release_lockspace(ls->dlm_lockspace, 2); | 222 | dlm_release_lockspace(ls->dlm_lockspace, 2); |
| 234 | gdlm_release_threads(ls); | 223 | gdlm_release_threads(ls); |
| 235 | gdlm_release_all_locks(ls); | ||
| 236 | gdlm_kobject_release(ls); | 224 | gdlm_kobject_release(ls); |
| 237 | } | 225 | } |
| 238 | 226 | ||
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index a4ff271df9ee..4ec571c3d8a9 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
| @@ -114,17 +114,6 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) | |||
| 114 | return sprintf(buf, "%d\n", ls->recover_jid_status); | 114 | return sprintf(buf, "%d\n", ls->recover_jid_status); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf) | ||
| 118 | { | ||
| 119 | return sprintf(buf, "%d\n", ls->drop_locks_count); | ||
| 120 | } | ||
| 121 | |||
| 122 | static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len) | ||
| 123 | { | ||
| 124 | ls->drop_locks_count = simple_strtol(buf, NULL, 0); | ||
| 125 | return len; | ||
| 126 | } | ||
| 127 | |||
| 128 | struct gdlm_attr { | 117 | struct gdlm_attr { |
| 129 | struct attribute attr; | 118 | struct attribute attr; |
| 130 | ssize_t (*show)(struct gdlm_ls *, char *); | 119 | ssize_t (*show)(struct gdlm_ls *, char *); |
| @@ -144,7 +133,6 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL); | |||
| 144 | GDLM_ATTR(recover, 0644, recover_show, recover_store); | 133 | GDLM_ATTR(recover, 0644, recover_show, recover_store); |
| 145 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 134 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
| 146 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | 135 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); |
| 147 | GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store); | ||
| 148 | 136 | ||
| 149 | static struct attribute *gdlm_attrs[] = { | 137 | static struct attribute *gdlm_attrs[] = { |
| 150 | &gdlm_attr_proto_name.attr, | 138 | &gdlm_attr_proto_name.attr, |
| @@ -157,7 +145,6 @@ static struct attribute *gdlm_attrs[] = { | |||
| 157 | &gdlm_attr_recover.attr, | 145 | &gdlm_attr_recover.attr, |
| 158 | &gdlm_attr_recover_done.attr, | 146 | &gdlm_attr_recover_done.attr, |
| 159 | &gdlm_attr_recover_status.attr, | 147 | &gdlm_attr_recover_status.attr, |
| 160 | &gdlm_attr_drop_count.attr, | ||
| 161 | NULL, | 148 | NULL, |
| 162 | }; | 149 | }; |
| 163 | 150 | ||
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index e53db6fd28ab..38823efd698c 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c | |||
| @@ -9,367 +9,60 @@ | |||
| 9 | 9 | ||
| 10 | #include "lock_dlm.h" | 10 | #include "lock_dlm.h" |
| 11 | 11 | ||
| 12 | /* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm | 12 | static inline int no_work(struct gdlm_ls *ls) |
| 13 | thread gets to it. */ | ||
| 14 | |||
| 15 | static void queue_submit(struct gdlm_lock *lp) | ||
| 16 | { | ||
| 17 | struct gdlm_ls *ls = lp->ls; | ||
| 18 | |||
| 19 | spin_lock(&ls->async_lock); | ||
| 20 | list_add_tail(&lp->delay_list, &ls->submit); | ||
| 21 | spin_unlock(&ls->async_lock); | ||
| 22 | wake_up(&ls->thread_wait); | ||
| 23 | } | ||
| 24 | |||
| 25 | static void process_blocking(struct gdlm_lock *lp, int bast_mode) | ||
| 26 | { | ||
| 27 | struct gdlm_ls *ls = lp->ls; | ||
| 28 | unsigned int cb = 0; | ||
| 29 | |||
| 30 | switch (gdlm_make_lmstate(bast_mode)) { | ||
| 31 | case LM_ST_EXCLUSIVE: | ||
| 32 | cb = LM_CB_NEED_E; | ||
| 33 | break; | ||
| 34 | case LM_ST_DEFERRED: | ||
| 35 | cb = LM_CB_NEED_D; | ||
| 36 | break; | ||
| 37 | case LM_ST_SHARED: | ||
| 38 | cb = LM_CB_NEED_S; | ||
| 39 | break; | ||
| 40 | default: | ||
| 41 | gdlm_assert(0, "unknown bast mode %u", lp->bast_mode); | ||
| 42 | } | ||
| 43 | |||
| 44 | ls->fscb(ls->sdp, cb, &lp->lockname); | ||
| 45 | } | ||
| 46 | |||
| 47 | static void wake_up_ast(struct gdlm_lock *lp) | ||
| 48 | { | ||
| 49 | clear_bit(LFL_AST_WAIT, &lp->flags); | ||
| 50 | smp_mb__after_clear_bit(); | ||
| 51 | wake_up_bit(&lp->flags, LFL_AST_WAIT); | ||
| 52 | } | ||
| 53 | |||
| 54 | static void process_complete(struct gdlm_lock *lp) | ||
| 55 | { | ||
| 56 | struct gdlm_ls *ls = lp->ls; | ||
| 57 | struct lm_async_cb acb; | ||
| 58 | s16 prev_mode = lp->cur; | ||
| 59 | |||
| 60 | memset(&acb, 0, sizeof(acb)); | ||
| 61 | |||
| 62 | if (lp->lksb.sb_status == -DLM_ECANCEL) { | ||
| 63 | log_info("complete dlm cancel %x,%llx flags %lx", | ||
| 64 | lp->lockname.ln_type, | ||
| 65 | (unsigned long long)lp->lockname.ln_number, | ||
| 66 | lp->flags); | ||
| 67 | |||
| 68 | lp->req = lp->cur; | ||
| 69 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 70 | if (lp->cur == DLM_LOCK_IV) | ||
| 71 | lp->lksb.sb_lkid = 0; | ||
| 72 | goto out; | ||
| 73 | } | ||
| 74 | |||
| 75 | if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { | ||
| 76 | if (lp->lksb.sb_status != -DLM_EUNLOCK) { | ||
| 77 | log_info("unlock sb_status %d %x,%llx flags %lx", | ||
| 78 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 79 | (unsigned long long)lp->lockname.ln_number, | ||
| 80 | lp->flags); | ||
| 81 | return; | ||
| 82 | } | ||
| 83 | |||
| 84 | lp->cur = DLM_LOCK_IV; | ||
| 85 | lp->req = DLM_LOCK_IV; | ||
| 86 | lp->lksb.sb_lkid = 0; | ||
| 87 | |||
| 88 | if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) { | ||
| 89 | gdlm_delete_lp(lp); | ||
| 90 | return; | ||
| 91 | } | ||
| 92 | goto out; | ||
| 93 | } | ||
| 94 | |||
| 95 | if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID) | ||
| 96 | memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); | ||
| 97 | |||
| 98 | if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) { | ||
| 99 | if (lp->req == DLM_LOCK_PR) | ||
| 100 | lp->req = DLM_LOCK_CW; | ||
| 101 | else if (lp->req == DLM_LOCK_CW) | ||
| 102 | lp->req = DLM_LOCK_PR; | ||
| 103 | } | ||
| 104 | |||
| 105 | /* | ||
| 106 | * A canceled lock request. The lock was just taken off the delayed | ||
| 107 | * list and was never even submitted to dlm. | ||
| 108 | */ | ||
| 109 | |||
| 110 | if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { | ||
| 111 | log_info("complete internal cancel %x,%llx", | ||
| 112 | lp->lockname.ln_type, | ||
| 113 | (unsigned long long)lp->lockname.ln_number); | ||
| 114 | lp->req = lp->cur; | ||
| 115 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 116 | goto out; | ||
| 117 | } | ||
| 118 | |||
| 119 | /* | ||
| 120 | * An error occured. | ||
| 121 | */ | ||
| 122 | |||
| 123 | if (lp->lksb.sb_status) { | ||
| 124 | /* a "normal" error */ | ||
| 125 | if ((lp->lksb.sb_status == -EAGAIN) && | ||
| 126 | (lp->lkf & DLM_LKF_NOQUEUE)) { | ||
| 127 | lp->req = lp->cur; | ||
| 128 | if (lp->cur == DLM_LOCK_IV) | ||
| 129 | lp->lksb.sb_lkid = 0; | ||
| 130 | goto out; | ||
| 131 | } | ||
| 132 | |||
| 133 | /* this could only happen with cancels I think */ | ||
| 134 | log_info("ast sb_status %d %x,%llx flags %lx", | ||
| 135 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 136 | (unsigned long long)lp->lockname.ln_number, | ||
| 137 | lp->flags); | ||
| 138 | if (lp->lksb.sb_status == -EDEADLOCK && | ||
| 139 | lp->ls->fsflags & LM_MFLAG_CONV_NODROP) { | ||
| 140 | lp->req = lp->cur; | ||
| 141 | acb.lc_ret |= LM_OUT_CONV_DEADLK; | ||
| 142 | if (lp->cur == DLM_LOCK_IV) | ||
| 143 | lp->lksb.sb_lkid = 0; | ||
| 144 | goto out; | ||
| 145 | } else | ||
| 146 | return; | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * This is an AST for an EX->EX conversion for sync_lvb from GFS. | ||
| 151 | */ | ||
| 152 | |||
| 153 | if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { | ||
| 154 | wake_up_ast(lp); | ||
| 155 | return; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* | ||
| 159 | * A lock has been demoted to NL because it initially completed during | ||
| 160 | * BLOCK_LOCKS. Now it must be requested in the originally requested | ||
| 161 | * mode. | ||
| 162 | */ | ||
| 163 | |||
| 164 | if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { | ||
| 165 | gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx", | ||
| 166 | lp->lockname.ln_type, | ||
| 167 | (unsigned long long)lp->lockname.ln_number); | ||
| 168 | gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx", | ||
| 169 | lp->lockname.ln_type, | ||
| 170 | (unsigned long long)lp->lockname.ln_number); | ||
| 171 | |||
| 172 | lp->cur = DLM_LOCK_NL; | ||
| 173 | lp->req = lp->prev_req; | ||
| 174 | lp->prev_req = DLM_LOCK_IV; | ||
| 175 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 176 | |||
| 177 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 178 | |||
| 179 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 180 | !test_bit(LFL_NOBLOCK, &lp->flags)) | ||
| 181 | gdlm_queue_delayed(lp); | ||
| 182 | else | ||
| 183 | queue_submit(lp); | ||
| 184 | return; | ||
| 185 | } | ||
| 186 | |||
| 187 | /* | ||
| 188 | * A request is granted during dlm recovery. It may be granted | ||
| 189 | * because the locks of a failed node were cleared. In that case, | ||
| 190 | * there may be inconsistent data beneath this lock and we must wait | ||
| 191 | * for recovery to complete to use it. When gfs recovery is done this | ||
| 192 | * granted lock will be converted to NL and then reacquired in this | ||
| 193 | * granted state. | ||
| 194 | */ | ||
| 195 | |||
| 196 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 197 | !test_bit(LFL_NOBLOCK, &lp->flags) && | ||
| 198 | lp->req != DLM_LOCK_NL) { | ||
| 199 | |||
| 200 | lp->cur = lp->req; | ||
| 201 | lp->prev_req = lp->req; | ||
| 202 | lp->req = DLM_LOCK_NL; | ||
| 203 | lp->lkf |= DLM_LKF_CONVERT; | ||
| 204 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 205 | |||
| 206 | log_debug("rereq %x,%llx id %x %d,%d", | ||
| 207 | lp->lockname.ln_type, | ||
| 208 | (unsigned long long)lp->lockname.ln_number, | ||
| 209 | lp->lksb.sb_lkid, lp->cur, lp->req); | ||
| 210 | |||
| 211 | set_bit(LFL_REREQUEST, &lp->flags); | ||
| 212 | queue_submit(lp); | ||
| 213 | return; | ||
| 214 | } | ||
| 215 | |||
| 216 | /* | ||
| 217 | * DLM demoted the lock to NL before it was granted so GFS must be | ||
| 218 | * told it cannot cache data for this lock. | ||
| 219 | */ | ||
| 220 | |||
| 221 | if (lp->lksb.sb_flags & DLM_SBF_DEMOTED) | ||
| 222 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 223 | |||
| 224 | out: | ||
| 225 | /* | ||
| 226 | * This is an internal lock_dlm lock | ||
| 227 | */ | ||
| 228 | |||
| 229 | if (test_bit(LFL_INLOCK, &lp->flags)) { | ||
| 230 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 231 | lp->cur = lp->req; | ||
| 232 | wake_up_ast(lp); | ||
| 233 | return; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Normal completion of a lock request. Tell GFS it now has the lock. | ||
| 238 | */ | ||
| 239 | |||
| 240 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 241 | lp->cur = lp->req; | ||
| 242 | |||
| 243 | acb.lc_name = lp->lockname; | ||
| 244 | acb.lc_ret |= gdlm_make_lmstate(lp->cur); | ||
| 245 | |||
| 246 | if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) && | ||
| 247 | (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL)) | ||
| 248 | acb.lc_ret |= LM_OUT_CACHEABLE; | ||
| 249 | |||
| 250 | ls->fscb(ls->sdp, LM_CB_ASYNC, &acb); | ||
| 251 | } | ||
| 252 | |||
| 253 | static inline int no_work(struct gdlm_ls *ls, int blocking) | ||
| 254 | { | 13 | { |
| 255 | int ret; | 14 | int ret; |
| 256 | 15 | ||
| 257 | spin_lock(&ls->async_lock); | 16 | spin_lock(&ls->async_lock); |
| 258 | ret = list_empty(&ls->complete) && list_empty(&ls->submit); | 17 | ret = list_empty(&ls->submit); |
| 259 | if (ret && blocking) | ||
| 260 | ret = list_empty(&ls->blocking); | ||
| 261 | spin_unlock(&ls->async_lock); | 18 | spin_unlock(&ls->async_lock); |
| 262 | 19 | ||
| 263 | return ret; | 20 | return ret; |
| 264 | } | 21 | } |
| 265 | 22 | ||
| 266 | static inline int check_drop(struct gdlm_ls *ls) | 23 | static int gdlm_thread(void *data) |
| 267 | { | ||
| 268 | if (!ls->drop_locks_count) | ||
| 269 | return 0; | ||
| 270 | |||
| 271 | if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) { | ||
| 272 | ls->drop_time = jiffies; | ||
| 273 | if (ls->all_locks_count >= ls->drop_locks_count) | ||
| 274 | return 1; | ||
| 275 | } | ||
| 276 | return 0; | ||
| 277 | } | ||
| 278 | |||
| 279 | static int gdlm_thread(void *data, int blist) | ||
| 280 | { | 24 | { |
| 281 | struct gdlm_ls *ls = (struct gdlm_ls *) data; | 25 | struct gdlm_ls *ls = (struct gdlm_ls *) data; |
| 282 | struct gdlm_lock *lp = NULL; | 26 | struct gdlm_lock *lp = NULL; |
| 283 | uint8_t complete, blocking, submit, drop; | ||
| 284 | |||
| 285 | /* Only thread1 is allowed to do blocking callbacks since gfs | ||
| 286 | may wait for a completion callback within a blocking cb. */ | ||
| 287 | 27 | ||
| 288 | while (!kthread_should_stop()) { | 28 | while (!kthread_should_stop()) { |
| 289 | wait_event_interruptible(ls->thread_wait, | 29 | wait_event_interruptible(ls->thread_wait, |
| 290 | !no_work(ls, blist) || kthread_should_stop()); | 30 | !no_work(ls) || kthread_should_stop()); |
| 291 | |||
| 292 | complete = blocking = submit = drop = 0; | ||
| 293 | 31 | ||
| 294 | spin_lock(&ls->async_lock); | 32 | spin_lock(&ls->async_lock); |
| 295 | 33 | ||
| 296 | if (blist && !list_empty(&ls->blocking)) { | 34 | if (!list_empty(&ls->submit)) { |
| 297 | lp = list_entry(ls->blocking.next, struct gdlm_lock, | ||
| 298 | blist); | ||
| 299 | list_del_init(&lp->blist); | ||
| 300 | blocking = lp->bast_mode; | ||
| 301 | lp->bast_mode = 0; | ||
| 302 | } else if (!list_empty(&ls->complete)) { | ||
| 303 | lp = list_entry(ls->complete.next, struct gdlm_lock, | ||
| 304 | clist); | ||
| 305 | list_del_init(&lp->clist); | ||
| 306 | complete = 1; | ||
| 307 | } else if (!list_empty(&ls->submit)) { | ||
| 308 | lp = list_entry(ls->submit.next, struct gdlm_lock, | 35 | lp = list_entry(ls->submit.next, struct gdlm_lock, |
| 309 | delay_list); | 36 | delay_list); |
| 310 | list_del_init(&lp->delay_list); | 37 | list_del_init(&lp->delay_list); |
| 311 | submit = 1; | 38 | spin_unlock(&ls->async_lock); |
| 39 | gdlm_do_lock(lp); | ||
| 40 | spin_lock(&ls->async_lock); | ||
| 312 | } | 41 | } |
| 313 | |||
| 314 | drop = check_drop(ls); | ||
| 315 | spin_unlock(&ls->async_lock); | 42 | spin_unlock(&ls->async_lock); |
| 316 | |||
| 317 | if (complete) | ||
| 318 | process_complete(lp); | ||
| 319 | |||
| 320 | else if (blocking) | ||
| 321 | process_blocking(lp, blocking); | ||
| 322 | |||
| 323 | else if (submit) | ||
| 324 | gdlm_do_lock(lp); | ||
| 325 | |||
| 326 | if (drop) | ||
| 327 | ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL); | ||
| 328 | |||
| 329 | schedule(); | ||
| 330 | } | 43 | } |
| 331 | 44 | ||
| 332 | return 0; | 45 | return 0; |
| 333 | } | 46 | } |
| 334 | 47 | ||
| 335 | static int gdlm_thread1(void *data) | ||
| 336 | { | ||
| 337 | return gdlm_thread(data, 1); | ||
| 338 | } | ||
| 339 | |||
| 340 | static int gdlm_thread2(void *data) | ||
| 341 | { | ||
| 342 | return gdlm_thread(data, 0); | ||
| 343 | } | ||
| 344 | |||
| 345 | int gdlm_init_threads(struct gdlm_ls *ls) | 48 | int gdlm_init_threads(struct gdlm_ls *ls) |
| 346 | { | 49 | { |
| 347 | struct task_struct *p; | 50 | struct task_struct *p; |
| 348 | int error; | 51 | int error; |
| 349 | 52 | ||
| 350 | p = kthread_run(gdlm_thread1, ls, "lock_dlm1"); | 53 | p = kthread_run(gdlm_thread, ls, "lock_dlm"); |
| 351 | error = IS_ERR(p); | ||
| 352 | if (error) { | ||
| 353 | log_error("can't start lock_dlm1 thread %d", error); | ||
| 354 | return error; | ||
| 355 | } | ||
| 356 | ls->thread1 = p; | ||
| 357 | |||
| 358 | p = kthread_run(gdlm_thread2, ls, "lock_dlm2"); | ||
| 359 | error = IS_ERR(p); | 54 | error = IS_ERR(p); |
| 360 | if (error) { | 55 | if (error) { |
| 361 | log_error("can't start lock_dlm2 thread %d", error); | 56 | log_error("can't start lock_dlm thread %d", error); |
| 362 | kthread_stop(ls->thread1); | ||
| 363 | return error; | 57 | return error; |
| 364 | } | 58 | } |
| 365 | ls->thread2 = p; | 59 | ls->thread = p; |
| 366 | 60 | ||
| 367 | return 0; | 61 | return 0; |
| 368 | } | 62 | } |
| 369 | 63 | ||
| 370 | void gdlm_release_threads(struct gdlm_ls *ls) | 64 | void gdlm_release_threads(struct gdlm_ls *ls) |
| 371 | { | 65 | { |
| 372 | kthread_stop(ls->thread1); | 66 | kthread_stop(ls->thread); |
| 373 | kthread_stop(ls->thread2); | ||
| 374 | } | 67 | } |
| 375 | 68 | ||
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile deleted file mode 100644 index 35e9730bc3a8..000000000000 --- a/fs/gfs2/locking/nolock/Makefile +++ /dev/null | |||
| @@ -1,3 +0,0 @@ | |||
| 1 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o | ||
| 2 | lock_nolock-y := main.o | ||
| 3 | |||
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c deleted file mode 100644 index 284a5ece8d94..000000000000 --- a/fs/gfs2/locking/nolock/main.c +++ /dev/null | |||
| @@ -1,238 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/init.h> | ||
| 13 | #include <linux/types.h> | ||
| 14 | #include <linux/fs.h> | ||
| 15 | #include <linux/lm_interface.h> | ||
| 16 | |||
| 17 | struct nolock_lockspace { | ||
| 18 | unsigned int nl_lvb_size; | ||
| 19 | }; | ||
| 20 | |||
| 21 | static const struct lm_lockops nolock_ops; | ||
| 22 | |||
| 23 | static int nolock_mount(char *table_name, char *host_data, | ||
| 24 | lm_callback_t cb, void *cb_data, | ||
| 25 | unsigned int min_lvb_size, int flags, | ||
| 26 | struct lm_lockstruct *lockstruct, | ||
| 27 | struct kobject *fskobj) | ||
| 28 | { | ||
| 29 | char *c; | ||
| 30 | unsigned int jid; | ||
| 31 | struct nolock_lockspace *nl; | ||
| 32 | |||
| 33 | c = strstr(host_data, "jid="); | ||
| 34 | if (!c) | ||
| 35 | jid = 0; | ||
| 36 | else { | ||
| 37 | c += 4; | ||
| 38 | sscanf(c, "%u", &jid); | ||
| 39 | } | ||
| 40 | |||
| 41 | nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL); | ||
| 42 | if (!nl) | ||
| 43 | return -ENOMEM; | ||
| 44 | |||
| 45 | nl->nl_lvb_size = min_lvb_size; | ||
| 46 | |||
| 47 | lockstruct->ls_jid = jid; | ||
| 48 | lockstruct->ls_first = 1; | ||
| 49 | lockstruct->ls_lvb_size = min_lvb_size; | ||
| 50 | lockstruct->ls_lockspace = nl; | ||
| 51 | lockstruct->ls_ops = &nolock_ops; | ||
| 52 | lockstruct->ls_flags = LM_LSFLAG_LOCAL; | ||
| 53 | |||
| 54 | return 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | static void nolock_others_may_mount(void *lockspace) | ||
| 58 | { | ||
| 59 | } | ||
| 60 | |||
| 61 | static void nolock_unmount(void *lockspace) | ||
| 62 | { | ||
| 63 | struct nolock_lockspace *nl = lockspace; | ||
| 64 | kfree(nl); | ||
| 65 | } | ||
| 66 | |||
| 67 | static void nolock_withdraw(void *lockspace) | ||
| 68 | { | ||
| 69 | } | ||
| 70 | |||
| 71 | /** | ||
| 72 | * nolock_get_lock - get a lm_lock_t given a descripton of the lock | ||
| 73 | * @lockspace: the lockspace the lock lives in | ||
| 74 | * @name: the name of the lock | ||
| 75 | * @lockp: return the lm_lock_t here | ||
| 76 | * | ||
| 77 | * Returns: 0 on success, -EXXX on failure | ||
| 78 | */ | ||
| 79 | |||
| 80 | static int nolock_get_lock(void *lockspace, struct lm_lockname *name, | ||
| 81 | void **lockp) | ||
| 82 | { | ||
| 83 | *lockp = lockspace; | ||
| 84 | return 0; | ||
| 85 | } | ||
| 86 | |||
| 87 | /** | ||
| 88 | * nolock_put_lock - get rid of a lock structure | ||
| 89 | * @lock: the lock to throw away | ||
| 90 | * | ||
| 91 | */ | ||
| 92 | |||
| 93 | static void nolock_put_lock(void *lock) | ||
| 94 | { | ||
| 95 | } | ||
| 96 | |||
| 97 | /** | ||
| 98 | * nolock_lock - acquire a lock | ||
| 99 | * @lock: the lock to manipulate | ||
| 100 | * @cur_state: the current state | ||
| 101 | * @req_state: the requested state | ||
| 102 | * @flags: modifier flags | ||
| 103 | * | ||
| 104 | * Returns: A bitmap of LM_OUT_* | ||
| 105 | */ | ||
| 106 | |||
| 107 | static unsigned int nolock_lock(void *lock, unsigned int cur_state, | ||
| 108 | unsigned int req_state, unsigned int flags) | ||
| 109 | { | ||
| 110 | return req_state | LM_OUT_CACHEABLE; | ||
| 111 | } | ||
| 112 | |||
| 113 | /** | ||
| 114 | * nolock_unlock - unlock a lock | ||
| 115 | * @lock: the lock to manipulate | ||
| 116 | * @cur_state: the current state | ||
| 117 | * | ||
| 118 | * Returns: 0 | ||
| 119 | */ | ||
| 120 | |||
| 121 | static unsigned int nolock_unlock(void *lock, unsigned int cur_state) | ||
| 122 | { | ||
| 123 | return 0; | ||
| 124 | } | ||
| 125 | |||
| 126 | static void nolock_cancel(void *lock) | ||
| 127 | { | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 131 | * nolock_hold_lvb - hold on to a lock value block | ||
| 132 | * @lock: the lock the LVB is associated with | ||
| 133 | * @lvbp: return the lm_lvb_t here | ||
| 134 | * | ||
| 135 | * Returns: 0 on success, -EXXX on failure | ||
| 136 | */ | ||
| 137 | |||
| 138 | static int nolock_hold_lvb(void *lock, char **lvbp) | ||
| 139 | { | ||
| 140 | struct nolock_lockspace *nl = lock; | ||
| 141 | int error = 0; | ||
| 142 | |||
| 143 | *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS); | ||
| 144 | if (!*lvbp) | ||
| 145 | error = -ENOMEM; | ||
| 146 | |||
| 147 | return error; | ||
| 148 | } | ||
| 149 | |||
| 150 | /** | ||
| 151 | * nolock_unhold_lvb - release a LVB | ||
| 152 | * @lock: the lock the LVB is associated with | ||
| 153 | * @lvb: the lock value block | ||
| 154 | * | ||
| 155 | */ | ||
| 156 | |||
| 157 | static void nolock_unhold_lvb(void *lock, char *lvb) | ||
| 158 | { | ||
| 159 | kfree(lvb); | ||
| 160 | } | ||
| 161 | |||
| 162 | static int nolock_plock_get(void *lockspace, struct lm_lockname *name, | ||
| 163 | struct file *file, struct file_lock *fl) | ||
| 164 | { | ||
| 165 | posix_test_lock(file, fl); | ||
| 166 | |||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | |||
| 170 | static int nolock_plock(void *lockspace, struct lm_lockname *name, | ||
| 171 | struct file *file, int cmd, struct file_lock *fl) | ||
| 172 | { | ||
| 173 | int error; | ||
| 174 | error = posix_lock_file_wait(file, fl); | ||
| 175 | return error; | ||
| 176 | } | ||
| 177 | |||
| 178 | static int nolock_punlock(void *lockspace, struct lm_lockname *name, | ||
| 179 | struct file *file, struct file_lock *fl) | ||
| 180 | { | ||
| 181 | int error; | ||
| 182 | error = posix_lock_file_wait(file, fl); | ||
| 183 | return error; | ||
| 184 | } | ||
| 185 | |||
| 186 | static void nolock_recovery_done(void *lockspace, unsigned int jid, | ||
| 187 | unsigned int message) | ||
| 188 | { | ||
| 189 | } | ||
| 190 | |||
| 191 | static const struct lm_lockops nolock_ops = { | ||
| 192 | .lm_proto_name = "lock_nolock", | ||
| 193 | .lm_mount = nolock_mount, | ||
| 194 | .lm_others_may_mount = nolock_others_may_mount, | ||
| 195 | .lm_unmount = nolock_unmount, | ||
| 196 | .lm_withdraw = nolock_withdraw, | ||
| 197 | .lm_get_lock = nolock_get_lock, | ||
| 198 | .lm_put_lock = nolock_put_lock, | ||
| 199 | .lm_lock = nolock_lock, | ||
| 200 | .lm_unlock = nolock_unlock, | ||
| 201 | .lm_cancel = nolock_cancel, | ||
| 202 | .lm_hold_lvb = nolock_hold_lvb, | ||
| 203 | .lm_unhold_lvb = nolock_unhold_lvb, | ||
| 204 | .lm_plock_get = nolock_plock_get, | ||
| 205 | .lm_plock = nolock_plock, | ||
| 206 | .lm_punlock = nolock_punlock, | ||
| 207 | .lm_recovery_done = nolock_recovery_done, | ||
| 208 | .lm_owner = THIS_MODULE, | ||
| 209 | }; | ||
| 210 | |||
| 211 | static int __init init_nolock(void) | ||
| 212 | { | ||
| 213 | int error; | ||
| 214 | |||
| 215 | error = gfs2_register_lockproto(&nolock_ops); | ||
| 216 | if (error) { | ||
| 217 | printk(KERN_WARNING | ||
| 218 | "lock_nolock: can't register protocol: %d\n", error); | ||
| 219 | return error; | ||
| 220 | } | ||
| 221 | |||
| 222 | printk(KERN_INFO | ||
| 223 | "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__); | ||
| 224 | return 0; | ||
| 225 | } | ||
| 226 | |||
| 227 | static void __exit exit_nolock(void) | ||
| 228 | { | ||
| 229 | gfs2_unregister_lockproto(&nolock_ops); | ||
| 230 | } | ||
| 231 | |||
| 232 | module_init(init_nolock); | ||
| 233 | module_exit(exit_nolock); | ||
| 234 | |||
| 235 | MODULE_DESCRIPTION("GFS Nolock Locking Module"); | ||
| 236 | MODULE_AUTHOR("Red Hat, Inc."); | ||
| 237 | MODULE_LICENSE("GPL"); | ||
| 238 | |||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 548264b1836d..6c6af9f5e3ab 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
| @@ -87,6 +87,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) | |||
| 87 | */ | 87 | */ |
| 88 | 88 | ||
| 89 | static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | 89 | static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) |
| 90 | __releases(&sdp->sd_log_lock) | ||
| 91 | __acquires(&sdp->sd_log_lock) | ||
| 90 | { | 92 | { |
| 91 | struct gfs2_bufdata *bd, *s; | 93 | struct gfs2_bufdata *bd, *s; |
| 92 | struct buffer_head *bh; | 94 | struct buffer_head *bh; |
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 771152816508..7c64510ccfd2 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | static inline void gfs2_log_lock(struct gfs2_sbd *sdp) | 23 | static inline void gfs2_log_lock(struct gfs2_sbd *sdp) |
| 24 | __acquires(&sdp->sd_log_lock) | ||
| 24 | { | 25 | { |
| 25 | spin_lock(&sdp->sd_log_lock); | 26 | spin_lock(&sdp->sd_log_lock); |
| 26 | } | 27 | } |
| @@ -32,6 +33,7 @@ static inline void gfs2_log_lock(struct gfs2_sbd *sdp) | |||
| 32 | */ | 33 | */ |
| 33 | 34 | ||
| 34 | static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) | 35 | static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) |
| 36 | __releases(&sdp->sd_log_lock) | ||
| 35 | { | 37 | { |
| 36 | spin_unlock(&sdp->sd_log_lock); | 38 | spin_unlock(&sdp->sd_log_lock); |
| 37 | } | 39 | } |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 053e2ebbbd50..bcc668d0fadd 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -40,8 +40,6 @@ static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) | |||
| 40 | INIT_HLIST_NODE(&gl->gl_list); | 40 | INIT_HLIST_NODE(&gl->gl_list); |
| 41 | spin_lock_init(&gl->gl_spin); | 41 | spin_lock_init(&gl->gl_spin); |
| 42 | INIT_LIST_HEAD(&gl->gl_holders); | 42 | INIT_LIST_HEAD(&gl->gl_holders); |
| 43 | INIT_LIST_HEAD(&gl->gl_waiters1); | ||
| 44 | INIT_LIST_HEAD(&gl->gl_waiters3); | ||
| 45 | gl->gl_lvb = NULL; | 43 | gl->gl_lvb = NULL; |
| 46 | atomic_set(&gl->gl_lvb_count, 0); | 44 | atomic_set(&gl->gl_lvb_count, 0); |
| 47 | INIT_LIST_HEAD(&gl->gl_reclaim); | 45 | INIT_LIST_HEAD(&gl->gl_reclaim); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 78d75f892f82..09853620c951 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
| @@ -129,7 +129,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl) | |||
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | /** | 131 | /** |
| 132 | * getbuf - Get a buffer with a given address space | 132 | * gfs2_getbuf - Get a buffer with a given address space |
| 133 | * @gl: the glock | 133 | * @gl: the glock |
| 134 | * @blkno: the block number (filesystem scope) | 134 | * @blkno: the block number (filesystem scope) |
| 135 | * @create: 1 if the buffer should be created | 135 | * @create: 1 if the buffer should be created |
| @@ -137,7 +137,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl) | |||
| 137 | * Returns: the buffer | 137 | * Returns: the buffer |
| 138 | */ | 138 | */ |
| 139 | 139 | ||
| 140 | static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create) | 140 | struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) |
| 141 | { | 141 | { |
| 142 | struct address_space *mapping = gl->gl_aspace->i_mapping; | 142 | struct address_space *mapping = gl->gl_aspace->i_mapping; |
| 143 | struct gfs2_sbd *sdp = gl->gl_sbd; | 143 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| @@ -205,7 +205,7 @@ static void meta_prep_new(struct buffer_head *bh) | |||
| 205 | struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) | 205 | struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) |
| 206 | { | 206 | { |
| 207 | struct buffer_head *bh; | 207 | struct buffer_head *bh; |
| 208 | bh = getbuf(gl, blkno, CREATE); | 208 | bh = gfs2_getbuf(gl, blkno, CREATE); |
| 209 | meta_prep_new(bh); | 209 | meta_prep_new(bh); |
| 210 | return bh; | 210 | return bh; |
| 211 | } | 211 | } |
| @@ -223,7 +223,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) | |||
| 223 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | 223 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, |
| 224 | struct buffer_head **bhp) | 224 | struct buffer_head **bhp) |
| 225 | { | 225 | { |
| 226 | *bhp = getbuf(gl, blkno, CREATE); | 226 | *bhp = gfs2_getbuf(gl, blkno, CREATE); |
| 227 | if (!buffer_uptodate(*bhp)) { | 227 | if (!buffer_uptodate(*bhp)) { |
| 228 | ll_rw_block(READ_META, 1, bhp); | 228 | ll_rw_block(READ_META, 1, bhp); |
| 229 | if (flags & DIO_WAIT) { | 229 | if (flags & DIO_WAIT) { |
| @@ -346,7 +346,7 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
| 346 | struct buffer_head *bh; | 346 | struct buffer_head *bh; |
| 347 | 347 | ||
| 348 | while (blen) { | 348 | while (blen) { |
| 349 | bh = getbuf(ip->i_gl, bstart, NO_CREATE); | 349 | bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE); |
| 350 | if (bh) { | 350 | if (bh) { |
| 351 | lock_buffer(bh); | 351 | lock_buffer(bh); |
| 352 | gfs2_log_lock(sdp); | 352 | gfs2_log_lock(sdp); |
| @@ -421,7 +421,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
| 421 | if (extlen > max_ra) | 421 | if (extlen > max_ra) |
| 422 | extlen = max_ra; | 422 | extlen = max_ra; |
| 423 | 423 | ||
| 424 | first_bh = getbuf(gl, dblock, CREATE); | 424 | first_bh = gfs2_getbuf(gl, dblock, CREATE); |
| 425 | 425 | ||
| 426 | if (buffer_uptodate(first_bh)) | 426 | if (buffer_uptodate(first_bh)) |
| 427 | goto out; | 427 | goto out; |
| @@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
| 432 | extlen--; | 432 | extlen--; |
| 433 | 433 | ||
| 434 | while (extlen) { | 434 | while (extlen) { |
| 435 | bh = getbuf(gl, dblock, CREATE); | 435 | bh = gfs2_getbuf(gl, dblock, CREATE); |
| 436 | 436 | ||
| 437 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) | 437 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) |
| 438 | ll_rw_block(READA, 1, &bh); | 438 | ll_rw_block(READA, 1, &bh); |
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 73e3b1c76fe1..b1a5f3674d43 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h | |||
| @@ -47,6 +47,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); | |||
| 47 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, | 47 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, |
| 48 | int flags, struct buffer_head **bhp); | 48 | int flags, struct buffer_head **bhp); |
| 49 | int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); | 49 | int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); |
| 50 | struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); | ||
| 50 | 51 | ||
| 51 | void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, | 52 | void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, |
| 52 | int meta); | 53 | int meta); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index f55394e57cb2..e64a1b04117a 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
| @@ -499,34 +499,34 @@ static int __gfs2_readpage(void *file, struct page *page) | |||
| 499 | * @file: The file to read | 499 | * @file: The file to read |
| 500 | * @page: The page of the file | 500 | * @page: The page of the file |
| 501 | * | 501 | * |
| 502 | * This deals with the locking required. We use a trylock in order to | 502 | * This deals with the locking required. We have to unlock and |
| 503 | * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE | 503 | * relock the page in order to get the locking in the right |
| 504 | * in the event that we are unable to get the lock. | 504 | * order. |
| 505 | */ | 505 | */ |
| 506 | 506 | ||
| 507 | static int gfs2_readpage(struct file *file, struct page *page) | 507 | static int gfs2_readpage(struct file *file, struct page *page) |
| 508 | { | 508 | { |
| 509 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 509 | struct address_space *mapping = page->mapping; |
| 510 | struct gfs2_holder *gh; | 510 | struct gfs2_inode *ip = GFS2_I(mapping->host); |
| 511 | struct gfs2_holder gh; | ||
| 511 | int error; | 512 | int error; |
| 512 | 513 | ||
| 513 | gh = gfs2_glock_is_locked_by_me(ip->i_gl); | 514 | unlock_page(page); |
| 514 | if (!gh) { | 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); |
| 515 | gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS); | 516 | error = gfs2_glock_nq_atime(&gh); |
| 516 | if (!gh) | 517 | if (unlikely(error)) |
| 517 | return -ENOBUFS; | 518 | goto out; |
| 518 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh); | 519 | error = AOP_TRUNCATED_PAGE; |
| 520 | lock_page(page); | ||
| 521 | if (page->mapping == mapping && !PageUptodate(page)) | ||
| 522 | error = __gfs2_readpage(file, page); | ||
| 523 | else | ||
| 519 | unlock_page(page); | 524 | unlock_page(page); |
| 520 | error = gfs2_glock_nq_atime(gh); | 525 | gfs2_glock_dq(&gh); |
| 521 | if (likely(error != 0)) | ||
| 522 | goto out; | ||
| 523 | return AOP_TRUNCATED_PAGE; | ||
| 524 | } | ||
| 525 | error = __gfs2_readpage(file, page); | ||
| 526 | gfs2_glock_dq(gh); | ||
| 527 | out: | 526 | out: |
| 528 | gfs2_holder_uninit(gh); | 527 | gfs2_holder_uninit(&gh); |
| 529 | kfree(gh); | 528 | if (error && error != AOP_TRUNCATED_PAGE) |
| 529 | lock_page(page); | ||
| 530 | return error; | 530 | return error; |
| 531 | } | 531 | } |
| 532 | 532 | ||
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e1b7d525a066..e9a366d4411c 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/uio.h> | 15 | #include <linux/uio.h> |
| 16 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
| 17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
| 18 | #include <linux/mount.h> | ||
| 18 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
| 19 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
| 20 | #include <linux/ext2_fs.h> | 21 | #include <linux/ext2_fs.h> |
| @@ -62,11 +63,11 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) | |||
| 62 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, | 63 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
| 63 | &i_gh); | 64 | &i_gh); |
| 64 | if (!error) { | 65 | if (!error) { |
| 65 | error = remote_llseek(file, offset, origin); | 66 | error = generic_file_llseek_unlocked(file, offset, origin); |
| 66 | gfs2_glock_dq_uninit(&i_gh); | 67 | gfs2_glock_dq_uninit(&i_gh); |
| 67 | } | 68 | } |
| 68 | } else | 69 | } else |
| 69 | error = remote_llseek(file, offset, origin); | 70 | error = generic_file_llseek_unlocked(file, offset, origin); |
| 70 | 71 | ||
| 71 | return error; | 72 | return error; |
| 72 | } | 73 | } |
| @@ -133,7 +134,6 @@ static const u32 fsflags_to_gfs2[32] = { | |||
| 133 | [7] = GFS2_DIF_NOATIME, | 134 | [7] = GFS2_DIF_NOATIME, |
| 134 | [12] = GFS2_DIF_EXHASH, | 135 | [12] = GFS2_DIF_EXHASH, |
| 135 | [14] = GFS2_DIF_INHERIT_JDATA, | 136 | [14] = GFS2_DIF_INHERIT_JDATA, |
| 136 | [20] = GFS2_DIF_INHERIT_DIRECTIO, | ||
| 137 | }; | 137 | }; |
| 138 | 138 | ||
| 139 | static const u32 gfs2_to_fsflags[32] = { | 139 | static const u32 gfs2_to_fsflags[32] = { |
| @@ -142,7 +142,6 @@ static const u32 gfs2_to_fsflags[32] = { | |||
| 142 | [gfs2fl_AppendOnly] = FS_APPEND_FL, | 142 | [gfs2fl_AppendOnly] = FS_APPEND_FL, |
| 143 | [gfs2fl_NoAtime] = FS_NOATIME_FL, | 143 | [gfs2fl_NoAtime] = FS_NOATIME_FL, |
| 144 | [gfs2fl_ExHash] = FS_INDEX_FL, | 144 | [gfs2fl_ExHash] = FS_INDEX_FL, |
| 145 | [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL, | ||
| 146 | [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, | 145 | [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, |
| 147 | }; | 146 | }; |
| 148 | 147 | ||
| @@ -160,12 +159,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
| 160 | return error; | 159 | return error; |
| 161 | 160 | ||
| 162 | fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); | 161 | fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); |
| 163 | if (!S_ISDIR(inode->i_mode)) { | 162 | if (!S_ISDIR(inode->i_mode) && ip->i_di.di_flags & GFS2_DIF_JDATA) |
| 164 | if (ip->i_di.di_flags & GFS2_DIF_JDATA) | 163 | fsflags |= FS_JOURNAL_DATA_FL; |
| 165 | fsflags |= FS_JOURNAL_DATA_FL; | ||
| 166 | if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) | ||
| 167 | fsflags |= FS_DIRECTIO_FL; | ||
| 168 | } | ||
| 169 | if (put_user(fsflags, ptr)) | 164 | if (put_user(fsflags, ptr)) |
| 170 | error = -EFAULT; | 165 | error = -EFAULT; |
| 171 | 166 | ||
| @@ -194,13 +189,11 @@ void gfs2_set_inode_flags(struct inode *inode) | |||
| 194 | 189 | ||
| 195 | /* Flags that can be set by user space */ | 190 | /* Flags that can be set by user space */ |
| 196 | #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ | 191 | #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ |
| 197 | GFS2_DIF_DIRECTIO| \ | ||
| 198 | GFS2_DIF_IMMUTABLE| \ | 192 | GFS2_DIF_IMMUTABLE| \ |
| 199 | GFS2_DIF_APPENDONLY| \ | 193 | GFS2_DIF_APPENDONLY| \ |
| 200 | GFS2_DIF_NOATIME| \ | 194 | GFS2_DIF_NOATIME| \ |
| 201 | GFS2_DIF_SYNC| \ | 195 | GFS2_DIF_SYNC| \ |
| 202 | GFS2_DIF_SYSTEM| \ | 196 | GFS2_DIF_SYSTEM| \ |
| 203 | GFS2_DIF_INHERIT_DIRECTIO| \ | ||
| 204 | GFS2_DIF_INHERIT_JDATA) | 197 | GFS2_DIF_INHERIT_JDATA) |
| 205 | 198 | ||
| 206 | /** | 199 | /** |
| @@ -220,10 +213,14 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
| 220 | int error; | 213 | int error; |
| 221 | u32 new_flags, flags; | 214 | u32 new_flags, flags; |
| 222 | 215 | ||
| 223 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 216 | error = mnt_want_write(filp->f_path.mnt); |
| 224 | if (error) | 217 | if (error) |
| 225 | return error; | 218 | return error; |
| 226 | 219 | ||
| 220 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
| 221 | if (error) | ||
| 222 | goto out_drop_write; | ||
| 223 | |||
| 227 | flags = ip->i_di.di_flags; | 224 | flags = ip->i_di.di_flags; |
| 228 | new_flags = (flags & ~mask) | (reqflags & mask); | 225 | new_flags = (flags & ~mask) | (reqflags & mask); |
| 229 | if ((new_flags ^ flags) == 0) | 226 | if ((new_flags ^ flags) == 0) |
| @@ -242,7 +239,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
| 242 | !capable(CAP_LINUX_IMMUTABLE)) | 239 | !capable(CAP_LINUX_IMMUTABLE)) |
| 243 | goto out; | 240 | goto out; |
| 244 | if (!IS_IMMUTABLE(inode)) { | 241 | if (!IS_IMMUTABLE(inode)) { |
| 245 | error = permission(inode, MAY_WRITE, NULL); | 242 | error = gfs2_permission(inode, MAY_WRITE); |
| 246 | if (error) | 243 | if (error) |
| 247 | goto out; | 244 | goto out; |
| 248 | } | 245 | } |
| @@ -272,6 +269,8 @@ out_trans_end: | |||
| 272 | gfs2_trans_end(sdp); | 269 | gfs2_trans_end(sdp); |
| 273 | out: | 270 | out: |
| 274 | gfs2_glock_dq_uninit(&gh); | 271 | gfs2_glock_dq_uninit(&gh); |
| 272 | out_drop_write: | ||
| 273 | mnt_drop_write(filp->f_path.mnt); | ||
| 275 | return error; | 274 | return error; |
| 276 | } | 275 | } |
| 277 | 276 | ||
| @@ -285,8 +284,6 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) | |||
| 285 | if (!S_ISDIR(inode->i_mode)) { | 284 | if (!S_ISDIR(inode->i_mode)) { |
| 286 | if (gfsflags & GFS2_DIF_INHERIT_JDATA) | 285 | if (gfsflags & GFS2_DIF_INHERIT_JDATA) |
| 287 | gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); | 286 | gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); |
| 288 | if (gfsflags & GFS2_DIF_INHERIT_DIRECTIO) | ||
| 289 | gfsflags ^= (GFS2_DIF_DIRECTIO | GFS2_DIF_INHERIT_DIRECTIO); | ||
| 290 | return do_gfs2_set_flags(filp, gfsflags, ~0); | 287 | return do_gfs2_set_flags(filp, gfsflags, ~0); |
| 291 | } | 288 | } |
| 292 | return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); | 289 | return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); |
| @@ -487,11 +484,6 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
| 487 | goto fail_gunlock; | 484 | goto fail_gunlock; |
| 488 | } | 485 | } |
| 489 | 486 | ||
| 490 | /* Listen to the Direct I/O flag */ | ||
| 491 | |||
| 492 | if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) | ||
| 493 | file->f_flags |= O_DIRECT; | ||
| 494 | |||
| 495 | gfs2_glock_dq_uninit(&i_gh); | 487 | gfs2_glock_dq_uninit(&i_gh); |
| 496 | } | 488 | } |
| 497 | 489 | ||
| @@ -669,8 +661,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 669 | int error = 0; | 661 | int error = 0; |
| 670 | 662 | ||
| 671 | state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; | 663 | state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; |
| 672 | flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE | 664 | flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; |
| 673 | | GL_FLOCK; | ||
| 674 | 665 | ||
| 675 | mutex_lock(&fp->f_fl_mutex); | 666 | mutex_lock(&fp->f_fl_mutex); |
| 676 | 667 | ||
| @@ -683,9 +674,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 683 | gfs2_glock_dq_wait(fl_gh); | 674 | gfs2_glock_dq_wait(fl_gh); |
| 684 | gfs2_holder_reinit(state, flags, fl_gh); | 675 | gfs2_holder_reinit(state, flags, fl_gh); |
| 685 | } else { | 676 | } else { |
| 686 | error = gfs2_glock_get(GFS2_SB(&ip->i_inode), | 677 | error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, |
| 687 | ip->i_no_addr, &gfs2_flock_glops, | 678 | &gfs2_flock_glops, CREATE, &gl); |
| 688 | CREATE, &gl); | ||
| 689 | if (error) | 679 | if (error) |
| 690 | goto out; | 680 | goto out; |
| 691 | gfs2_holder_init(gl, state, flags, fl_gh); | 681 | gfs2_holder_init(gl, state, flags, fl_gh); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b2028c82e8d1..b4d1d6490633 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -64,7 +64,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 64 | mutex_init(&sdp->sd_rindex_mutex); | 64 | mutex_init(&sdp->sd_rindex_mutex); |
| 65 | INIT_LIST_HEAD(&sdp->sd_rindex_list); | 65 | INIT_LIST_HEAD(&sdp->sd_rindex_list); |
| 66 | INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); | 66 | INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); |
| 67 | INIT_LIST_HEAD(&sdp->sd_rindex_recent_list); | ||
| 68 | 67 | ||
| 69 | INIT_LIST_HEAD(&sdp->sd_jindex_list); | 68 | INIT_LIST_HEAD(&sdp->sd_jindex_list); |
| 70 | spin_lock_init(&sdp->sd_jindex_spin); | 69 | spin_lock_init(&sdp->sd_jindex_spin); |
| @@ -364,6 +363,8 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
| 364 | 363 | ||
| 365 | static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | 364 | static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) |
| 366 | { | 365 | { |
| 366 | if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount) | ||
| 367 | return; | ||
| 367 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 368 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 368 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( | 369 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( |
| 369 | sdp->sd_lockstruct.ls_lockspace); | 370 | sdp->sd_lockstruct.ls_lockspace); |
| @@ -741,8 +742,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
| 741 | goto out; | 742 | goto out; |
| 742 | } | 743 | } |
| 743 | 744 | ||
| 744 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || | 745 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || |
| 745 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || | ||
| 746 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= | 746 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= |
| 747 | GFS2_MIN_LVB_SIZE)) { | 747 | GFS2_MIN_LVB_SIZE)) { |
| 748 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | 748 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); |
| @@ -873,7 +873,7 @@ fail_sb: | |||
| 873 | fail_locking: | 873 | fail_locking: |
| 874 | init_locking(sdp, &mount_gh, UNDO); | 874 | init_locking(sdp, &mount_gh, UNDO); |
| 875 | fail_lm: | 875 | fail_lm: |
| 876 | gfs2_gl_hash_clear(sdp, WAIT); | 876 | gfs2_gl_hash_clear(sdp); |
| 877 | gfs2_lm_unmount(sdp); | 877 | gfs2_lm_unmount(sdp); |
| 878 | while (invalidate_inodes(sb)) | 878 | while (invalidate_inodes(sb)) |
| 879 | yield(); | 879 | yield(); |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 2686ad4c0029..1e252dfc5294 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
| @@ -163,7 +163,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
| 163 | if (error) | 163 | if (error) |
| 164 | goto out; | 164 | goto out; |
| 165 | 165 | ||
| 166 | error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); | 166 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); |
| 167 | if (error) | 167 | if (error) |
| 168 | goto out_gunlock; | 168 | goto out_gunlock; |
| 169 | 169 | ||
| @@ -669,7 +669,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 669 | } | 669 | } |
| 670 | } | 670 | } |
| 671 | } else { | 671 | } else { |
| 672 | error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL); | 672 | error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); |
| 673 | if (error) | 673 | if (error) |
| 674 | goto out_gunlock; | 674 | goto out_gunlock; |
| 675 | 675 | ||
| @@ -704,7 +704,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 704 | /* Check out the dir to be renamed */ | 704 | /* Check out the dir to be renamed */ |
| 705 | 705 | ||
| 706 | if (dir_rename) { | 706 | if (dir_rename) { |
| 707 | error = permission(odentry->d_inode, MAY_WRITE, NULL); | 707 | error = gfs2_permission(odentry->d_inode, MAY_WRITE); |
| 708 | if (error) | 708 | if (error) |
| 709 | goto out_gunlock; | 709 | goto out_gunlock; |
| 710 | } | 710 | } |
| @@ -891,7 +891,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 891 | * Returns: errno | 891 | * Returns: errno |
| 892 | */ | 892 | */ |
| 893 | 893 | ||
| 894 | static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | 894 | int gfs2_permission(struct inode *inode, int mask) |
| 895 | { | 895 | { |
| 896 | struct gfs2_inode *ip = GFS2_I(inode); | 896 | struct gfs2_inode *ip = GFS2_I(inode); |
| 897 | struct gfs2_holder i_gh; | 897 | struct gfs2_holder i_gh; |
| @@ -905,13 +905,22 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 905 | unlock = 1; | 905 | unlock = 1; |
| 906 | } | 906 | } |
| 907 | 907 | ||
| 908 | error = generic_permission(inode, mask, gfs2_check_acl); | 908 | if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) |
| 909 | error = -EACCES; | ||
| 910 | else | ||
| 911 | error = generic_permission(inode, mask, gfs2_check_acl); | ||
| 909 | if (unlock) | 912 | if (unlock) |
| 910 | gfs2_glock_dq_uninit(&i_gh); | 913 | gfs2_glock_dq_uninit(&i_gh); |
| 911 | 914 | ||
| 912 | return error; | 915 | return error; |
| 913 | } | 916 | } |
| 914 | 917 | ||
| 918 | static int gfs2_iop_permission(struct inode *inode, int mask, | ||
| 919 | struct nameidata *nd) | ||
| 920 | { | ||
| 921 | return gfs2_permission(inode, mask); | ||
| 922 | } | ||
| 923 | |||
| 915 | static int setattr_size(struct inode *inode, struct iattr *attr) | 924 | static int setattr_size(struct inode *inode, struct iattr *attr) |
| 916 | { | 925 | { |
| 917 | struct gfs2_inode *ip = GFS2_I(inode); | 926 | struct gfs2_inode *ip = GFS2_I(inode); |
| @@ -1141,7 +1150,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
| 1141 | } | 1150 | } |
| 1142 | 1151 | ||
| 1143 | const struct inode_operations gfs2_file_iops = { | 1152 | const struct inode_operations gfs2_file_iops = { |
| 1144 | .permission = gfs2_permission, | 1153 | .permission = gfs2_iop_permission, |
| 1145 | .setattr = gfs2_setattr, | 1154 | .setattr = gfs2_setattr, |
| 1146 | .getattr = gfs2_getattr, | 1155 | .getattr = gfs2_getattr, |
| 1147 | .setxattr = gfs2_setxattr, | 1156 | .setxattr = gfs2_setxattr, |
| @@ -1160,7 +1169,7 @@ const struct inode_operations gfs2_dir_iops = { | |||
| 1160 | .rmdir = gfs2_rmdir, | 1169 | .rmdir = gfs2_rmdir, |
| 1161 | .mknod = gfs2_mknod, | 1170 | .mknod = gfs2_mknod, |
| 1162 | .rename = gfs2_rename, | 1171 | .rename = gfs2_rename, |
| 1163 | .permission = gfs2_permission, | 1172 | .permission = gfs2_iop_permission, |
| 1164 | .setattr = gfs2_setattr, | 1173 | .setattr = gfs2_setattr, |
| 1165 | .getattr = gfs2_getattr, | 1174 | .getattr = gfs2_getattr, |
| 1166 | .setxattr = gfs2_setxattr, | 1175 | .setxattr = gfs2_setxattr, |
| @@ -1172,7 +1181,7 @@ const struct inode_operations gfs2_dir_iops = { | |||
| 1172 | const struct inode_operations gfs2_symlink_iops = { | 1181 | const struct inode_operations gfs2_symlink_iops = { |
| 1173 | .readlink = gfs2_readlink, | 1182 | .readlink = gfs2_readlink, |
| 1174 | .follow_link = gfs2_follow_link, | 1183 | .follow_link = gfs2_follow_link, |
| 1175 | .permission = gfs2_permission, | 1184 | .permission = gfs2_iop_permission, |
| 1176 | .setattr = gfs2_setattr, | 1185 | .setattr = gfs2_setattr, |
| 1177 | .getattr = gfs2_getattr, | 1186 | .getattr = gfs2_getattr, |
| 1178 | .setxattr = gfs2_setxattr, | 1187 | .setxattr = gfs2_setxattr, |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 0b7cc920eb89..f66ea0f7a356 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
| @@ -126,7 +126,7 @@ static void gfs2_put_super(struct super_block *sb) | |||
| 126 | gfs2_clear_rgrpd(sdp); | 126 | gfs2_clear_rgrpd(sdp); |
| 127 | gfs2_jindex_free(sdp); | 127 | gfs2_jindex_free(sdp); |
| 128 | /* Take apart glock structures and buffer lists */ | 128 | /* Take apart glock structures and buffer lists */ |
| 129 | gfs2_gl_hash_clear(sdp, WAIT); | 129 | gfs2_gl_hash_clear(sdp); |
| 130 | /* Unmount the locking protocol */ | 130 | /* Unmount the locking protocol */ |
| 131 | gfs2_lm_unmount(sdp); | 131 | gfs2_lm_unmount(sdp); |
| 132 | 132 | ||
| @@ -155,7 +155,7 @@ static void gfs2_write_super(struct super_block *sb) | |||
| 155 | static int gfs2_sync_fs(struct super_block *sb, int wait) | 155 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
| 156 | { | 156 | { |
| 157 | sb->s_dirt = 0; | 157 | sb->s_dirt = 0; |
| 158 | if (wait) | 158 | if (wait && sb->s_fs_info) |
| 159 | gfs2_log_flush(sb->s_fs_info, NULL); | 159 | gfs2_log_flush(sb->s_fs_info, NULL); |
| 160 | return 0; | 160 | return 0; |
| 161 | } | 161 | } |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 56aaf915c59a..3e073f5144fa 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -904,7 +904,7 @@ static int need_sync(struct gfs2_quota_data *qd) | |||
| 904 | do_sync = 0; | 904 | do_sync = 0; |
| 905 | else { | 905 | else { |
| 906 | value *= gfs2_jindex_size(sdp) * num; | 906 | value *= gfs2_jindex_size(sdp) * num; |
| 907 | do_div(value, den); | 907 | value = div_s64(value, den); |
| 908 | value += (s64)be64_to_cpu(qd->qd_qb.qb_value); | 908 | value += (s64)be64_to_cpu(qd->qd_qb.qb_value); |
| 909 | if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) | 909 | if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) |
| 910 | do_sync = 0; | 910 | do_sync = 0; |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 2888e4b4b1c5..d5e91f4f6a0b 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
| @@ -428,6 +428,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea | |||
| 428 | static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | 428 | static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, |
| 429 | unsigned int message) | 429 | unsigned int message) |
| 430 | { | 430 | { |
| 431 | if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done) | ||
| 432 | return; | ||
| 433 | |||
| 431 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 434 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 432 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( | 435 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( |
| 433 | sdp->sd_lockstruct.ls_lockspace, jid, message); | 436 | sdp->sd_lockstruct.ls_lockspace, jid, message); |
| @@ -505,7 +508,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) | |||
| 505 | 508 | ||
| 506 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | 509 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, |
| 507 | LM_FLAG_NOEXP | LM_FLAG_PRIORITY | | 510 | LM_FLAG_NOEXP | LM_FLAG_PRIORITY | |
| 508 | GL_NOCANCEL | GL_NOCACHE, &t_gh); | 511 | GL_NOCACHE, &t_gh); |
| 509 | if (error) | 512 | if (error) |
| 510 | goto fail_gunlock_ji; | 513 | goto fail_gunlock_ji; |
| 511 | 514 | ||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6387523a3153..2d90fb253505 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -195,7 +195,7 @@ ulong_aligned: | |||
| 195 | depending on architecture. I've experimented with several ways | 195 | depending on architecture. I've experimented with several ways |
| 196 | of writing this section such as using an else before the goto | 196 | of writing this section such as using an else before the goto |
| 197 | but this one seems to be the fastest. */ | 197 | but this one seems to be the fastest. */ |
| 198 | while ((unsigned char *)plong < end - 1) { | 198 | while ((unsigned char *)plong < end - sizeof(unsigned long)) { |
| 199 | prefetch(plong + 1); | 199 | prefetch(plong + 1); |
| 200 | if (((*plong) & LBITMASK) != lskipval) | 200 | if (((*plong) & LBITMASK) != lskipval) |
| 201 | break; | 201 | break; |
| @@ -371,11 +371,6 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp) | |||
| 371 | 371 | ||
| 372 | spin_lock(&sdp->sd_rindex_spin); | 372 | spin_lock(&sdp->sd_rindex_spin); |
| 373 | sdp->sd_rindex_forward = NULL; | 373 | sdp->sd_rindex_forward = NULL; |
| 374 | head = &sdp->sd_rindex_recent_list; | ||
| 375 | while (!list_empty(head)) { | ||
| 376 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); | ||
| 377 | list_del(&rgd->rd_recent); | ||
| 378 | } | ||
| 379 | spin_unlock(&sdp->sd_rindex_spin); | 374 | spin_unlock(&sdp->sd_rindex_spin); |
| 380 | 375 | ||
| 381 | head = &sdp->sd_rindex_list; | 376 | head = &sdp->sd_rindex_list; |
| @@ -945,107 +940,30 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
| 945 | } | 940 | } |
| 946 | 941 | ||
| 947 | /** | 942 | /** |
| 948 | * recent_rgrp_first - get first RG from "recent" list | ||
| 949 | * @sdp: The GFS2 superblock | ||
| 950 | * @rglast: address of the rgrp used last | ||
| 951 | * | ||
| 952 | * Returns: The first rgrp in the recent list | ||
| 953 | */ | ||
| 954 | |||
| 955 | static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, | ||
| 956 | u64 rglast) | ||
| 957 | { | ||
| 958 | struct gfs2_rgrpd *rgd; | ||
| 959 | |||
| 960 | spin_lock(&sdp->sd_rindex_spin); | ||
| 961 | |||
| 962 | if (rglast) { | ||
| 963 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
| 964 | if (rgrp_contains_block(rgd, rglast)) | ||
| 965 | goto out; | ||
| 966 | } | ||
| 967 | } | ||
| 968 | rgd = NULL; | ||
| 969 | if (!list_empty(&sdp->sd_rindex_recent_list)) | ||
| 970 | rgd = list_entry(sdp->sd_rindex_recent_list.next, | ||
| 971 | struct gfs2_rgrpd, rd_recent); | ||
| 972 | out: | ||
| 973 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 974 | return rgd; | ||
| 975 | } | ||
| 976 | |||
| 977 | /** | ||
| 978 | * recent_rgrp_next - get next RG from "recent" list | 943 | * recent_rgrp_next - get next RG from "recent" list |
| 979 | * @cur_rgd: current rgrp | 944 | * @cur_rgd: current rgrp |
| 980 | * @remove: | ||
| 981 | * | 945 | * |
| 982 | * Returns: The next rgrp in the recent list | 946 | * Returns: The next rgrp in the recent list |
| 983 | */ | 947 | */ |
| 984 | 948 | ||
| 985 | static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd, | 949 | static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd) |
| 986 | int remove) | ||
| 987 | { | 950 | { |
| 988 | struct gfs2_sbd *sdp = cur_rgd->rd_sbd; | 951 | struct gfs2_sbd *sdp = cur_rgd->rd_sbd; |
| 989 | struct list_head *head; | 952 | struct list_head *head; |
| 990 | struct gfs2_rgrpd *rgd; | 953 | struct gfs2_rgrpd *rgd; |
| 991 | 954 | ||
| 992 | spin_lock(&sdp->sd_rindex_spin); | 955 | spin_lock(&sdp->sd_rindex_spin); |
| 993 | 956 | head = &sdp->sd_rindex_mru_list; | |
| 994 | head = &sdp->sd_rindex_recent_list; | 957 | if (unlikely(cur_rgd->rd_list_mru.next == head)) { |
| 995 | 958 | spin_unlock(&sdp->sd_rindex_spin); | |
| 996 | list_for_each_entry(rgd, head, rd_recent) { | 959 | return NULL; |
| 997 | if (rgd == cur_rgd) { | ||
| 998 | if (cur_rgd->rd_recent.next != head) | ||
| 999 | rgd = list_entry(cur_rgd->rd_recent.next, | ||
| 1000 | struct gfs2_rgrpd, rd_recent); | ||
| 1001 | else | ||
| 1002 | rgd = NULL; | ||
| 1003 | |||
| 1004 | if (remove) | ||
| 1005 | list_del(&cur_rgd->rd_recent); | ||
| 1006 | |||
| 1007 | goto out; | ||
| 1008 | } | ||
| 1009 | } | 960 | } |
| 1010 | 961 | rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru); | |
| 1011 | rgd = NULL; | ||
| 1012 | if (!list_empty(head)) | ||
| 1013 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); | ||
| 1014 | |||
| 1015 | out: | ||
| 1016 | spin_unlock(&sdp->sd_rindex_spin); | 962 | spin_unlock(&sdp->sd_rindex_spin); |
| 1017 | return rgd; | 963 | return rgd; |
| 1018 | } | 964 | } |
| 1019 | 965 | ||
| 1020 | /** | 966 | /** |
| 1021 | * recent_rgrp_add - add an RG to tail of "recent" list | ||
| 1022 | * @new_rgd: The rgrp to add | ||
| 1023 | * | ||
| 1024 | */ | ||
| 1025 | |||
| 1026 | static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd) | ||
| 1027 | { | ||
| 1028 | struct gfs2_sbd *sdp = new_rgd->rd_sbd; | ||
| 1029 | struct gfs2_rgrpd *rgd; | ||
| 1030 | unsigned int count = 0; | ||
| 1031 | unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp); | ||
| 1032 | |||
| 1033 | spin_lock(&sdp->sd_rindex_spin); | ||
| 1034 | |||
| 1035 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
| 1036 | if (rgd == new_rgd) | ||
| 1037 | goto out; | ||
| 1038 | |||
| 1039 | if (++count >= max) | ||
| 1040 | goto out; | ||
| 1041 | } | ||
| 1042 | list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list); | ||
| 1043 | |||
| 1044 | out: | ||
| 1045 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | /** | ||
| 1049 | * forward_rgrp_get - get an rgrp to try next from full list | 967 | * forward_rgrp_get - get an rgrp to try next from full list |
| 1050 | * @sdp: The GFS2 superblock | 968 | * @sdp: The GFS2 superblock |
| 1051 | * | 969 | * |
| @@ -1112,9 +1030,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1112 | int loops = 0; | 1030 | int loops = 0; |
| 1113 | int error, rg_locked; | 1031 | int error, rg_locked; |
| 1114 | 1032 | ||
| 1115 | /* Try recently successful rgrps */ | 1033 | rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); |
| 1116 | |||
| 1117 | rgd = recent_rgrp_first(sdp, ip->i_goal); | ||
| 1118 | 1034 | ||
| 1119 | while (rgd) { | 1035 | while (rgd) { |
| 1120 | rg_locked = 0; | 1036 | rg_locked = 0; |
| @@ -1136,11 +1052,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1136 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1052 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
| 1137 | if (inode) | 1053 | if (inode) |
| 1138 | return inode; | 1054 | return inode; |
| 1139 | rgd = recent_rgrp_next(rgd, 1); | 1055 | /* fall through */ |
| 1140 | break; | ||
| 1141 | |||
| 1142 | case GLR_TRYFAILED: | 1056 | case GLR_TRYFAILED: |
| 1143 | rgd = recent_rgrp_next(rgd, 0); | 1057 | rgd = recent_rgrp_next(rgd); |
| 1144 | break; | 1058 | break; |
| 1145 | 1059 | ||
| 1146 | default: | 1060 | default: |
| @@ -1199,7 +1113,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1199 | 1113 | ||
| 1200 | out: | 1114 | out: |
| 1201 | if (begin) { | 1115 | if (begin) { |
| 1202 | recent_rgrp_add(rgd); | 1116 | spin_lock(&sdp->sd_rindex_spin); |
| 1117 | list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); | ||
| 1118 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1203 | rgd = gfs2_rgrpd_get_next(rgd); | 1119 | rgd = gfs2_rgrpd_get_next(rgd); |
| 1204 | if (!rgd) | 1120 | if (!rgd) |
| 1205 | rgd = gfs2_rgrpd_get_first(sdp); | 1121 | rgd = gfs2_rgrpd_get_first(sdp); |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 7aeacbc65f35..63a8a902d9db 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
| @@ -65,7 +65,6 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
| 65 | gt->gt_quota_quantum = 60; | 65 | gt->gt_quota_quantum = 60; |
| 66 | gt->gt_atime_quantum = 3600; | 66 | gt->gt_atime_quantum = 3600; |
| 67 | gt->gt_new_files_jdata = 0; | 67 | gt->gt_new_files_jdata = 0; |
| 68 | gt->gt_new_files_directio = 0; | ||
| 69 | gt->gt_max_readahead = 1 << 18; | 68 | gt->gt_max_readahead = 1 << 18; |
| 70 | gt->gt_stall_secs = 600; | 69 | gt->gt_stall_secs = 600; |
| 71 | gt->gt_complain_secs = 10; | 70 | gt->gt_complain_secs = 10; |
| @@ -941,8 +940,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, | |||
| 941 | } | 940 | } |
| 942 | 941 | ||
| 943 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, | 942 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, |
| 944 | LM_FLAG_PRIORITY | GL_NOCACHE, | 943 | GL_NOCACHE, t_gh); |
| 945 | t_gh); | ||
| 946 | 944 | ||
| 947 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | 945 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { |
| 948 | error = gfs2_jdesc_check(jd); | 946 | error = gfs2_jdesc_check(jd); |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9ab9fc85ecd0..74846559fc3f 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
| @@ -110,18 +110,6 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, | |||
| 110 | return len; | 110 | return len; |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | ||
| 114 | { | ||
| 115 | if (!capable(CAP_SYS_ADMIN)) | ||
| 116 | return -EACCES; | ||
| 117 | |||
| 118 | if (simple_strtol(buf, NULL, 0) != 1) | ||
| 119 | return -EINVAL; | ||
| 120 | |||
| 121 | gfs2_gl_hash_clear(sdp, NO_WAIT); | ||
| 122 | return len; | ||
| 123 | } | ||
| 124 | |||
| 125 | static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, | 113 | static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, |
| 126 | size_t len) | 114 | size_t len) |
| 127 | { | 115 | { |
| @@ -175,7 +163,6 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) | |||
| 175 | GFS2_ATTR(id, 0444, id_show, NULL); | 163 | GFS2_ATTR(id, 0444, id_show, NULL); |
| 176 | GFS2_ATTR(fsname, 0444, fsname_show, NULL); | 164 | GFS2_ATTR(fsname, 0444, fsname_show, NULL); |
| 177 | GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); | 165 | GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); |
| 178 | GFS2_ATTR(shrink, 0200, NULL, shrink_store); | ||
| 179 | GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | 166 | GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); |
| 180 | GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); | 167 | GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); |
| 181 | GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); | 168 | GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); |
| @@ -186,7 +173,6 @@ static struct attribute *gfs2_attrs[] = { | |||
| 186 | &gfs2_attr_id.attr, | 173 | &gfs2_attr_id.attr, |
| 187 | &gfs2_attr_fsname.attr, | 174 | &gfs2_attr_fsname.attr, |
| 188 | &gfs2_attr_freeze.attr, | 175 | &gfs2_attr_freeze.attr, |
| 189 | &gfs2_attr_shrink.attr, | ||
| 190 | &gfs2_attr_withdraw.attr, | 176 | &gfs2_attr_withdraw.attr, |
| 191 | &gfs2_attr_statfs_sync.attr, | 177 | &gfs2_attr_statfs_sync.attr, |
| 192 | &gfs2_attr_quota_sync.attr, | 178 | &gfs2_attr_quota_sync.attr, |
| @@ -426,7 +412,6 @@ TUNE_ATTR(max_readahead, 0); | |||
| 426 | TUNE_ATTR(complain_secs, 0); | 412 | TUNE_ATTR(complain_secs, 0); |
| 427 | TUNE_ATTR(statfs_slow, 0); | 413 | TUNE_ATTR(statfs_slow, 0); |
| 428 | TUNE_ATTR(new_files_jdata, 0); | 414 | TUNE_ATTR(new_files_jdata, 0); |
| 429 | TUNE_ATTR(new_files_directio, 0); | ||
| 430 | TUNE_ATTR(quota_simul_sync, 1); | 415 | TUNE_ATTR(quota_simul_sync, 1); |
| 431 | TUNE_ATTR(quota_cache_secs, 1); | 416 | TUNE_ATTR(quota_cache_secs, 1); |
| 432 | TUNE_ATTR(stall_secs, 1); | 417 | TUNE_ATTR(stall_secs, 1); |
| @@ -455,7 +440,6 @@ static struct attribute *tune_attrs[] = { | |||
| 455 | &tune_attr_quotad_secs.attr, | 440 | &tune_attr_quotad_secs.attr, |
| 456 | &tune_attr_quota_scale.attr, | 441 | &tune_attr_quota_scale.attr, |
| 457 | &tune_attr_new_files_jdata.attr, | 442 | &tune_attr_new_files_jdata.attr, |
| 458 | &tune_attr_new_files_directio.attr, | ||
| 459 | NULL, | 443 | NULL, |
| 460 | }; | 444 | }; |
| 461 | 445 | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6914598022ce..91389c8aee8a 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -688,7 +688,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
| 688 | 688 | ||
| 689 | J_ASSERT(transaction->t_state == T_FINISHED); | 689 | J_ASSERT(transaction->t_state == T_FINISHED); |
| 690 | J_ASSERT(transaction->t_buffers == NULL); | 690 | J_ASSERT(transaction->t_buffers == NULL); |
| 691 | J_ASSERT(transaction->t_sync_datalist == NULL); | ||
| 692 | J_ASSERT(transaction->t_forget == NULL); | 691 | J_ASSERT(transaction->t_forget == NULL); |
| 693 | J_ASSERT(transaction->t_iobuf_list == NULL); | 692 | J_ASSERT(transaction->t_iobuf_list == NULL); |
| 694 | J_ASSERT(transaction->t_shadow_list == NULL); | 693 | J_ASSERT(transaction->t_shadow_list == NULL); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index a2ed72f7ceee..f8b3be873226 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
| 23 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
| 24 | #include <linux/crc32.h> | 24 | #include <linux/crc32.h> |
| 25 | #include <linux/writeback.h> | ||
| 26 | #include <linux/backing-dev.h> | ||
| 25 | 27 | ||
| 26 | /* | 28 | /* |
| 27 | * Default IO end handler for temporary BJ_IO buffer_heads. | 29 | * Default IO end handler for temporary BJ_IO buffer_heads. |
| @@ -37,8 +39,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |||
| 37 | } | 39 | } |
| 38 | 40 | ||
| 39 | /* | 41 | /* |
| 40 | * When an ext3-ordered file is truncated, it is possible that many pages are | 42 | * When an ext4 file is truncated, it is possible that some pages are not |
| 41 | * not sucessfully freed, because they are attached to a committing transaction. | 43 | * successfully freed, because they are attached to a committing transaction. |
| 42 | * After the transaction commits, these pages are left on the LRU, with no | 44 | * After the transaction commits, these pages are left on the LRU, with no |
| 43 | * ->mapping, and with attached buffers. These pages are trivially reclaimable | 45 | * ->mapping, and with attached buffers. These pages are trivially reclaimable |
| 44 | * by the VM, but their apparent absence upsets the VM accounting, and it makes | 46 | * by the VM, but their apparent absence upsets the VM accounting, and it makes |
| @@ -80,21 +82,6 @@ nope: | |||
| 80 | } | 82 | } |
| 81 | 83 | ||
| 82 | /* | 84 | /* |
| 83 | * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is | ||
| 84 | * held. For ranking reasons we must trylock. If we lose, schedule away and | ||
| 85 | * return 0. j_list_lock is dropped in this case. | ||
| 86 | */ | ||
| 87 | static int inverted_lock(journal_t *journal, struct buffer_head *bh) | ||
| 88 | { | ||
| 89 | if (!jbd_trylock_bh_state(bh)) { | ||
| 90 | spin_unlock(&journal->j_list_lock); | ||
| 91 | schedule(); | ||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | return 1; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Done it all: now submit the commit record. We should have | 85 | * Done it all: now submit the commit record. We should have |
| 99 | * cleaned up our previous buffers by now, so if we are in abort | 86 | * cleaned up our previous buffers by now, so if we are in abort |
| 100 | * mode we can now just skip the rest of the journal write | 87 | * mode we can now just skip the rest of the journal write |
| @@ -112,6 +99,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 112 | struct buffer_head *bh; | 99 | struct buffer_head *bh; |
| 113 | int ret; | 100 | int ret; |
| 114 | int barrier_done = 0; | 101 | int barrier_done = 0; |
| 102 | struct timespec now = current_kernel_time(); | ||
| 115 | 103 | ||
| 116 | if (is_journal_aborted(journal)) | 104 | if (is_journal_aborted(journal)) |
| 117 | return 0; | 105 | return 0; |
| @@ -126,6 +114,8 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 126 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 114 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
| 127 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 115 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
| 128 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 116 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
| 117 | tmp->h_commit_sec = cpu_to_be64(now.tv_sec); | ||
| 118 | tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); | ||
| 129 | 119 | ||
| 130 | if (JBD2_HAS_COMPAT_FEATURE(journal, | 120 | if (JBD2_HAS_COMPAT_FEATURE(journal, |
| 131 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | 121 | JBD2_FEATURE_COMPAT_CHECKSUM)) { |
| @@ -197,159 +187,104 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) | |||
| 197 | } | 187 | } |
| 198 | 188 | ||
| 199 | /* | 189 | /* |
| 200 | * Wait for all submitted IO to complete. | 190 | * write the filemap data using writepage() address_space_operations. |
| 191 | * We don't do block allocation here even for delalloc. We don't | ||
| 192 | * use writepages() because with dealyed allocation we may be doing | ||
| 193 | * block allocation in writepages(). | ||
| 201 | */ | 194 | */ |
| 202 | static int journal_wait_on_locked_list(journal_t *journal, | 195 | static int journal_submit_inode_data_buffers(struct address_space *mapping) |
| 203 | transaction_t *commit_transaction) | ||
| 204 | { | 196 | { |
| 205 | int ret = 0; | 197 | int ret; |
| 206 | struct journal_head *jh; | 198 | struct writeback_control wbc = { |
| 207 | 199 | .sync_mode = WB_SYNC_ALL, | |
| 208 | while (commit_transaction->t_locked_list) { | 200 | .nr_to_write = mapping->nrpages * 2, |
| 209 | struct buffer_head *bh; | 201 | .range_start = 0, |
| 210 | 202 | .range_end = i_size_read(mapping->host), | |
| 211 | jh = commit_transaction->t_locked_list->b_tprev; | 203 | .for_writepages = 1, |
| 212 | bh = jh2bh(jh); | 204 | }; |
| 213 | get_bh(bh); | 205 | |
| 214 | if (buffer_locked(bh)) { | 206 | ret = generic_writepages(mapping, &wbc); |
| 215 | spin_unlock(&journal->j_list_lock); | ||
| 216 | wait_on_buffer(bh); | ||
| 217 | if (unlikely(!buffer_uptodate(bh))) | ||
| 218 | ret = -EIO; | ||
| 219 | spin_lock(&journal->j_list_lock); | ||
| 220 | } | ||
| 221 | if (!inverted_lock(journal, bh)) { | ||
| 222 | put_bh(bh); | ||
| 223 | spin_lock(&journal->j_list_lock); | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
| 227 | __jbd2_journal_unfile_buffer(jh); | ||
| 228 | jbd_unlock_bh_state(bh); | ||
| 229 | jbd2_journal_remove_journal_head(bh); | ||
| 230 | put_bh(bh); | ||
| 231 | } else { | ||
| 232 | jbd_unlock_bh_state(bh); | ||
| 233 | } | ||
| 234 | put_bh(bh); | ||
| 235 | cond_resched_lock(&journal->j_list_lock); | ||
| 236 | } | ||
| 237 | return ret; | 207 | return ret; |
| 238 | } | 208 | } |
| 239 | 209 | ||
| 240 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | 210 | /* |
| 211 | * Submit all the data buffers of inode associated with the transaction to | ||
| 212 | * disk. | ||
| 213 | * | ||
| 214 | * We are in a committing transaction. Therefore no new inode can be added to | ||
| 215 | * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently | ||
| 216 | * operate on from being released while we write out pages. | ||
| 217 | */ | ||
| 218 | static int journal_submit_data_buffers(journal_t *journal, | ||
| 219 | transaction_t *commit_transaction) | ||
| 241 | { | 220 | { |
| 242 | int i; | 221 | struct jbd2_inode *jinode; |
| 222 | int err, ret = 0; | ||
| 223 | struct address_space *mapping; | ||
| 243 | 224 | ||
| 244 | for (i = 0; i < bufs; i++) { | 225 | spin_lock(&journal->j_list_lock); |
| 245 | wbuf[i]->b_end_io = end_buffer_write_sync; | 226 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
| 246 | /* We use-up our safety reference in submit_bh() */ | 227 | mapping = jinode->i_vfs_inode->i_mapping; |
| 247 | submit_bh(WRITE, wbuf[i]); | 228 | jinode->i_flags |= JI_COMMIT_RUNNING; |
| 229 | spin_unlock(&journal->j_list_lock); | ||
| 230 | /* | ||
| 231 | * submit the inode data buffers. We use writepage | ||
| 232 | * instead of writepages. Because writepages can do | ||
| 233 | * block allocation with delalloc. We need to write | ||
| 234 | * only allocated blocks here. | ||
| 235 | */ | ||
| 236 | err = journal_submit_inode_data_buffers(mapping); | ||
| 237 | if (!ret) | ||
| 238 | ret = err; | ||
| 239 | spin_lock(&journal->j_list_lock); | ||
| 240 | J_ASSERT(jinode->i_transaction == commit_transaction); | ||
| 241 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
| 242 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 248 | } | 243 | } |
| 244 | spin_unlock(&journal->j_list_lock); | ||
| 245 | return ret; | ||
| 249 | } | 246 | } |
| 250 | 247 | ||
| 251 | /* | 248 | /* |
| 252 | * Submit all the data buffers to disk | 249 | * Wait for data submitted for writeout, refile inodes to proper |
| 250 | * transaction if needed. | ||
| 251 | * | ||
| 253 | */ | 252 | */ |
| 254 | static void journal_submit_data_buffers(journal_t *journal, | 253 | static int journal_finish_inode_data_buffers(journal_t *journal, |
| 255 | transaction_t *commit_transaction) | 254 | transaction_t *commit_transaction) |
| 256 | { | 255 | { |
| 257 | struct journal_head *jh; | 256 | struct jbd2_inode *jinode, *next_i; |
| 258 | struct buffer_head *bh; | 257 | int err, ret = 0; |
| 259 | int locked; | ||
| 260 | int bufs = 0; | ||
| 261 | struct buffer_head **wbuf = journal->j_wbuf; | ||
| 262 | 258 | ||
| 263 | /* | 259 | /* For locking, see the comment in journal_submit_data_buffers() */ |
| 264 | * Whenever we unlock the journal and sleep, things can get added | ||
| 265 | * onto ->t_sync_datalist, so we have to keep looping back to | ||
| 266 | * write_out_data until we *know* that the list is empty. | ||
| 267 | * | ||
| 268 | * Cleanup any flushed data buffers from the data list. Even in | ||
| 269 | * abort mode, we want to flush this out as soon as possible. | ||
| 270 | */ | ||
| 271 | write_out_data: | ||
| 272 | cond_resched(); | ||
| 273 | spin_lock(&journal->j_list_lock); | 260 | spin_lock(&journal->j_list_lock); |
| 261 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | ||
| 262 | jinode->i_flags |= JI_COMMIT_RUNNING; | ||
| 263 | spin_unlock(&journal->j_list_lock); | ||
| 264 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); | ||
| 265 | if (!ret) | ||
| 266 | ret = err; | ||
| 267 | spin_lock(&journal->j_list_lock); | ||
| 268 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
| 269 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 270 | } | ||
| 274 | 271 | ||
| 275 | while (commit_transaction->t_sync_datalist) { | 272 | /* Now refile inode to proper lists */ |
| 276 | jh = commit_transaction->t_sync_datalist; | 273 | list_for_each_entry_safe(jinode, next_i, |
| 277 | bh = jh2bh(jh); | 274 | &commit_transaction->t_inode_list, i_list) { |
| 278 | locked = 0; | 275 | list_del(&jinode->i_list); |
| 279 | 276 | if (jinode->i_next_transaction) { | |
| 280 | /* Get reference just to make sure buffer does not disappear | 277 | jinode->i_transaction = jinode->i_next_transaction; |
| 281 | * when we are forced to drop various locks */ | 278 | jinode->i_next_transaction = NULL; |
| 282 | get_bh(bh); | 279 | list_add(&jinode->i_list, |
| 283 | /* If the buffer is dirty, we need to submit IO and hence | 280 | &jinode->i_transaction->t_inode_list); |
| 284 | * we need the buffer lock. We try to lock the buffer without | ||
| 285 | * blocking. If we fail, we need to drop j_list_lock and do | ||
| 286 | * blocking lock_buffer(). | ||
| 287 | */ | ||
| 288 | if (buffer_dirty(bh)) { | ||
| 289 | if (test_set_buffer_locked(bh)) { | ||
| 290 | BUFFER_TRACE(bh, "needs blocking lock"); | ||
| 291 | spin_unlock(&journal->j_list_lock); | ||
| 292 | /* Write out all data to prevent deadlocks */ | ||
| 293 | journal_do_submit_data(wbuf, bufs); | ||
| 294 | bufs = 0; | ||
| 295 | lock_buffer(bh); | ||
| 296 | spin_lock(&journal->j_list_lock); | ||
| 297 | } | ||
| 298 | locked = 1; | ||
| 299 | } | ||
| 300 | /* We have to get bh_state lock. Again out of order, sigh. */ | ||
| 301 | if (!inverted_lock(journal, bh)) { | ||
| 302 | jbd_lock_bh_state(bh); | ||
| 303 | spin_lock(&journal->j_list_lock); | ||
| 304 | } | ||
| 305 | /* Someone already cleaned up the buffer? */ | ||
| 306 | if (!buffer_jbd(bh) | ||
| 307 | || jh->b_transaction != commit_transaction | ||
| 308 | || jh->b_jlist != BJ_SyncData) { | ||
| 309 | jbd_unlock_bh_state(bh); | ||
| 310 | if (locked) | ||
| 311 | unlock_buffer(bh); | ||
| 312 | BUFFER_TRACE(bh, "already cleaned up"); | ||
| 313 | put_bh(bh); | ||
| 314 | continue; | ||
| 315 | } | ||
| 316 | if (locked && test_clear_buffer_dirty(bh)) { | ||
| 317 | BUFFER_TRACE(bh, "needs writeout, adding to array"); | ||
| 318 | wbuf[bufs++] = bh; | ||
| 319 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 320 | BJ_Locked); | ||
| 321 | jbd_unlock_bh_state(bh); | ||
| 322 | if (bufs == journal->j_wbufsize) { | ||
| 323 | spin_unlock(&journal->j_list_lock); | ||
| 324 | journal_do_submit_data(wbuf, bufs); | ||
| 325 | bufs = 0; | ||
| 326 | goto write_out_data; | ||
| 327 | } | ||
| 328 | } else if (!locked && buffer_locked(bh)) { | ||
| 329 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 330 | BJ_Locked); | ||
| 331 | jbd_unlock_bh_state(bh); | ||
| 332 | put_bh(bh); | ||
| 333 | } else { | 281 | } else { |
| 334 | BUFFER_TRACE(bh, "writeout complete: unfile"); | 282 | jinode->i_transaction = NULL; |
| 335 | __jbd2_journal_unfile_buffer(jh); | ||
| 336 | jbd_unlock_bh_state(bh); | ||
| 337 | if (locked) | ||
| 338 | unlock_buffer(bh); | ||
| 339 | jbd2_journal_remove_journal_head(bh); | ||
| 340 | /* Once for our safety reference, once for | ||
| 341 | * jbd2_journal_remove_journal_head() */ | ||
| 342 | put_bh(bh); | ||
| 343 | put_bh(bh); | ||
| 344 | } | ||
| 345 | |||
| 346 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { | ||
| 347 | spin_unlock(&journal->j_list_lock); | ||
| 348 | goto write_out_data; | ||
| 349 | } | 283 | } |
| 350 | } | 284 | } |
| 351 | spin_unlock(&journal->j_list_lock); | 285 | spin_unlock(&journal->j_list_lock); |
| 352 | journal_do_submit_data(wbuf, bufs); | 286 | |
| 287 | return ret; | ||
| 353 | } | 288 | } |
| 354 | 289 | ||
| 355 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) | 290 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) |
| @@ -524,21 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 524 | * Now start flushing things to disk, in the order they appear | 459 | * Now start flushing things to disk, in the order they appear |
| 525 | * on the transaction lists. Data blocks go first. | 460 | * on the transaction lists. Data blocks go first. |
| 526 | */ | 461 | */ |
| 527 | err = 0; | 462 | err = journal_submit_data_buffers(journal, commit_transaction); |
| 528 | journal_submit_data_buffers(journal, commit_transaction); | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Wait for all previously submitted IO to complete if commit | ||
| 532 | * record is to be written synchronously. | ||
| 533 | */ | ||
| 534 | spin_lock(&journal->j_list_lock); | ||
| 535 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
| 536 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) | ||
| 537 | err = journal_wait_on_locked_list(journal, | ||
| 538 | commit_transaction); | ||
| 539 | |||
| 540 | spin_unlock(&journal->j_list_lock); | ||
| 541 | |||
| 542 | if (err) | 463 | if (err) |
| 543 | jbd2_journal_abort(journal, err); | 464 | jbd2_journal_abort(journal, err); |
| 544 | 465 | ||
| @@ -547,16 +468,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 547 | jbd_debug(3, "JBD: commit phase 2\n"); | 468 | jbd_debug(3, "JBD: commit phase 2\n"); |
| 548 | 469 | ||
| 549 | /* | 470 | /* |
| 550 | * If we found any dirty or locked buffers, then we should have | ||
| 551 | * looped back up to the write_out_data label. If there weren't | ||
| 552 | * any then journal_clean_data_list should have wiped the list | ||
| 553 | * clean by now, so check that it is in fact empty. | ||
| 554 | */ | ||
| 555 | J_ASSERT (commit_transaction->t_sync_datalist == NULL); | ||
| 556 | |||
| 557 | jbd_debug (3, "JBD: commit phase 3\n"); | ||
| 558 | |||
| 559 | /* | ||
| 560 | * Way to go: we have now written out all of the data for a | 471 | * Way to go: we have now written out all of the data for a |
| 561 | * transaction! Now comes the tricky part: we need to write out | 472 | * transaction! Now comes the tricky part: we need to write out |
| 562 | * metadata. Loop over the transaction's entire buffer list: | 473 | * metadata. Loop over the transaction's entire buffer list: |
| @@ -574,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 574 | J_ASSERT(commit_transaction->t_nr_buffers <= | 485 | J_ASSERT(commit_transaction->t_nr_buffers <= |
| 575 | commit_transaction->t_outstanding_credits); | 486 | commit_transaction->t_outstanding_credits); |
| 576 | 487 | ||
| 488 | err = 0; | ||
| 577 | descriptor = NULL; | 489 | descriptor = NULL; |
| 578 | bufs = 0; | 490 | bufs = 0; |
| 579 | while (commit_transaction->t_buffers) { | 491 | while (commit_transaction->t_buffers) { |
| @@ -748,15 +660,19 @@ start_journal_io: | |||
| 748 | &cbh, crc32_sum); | 660 | &cbh, crc32_sum); |
| 749 | if (err) | 661 | if (err) |
| 750 | __jbd2_journal_abort_hard(journal); | 662 | __jbd2_journal_abort_hard(journal); |
| 751 | |||
| 752 | spin_lock(&journal->j_list_lock); | ||
| 753 | err = journal_wait_on_locked_list(journal, | ||
| 754 | commit_transaction); | ||
| 755 | spin_unlock(&journal->j_list_lock); | ||
| 756 | if (err) | ||
| 757 | __jbd2_journal_abort_hard(journal); | ||
| 758 | } | 663 | } |
| 759 | 664 | ||
| 665 | /* | ||
| 666 | * This is the right place to wait for data buffers both for ASYNC | ||
| 667 | * and !ASYNC commit. If commit is ASYNC, we need to wait only after | ||
| 668 | * the commit block went to disk (which happens above). If commit is | ||
| 669 | * SYNC, we need to wait for data buffers before we start writing | ||
| 670 | * commit block, which happens below in such setting. | ||
| 671 | */ | ||
| 672 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
| 673 | if (err) | ||
| 674 | jbd2_journal_abort(journal, err); | ||
| 675 | |||
| 760 | /* Lo and behold: we have just managed to send a transaction to | 676 | /* Lo and behold: we have just managed to send a transaction to |
| 761 | the log. Before we can commit it, wait for the IO so far to | 677 | the log. Before we can commit it, wait for the IO so far to |
| 762 | complete. Control buffers being written are on the | 678 | complete. Control buffers being written are on the |
| @@ -768,7 +684,7 @@ start_journal_io: | |||
| 768 | so we incur less scheduling load. | 684 | so we incur less scheduling load. |
| 769 | */ | 685 | */ |
| 770 | 686 | ||
| 771 | jbd_debug(3, "JBD: commit phase 4\n"); | 687 | jbd_debug(3, "JBD: commit phase 3\n"); |
| 772 | 688 | ||
| 773 | /* | 689 | /* |
| 774 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 690 | * akpm: these are BJ_IO, and j_list_lock is not needed. |
| @@ -827,7 +743,7 @@ wait_for_iobuf: | |||
| 827 | 743 | ||
| 828 | J_ASSERT (commit_transaction->t_shadow_list == NULL); | 744 | J_ASSERT (commit_transaction->t_shadow_list == NULL); |
| 829 | 745 | ||
| 830 | jbd_debug(3, "JBD: commit phase 5\n"); | 746 | jbd_debug(3, "JBD: commit phase 4\n"); |
| 831 | 747 | ||
| 832 | /* Here we wait for the revoke record and descriptor record buffers */ | 748 | /* Here we wait for the revoke record and descriptor record buffers */ |
| 833 | wait_for_ctlbuf: | 749 | wait_for_ctlbuf: |
| @@ -854,7 +770,7 @@ wait_for_iobuf: | |||
| 854 | /* AKPM: bforget here */ | 770 | /* AKPM: bforget here */ |
| 855 | } | 771 | } |
| 856 | 772 | ||
| 857 | jbd_debug(3, "JBD: commit phase 6\n"); | 773 | jbd_debug(3, "JBD: commit phase 5\n"); |
| 858 | 774 | ||
| 859 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 775 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 860 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 776 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| @@ -874,9 +790,9 @@ wait_for_iobuf: | |||
| 874 | transaction can be removed from any checkpoint list it was on | 790 | transaction can be removed from any checkpoint list it was on |
| 875 | before. */ | 791 | before. */ |
| 876 | 792 | ||
| 877 | jbd_debug(3, "JBD: commit phase 7\n"); | 793 | jbd_debug(3, "JBD: commit phase 6\n"); |
| 878 | 794 | ||
| 879 | J_ASSERT(commit_transaction->t_sync_datalist == NULL); | 795 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
| 880 | J_ASSERT(commit_transaction->t_buffers == NULL); | 796 | J_ASSERT(commit_transaction->t_buffers == NULL); |
| 881 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 797 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
| 882 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | 798 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); |
| @@ -997,7 +913,7 @@ restart_loop: | |||
| 997 | 913 | ||
| 998 | /* Done with this transaction! */ | 914 | /* Done with this transaction! */ |
| 999 | 915 | ||
| 1000 | jbd_debug(3, "JBD: commit phase 8\n"); | 916 | jbd_debug(3, "JBD: commit phase 7\n"); |
| 1001 | 917 | ||
| 1002 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 918 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
| 1003 | 919 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 2e24567c4a79..b26c6d9fe6ae 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -50,7 +50,6 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates); | |||
| 50 | EXPORT_SYMBOL(jbd2_journal_get_write_access); | 50 | EXPORT_SYMBOL(jbd2_journal_get_write_access); |
| 51 | EXPORT_SYMBOL(jbd2_journal_get_create_access); | 51 | EXPORT_SYMBOL(jbd2_journal_get_create_access); |
| 52 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); | 52 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); |
| 53 | EXPORT_SYMBOL(jbd2_journal_dirty_data); | ||
| 54 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); | 53 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); |
| 55 | EXPORT_SYMBOL(jbd2_journal_release_buffer); | 54 | EXPORT_SYMBOL(jbd2_journal_release_buffer); |
| 56 | EXPORT_SYMBOL(jbd2_journal_forget); | 55 | EXPORT_SYMBOL(jbd2_journal_forget); |
| @@ -82,6 +81,10 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page); | |||
| 82 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); | 81 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); |
| 83 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); | 82 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); |
| 84 | EXPORT_SYMBOL(jbd2_journal_force_commit); | 83 | EXPORT_SYMBOL(jbd2_journal_force_commit); |
| 84 | EXPORT_SYMBOL(jbd2_journal_file_inode); | ||
| 85 | EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); | ||
| 86 | EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); | ||
| 87 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | ||
| 85 | 88 | ||
| 86 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 89 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
| 87 | static void __journal_abort_soft (journal_t *journal, int errno); | 90 | static void __journal_abort_soft (journal_t *journal, int errno); |
| @@ -2195,6 +2198,54 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) | |||
| 2195 | } | 2198 | } |
| 2196 | 2199 | ||
| 2197 | /* | 2200 | /* |
| 2201 | * Initialize jbd inode head | ||
| 2202 | */ | ||
| 2203 | void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) | ||
| 2204 | { | ||
| 2205 | jinode->i_transaction = NULL; | ||
| 2206 | jinode->i_next_transaction = NULL; | ||
| 2207 | jinode->i_vfs_inode = inode; | ||
| 2208 | jinode->i_flags = 0; | ||
| 2209 | INIT_LIST_HEAD(&jinode->i_list); | ||
| 2210 | } | ||
| 2211 | |||
| 2212 | /* | ||
| 2213 | * Function to be called before we start removing inode from memory (i.e., | ||
| 2214 | * clear_inode() is a fine place to be called from). It removes inode from | ||
| 2215 | * transaction's lists. | ||
| 2216 | */ | ||
| 2217 | void jbd2_journal_release_jbd_inode(journal_t *journal, | ||
| 2218 | struct jbd2_inode *jinode) | ||
| 2219 | { | ||
| 2220 | int writeout = 0; | ||
| 2221 | |||
| 2222 | if (!journal) | ||
| 2223 | return; | ||
| 2224 | restart: | ||
| 2225 | spin_lock(&journal->j_list_lock); | ||
| 2226 | /* Is commit writing out inode - we have to wait */ | ||
| 2227 | if (jinode->i_flags & JI_COMMIT_RUNNING) { | ||
| 2228 | wait_queue_head_t *wq; | ||
| 2229 | DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 2230 | wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 2231 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
| 2232 | spin_unlock(&journal->j_list_lock); | ||
| 2233 | schedule(); | ||
| 2234 | finish_wait(wq, &wait.wait); | ||
| 2235 | goto restart; | ||
| 2236 | } | ||
| 2237 | |||
| 2238 | /* Do we need to wait for data writeback? */ | ||
| 2239 | if (journal->j_committing_transaction == jinode->i_transaction) | ||
| 2240 | writeout = 1; | ||
| 2241 | if (jinode->i_transaction) { | ||
| 2242 | list_del(&jinode->i_list); | ||
| 2243 | jinode->i_transaction = NULL; | ||
| 2244 | } | ||
| 2245 | spin_unlock(&journal->j_list_lock); | ||
| 2246 | } | ||
| 2247 | |||
| 2248 | /* | ||
| 2198 | * debugfs tunables | 2249 | * debugfs tunables |
| 2199 | */ | 2250 | */ |
| 2200 | #ifdef CONFIG_JBD2_DEBUG | 2251 | #ifdef CONFIG_JBD2_DEBUG |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d6e006e67804..4f7cadbb19fa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -41,7 +41,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | |||
| 41 | * new transaction and we can't block without protecting against other | 41 | * new transaction and we can't block without protecting against other |
| 42 | * processes trying to touch the journal while it is in transition. | 42 | * processes trying to touch the journal while it is in transition. |
| 43 | * | 43 | * |
| 44 | * Called under j_state_lock | ||
| 45 | */ | 44 | */ |
| 46 | 45 | ||
| 47 | static transaction_t * | 46 | static transaction_t * |
| @@ -52,6 +51,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
| 52 | transaction->t_tid = journal->j_transaction_sequence++; | 51 | transaction->t_tid = journal->j_transaction_sequence++; |
| 53 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
| 54 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
| 54 | INIT_LIST_HEAD(&transaction->t_inode_list); | ||
| 55 | 55 | ||
| 56 | /* Set up the commit timer for the new transaction. */ | 56 | /* Set up the commit timer for the new transaction. */ |
| 57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
| @@ -943,183 +943,6 @@ out: | |||
| 943 | } | 943 | } |
| 944 | 944 | ||
| 945 | /** | 945 | /** |
| 946 | * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which | ||
| 947 | * needs to be flushed before we can commit the | ||
| 948 | * current transaction. | ||
| 949 | * @handle: transaction | ||
| 950 | * @bh: bufferhead to mark | ||
| 951 | * | ||
| 952 | * The buffer is placed on the transaction's data list and is marked as | ||
| 953 | * belonging to the transaction. | ||
| 954 | * | ||
| 955 | * Returns error number or 0 on success. | ||
| 956 | * | ||
| 957 | * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage | ||
| 958 | * by kswapd. | ||
| 959 | */ | ||
| 960 | int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
| 961 | { | ||
| 962 | journal_t *journal = handle->h_transaction->t_journal; | ||
| 963 | int need_brelse = 0; | ||
| 964 | struct journal_head *jh; | ||
| 965 | |||
| 966 | if (is_handle_aborted(handle)) | ||
| 967 | return 0; | ||
| 968 | |||
| 969 | jh = jbd2_journal_add_journal_head(bh); | ||
| 970 | JBUFFER_TRACE(jh, "entry"); | ||
| 971 | |||
| 972 | /* | ||
| 973 | * The buffer could *already* be dirty. Writeout can start | ||
| 974 | * at any time. | ||
| 975 | */ | ||
| 976 | jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); | ||
| 977 | |||
| 978 | /* | ||
| 979 | * What if the buffer is already part of a running transaction? | ||
| 980 | * | ||
| 981 | * There are two cases: | ||
| 982 | * 1) It is part of the current running transaction. Refile it, | ||
| 983 | * just in case we have allocated it as metadata, deallocated | ||
| 984 | * it, then reallocated it as data. | ||
| 985 | * 2) It is part of the previous, still-committing transaction. | ||
| 986 | * If all we want to do is to guarantee that the buffer will be | ||
| 987 | * written to disk before this new transaction commits, then | ||
| 988 | * being sure that the *previous* transaction has this same | ||
| 989 | * property is sufficient for us! Just leave it on its old | ||
| 990 | * transaction. | ||
| 991 | * | ||
| 992 | * In case (2), the buffer must not already exist as metadata | ||
| 993 | * --- that would violate write ordering (a transaction is free | ||
| 994 | * to write its data at any point, even before the previous | ||
| 995 | * committing transaction has committed). The caller must | ||
| 996 | * never, ever allow this to happen: there's nothing we can do | ||
| 997 | * about it in this layer. | ||
| 998 | */ | ||
| 999 | jbd_lock_bh_state(bh); | ||
| 1000 | spin_lock(&journal->j_list_lock); | ||
| 1001 | |||
| 1002 | /* Now that we have bh_state locked, are we really still mapped? */ | ||
| 1003 | if (!buffer_mapped(bh)) { | ||
| 1004 | JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); | ||
| 1005 | goto no_journal; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | if (jh->b_transaction) { | ||
| 1009 | JBUFFER_TRACE(jh, "has transaction"); | ||
| 1010 | if (jh->b_transaction != handle->h_transaction) { | ||
| 1011 | JBUFFER_TRACE(jh, "belongs to older transaction"); | ||
| 1012 | J_ASSERT_JH(jh, jh->b_transaction == | ||
| 1013 | journal->j_committing_transaction); | ||
| 1014 | |||
| 1015 | /* @@@ IS THIS TRUE ? */ | ||
| 1016 | /* | ||
| 1017 | * Not any more. Scenario: someone does a write() | ||
| 1018 | * in data=journal mode. The buffer's transaction has | ||
| 1019 | * moved into commit. Then someone does another | ||
| 1020 | * write() to the file. We do the frozen data copyout | ||
| 1021 | * and set b_next_transaction to point to j_running_t. | ||
| 1022 | * And while we're in that state, someone does a | ||
| 1023 | * writepage() in an attempt to pageout the same area | ||
| 1024 | * of the file via a shared mapping. At present that | ||
| 1025 | * calls jbd2_journal_dirty_data(), and we get right here. | ||
| 1026 | * It may be too late to journal the data. Simply | ||
| 1027 | * falling through to the next test will suffice: the | ||
| 1028 | * data will be dirty and wil be checkpointed. The | ||
| 1029 | * ordering comments in the next comment block still | ||
| 1030 | * apply. | ||
| 1031 | */ | ||
| 1032 | //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); | ||
| 1033 | |||
| 1034 | /* | ||
| 1035 | * If we're journalling data, and this buffer was | ||
| 1036 | * subject to a write(), it could be metadata, forget | ||
| 1037 | * or shadow against the committing transaction. Now, | ||
| 1038 | * someone has dirtied the same darn page via a mapping | ||
| 1039 | * and it is being writepage()'d. | ||
| 1040 | * We *could* just steal the page from commit, with some | ||
| 1041 | * fancy locking there. Instead, we just skip it - | ||
| 1042 | * don't tie the page's buffers to the new transaction | ||
| 1043 | * at all. | ||
| 1044 | * Implication: if we crash before the writepage() data | ||
| 1045 | * is written into the filesystem, recovery will replay | ||
| 1046 | * the write() data. | ||
| 1047 | */ | ||
| 1048 | if (jh->b_jlist != BJ_None && | ||
| 1049 | jh->b_jlist != BJ_SyncData && | ||
| 1050 | jh->b_jlist != BJ_Locked) { | ||
| 1051 | JBUFFER_TRACE(jh, "Not stealing"); | ||
| 1052 | goto no_journal; | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | /* | ||
| 1056 | * This buffer may be undergoing writeout in commit. We | ||
| 1057 | * can't return from here and let the caller dirty it | ||
| 1058 | * again because that can cause the write-out loop in | ||
| 1059 | * commit to never terminate. | ||
| 1060 | */ | ||
| 1061 | if (buffer_dirty(bh)) { | ||
| 1062 | get_bh(bh); | ||
| 1063 | spin_unlock(&journal->j_list_lock); | ||
| 1064 | jbd_unlock_bh_state(bh); | ||
| 1065 | need_brelse = 1; | ||
| 1066 | sync_dirty_buffer(bh); | ||
| 1067 | jbd_lock_bh_state(bh); | ||
| 1068 | spin_lock(&journal->j_list_lock); | ||
| 1069 | /* Since we dropped the lock... */ | ||
| 1070 | if (!buffer_mapped(bh)) { | ||
| 1071 | JBUFFER_TRACE(jh, "buffer got unmapped"); | ||
| 1072 | goto no_journal; | ||
| 1073 | } | ||
| 1074 | /* The buffer may become locked again at any | ||
| 1075 | time if it is redirtied */ | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | /* journal_clean_data_list() may have got there first */ | ||
| 1079 | if (jh->b_transaction != NULL) { | ||
| 1080 | JBUFFER_TRACE(jh, "unfile from commit"); | ||
| 1081 | __jbd2_journal_temp_unlink_buffer(jh); | ||
| 1082 | /* It still points to the committing | ||
| 1083 | * transaction; move it to this one so | ||
| 1084 | * that the refile assert checks are | ||
| 1085 | * happy. */ | ||
| 1086 | jh->b_transaction = handle->h_transaction; | ||
| 1087 | } | ||
| 1088 | /* The buffer will be refiled below */ | ||
| 1089 | |||
| 1090 | } | ||
| 1091 | /* | ||
| 1092 | * Special case --- the buffer might actually have been | ||
| 1093 | * allocated and then immediately deallocated in the previous, | ||
| 1094 | * committing transaction, so might still be left on that | ||
| 1095 | * transaction's metadata lists. | ||
| 1096 | */ | ||
| 1097 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | ||
| 1098 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | ||
| 1099 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | ||
| 1100 | __jbd2_journal_temp_unlink_buffer(jh); | ||
| 1101 | jh->b_transaction = handle->h_transaction; | ||
| 1102 | JBUFFER_TRACE(jh, "file as data"); | ||
| 1103 | __jbd2_journal_file_buffer(jh, handle->h_transaction, | ||
| 1104 | BJ_SyncData); | ||
| 1105 | } | ||
| 1106 | } else { | ||
| 1107 | JBUFFER_TRACE(jh, "not on a transaction"); | ||
| 1108 | __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); | ||
| 1109 | } | ||
| 1110 | no_journal: | ||
| 1111 | spin_unlock(&journal->j_list_lock); | ||
| 1112 | jbd_unlock_bh_state(bh); | ||
| 1113 | if (need_brelse) { | ||
| 1114 | BUFFER_TRACE(bh, "brelse"); | ||
| 1115 | __brelse(bh); | ||
| 1116 | } | ||
| 1117 | JBUFFER_TRACE(jh, "exit"); | ||
| 1118 | jbd2_journal_put_journal_head(jh); | ||
| 1119 | return 0; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | /** | ||
| 1123 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata | 946 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata |
| 1124 | * @handle: transaction to add buffer to. | 947 | * @handle: transaction to add buffer to. |
| 1125 | * @bh: buffer to mark | 948 | * @bh: buffer to mark |
| @@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
| 1541 | * Remove a buffer from the appropriate transaction list. | 1364 | * Remove a buffer from the appropriate transaction list. |
| 1542 | * | 1365 | * |
| 1543 | * Note that this function can *change* the value of | 1366 | * Note that this function can *change* the value of |
| 1544 | * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, | 1367 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, |
| 1545 | * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller | 1368 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one |
| 1546 | * is holding onto a copy of one of thee pointers, it could go bad. | 1369 | * of these pointers, it could go bad. Generally the caller needs to re-read |
| 1547 | * Generally the caller needs to re-read the pointer from the transaction_t. | 1370 | * the pointer from the transaction_t. |
| 1548 | * | 1371 | * |
| 1549 | * Called under j_list_lock. The journal may not be locked. | 1372 | * Called under j_list_lock. The journal may not be locked. |
| 1550 | */ | 1373 | */ |
| @@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
| 1566 | switch (jh->b_jlist) { | 1389 | switch (jh->b_jlist) { |
| 1567 | case BJ_None: | 1390 | case BJ_None: |
| 1568 | return; | 1391 | return; |
| 1569 | case BJ_SyncData: | ||
| 1570 | list = &transaction->t_sync_datalist; | ||
| 1571 | break; | ||
| 1572 | case BJ_Metadata: | 1392 | case BJ_Metadata: |
| 1573 | transaction->t_nr_buffers--; | 1393 | transaction->t_nr_buffers--; |
| 1574 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); | 1394 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); |
| @@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
| 1589 | case BJ_Reserved: | 1409 | case BJ_Reserved: |
| 1590 | list = &transaction->t_reserved_list; | 1410 | list = &transaction->t_reserved_list; |
| 1591 | break; | 1411 | break; |
| 1592 | case BJ_Locked: | ||
| 1593 | list = &transaction->t_locked_list; | ||
| 1594 | break; | ||
| 1595 | } | 1412 | } |
| 1596 | 1413 | ||
| 1597 | __blist_del_buffer(list, jh); | 1414 | __blist_del_buffer(list, jh); |
| @@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1634 | goto out; | 1451 | goto out; |
| 1635 | 1452 | ||
| 1636 | spin_lock(&journal->j_list_lock); | 1453 | spin_lock(&journal->j_list_lock); |
| 1637 | if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { | 1454 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
| 1638 | if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { | ||
| 1639 | /* A written-back ordered data buffer */ | ||
| 1640 | JBUFFER_TRACE(jh, "release data"); | ||
| 1641 | __jbd2_journal_unfile_buffer(jh); | ||
| 1642 | jbd2_journal_remove_journal_head(bh); | ||
| 1643 | __brelse(bh); | ||
| 1644 | } | ||
| 1645 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | ||
| 1646 | /* written-back checkpointed metadata buffer */ | 1455 | /* written-back checkpointed metadata buffer */ |
| 1647 | if (jh->b_jlist == BJ_None) { | 1456 | if (jh->b_jlist == BJ_None) { |
| 1648 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1457 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
| @@ -1656,12 +1465,43 @@ out: | |||
| 1656 | return; | 1465 | return; |
| 1657 | } | 1466 | } |
| 1658 | 1467 | ||
| 1468 | /* | ||
| 1469 | * jbd2_journal_try_to_free_buffers() could race with | ||
| 1470 | * jbd2_journal_commit_transaction(). The later might still hold the | ||
| 1471 | * reference count to the buffers when inspecting them on | ||
| 1472 | * t_syncdata_list or t_locked_list. | ||
| 1473 | * | ||
| 1474 | * jbd2_journal_try_to_free_buffers() will call this function to | ||
| 1475 | * wait for the current transaction to finish syncing data buffers, before | ||
| 1476 | * try to free that buffer. | ||
| 1477 | * | ||
| 1478 | * Called with journal->j_state_lock hold. | ||
| 1479 | */ | ||
| 1480 | static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal) | ||
| 1481 | { | ||
| 1482 | transaction_t *transaction; | ||
| 1483 | tid_t tid; | ||
| 1484 | |||
| 1485 | spin_lock(&journal->j_state_lock); | ||
| 1486 | transaction = journal->j_committing_transaction; | ||
| 1487 | |||
| 1488 | if (!transaction) { | ||
| 1489 | spin_unlock(&journal->j_state_lock); | ||
| 1490 | return; | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | tid = transaction->t_tid; | ||
| 1494 | spin_unlock(&journal->j_state_lock); | ||
| 1495 | jbd2_log_wait_commit(journal, tid); | ||
| 1496 | } | ||
| 1659 | 1497 | ||
| 1660 | /** | 1498 | /** |
| 1661 | * int jbd2_journal_try_to_free_buffers() - try to free page buffers. | 1499 | * int jbd2_journal_try_to_free_buffers() - try to free page buffers. |
| 1662 | * @journal: journal for operation | 1500 | * @journal: journal for operation |
| 1663 | * @page: to try and free | 1501 | * @page: to try and free |
| 1664 | * @unused_gfp_mask: unused | 1502 | * @gfp_mask: we use the mask to detect how hard should we try to release |
| 1503 | * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to | ||
| 1504 | * release the buffers. | ||
| 1665 | * | 1505 | * |
| 1666 | * | 1506 | * |
| 1667 | * For all the buffers on this page, | 1507 | * For all the buffers on this page, |
| @@ -1690,9 +1530,11 @@ out: | |||
| 1690 | * journal_try_to_free_buffer() is changing its state. But that | 1530 | * journal_try_to_free_buffer() is changing its state. But that |
| 1691 | * cannot happen because we never reallocate freed data as metadata | 1531 | * cannot happen because we never reallocate freed data as metadata |
| 1692 | * while the data is part of a transaction. Yes? | 1532 | * while the data is part of a transaction. Yes? |
| 1533 | * | ||
| 1534 | * Return 0 on failure, 1 on success | ||
| 1693 | */ | 1535 | */ |
| 1694 | int jbd2_journal_try_to_free_buffers(journal_t *journal, | 1536 | int jbd2_journal_try_to_free_buffers(journal_t *journal, |
| 1695 | struct page *page, gfp_t unused_gfp_mask) | 1537 | struct page *page, gfp_t gfp_mask) |
| 1696 | { | 1538 | { |
| 1697 | struct buffer_head *head; | 1539 | struct buffer_head *head; |
| 1698 | struct buffer_head *bh; | 1540 | struct buffer_head *bh; |
| @@ -1708,7 +1550,8 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
| 1708 | /* | 1550 | /* |
| 1709 | * We take our own ref against the journal_head here to avoid | 1551 | * We take our own ref against the journal_head here to avoid |
| 1710 | * having to add tons of locking around each instance of | 1552 | * having to add tons of locking around each instance of |
| 1711 | * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). | 1553 | * jbd2_journal_remove_journal_head() and |
| 1554 | * jbd2_journal_put_journal_head(). | ||
| 1712 | */ | 1555 | */ |
| 1713 | jh = jbd2_journal_grab_journal_head(bh); | 1556 | jh = jbd2_journal_grab_journal_head(bh); |
| 1714 | if (!jh) | 1557 | if (!jh) |
| @@ -1721,7 +1564,28 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
| 1721 | if (buffer_jbd(bh)) | 1564 | if (buffer_jbd(bh)) |
| 1722 | goto busy; | 1565 | goto busy; |
| 1723 | } while ((bh = bh->b_this_page) != head); | 1566 | } while ((bh = bh->b_this_page) != head); |
| 1567 | |||
| 1724 | ret = try_to_free_buffers(page); | 1568 | ret = try_to_free_buffers(page); |
| 1569 | |||
| 1570 | /* | ||
| 1571 | * There are a number of places where jbd2_journal_try_to_free_buffers() | ||
| 1572 | * could race with jbd2_journal_commit_transaction(), the later still | ||
| 1573 | * holds the reference to the buffers to free while processing them. | ||
| 1574 | * try_to_free_buffers() failed to free those buffers. Some of the | ||
| 1575 | * caller of releasepage() request page buffers to be dropped, otherwise | ||
| 1576 | * treat the fail-to-free as errors (such as generic_file_direct_IO()) | ||
| 1577 | * | ||
| 1578 | * So, if the caller of try_to_release_page() wants the synchronous | ||
| 1579 | * behaviour(i.e make sure buffers are dropped upon return), | ||
| 1580 | * let's wait for the current transaction to finish flush of | ||
| 1581 | * dirty data buffers, then try to free those buffers again, | ||
| 1582 | * with the journal locked. | ||
| 1583 | */ | ||
| 1584 | if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { | ||
| 1585 | jbd2_journal_wait_for_transaction_sync_data(journal); | ||
| 1586 | ret = try_to_free_buffers(page); | ||
| 1587 | } | ||
| 1588 | |||
| 1725 | busy: | 1589 | busy: |
| 1726 | return ret; | 1590 | return ret; |
| 1727 | } | 1591 | } |
| @@ -1823,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1823 | if (!buffer_jbd(bh)) | 1687 | if (!buffer_jbd(bh)) |
| 1824 | goto zap_buffer_unlocked; | 1688 | goto zap_buffer_unlocked; |
| 1825 | 1689 | ||
| 1690 | /* OK, we have data buffer in journaled mode */ | ||
| 1826 | spin_lock(&journal->j_state_lock); | 1691 | spin_lock(&journal->j_state_lock); |
| 1827 | jbd_lock_bh_state(bh); | 1692 | jbd_lock_bh_state(bh); |
| 1828 | spin_lock(&journal->j_list_lock); | 1693 | spin_lock(&journal->j_list_lock); |
| @@ -1886,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1886 | } | 1751 | } |
| 1887 | } else if (transaction == journal->j_committing_transaction) { | 1752 | } else if (transaction == journal->j_committing_transaction) { |
| 1888 | JBUFFER_TRACE(jh, "on committing transaction"); | 1753 | JBUFFER_TRACE(jh, "on committing transaction"); |
| 1889 | if (jh->b_jlist == BJ_Locked) { | ||
| 1890 | /* | ||
| 1891 | * The buffer is on the committing transaction's locked | ||
| 1892 | * list. We have the buffer locked, so I/O has | ||
| 1893 | * completed. So we can nail the buffer now. | ||
| 1894 | */ | ||
| 1895 | may_free = __dispose_buffer(jh, transaction); | ||
| 1896 | goto zap_buffer; | ||
| 1897 | } | ||
| 1898 | /* | 1754 | /* |
| 1899 | * If it is committing, we simply cannot touch it. We | 1755 | * If it is committing, we simply cannot touch it. We |
| 1900 | * can remove it's next_transaction pointer from the | 1756 | * can remove it's next_transaction pointer from the |
| @@ -2027,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 2027 | J_ASSERT_JH(jh, !jh->b_committed_data); | 1883 | J_ASSERT_JH(jh, !jh->b_committed_data); |
| 2028 | J_ASSERT_JH(jh, !jh->b_frozen_data); | 1884 | J_ASSERT_JH(jh, !jh->b_frozen_data); |
| 2029 | return; | 1885 | return; |
| 2030 | case BJ_SyncData: | ||
| 2031 | list = &transaction->t_sync_datalist; | ||
| 2032 | break; | ||
| 2033 | case BJ_Metadata: | 1886 | case BJ_Metadata: |
| 2034 | transaction->t_nr_buffers++; | 1887 | transaction->t_nr_buffers++; |
| 2035 | list = &transaction->t_buffers; | 1888 | list = &transaction->t_buffers; |
| @@ -2049,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 2049 | case BJ_Reserved: | 1902 | case BJ_Reserved: |
| 2050 | list = &transaction->t_reserved_list; | 1903 | list = &transaction->t_reserved_list; |
| 2051 | break; | 1904 | break; |
| 2052 | case BJ_Locked: | ||
| 2053 | list = &transaction->t_locked_list; | ||
| 2054 | break; | ||
| 2055 | } | 1905 | } |
| 2056 | 1906 | ||
| 2057 | __blist_add_buffer(list, jh); | 1907 | __blist_add_buffer(list, jh); |
| @@ -2141,3 +1991,88 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | |||
| 2141 | spin_unlock(&journal->j_list_lock); | 1991 | spin_unlock(&journal->j_list_lock); |
| 2142 | __brelse(bh); | 1992 | __brelse(bh); |
| 2143 | } | 1993 | } |
| 1994 | |||
| 1995 | /* | ||
| 1996 | * File inode in the inode list of the handle's transaction | ||
| 1997 | */ | ||
| 1998 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | ||
| 1999 | { | ||
| 2000 | transaction_t *transaction = handle->h_transaction; | ||
| 2001 | journal_t *journal = transaction->t_journal; | ||
| 2002 | |||
| 2003 | if (is_handle_aborted(handle)) | ||
| 2004 | return -EIO; | ||
| 2005 | |||
| 2006 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, | ||
| 2007 | transaction->t_tid); | ||
| 2008 | |||
| 2009 | /* | ||
| 2010 | * First check whether inode isn't already on the transaction's | ||
| 2011 | * lists without taking the lock. Note that this check is safe | ||
| 2012 | * without the lock as we cannot race with somebody removing inode | ||
| 2013 | * from the transaction. The reason is that we remove inode from the | ||
| 2014 | * transaction only in journal_release_jbd_inode() and when we commit | ||
| 2015 | * the transaction. We are guarded from the first case by holding | ||
| 2016 | * a reference to the inode. We are safe against the second case | ||
| 2017 | * because if jinode->i_transaction == transaction, commit code | ||
| 2018 | * cannot touch the transaction because we hold reference to it, | ||
| 2019 | * and if jinode->i_next_transaction == transaction, commit code | ||
| 2020 | * will only file the inode where we want it. | ||
| 2021 | */ | ||
| 2022 | if (jinode->i_transaction == transaction || | ||
| 2023 | jinode->i_next_transaction == transaction) | ||
| 2024 | return 0; | ||
| 2025 | |||
| 2026 | spin_lock(&journal->j_list_lock); | ||
| 2027 | |||
| 2028 | if (jinode->i_transaction == transaction || | ||
| 2029 | jinode->i_next_transaction == transaction) | ||
| 2030 | goto done; | ||
| 2031 | |||
| 2032 | /* On some different transaction's list - should be | ||
| 2033 | * the committing one */ | ||
| 2034 | if (jinode->i_transaction) { | ||
| 2035 | J_ASSERT(jinode->i_next_transaction == NULL); | ||
| 2036 | J_ASSERT(jinode->i_transaction == | ||
| 2037 | journal->j_committing_transaction); | ||
| 2038 | jinode->i_next_transaction = transaction; | ||
| 2039 | goto done; | ||
| 2040 | } | ||
| 2041 | /* Not on any transaction list... */ | ||
| 2042 | J_ASSERT(!jinode->i_next_transaction); | ||
| 2043 | jinode->i_transaction = transaction; | ||
| 2044 | list_add(&jinode->i_list, &transaction->t_inode_list); | ||
| 2045 | done: | ||
| 2046 | spin_unlock(&journal->j_list_lock); | ||
| 2047 | |||
| 2048 | return 0; | ||
| 2049 | } | ||
| 2050 | |||
| 2051 | /* | ||
| 2052 | * This function must be called when inode is journaled in ordered mode | ||
| 2053 | * before truncation happens. It starts writeout of truncated part in | ||
| 2054 | * case it is in the committing transaction so that we stand to ordered | ||
| 2055 | * mode consistency guarantees. | ||
| 2056 | */ | ||
| 2057 | int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | ||
| 2058 | loff_t new_size) | ||
| 2059 | { | ||
| 2060 | journal_t *journal; | ||
| 2061 | transaction_t *commit_trans; | ||
| 2062 | int ret = 0; | ||
| 2063 | |||
| 2064 | if (!inode->i_transaction && !inode->i_next_transaction) | ||
| 2065 | goto out; | ||
| 2066 | journal = inode->i_transaction->t_journal; | ||
| 2067 | spin_lock(&journal->j_state_lock); | ||
| 2068 | commit_trans = journal->j_committing_transaction; | ||
| 2069 | spin_unlock(&journal->j_state_lock); | ||
| 2070 | if (inode->i_transaction == commit_trans) { | ||
| 2071 | ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, | ||
| 2072 | new_size, LLONG_MAX); | ||
| 2073 | if (ret) | ||
| 2074 | jbd2_journal_abort(journal, ret); | ||
| 2075 | } | ||
| 2076 | out: | ||
| 2077 | return ret; | ||
| 2078 | } | ||
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index bf6ab19b86ee..6a73de84bcef 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
| 22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
| 23 | #include <linux/proc_fs.h> | 23 | #include <linux/proc_fs.h> |
| 24 | #include <linux/seq_file.h> | ||
| 24 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
| 25 | #include "jfs_incore.h" | 26 | #include "jfs_incore.h" |
| 26 | #include "jfs_filsys.h" | 27 | #include "jfs_filsys.h" |
| @@ -30,29 +31,19 @@ | |||
| 30 | 31 | ||
| 31 | static struct proc_dir_entry *base; | 32 | static struct proc_dir_entry *base; |
| 32 | #ifdef CONFIG_JFS_DEBUG | 33 | #ifdef CONFIG_JFS_DEBUG |
| 33 | static int loglevel_read(char *page, char **start, off_t off, | 34 | static int jfs_loglevel_proc_show(struct seq_file *m, void *v) |
| 34 | int count, int *eof, void *data) | ||
| 35 | { | 35 | { |
| 36 | int len; | 36 | seq_printf(m, "%d\n", jfsloglevel); |
| 37 | 37 | return 0; | |
| 38 | len = sprintf(page, "%d\n", jfsloglevel); | 38 | } |
| 39 | |||
| 40 | len -= off; | ||
| 41 | *start = page + off; | ||
| 42 | |||
| 43 | if (len > count) | ||
| 44 | len = count; | ||
| 45 | else | ||
| 46 | *eof = 1; | ||
| 47 | |||
| 48 | if (len < 0) | ||
| 49 | len = 0; | ||
| 50 | 39 | ||
| 51 | return len; | 40 | static int jfs_loglevel_proc_open(struct inode *inode, struct file *file) |
| 41 | { | ||
| 42 | return single_open(file, jfs_loglevel_proc_show, NULL); | ||
| 52 | } | 43 | } |
| 53 | 44 | ||
| 54 | static int loglevel_write(struct file *file, const char __user *buffer, | 45 | static ssize_t jfs_loglevel_proc_write(struct file *file, |
| 55 | unsigned long count, void *data) | 46 | const char __user *buffer, size_t count, loff_t *ppos) |
| 56 | { | 47 | { |
| 57 | char c; | 48 | char c; |
| 58 | 49 | ||
| @@ -65,22 +56,30 @@ static int loglevel_write(struct file *file, const char __user *buffer, | |||
| 65 | jfsloglevel = c - '0'; | 56 | jfsloglevel = c - '0'; |
| 66 | return count; | 57 | return count; |
| 67 | } | 58 | } |
| 59 | |||
| 60 | static const struct file_operations jfs_loglevel_proc_fops = { | ||
| 61 | .owner = THIS_MODULE, | ||
| 62 | .open = jfs_loglevel_proc_open, | ||
| 63 | .read = seq_read, | ||
| 64 | .llseek = seq_lseek, | ||
| 65 | .release = single_release, | ||
| 66 | .write = jfs_loglevel_proc_write, | ||
| 67 | }; | ||
| 68 | #endif | 68 | #endif |
| 69 | 69 | ||
| 70 | static struct { | 70 | static struct { |
| 71 | const char *name; | 71 | const char *name; |
| 72 | read_proc_t *read_fn; | 72 | const struct file_operations *proc_fops; |
| 73 | write_proc_t *write_fn; | ||
| 74 | } Entries[] = { | 73 | } Entries[] = { |
| 75 | #ifdef CONFIG_JFS_STATISTICS | 74 | #ifdef CONFIG_JFS_STATISTICS |
| 76 | { "lmstats", jfs_lmstats_read, }, | 75 | { "lmstats", &jfs_lmstats_proc_fops, }, |
| 77 | { "txstats", jfs_txstats_read, }, | 76 | { "txstats", &jfs_txstats_proc_fops, }, |
| 78 | { "xtstat", jfs_xtstat_read, }, | 77 | { "xtstat", &jfs_xtstat_proc_fops, }, |
| 79 | { "mpstat", jfs_mpstat_read, }, | 78 | { "mpstat", &jfs_mpstat_proc_fops, }, |
| 80 | #endif | 79 | #endif |
| 81 | #ifdef CONFIG_JFS_DEBUG | 80 | #ifdef CONFIG_JFS_DEBUG |
| 82 | { "TxAnchor", jfs_txanchor_read, }, | 81 | { "TxAnchor", &jfs_txanchor_proc_fops, }, |
| 83 | { "loglevel", loglevel_read, loglevel_write } | 82 | { "loglevel", &jfs_loglevel_proc_fops } |
| 84 | #endif | 83 | #endif |
| 85 | }; | 84 | }; |
| 86 | #define NPROCENT ARRAY_SIZE(Entries) | 85 | #define NPROCENT ARRAY_SIZE(Entries) |
| @@ -93,13 +92,8 @@ void jfs_proc_init(void) | |||
| 93 | return; | 92 | return; |
| 94 | base->owner = THIS_MODULE; | 93 | base->owner = THIS_MODULE; |
| 95 | 94 | ||
| 96 | for (i = 0; i < NPROCENT; i++) { | 95 | for (i = 0; i < NPROCENT; i++) |
| 97 | struct proc_dir_entry *p; | 96 | proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); |
| 98 | if ((p = create_proc_entry(Entries[i].name, 0, base))) { | ||
| 99 | p->read_proc = Entries[i].read_fn; | ||
| 100 | p->write_proc = Entries[i].write_fn; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | } | 97 | } |
| 104 | 98 | ||
| 105 | void jfs_proc_clean(void) | 99 | void jfs_proc_clean(void) |
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index 044c1e654cc0..eafd1300a00b 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h | |||
| @@ -62,7 +62,7 @@ extern void jfs_proc_clean(void); | |||
| 62 | 62 | ||
| 63 | extern int jfsloglevel; | 63 | extern int jfsloglevel; |
| 64 | 64 | ||
| 65 | extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); | 65 | extern const struct file_operations jfs_txanchor_proc_fops; |
| 66 | 66 | ||
| 67 | /* information message: e.g., configuration, major event */ | 67 | /* information message: e.g., configuration, major event */ |
| 68 | #define jfs_info(fmt, arg...) do { \ | 68 | #define jfs_info(fmt, arg...) do { \ |
| @@ -105,10 +105,10 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); | |||
| 105 | * ---------- | 105 | * ---------- |
| 106 | */ | 106 | */ |
| 107 | #ifdef CONFIG_JFS_STATISTICS | 107 | #ifdef CONFIG_JFS_STATISTICS |
| 108 | extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *); | 108 | extern const struct file_operations jfs_lmstats_proc_fops; |
| 109 | extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *); | 109 | extern const struct file_operations jfs_txstats_proc_fops; |
| 110 | extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *); | 110 | extern const struct file_operations jfs_mpstat_proc_fops; |
| 111 | extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *); | 111 | extern const struct file_operations jfs_xtstat_proc_fops; |
| 112 | 112 | ||
| 113 | #define INCREMENT(x) ((x)++) | 113 | #define INCREMENT(x) ((x)++) |
| 114 | #define DECREMENT(x) ((x)--) | 114 | #define DECREMENT(x) ((x)--) |
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index cdac2d5bafeb..2545bb317235 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
| @@ -243,9 +243,6 @@ typedef union { | |||
| 243 | #define JFS_REMOVE 3 | 243 | #define JFS_REMOVE 3 |
| 244 | #define JFS_RENAME 4 | 244 | #define JFS_RENAME 4 |
| 245 | 245 | ||
| 246 | #define DIRENTSIZ(namlen) \ | ||
| 247 | ( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 ) | ||
| 248 | |||
| 249 | /* | 246 | /* |
| 250 | * Maximum file offset for directories. | 247 | * Maximum file offset for directories. |
| 251 | */ | 248 | */ |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 734ec916beaf..d6363d8309d0 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
| @@ -1520,7 +1520,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
| 1520 | jfs_error(ip->i_sb, | 1520 | jfs_error(ip->i_sb, |
| 1521 | "diAlloc: can't find free bit " | 1521 | "diAlloc: can't find free bit " |
| 1522 | "in wmap"); | 1522 | "in wmap"); |
| 1523 | return EIO; | 1523 | return -EIO; |
| 1524 | } | 1524 | } |
| 1525 | 1525 | ||
| 1526 | /* determine the inode number within the | 1526 | /* determine the inode number within the |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 325a9679b95a..cd2ec2988b59 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
| @@ -69,6 +69,7 @@ | |||
| 69 | #include <linux/freezer.h> | 69 | #include <linux/freezer.h> |
| 70 | #include <linux/delay.h> | 70 | #include <linux/delay.h> |
| 71 | #include <linux/mutex.h> | 71 | #include <linux/mutex.h> |
| 72 | #include <linux/seq_file.h> | ||
| 72 | #include "jfs_incore.h" | 73 | #include "jfs_incore.h" |
| 73 | #include "jfs_filsys.h" | 74 | #include "jfs_filsys.h" |
| 74 | #include "jfs_metapage.h" | 75 | #include "jfs_metapage.h" |
| @@ -2503,13 +2504,9 @@ exit: | |||
| 2503 | } | 2504 | } |
| 2504 | 2505 | ||
| 2505 | #ifdef CONFIG_JFS_STATISTICS | 2506 | #ifdef CONFIG_JFS_STATISTICS |
| 2506 | int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, | 2507 | static int jfs_lmstats_proc_show(struct seq_file *m, void *v) |
| 2507 | int *eof, void *data) | ||
| 2508 | { | 2508 | { |
| 2509 | int len = 0; | 2509 | seq_printf(m, |
| 2510 | off_t begin; | ||
| 2511 | |||
| 2512 | len += sprintf(buffer, | ||
| 2513 | "JFS Logmgr stats\n" | 2510 | "JFS Logmgr stats\n" |
| 2514 | "================\n" | 2511 | "================\n" |
| 2515 | "commits = %d\n" | 2512 | "commits = %d\n" |
| @@ -2522,19 +2519,19 @@ int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, | |||
| 2522 | lmStat.pagedone, | 2519 | lmStat.pagedone, |
| 2523 | lmStat.full_page, | 2520 | lmStat.full_page, |
| 2524 | lmStat.partial_page); | 2521 | lmStat.partial_page); |
| 2522 | return 0; | ||
| 2523 | } | ||
| 2525 | 2524 | ||
| 2526 | begin = offset; | 2525 | static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) |
| 2527 | *start = buffer + begin; | 2526 | { |
| 2528 | len -= begin; | 2527 | return single_open(file, jfs_lmstats_proc_show, NULL); |
| 2529 | |||
| 2530 | if (len > length) | ||
| 2531 | len = length; | ||
| 2532 | else | ||
| 2533 | *eof = 1; | ||
| 2534 | |||
| 2535 | if (len < 0) | ||
| 2536 | len = 0; | ||
| 2537 | |||
| 2538 | return len; | ||
| 2539 | } | 2528 | } |
| 2529 | |||
| 2530 | const struct file_operations jfs_lmstats_proc_fops = { | ||
| 2531 | .owner = THIS_MODULE, | ||
| 2532 | .open = jfs_lmstats_proc_open, | ||
| 2533 | .read = seq_read, | ||
| 2534 | .llseek = seq_lseek, | ||
| 2535 | .release = single_release, | ||
| 2536 | }; | ||
| 2540 | #endif /* CONFIG_JFS_STATISTICS */ | 2537 | #endif /* CONFIG_JFS_STATISTICS */ |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index d1e64f2f2fcd..854ff0ec574f 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
| @@ -19,10 +19,12 @@ | |||
| 19 | 19 | ||
| 20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
| 21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
| 22 | #include <linux/module.h> | ||
| 22 | #include <linux/bio.h> | 23 | #include <linux/bio.h> |
| 23 | #include <linux/init.h> | 24 | #include <linux/init.h> |
| 24 | #include <linux/buffer_head.h> | 25 | #include <linux/buffer_head.h> |
| 25 | #include <linux/mempool.h> | 26 | #include <linux/mempool.h> |
| 27 | #include <linux/seq_file.h> | ||
| 26 | #include "jfs_incore.h" | 28 | #include "jfs_incore.h" |
| 27 | #include "jfs_superblock.h" | 29 | #include "jfs_superblock.h" |
| 28 | #include "jfs_filsys.h" | 30 | #include "jfs_filsys.h" |
| @@ -804,13 +806,9 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) | |||
| 804 | } | 806 | } |
| 805 | 807 | ||
| 806 | #ifdef CONFIG_JFS_STATISTICS | 808 | #ifdef CONFIG_JFS_STATISTICS |
| 807 | int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, | 809 | static int jfs_mpstat_proc_show(struct seq_file *m, void *v) |
| 808 | int *eof, void *data) | ||
| 809 | { | 810 | { |
| 810 | int len = 0; | 811 | seq_printf(m, |
| 811 | off_t begin; | ||
| 812 | |||
| 813 | len += sprintf(buffer, | ||
| 814 | "JFS Metapage statistics\n" | 812 | "JFS Metapage statistics\n" |
| 815 | "=======================\n" | 813 | "=======================\n" |
| 816 | "page allocations = %d\n" | 814 | "page allocations = %d\n" |
| @@ -819,19 +817,19 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, | |||
| 819 | mpStat.pagealloc, | 817 | mpStat.pagealloc, |
| 820 | mpStat.pagefree, | 818 | mpStat.pagefree, |
| 821 | mpStat.lockwait); | 819 | mpStat.lockwait); |
| 820 | return 0; | ||
| 821 | } | ||
| 822 | 822 | ||
| 823 | begin = offset; | 823 | static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) |
| 824 | *start = buffer + begin; | 824 | { |
| 825 | len -= begin; | 825 | return single_open(file, jfs_mpstat_proc_show, NULL); |
| 826 | |||
| 827 | if (len > length) | ||
| 828 | len = length; | ||
| 829 | else | ||
| 830 | *eof = 1; | ||
| 831 | |||
| 832 | if (len < 0) | ||
| 833 | len = 0; | ||
| 834 | |||
| 835 | return len; | ||
| 836 | } | 826 | } |
| 827 | |||
| 828 | const struct file_operations jfs_mpstat_proc_fops = { | ||
| 829 | .owner = THIS_MODULE, | ||
| 830 | .open = jfs_mpstat_proc_open, | ||
| 831 | .read = seq_read, | ||
| 832 | .llseek = seq_lseek, | ||
| 833 | .release = single_release, | ||
| 834 | }; | ||
| 837 | #endif | 835 | #endif |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index e7c60ae6b5b2..f26e4d03ada5 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
| 50 | #include <linux/moduleparam.h> | 50 | #include <linux/moduleparam.h> |
| 51 | #include <linux/kthread.h> | 51 | #include <linux/kthread.h> |
| 52 | #include <linux/seq_file.h> | ||
| 52 | #include "jfs_incore.h" | 53 | #include "jfs_incore.h" |
| 53 | #include "jfs_inode.h" | 54 | #include "jfs_inode.h" |
| 54 | #include "jfs_filsys.h" | 55 | #include "jfs_filsys.h" |
| @@ -3009,11 +3010,8 @@ int jfs_sync(void *arg) | |||
| 3009 | } | 3010 | } |
| 3010 | 3011 | ||
| 3011 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) | 3012 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) |
| 3012 | int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | 3013 | static int jfs_txanchor_proc_show(struct seq_file *m, void *v) |
| 3013 | int *eof, void *data) | ||
| 3014 | { | 3014 | { |
| 3015 | int len = 0; | ||
| 3016 | off_t begin; | ||
| 3017 | char *freewait; | 3015 | char *freewait; |
| 3018 | char *freelockwait; | 3016 | char *freelockwait; |
| 3019 | char *lowlockwait; | 3017 | char *lowlockwait; |
| @@ -3025,7 +3023,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | |||
| 3025 | lowlockwait = | 3023 | lowlockwait = |
| 3026 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; | 3024 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; |
| 3027 | 3025 | ||
| 3028 | len += sprintf(buffer, | 3026 | seq_printf(m, |
| 3029 | "JFS TxAnchor\n" | 3027 | "JFS TxAnchor\n" |
| 3030 | "============\n" | 3028 | "============\n" |
| 3031 | "freetid = %d\n" | 3029 | "freetid = %d\n" |
| @@ -3044,31 +3042,27 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | |||
| 3044 | TxAnchor.tlocksInUse, | 3042 | TxAnchor.tlocksInUse, |
| 3045 | jfs_tlocks_low, | 3043 | jfs_tlocks_low, |
| 3046 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); | 3044 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); |
| 3045 | return 0; | ||
| 3046 | } | ||
| 3047 | 3047 | ||
| 3048 | begin = offset; | 3048 | static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) |
| 3049 | *start = buffer + begin; | 3049 | { |
| 3050 | len -= begin; | 3050 | return single_open(file, jfs_txanchor_proc_show, NULL); |
| 3051 | |||
| 3052 | if (len > length) | ||
| 3053 | len = length; | ||
| 3054 | else | ||
| 3055 | *eof = 1; | ||
| 3056 | |||
| 3057 | if (len < 0) | ||
| 3058 | len = 0; | ||
| 3059 | |||
| 3060 | return len; | ||
| 3061 | } | 3051 | } |
| 3052 | |||
| 3053 | const struct file_operations jfs_txanchor_proc_fops = { | ||
| 3054 | .owner = THIS_MODULE, | ||
| 3055 | .open = jfs_txanchor_proc_open, | ||
| 3056 | .read = seq_read, | ||
| 3057 | .llseek = seq_lseek, | ||
| 3058 | .release = single_release, | ||
| 3059 | }; | ||
| 3062 | #endif | 3060 | #endif |
| 3063 | 3061 | ||
| 3064 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) | 3062 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) |
| 3065 | int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, | 3063 | static int jfs_txstats_proc_show(struct seq_file *m, void *v) |
| 3066 | int *eof, void *data) | ||
| 3067 | { | 3064 | { |
| 3068 | int len = 0; | 3065 | seq_printf(m, |
| 3069 | off_t begin; | ||
| 3070 | |||
| 3071 | len += sprintf(buffer, | ||
| 3072 | "JFS TxStats\n" | 3066 | "JFS TxStats\n" |
| 3073 | "===========\n" | 3067 | "===========\n" |
| 3074 | "calls to txBegin = %d\n" | 3068 | "calls to txBegin = %d\n" |
| @@ -3089,19 +3083,19 @@ int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, | |||
| 3089 | TxStat.txBeginAnon_lockslow, | 3083 | TxStat.txBeginAnon_lockslow, |
| 3090 | TxStat.txLockAlloc, | 3084 | TxStat.txLockAlloc, |
| 3091 | TxStat.txLockAlloc_freelock); | 3085 | TxStat.txLockAlloc_freelock); |
| 3086 | return 0; | ||
| 3087 | } | ||
| 3092 | 3088 | ||
| 3093 | begin = offset; | 3089 | static int jfs_txstats_proc_open(struct inode *inode, struct file *file) |
| 3094 | *start = buffer + begin; | 3090 | { |
| 3095 | len -= begin; | 3091 | return single_open(file, jfs_txstats_proc_show, NULL); |
| 3096 | |||
| 3097 | if (len > length) | ||
| 3098 | len = length; | ||
| 3099 | else | ||
| 3100 | *eof = 1; | ||
| 3101 | |||
| 3102 | if (len < 0) | ||
| 3103 | len = 0; | ||
| 3104 | |||
| 3105 | return len; | ||
| 3106 | } | 3092 | } |
| 3093 | |||
| 3094 | const struct file_operations jfs_txstats_proc_fops = { | ||
| 3095 | .owner = THIS_MODULE, | ||
| 3096 | .open = jfs_txstats_proc_open, | ||
| 3097 | .read = seq_read, | ||
| 3098 | .llseek = seq_lseek, | ||
| 3099 | .release = single_release, | ||
| 3100 | }; | ||
| 3107 | #endif | 3101 | #endif |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 5a61ebf2cbcc..ae3acafb447b 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
| @@ -20,7 +20,9 @@ | |||
| 20 | */ | 20 | */ |
| 21 | 21 | ||
| 22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
| 23 | #include <linux/module.h> | ||
| 23 | #include <linux/quotaops.h> | 24 | #include <linux/quotaops.h> |
| 25 | #include <linux/seq_file.h> | ||
| 24 | #include "jfs_incore.h" | 26 | #include "jfs_incore.h" |
| 25 | #include "jfs_filsys.h" | 27 | #include "jfs_filsys.h" |
| 26 | #include "jfs_metapage.h" | 28 | #include "jfs_metapage.h" |
| @@ -4134,13 +4136,9 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
| 4134 | } | 4136 | } |
| 4135 | 4137 | ||
| 4136 | #ifdef CONFIG_JFS_STATISTICS | 4138 | #ifdef CONFIG_JFS_STATISTICS |
| 4137 | int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, | 4139 | static int jfs_xtstat_proc_show(struct seq_file *m, void *v) |
| 4138 | int *eof, void *data) | ||
| 4139 | { | 4140 | { |
| 4140 | int len = 0; | 4141 | seq_printf(m, |
| 4141 | off_t begin; | ||
| 4142 | |||
| 4143 | len += sprintf(buffer, | ||
| 4144 | "JFS Xtree statistics\n" | 4142 | "JFS Xtree statistics\n" |
| 4145 | "====================\n" | 4143 | "====================\n" |
| 4146 | "searches = %d\n" | 4144 | "searches = %d\n" |
| @@ -4149,19 +4147,19 @@ int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, | |||
| 4149 | xtStat.search, | 4147 | xtStat.search, |
| 4150 | xtStat.fastSearch, | 4148 | xtStat.fastSearch, |
| 4151 | xtStat.split); | 4149 | xtStat.split); |
| 4150 | return 0; | ||
| 4151 | } | ||
| 4152 | 4152 | ||
| 4153 | begin = offset; | 4153 | static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) |
| 4154 | *start = buffer + begin; | 4154 | { |
| 4155 | len -= begin; | 4155 | return single_open(file, jfs_xtstat_proc_show, NULL); |
| 4156 | |||
| 4157 | if (len > length) | ||
| 4158 | len = length; | ||
| 4159 | else | ||
| 4160 | *eof = 1; | ||
| 4161 | |||
| 4162 | if (len < 0) | ||
| 4163 | len = 0; | ||
| 4164 | |||
| 4165 | return len; | ||
| 4166 | } | 4156 | } |
| 4157 | |||
| 4158 | const struct file_operations jfs_xtstat_proc_fops = { | ||
| 4159 | .owner = THIS_MODULE, | ||
| 4160 | .open = jfs_xtstat_proc_open, | ||
| 4161 | .read = seq_read, | ||
| 4162 | .llseek = seq_lseek, | ||
| 4163 | .release = single_release, | ||
| 4164 | }; | ||
| 4167 | #endif | 4165 | #endif |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 0ba6778edaa2..2aba82386810 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
| @@ -1455,7 +1455,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc | |||
| 1455 | free_UCSname(&key); | 1455 | free_UCSname(&key); |
| 1456 | if (rc == -ENOENT) { | 1456 | if (rc == -ENOENT) { |
| 1457 | d_add(dentry, NULL); | 1457 | d_add(dentry, NULL); |
| 1458 | return ERR_PTR(0); | 1458 | return NULL; |
| 1459 | } else if (rc) { | 1459 | } else if (rc) { |
| 1460 | jfs_err("jfs_lookup: dtSearch returned %d", rc); | 1460 | jfs_err("jfs_lookup: dtSearch returned %d", rc); |
| 1461 | return ERR_PTR(rc); | 1461 | return ERR_PTR(rc); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 50ea65451732..0288e6d7936a 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -499,7 +499,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 499 | inode = jfs_iget(sb, ROOT_I); | 499 | inode = jfs_iget(sb, ROOT_I); |
| 500 | if (IS_ERR(inode)) { | 500 | if (IS_ERR(inode)) { |
| 501 | ret = PTR_ERR(inode); | 501 | ret = PTR_ERR(inode); |
| 502 | goto out_no_root; | 502 | goto out_no_rw; |
| 503 | } | 503 | } |
| 504 | sb->s_root = d_alloc_root(inode); | 504 | sb->s_root = d_alloc_root(inode); |
| 505 | if (!sb->s_root) | 505 | if (!sb->s_root) |
| @@ -521,9 +521,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 521 | return 0; | 521 | return 0; |
| 522 | 522 | ||
| 523 | out_no_root: | 523 | out_no_root: |
| 524 | jfs_err("jfs_read_super: get root inode failed"); | 524 | jfs_err("jfs_read_super: get root dentry failed"); |
| 525 | if (inode) | 525 | iput(inode); |
| 526 | iput(inode); | ||
| 527 | 526 | ||
| 528 | out_no_rw: | 527 | out_no_rw: |
| 529 | rc = jfs_umount(sb); | 528 | rc = jfs_umount(sb); |
diff --git a/fs/libfs.c b/fs/libfs.c index 892d41cb3382..baeb71ee1cde 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
| @@ -512,6 +512,20 @@ void simple_release_fs(struct vfsmount **mount, int *count) | |||
| 512 | mntput(mnt); | 512 | mntput(mnt); |
| 513 | } | 513 | } |
| 514 | 514 | ||
| 515 | /** | ||
| 516 | * simple_read_from_buffer - copy data from the buffer to user space | ||
| 517 | * @to: the user space buffer to read to | ||
| 518 | * @count: the maximum number of bytes to read | ||
| 519 | * @ppos: the current position in the buffer | ||
| 520 | * @from: the buffer to read from | ||
| 521 | * @available: the size of the buffer | ||
| 522 | * | ||
| 523 | * The simple_read_from_buffer() function reads up to @count bytes from the | ||
| 524 | * buffer @from at offset @ppos into the user space address starting at @to. | ||
| 525 | * | ||
| 526 | * On success, the number of bytes read is returned and the offset @ppos is | ||
| 527 | * advanced by this number, or negative value is returned on error. | ||
| 528 | **/ | ||
| 515 | ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, | 529 | ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, |
| 516 | const void *from, size_t available) | 530 | const void *from, size_t available) |
| 517 | { | 531 | { |
| @@ -528,6 +542,20 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, | |||
| 528 | return count; | 542 | return count; |
| 529 | } | 543 | } |
| 530 | 544 | ||
| 545 | /** | ||
| 546 | * memory_read_from_buffer - copy data from the buffer | ||
| 547 | * @to: the kernel space buffer to read to | ||
| 548 | * @count: the maximum number of bytes to read | ||
| 549 | * @ppos: the current position in the buffer | ||
| 550 | * @from: the buffer to read from | ||
| 551 | * @available: the size of the buffer | ||
| 552 | * | ||
| 553 | * The memory_read_from_buffer() function reads up to @count bytes from the | ||
| 554 | * buffer @from at offset @ppos into the kernel space address starting at @to. | ||
| 555 | * | ||
| 556 | * On success, the number of bytes read is returned and the offset @ppos is | ||
| 557 | * advanced by this number, or negative value is returned on error. | ||
| 558 | **/ | ||
| 531 | ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, | 559 | ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, |
| 532 | const void *from, size_t available) | 560 | const void *from, size_t available) |
| 533 | { | 561 | { |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 5df517b81f3f..1f6dc518505c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
| @@ -224,7 +224,9 @@ void nlm_release_call(struct nlm_rqst *call) | |||
| 224 | 224 | ||
| 225 | static void nlmclnt_rpc_release(void *data) | 225 | static void nlmclnt_rpc_release(void *data) |
| 226 | { | 226 | { |
| 227 | lock_kernel(); | ||
| 227 | nlm_release_call(data); | 228 | nlm_release_call(data); |
| 229 | unlock_kernel(); | ||
| 228 | } | 230 | } |
| 229 | 231 | ||
| 230 | static int nlm_wait_on_grace(wait_queue_head_t *queue) | 232 | static int nlm_wait_on_grace(wait_queue_head_t *queue) |
| @@ -430,7 +432,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) | |||
| 430 | * Report the conflicting lock back to the application. | 432 | * Report the conflicting lock back to the application. |
| 431 | */ | 433 | */ |
| 432 | fl->fl_start = req->a_res.lock.fl.fl_start; | 434 | fl->fl_start = req->a_res.lock.fl.fl_start; |
| 433 | fl->fl_end = req->a_res.lock.fl.fl_start; | 435 | fl->fl_end = req->a_res.lock.fl.fl_end; |
| 434 | fl->fl_type = req->a_res.lock.fl.fl_type; | 436 | fl->fl_type = req->a_res.lock.fl.fl_type; |
| 435 | fl->fl_pid = 0; | 437 | fl->fl_pid = 0; |
| 436 | break; | 438 | break; |
| @@ -710,7 +712,9 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) | |||
| 710 | die: | 712 | die: |
| 711 | return; | 713 | return; |
| 712 | retry_rebind: | 714 | retry_rebind: |
| 715 | lock_kernel(); | ||
| 713 | nlm_rebind_host(req->a_host); | 716 | nlm_rebind_host(req->a_host); |
| 717 | unlock_kernel(); | ||
| 714 | retry_unlock: | 718 | retry_unlock: |
| 715 | rpc_restart_call(task); | 719 | rpc_restart_call(task); |
| 716 | } | 720 | } |
| @@ -788,7 +792,9 @@ retry_cancel: | |||
| 788 | /* Don't ever retry more than 3 times */ | 792 | /* Don't ever retry more than 3 times */ |
| 789 | if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) | 793 | if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) |
| 790 | goto die; | 794 | goto die; |
| 795 | lock_kernel(); | ||
| 791 | nlm_rebind_host(req->a_host); | 796 | nlm_rebind_host(req->a_host); |
| 797 | unlock_kernel(); | ||
| 792 | rpc_restart_call(task); | 798 | rpc_restart_call(task); |
| 793 | rpc_delay(task, 30 * HZ); | 799 | rpc_delay(task, 30 * HZ); |
| 794 | } | 800 | } |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 385437e3387d..2e27176ff42f 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
| @@ -248,7 +248,9 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) | |||
| 248 | 248 | ||
| 249 | static void nlm4svc_callback_release(void *data) | 249 | static void nlm4svc_callback_release(void *data) |
| 250 | { | 250 | { |
| 251 | lock_kernel(); | ||
| 251 | nlm_release_call(data); | 252 | nlm_release_call(data); |
| 253 | unlock_kernel(); | ||
| 252 | } | 254 | } |
| 253 | 255 | ||
| 254 | static const struct rpc_call_ops nlm4svc_callback_ops = { | 256 | static const struct rpc_call_ops nlm4svc_callback_ops = { |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 81aca859bfde..56a08ab9a4cb 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
| @@ -795,6 +795,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) | |||
| 795 | 795 | ||
| 796 | dprintk("lockd: GRANT_MSG RPC callback\n"); | 796 | dprintk("lockd: GRANT_MSG RPC callback\n"); |
| 797 | 797 | ||
| 798 | lock_kernel(); | ||
| 798 | /* if the block is not on a list at this point then it has | 799 | /* if the block is not on a list at this point then it has |
| 799 | * been invalidated. Don't try to requeue it. | 800 | * been invalidated. Don't try to requeue it. |
| 800 | * | 801 | * |
| @@ -804,7 +805,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) | |||
| 804 | * for nlm_blocked? | 805 | * for nlm_blocked? |
| 805 | */ | 806 | */ |
| 806 | if (list_empty(&block->b_list)) | 807 | if (list_empty(&block->b_list)) |
| 807 | return; | 808 | goto out; |
| 808 | 809 | ||
| 809 | /* Technically, we should down the file semaphore here. Since we | 810 | /* Technically, we should down the file semaphore here. Since we |
| 810 | * move the block towards the head of the queue only, no harm | 811 | * move the block towards the head of the queue only, no harm |
| @@ -818,13 +819,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) | |||
| 818 | } | 819 | } |
| 819 | nlmsvc_insert_block(block, timeout); | 820 | nlmsvc_insert_block(block, timeout); |
| 820 | svc_wake_up(block->b_daemon); | 821 | svc_wake_up(block->b_daemon); |
| 822 | out: | ||
| 823 | unlock_kernel(); | ||
| 821 | } | 824 | } |
| 822 | 825 | ||
| 823 | static void nlmsvc_grant_release(void *data) | 826 | static void nlmsvc_grant_release(void *data) |
| 824 | { | 827 | { |
| 825 | struct nlm_rqst *call = data; | 828 | struct nlm_rqst *call = data; |
| 826 | 829 | ||
| 830 | lock_kernel(); | ||
| 827 | nlmsvc_release_block(call->a_block); | 831 | nlmsvc_release_block(call->a_block); |
| 832 | unlock_kernel(); | ||
| 828 | } | 833 | } |
| 829 | 834 | ||
| 830 | static const struct rpc_call_ops nlmsvc_grant_ops = { | 835 | static const struct rpc_call_ops nlmsvc_grant_ops = { |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 88379cc6e0b1..ce6952b50a75 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
| @@ -278,7 +278,9 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) | |||
| 278 | 278 | ||
| 279 | static void nlmsvc_callback_release(void *data) | 279 | static void nlmsvc_callback_release(void *data) |
| 280 | { | 280 | { |
| 281 | lock_kernel(); | ||
| 281 | nlm_release_call(data); | 282 | nlm_release_call(data); |
| 283 | unlock_kernel(); | ||
| 282 | } | 284 | } |
| 283 | 285 | ||
| 284 | static const struct rpc_call_ops nlmsvc_callback_ops = { | 286 | static const struct rpc_call_ops nlmsvc_callback_ops = { |
diff --git a/fs/locks.c b/fs/locks.c index 11dbf08651b7..dce8c747371c 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
| 561 | /* insert into file's list */ | 561 | /* insert into file's list */ |
| 562 | fl->fl_next = *pos; | 562 | fl->fl_next = *pos; |
| 563 | *pos = fl; | 563 | *pos = fl; |
| 564 | |||
| 565 | if (fl->fl_ops && fl->fl_ops->fl_insert) | ||
| 566 | fl->fl_ops->fl_insert(fl); | ||
| 567 | } | 564 | } |
| 568 | 565 | ||
| 569 | /* | 566 | /* |
| @@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p) | |||
| 586 | fl->fl_fasync = NULL; | 583 | fl->fl_fasync = NULL; |
| 587 | } | 584 | } |
| 588 | 585 | ||
| 589 | if (fl->fl_ops && fl->fl_ops->fl_remove) | ||
| 590 | fl->fl_ops->fl_remove(fl); | ||
| 591 | |||
| 592 | if (fl->fl_nspid) { | 586 | if (fl->fl_nspid) { |
| 593 | put_pid(fl->fl_nspid); | 587 | put_pid(fl->fl_nspid); |
| 594 | fl->fl_nspid = NULL; | 588 | fl->fl_nspid = NULL; |
diff --git a/fs/mpage.c b/fs/mpage.c index 235e4d3873a8..dbcc7af76a15 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
| @@ -82,7 +82,7 @@ static void mpage_end_io_write(struct bio *bio, int err) | |||
| 82 | bio_put(bio); | 82 | bio_put(bio); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | static struct bio *mpage_bio_submit(int rw, struct bio *bio) | 85 | struct bio *mpage_bio_submit(int rw, struct bio *bio) |
| 86 | { | 86 | { |
| 87 | bio->bi_end_io = mpage_end_io_read; | 87 | bio->bi_end_io = mpage_end_io_read; |
| 88 | if (rw == WRITE) | 88 | if (rw == WRITE) |
| @@ -90,6 +90,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio) | |||
| 90 | submit_bio(rw, bio); | 90 | submit_bio(rw, bio); |
| 91 | return NULL; | 91 | return NULL; |
| 92 | } | 92 | } |
| 93 | EXPORT_SYMBOL(mpage_bio_submit); | ||
| 93 | 94 | ||
| 94 | static struct bio * | 95 | static struct bio * |
| 95 | mpage_alloc(struct block_device *bdev, | 96 | mpage_alloc(struct block_device *bdev, |
| @@ -435,15 +436,9 @@ EXPORT_SYMBOL(mpage_readpage); | |||
| 435 | * written, so it can intelligently allocate a suitably-sized BIO. For now, | 436 | * written, so it can intelligently allocate a suitably-sized BIO. For now, |
| 436 | * just allocate full-size (16-page) BIOs. | 437 | * just allocate full-size (16-page) BIOs. |
| 437 | */ | 438 | */ |
| 438 | struct mpage_data { | ||
| 439 | struct bio *bio; | ||
| 440 | sector_t last_block_in_bio; | ||
| 441 | get_block_t *get_block; | ||
| 442 | unsigned use_writepage; | ||
| 443 | }; | ||
| 444 | 439 | ||
| 445 | static int __mpage_writepage(struct page *page, struct writeback_control *wbc, | 440 | int __mpage_writepage(struct page *page, struct writeback_control *wbc, |
| 446 | void *data) | 441 | void *data) |
| 447 | { | 442 | { |
| 448 | struct mpage_data *mpd = data; | 443 | struct mpage_data *mpd = data; |
| 449 | struct bio *bio = mpd->bio; | 444 | struct bio *bio = mpd->bio; |
| @@ -651,6 +646,7 @@ out: | |||
| 651 | mpd->bio = bio; | 646 | mpd->bio = bio; |
| 652 | return ret; | 647 | return ret; |
| 653 | } | 648 | } |
| 649 | EXPORT_SYMBOL(__mpage_writepage); | ||
| 654 | 650 | ||
| 655 | /** | 651 | /** |
| 656 | * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them | 652 | * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them |
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 05ff4f1d7026..1f7f2956412a 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c | |||
| @@ -214,7 +214,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, | |||
| 214 | 214 | ||
| 215 | dentry->d_op = &msdos_dentry_operations; | 215 | dentry->d_op = &msdos_dentry_operations; |
| 216 | 216 | ||
| 217 | lock_kernel(); | 217 | lock_super(sb); |
| 218 | res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); | 218 | res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); |
| 219 | if (res == -ENOENT) | 219 | if (res == -ENOENT) |
| 220 | goto add; | 220 | goto add; |
| @@ -232,7 +232,7 @@ add: | |||
| 232 | if (dentry) | 232 | if (dentry) |
| 233 | dentry->d_op = &msdos_dentry_operations; | 233 | dentry->d_op = &msdos_dentry_operations; |
| 234 | out: | 234 | out: |
| 235 | unlock_kernel(); | 235 | unlock_super(sb); |
| 236 | if (!res) | 236 | if (!res) |
| 237 | return dentry; | 237 | return dentry; |
| 238 | return ERR_PTR(res); | 238 | return ERR_PTR(res); |
| @@ -286,7 +286,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 286 | unsigned char msdos_name[MSDOS_NAME]; | 286 | unsigned char msdos_name[MSDOS_NAME]; |
| 287 | int err, is_hid; | 287 | int err, is_hid; |
| 288 | 288 | ||
| 289 | lock_kernel(); | 289 | lock_super(sb); |
| 290 | 290 | ||
| 291 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, | 291 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, |
| 292 | msdos_name, &MSDOS_SB(sb)->options); | 292 | msdos_name, &MSDOS_SB(sb)->options); |
| @@ -315,7 +315,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 315 | 315 | ||
| 316 | d_instantiate(dentry, inode); | 316 | d_instantiate(dentry, inode); |
| 317 | out: | 317 | out: |
| 318 | unlock_kernel(); | 318 | unlock_super(sb); |
| 319 | if (!err) | 319 | if (!err) |
| 320 | err = fat_flush_inodes(sb, dir, inode); | 320 | err = fat_flush_inodes(sb, dir, inode); |
| 321 | return err; | 321 | return err; |
| @@ -324,11 +324,12 @@ out: | |||
| 324 | /***** Remove a directory */ | 324 | /***** Remove a directory */ |
| 325 | static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | 325 | static int msdos_rmdir(struct inode *dir, struct dentry *dentry) |
| 326 | { | 326 | { |
| 327 | struct super_block *sb = dir->i_sb; | ||
| 327 | struct inode *inode = dentry->d_inode; | 328 | struct inode *inode = dentry->d_inode; |
| 328 | struct fat_slot_info sinfo; | 329 | struct fat_slot_info sinfo; |
| 329 | int err; | 330 | int err; |
| 330 | 331 | ||
| 331 | lock_kernel(); | 332 | lock_super(sb); |
| 332 | /* | 333 | /* |
| 333 | * Check whether the directory is not in use, then check | 334 | * Check whether the directory is not in use, then check |
| 334 | * whether it is empty. | 335 | * whether it is empty. |
| @@ -349,9 +350,9 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 349 | inode->i_ctime = CURRENT_TIME_SEC; | 350 | inode->i_ctime = CURRENT_TIME_SEC; |
| 350 | fat_detach(inode); | 351 | fat_detach(inode); |
| 351 | out: | 352 | out: |
| 352 | unlock_kernel(); | 353 | unlock_super(sb); |
| 353 | if (!err) | 354 | if (!err) |
| 354 | err = fat_flush_inodes(inode->i_sb, dir, inode); | 355 | err = fat_flush_inodes(sb, dir, inode); |
| 355 | 356 | ||
| 356 | return err; | 357 | return err; |
| 357 | } | 358 | } |
| @@ -366,7 +367,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 366 | struct timespec ts; | 367 | struct timespec ts; |
| 367 | int err, is_hid, cluster; | 368 | int err, is_hid, cluster; |
| 368 | 369 | ||
| 369 | lock_kernel(); | 370 | lock_super(sb); |
| 370 | 371 | ||
| 371 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, | 372 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, |
| 372 | msdos_name, &MSDOS_SB(sb)->options); | 373 | msdos_name, &MSDOS_SB(sb)->options); |
| @@ -404,14 +405,14 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 404 | 405 | ||
| 405 | d_instantiate(dentry, inode); | 406 | d_instantiate(dentry, inode); |
| 406 | 407 | ||
| 407 | unlock_kernel(); | 408 | unlock_super(sb); |
| 408 | fat_flush_inodes(sb, dir, inode); | 409 | fat_flush_inodes(sb, dir, inode); |
| 409 | return 0; | 410 | return 0; |
| 410 | 411 | ||
| 411 | out_free: | 412 | out_free: |
| 412 | fat_free_clusters(dir, cluster); | 413 | fat_free_clusters(dir, cluster); |
| 413 | out: | 414 | out: |
| 414 | unlock_kernel(); | 415 | unlock_super(sb); |
| 415 | return err; | 416 | return err; |
| 416 | } | 417 | } |
| 417 | 418 | ||
| @@ -419,10 +420,11 @@ out: | |||
| 419 | static int msdos_unlink(struct inode *dir, struct dentry *dentry) | 420 | static int msdos_unlink(struct inode *dir, struct dentry *dentry) |
| 420 | { | 421 | { |
| 421 | struct inode *inode = dentry->d_inode; | 422 | struct inode *inode = dentry->d_inode; |
| 423 | struct super_block *sb= inode->i_sb; | ||
| 422 | struct fat_slot_info sinfo; | 424 | struct fat_slot_info sinfo; |
| 423 | int err; | 425 | int err; |
| 424 | 426 | ||
| 425 | lock_kernel(); | 427 | lock_super(sb); |
| 426 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); | 428 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); |
| 427 | if (err) | 429 | if (err) |
| 428 | goto out; | 430 | goto out; |
| @@ -434,9 +436,9 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) | |||
| 434 | inode->i_ctime = CURRENT_TIME_SEC; | 436 | inode->i_ctime = CURRENT_TIME_SEC; |
| 435 | fat_detach(inode); | 437 | fat_detach(inode); |
| 436 | out: | 438 | out: |
| 437 | unlock_kernel(); | 439 | unlock_super(sb); |
| 438 | if (!err) | 440 | if (!err) |
| 439 | err = fat_flush_inodes(inode->i_sb, dir, inode); | 441 | err = fat_flush_inodes(sb, dir, inode); |
| 440 | 442 | ||
| 441 | return err; | 443 | return err; |
| 442 | } | 444 | } |
| @@ -618,10 +620,11 @@ error_inode: | |||
| 618 | static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, | 620 | static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 619 | struct inode *new_dir, struct dentry *new_dentry) | 621 | struct inode *new_dir, struct dentry *new_dentry) |
| 620 | { | 622 | { |
| 623 | struct super_block *sb = old_dir->i_sb; | ||
| 621 | unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; | 624 | unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; |
| 622 | int err, is_hid; | 625 | int err, is_hid; |
| 623 | 626 | ||
| 624 | lock_kernel(); | 627 | lock_super(sb); |
| 625 | 628 | ||
| 626 | err = msdos_format_name(old_dentry->d_name.name, | 629 | err = msdos_format_name(old_dentry->d_name.name, |
| 627 | old_dentry->d_name.len, old_msdos_name, | 630 | old_dentry->d_name.len, old_msdos_name, |
| @@ -640,9 +643,9 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 640 | err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, | 643 | err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, |
| 641 | new_dir, new_msdos_name, new_dentry, is_hid); | 644 | new_dir, new_msdos_name, new_dentry, is_hid); |
| 642 | out: | 645 | out: |
| 643 | unlock_kernel(); | 646 | unlock_super(sb); |
| 644 | if (!err) | 647 | if (!err) |
| 645 | err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir); | 648 | err = fat_flush_inodes(sb, old_dir, new_dir); |
| 646 | return err; | 649 | return err; |
| 647 | } | 650 | } |
| 648 | 651 | ||
diff --git a/fs/namei.c b/fs/namei.c index c7e43536c49a..01e67dddcc3d 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -581,15 +581,13 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd | |||
| 581 | int result; | 581 | int result; |
| 582 | 582 | ||
| 583 | /* make sure the stuff we saved doesn't go away */ | 583 | /* make sure the stuff we saved doesn't go away */ |
| 584 | dget(save.dentry); | 584 | path_get(&save); |
| 585 | mntget(save.mnt); | ||
| 586 | 585 | ||
| 587 | result = __link_path_walk(name, nd); | 586 | result = __link_path_walk(name, nd); |
| 588 | if (result == -ESTALE) { | 587 | if (result == -ESTALE) { |
| 589 | /* nd->path had been dropped */ | 588 | /* nd->path had been dropped */ |
| 590 | nd->path = save; | 589 | nd->path = save; |
| 591 | dget(nd->path.dentry); | 590 | path_get(&nd->path); |
| 592 | mntget(nd->path.mnt); | ||
| 593 | nd->flags |= LOOKUP_REVAL; | 591 | nd->flags |= LOOKUP_REVAL; |
| 594 | result = __link_path_walk(name, nd); | 592 | result = __link_path_walk(name, nd); |
| 595 | } | 593 | } |
| @@ -1216,8 +1214,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
| 1216 | nd->flags = flags; | 1214 | nd->flags = flags; |
| 1217 | nd->depth = 0; | 1215 | nd->depth = 0; |
| 1218 | 1216 | ||
| 1219 | nd->path.mnt = mntget(mnt); | 1217 | nd->path.dentry = dentry; |
| 1220 | nd->path.dentry = dget(dentry); | 1218 | nd->path.mnt = mnt; |
| 1219 | path_get(&nd->path); | ||
| 1221 | 1220 | ||
| 1222 | retval = path_walk(name, nd); | 1221 | retval = path_walk(name, nd); |
| 1223 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | 1222 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && |
| @@ -2857,16 +2856,17 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) | |||
| 2857 | { | 2856 | { |
| 2858 | struct nameidata nd; | 2857 | struct nameidata nd; |
| 2859 | void *cookie; | 2858 | void *cookie; |
| 2859 | int res; | ||
| 2860 | 2860 | ||
| 2861 | nd.depth = 0; | 2861 | nd.depth = 0; |
| 2862 | cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); | 2862 | cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); |
| 2863 | if (!IS_ERR(cookie)) { | 2863 | if (IS_ERR(cookie)) |
| 2864 | int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); | 2864 | return PTR_ERR(cookie); |
| 2865 | if (dentry->d_inode->i_op->put_link) | 2865 | |
| 2866 | dentry->d_inode->i_op->put_link(dentry, &nd, cookie); | 2866 | res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); |
| 2867 | cookie = ERR_PTR(res); | 2867 | if (dentry->d_inode->i_op->put_link) |
| 2868 | } | 2868 | dentry->d_inode->i_op->put_link(dentry, &nd, cookie); |
| 2869 | return PTR_ERR(cookie); | 2869 | return res; |
| 2870 | } | 2870 | } |
| 2871 | 2871 | ||
| 2872 | int vfs_follow_link(struct nameidata *nd, const char *link) | 2872 | int vfs_follow_link(struct nameidata *nd, const char *link) |
diff --git a/fs/namespace.c b/fs/namespace.c index 4fc302c2a0e0..4f6f7635b59c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -750,7 +750,7 @@ struct proc_fs_info { | |||
| 750 | const char *str; | 750 | const char *str; |
| 751 | }; | 751 | }; |
| 752 | 752 | ||
| 753 | static void show_sb_opts(struct seq_file *m, struct super_block *sb) | 753 | static int show_sb_opts(struct seq_file *m, struct super_block *sb) |
| 754 | { | 754 | { |
| 755 | static const struct proc_fs_info fs_info[] = { | 755 | static const struct proc_fs_info fs_info[] = { |
| 756 | { MS_SYNCHRONOUS, ",sync" }, | 756 | { MS_SYNCHRONOUS, ",sync" }, |
| @@ -764,6 +764,8 @@ static void show_sb_opts(struct seq_file *m, struct super_block *sb) | |||
| 764 | if (sb->s_flags & fs_infop->flag) | 764 | if (sb->s_flags & fs_infop->flag) |
| 765 | seq_puts(m, fs_infop->str); | 765 | seq_puts(m, fs_infop->str); |
| 766 | } | 766 | } |
| 767 | |||
| 768 | return security_sb_show_options(m, sb); | ||
| 767 | } | 769 | } |
| 768 | 770 | ||
| 769 | static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) | 771 | static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) |
| @@ -806,11 +808,14 @@ static int show_vfsmnt(struct seq_file *m, void *v) | |||
| 806 | seq_putc(m, ' '); | 808 | seq_putc(m, ' '); |
| 807 | show_type(m, mnt->mnt_sb); | 809 | show_type(m, mnt->mnt_sb); |
| 808 | seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); | 810 | seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); |
| 809 | show_sb_opts(m, mnt->mnt_sb); | 811 | err = show_sb_opts(m, mnt->mnt_sb); |
| 812 | if (err) | ||
| 813 | goto out; | ||
| 810 | show_mnt_opts(m, mnt); | 814 | show_mnt_opts(m, mnt); |
| 811 | if (mnt->mnt_sb->s_op->show_options) | 815 | if (mnt->mnt_sb->s_op->show_options) |
| 812 | err = mnt->mnt_sb->s_op->show_options(m, mnt); | 816 | err = mnt->mnt_sb->s_op->show_options(m, mnt); |
| 813 | seq_puts(m, " 0 0\n"); | 817 | seq_puts(m, " 0 0\n"); |
| 818 | out: | ||
| 814 | return err; | 819 | return err; |
| 815 | } | 820 | } |
| 816 | 821 | ||
| @@ -865,10 +870,13 @@ static int show_mountinfo(struct seq_file *m, void *v) | |||
| 865 | seq_putc(m, ' '); | 870 | seq_putc(m, ' '); |
| 866 | mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); | 871 | mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); |
| 867 | seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); | 872 | seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); |
| 868 | show_sb_opts(m, sb); | 873 | err = show_sb_opts(m, sb); |
| 874 | if (err) | ||
| 875 | goto out; | ||
| 869 | if (sb->s_op->show_options) | 876 | if (sb->s_op->show_options) |
| 870 | err = sb->s_op->show_options(m, mnt); | 877 | err = sb->s_op->show_options(m, mnt); |
| 871 | seq_putc(m, '\n'); | 878 | seq_putc(m, '\n'); |
| 879 | out: | ||
| 872 | return err; | 880 | return err; |
| 873 | } | 881 | } |
| 874 | 882 | ||
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 2b145de45b39..6a7d901f1936 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
| 19 | #include <linux/vmalloc.h> | 19 | #include <linux/vmalloc.h> |
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/smp_lock.h> | ||
| 21 | 22 | ||
| 22 | #include <linux/ncp_fs.h> | 23 | #include <linux/ncp_fs.h> |
| 23 | #include "ncplib_kernel.h" | 24 | #include "ncplib_kernel.h" |
| @@ -281,9 +282,18 @@ static int ncp_release(struct inode *inode, struct file *file) { | |||
| 281 | return 0; | 282 | return 0; |
| 282 | } | 283 | } |
| 283 | 284 | ||
| 285 | static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin) | ||
| 286 | { | ||
| 287 | loff_t ret; | ||
| 288 | lock_kernel(); | ||
| 289 | ret = generic_file_llseek_unlocked(file, offset, origin); | ||
| 290 | unlock_kernel(); | ||
| 291 | return ret; | ||
| 292 | } | ||
| 293 | |||
| 284 | const struct file_operations ncp_file_operations = | 294 | const struct file_operations ncp_file_operations = |
| 285 | { | 295 | { |
| 286 | .llseek = remote_llseek, | 296 | .llseek = ncp_remote_llseek, |
| 287 | .read = ncp_file_read, | 297 | .read = ncp_file_read, |
| 288 | .write = ncp_file_write, | 298 | .write = ncp_file_write, |
| 289 | .ioctl = ncp_ioctl, | 299 | .ioctl = ncp_ioctl, |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c1e7c8300629..f447f4b4476c 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | struct nfs_callback_data { | 28 | struct nfs_callback_data { |
| 29 | unsigned int users; | 29 | unsigned int users; |
| 30 | struct svc_serv *serv; | 30 | struct svc_rqst *rqst; |
| 31 | struct task_struct *task; | 31 | struct task_struct *task; |
| 32 | }; | 32 | }; |
| 33 | 33 | ||
| @@ -91,21 +91,17 @@ nfs_callback_svc(void *vrqstp) | |||
| 91 | svc_process(rqstp); | 91 | svc_process(rqstp); |
| 92 | } | 92 | } |
| 93 | unlock_kernel(); | 93 | unlock_kernel(); |
| 94 | nfs_callback_info.task = NULL; | ||
| 95 | svc_exit_thread(rqstp); | ||
| 96 | return 0; | 94 | return 0; |
| 97 | } | 95 | } |
| 98 | 96 | ||
| 99 | /* | 97 | /* |
| 100 | * Bring up the server process if it is not already up. | 98 | * Bring up the callback thread if it is not already up. |
| 101 | */ | 99 | */ |
| 102 | int nfs_callback_up(void) | 100 | int nfs_callback_up(void) |
| 103 | { | 101 | { |
| 104 | struct svc_serv *serv = NULL; | 102 | struct svc_serv *serv = NULL; |
| 105 | struct svc_rqst *rqstp; | ||
| 106 | int ret = 0; | 103 | int ret = 0; |
| 107 | 104 | ||
| 108 | lock_kernel(); | ||
| 109 | mutex_lock(&nfs_callback_mutex); | 105 | mutex_lock(&nfs_callback_mutex); |
| 110 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) | 106 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) |
| 111 | goto out; | 107 | goto out; |
| @@ -121,22 +117,23 @@ int nfs_callback_up(void) | |||
| 121 | nfs_callback_tcpport = ret; | 117 | nfs_callback_tcpport = ret; |
| 122 | dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); | 118 | dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); |
| 123 | 119 | ||
| 124 | rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); | 120 | nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); |
| 125 | if (IS_ERR(rqstp)) { | 121 | if (IS_ERR(nfs_callback_info.rqst)) { |
| 126 | ret = PTR_ERR(rqstp); | 122 | ret = PTR_ERR(nfs_callback_info.rqst); |
| 123 | nfs_callback_info.rqst = NULL; | ||
| 127 | goto out_err; | 124 | goto out_err; |
| 128 | } | 125 | } |
| 129 | 126 | ||
| 130 | svc_sock_update_bufs(serv); | 127 | svc_sock_update_bufs(serv); |
| 131 | nfs_callback_info.serv = serv; | ||
| 132 | 128 | ||
| 133 | nfs_callback_info.task = kthread_run(nfs_callback_svc, rqstp, | 129 | nfs_callback_info.task = kthread_run(nfs_callback_svc, |
| 130 | nfs_callback_info.rqst, | ||
| 134 | "nfsv4-svc"); | 131 | "nfsv4-svc"); |
| 135 | if (IS_ERR(nfs_callback_info.task)) { | 132 | if (IS_ERR(nfs_callback_info.task)) { |
| 136 | ret = PTR_ERR(nfs_callback_info.task); | 133 | ret = PTR_ERR(nfs_callback_info.task); |
| 137 | nfs_callback_info.serv = NULL; | 134 | svc_exit_thread(nfs_callback_info.rqst); |
| 135 | nfs_callback_info.rqst = NULL; | ||
| 138 | nfs_callback_info.task = NULL; | 136 | nfs_callback_info.task = NULL; |
| 139 | svc_exit_thread(rqstp); | ||
| 140 | goto out_err; | 137 | goto out_err; |
| 141 | } | 138 | } |
| 142 | out: | 139 | out: |
| @@ -149,7 +146,6 @@ out: | |||
| 149 | if (serv) | 146 | if (serv) |
| 150 | svc_destroy(serv); | 147 | svc_destroy(serv); |
| 151 | mutex_unlock(&nfs_callback_mutex); | 148 | mutex_unlock(&nfs_callback_mutex); |
| 152 | unlock_kernel(); | ||
| 153 | return ret; | 149 | return ret; |
| 154 | out_err: | 150 | out_err: |
| 155 | dprintk("Couldn't create callback socket or server thread; err = %d\n", | 151 | dprintk("Couldn't create callback socket or server thread; err = %d\n", |
| @@ -159,17 +155,19 @@ out_err: | |||
| 159 | } | 155 | } |
| 160 | 156 | ||
| 161 | /* | 157 | /* |
| 162 | * Kill the server process if it is not already down. | 158 | * Kill the callback thread if it's no longer being used. |
| 163 | */ | 159 | */ |
| 164 | void nfs_callback_down(void) | 160 | void nfs_callback_down(void) |
| 165 | { | 161 | { |
| 166 | lock_kernel(); | ||
| 167 | mutex_lock(&nfs_callback_mutex); | 162 | mutex_lock(&nfs_callback_mutex); |
| 168 | nfs_callback_info.users--; | 163 | nfs_callback_info.users--; |
| 169 | if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) | 164 | if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) { |
| 170 | kthread_stop(nfs_callback_info.task); | 165 | kthread_stop(nfs_callback_info.task); |
| 166 | svc_exit_thread(nfs_callback_info.rqst); | ||
| 167 | nfs_callback_info.rqst = NULL; | ||
| 168 | nfs_callback_info.task = NULL; | ||
| 169 | } | ||
| 171 | mutex_unlock(&nfs_callback_mutex); | 170 | mutex_unlock(&nfs_callback_mutex); |
| 172 | unlock_kernel(); | ||
| 173 | } | 171 | } |
| 174 | 172 | ||
| 175 | static int nfs_callback_authenticate(struct svc_rqst *rqstp) | 173 | static int nfs_callback_authenticate(struct svc_rqst *rqstp) |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2a092ca69b5..5ee23e7058b3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
| @@ -431,14 +431,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
| 431 | { | 431 | { |
| 432 | to->to_initval = timeo * HZ / 10; | 432 | to->to_initval = timeo * HZ / 10; |
| 433 | to->to_retries = retrans; | 433 | to->to_retries = retrans; |
| 434 | if (!to->to_retries) | ||
| 435 | to->to_retries = 2; | ||
| 436 | 434 | ||
| 437 | switch (proto) { | 435 | switch (proto) { |
| 438 | case XPRT_TRANSPORT_TCP: | 436 | case XPRT_TRANSPORT_TCP: |
| 439 | case XPRT_TRANSPORT_RDMA: | 437 | case XPRT_TRANSPORT_RDMA: |
| 438 | if (to->to_retries == 0) | ||
| 439 | to->to_retries = NFS_DEF_TCP_RETRANS; | ||
| 440 | if (to->to_initval == 0) | 440 | if (to->to_initval == 0) |
| 441 | to->to_initval = 60 * HZ; | 441 | to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; |
| 442 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) | 442 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) |
| 443 | to->to_initval = NFS_MAX_TCP_TIMEOUT; | 443 | to->to_initval = NFS_MAX_TCP_TIMEOUT; |
| 444 | to->to_increment = to->to_initval; | 444 | to->to_increment = to->to_initval; |
| @@ -450,14 +450,17 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
| 450 | to->to_exponential = 0; | 450 | to->to_exponential = 0; |
| 451 | break; | 451 | break; |
| 452 | case XPRT_TRANSPORT_UDP: | 452 | case XPRT_TRANSPORT_UDP: |
| 453 | default: | 453 | if (to->to_retries == 0) |
| 454 | to->to_retries = NFS_DEF_UDP_RETRANS; | ||
| 454 | if (!to->to_initval) | 455 | if (!to->to_initval) |
| 455 | to->to_initval = 11 * HZ / 10; | 456 | to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; |
| 456 | if (to->to_initval > NFS_MAX_UDP_TIMEOUT) | 457 | if (to->to_initval > NFS_MAX_UDP_TIMEOUT) |
| 457 | to->to_initval = NFS_MAX_UDP_TIMEOUT; | 458 | to->to_initval = NFS_MAX_UDP_TIMEOUT; |
| 458 | to->to_maxval = NFS_MAX_UDP_TIMEOUT; | 459 | to->to_maxval = NFS_MAX_UDP_TIMEOUT; |
| 459 | to->to_exponential = 1; | 460 | to->to_exponential = 1; |
| 460 | break; | 461 | break; |
| 462 | default: | ||
| 463 | BUG(); | ||
| 461 | } | 464 | } |
| 462 | } | 465 | } |
| 463 | 466 | ||
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 58d43daec084..28a238dab23a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -133,13 +133,14 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
| 133 | { | 133 | { |
| 134 | int res; | 134 | int res; |
| 135 | 135 | ||
| 136 | dfprintk(VFS, "NFS: opendir(%s/%ld)\n", | 136 | dfprintk(FILE, "NFS: open dir(%s/%s)\n", |
| 137 | inode->i_sb->s_id, inode->i_ino); | 137 | filp->f_path.dentry->d_parent->d_name.name, |
| 138 | filp->f_path.dentry->d_name.name); | ||
| 139 | |||
| 140 | nfs_inc_stats(inode, NFSIOS_VFSOPEN); | ||
| 138 | 141 | ||
| 139 | lock_kernel(); | ||
| 140 | /* Call generic open code in order to cache credentials */ | 142 | /* Call generic open code in order to cache credentials */ |
| 141 | res = nfs_open(inode, filp); | 143 | res = nfs_open(inode, filp); |
| 142 | unlock_kernel(); | ||
| 143 | return res; | 144 | return res; |
| 144 | } | 145 | } |
| 145 | 146 | ||
| @@ -204,7 +205,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
| 204 | * Note: assumes we have exclusive access to this mapping either | 205 | * Note: assumes we have exclusive access to this mapping either |
| 205 | * through inode->i_mutex or some other mechanism. | 206 | * through inode->i_mutex or some other mechanism. |
| 206 | */ | 207 | */ |
| 207 | if (page->index == 0 && invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1) < 0) { | 208 | if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { |
| 208 | /* Should never happen */ | 209 | /* Should never happen */ |
| 209 | nfs_zap_mapping(inode, inode->i_mapping); | 210 | nfs_zap_mapping(inode, inode->i_mapping); |
| 210 | } | 211 | } |
| @@ -528,13 +529,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 528 | struct nfs_fattr fattr; | 529 | struct nfs_fattr fattr; |
| 529 | long res; | 530 | long res; |
| 530 | 531 | ||
| 531 | dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n", | 532 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
| 532 | dentry->d_parent->d_name.name, dentry->d_name.name, | 533 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 533 | (long long)filp->f_pos); | 534 | (long long)filp->f_pos); |
| 534 | nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); | 535 | nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); |
| 535 | 536 | ||
| 536 | lock_kernel(); | ||
| 537 | |||
| 538 | /* | 537 | /* |
| 539 | * filp->f_pos points to the dirent entry number. | 538 | * filp->f_pos points to the dirent entry number. |
| 540 | * *desc->dir_cookie has the cookie for the next entry. We have | 539 | * *desc->dir_cookie has the cookie for the next entry. We have |
| @@ -592,10 +591,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 592 | } | 591 | } |
| 593 | out: | 592 | out: |
| 594 | nfs_unblock_sillyrename(dentry); | 593 | nfs_unblock_sillyrename(dentry); |
| 595 | unlock_kernel(); | ||
| 596 | if (res > 0) | 594 | if (res > 0) |
| 597 | res = 0; | 595 | res = 0; |
| 598 | dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n", | 596 | dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n", |
| 599 | dentry->d_parent->d_name.name, dentry->d_name.name, | 597 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 600 | res); | 598 | res); |
| 601 | return res; | 599 | return res; |
| @@ -603,7 +601,15 @@ out: | |||
| 603 | 601 | ||
| 604 | static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) | 602 | static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) |
| 605 | { | 603 | { |
| 606 | mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); | 604 | struct dentry *dentry = filp->f_path.dentry; |
| 605 | struct inode *inode = dentry->d_inode; | ||
| 606 | |||
| 607 | dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", | ||
| 608 | dentry->d_parent->d_name.name, | ||
| 609 | dentry->d_name.name, | ||
| 610 | offset, origin); | ||
| 611 | |||
| 612 | mutex_lock(&inode->i_mutex); | ||
| 607 | switch (origin) { | 613 | switch (origin) { |
| 608 | case 1: | 614 | case 1: |
| 609 | offset += filp->f_pos; | 615 | offset += filp->f_pos; |
| @@ -619,7 +625,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) | |||
| 619 | nfs_file_open_context(filp)->dir_cookie = 0; | 625 | nfs_file_open_context(filp)->dir_cookie = 0; |
| 620 | } | 626 | } |
| 621 | out: | 627 | out: |
| 622 | mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); | 628 | mutex_unlock(&inode->i_mutex); |
| 623 | return offset; | 629 | return offset; |
| 624 | } | 630 | } |
| 625 | 631 | ||
| @@ -629,10 +635,11 @@ out: | |||
| 629 | */ | 635 | */ |
| 630 | static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | 636 | static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) |
| 631 | { | 637 | { |
| 632 | dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", | 638 | dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", |
| 633 | dentry->d_parent->d_name.name, dentry->d_name.name, | 639 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 634 | datasync); | 640 | datasync); |
| 635 | 641 | ||
| 642 | nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); | ||
| 636 | return 0; | 643 | return 0; |
| 637 | } | 644 | } |
| 638 | 645 | ||
| @@ -767,7 +774,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
| 767 | struct nfs_fattr fattr; | 774 | struct nfs_fattr fattr; |
| 768 | 775 | ||
| 769 | parent = dget_parent(dentry); | 776 | parent = dget_parent(dentry); |
| 770 | lock_kernel(); | ||
| 771 | dir = parent->d_inode; | 777 | dir = parent->d_inode; |
| 772 | nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); | 778 | nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); |
| 773 | inode = dentry->d_inode; | 779 | inode = dentry->d_inode; |
| @@ -805,7 +811,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) | |||
| 805 | 811 | ||
| 806 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 812 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 807 | out_valid: | 813 | out_valid: |
| 808 | unlock_kernel(); | ||
| 809 | dput(parent); | 814 | dput(parent); |
| 810 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", | 815 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", |
| 811 | __func__, dentry->d_parent->d_name.name, | 816 | __func__, dentry->d_parent->d_name.name, |
| @@ -824,7 +829,6 @@ out_zap_parent: | |||
| 824 | shrink_dcache_parent(dentry); | 829 | shrink_dcache_parent(dentry); |
| 825 | } | 830 | } |
| 826 | d_drop(dentry); | 831 | d_drop(dentry); |
| 827 | unlock_kernel(); | ||
| 828 | dput(parent); | 832 | dput(parent); |
| 829 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", | 833 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", |
| 830 | __func__, dentry->d_parent->d_name.name, | 834 | __func__, dentry->d_parent->d_name.name, |
| @@ -858,6 +862,14 @@ static int nfs_dentry_delete(struct dentry *dentry) | |||
| 858 | 862 | ||
| 859 | } | 863 | } |
| 860 | 864 | ||
| 865 | static void nfs_drop_nlink(struct inode *inode) | ||
| 866 | { | ||
| 867 | spin_lock(&inode->i_lock); | ||
| 868 | if (inode->i_nlink > 0) | ||
| 869 | drop_nlink(inode); | ||
| 870 | spin_unlock(&inode->i_lock); | ||
| 871 | } | ||
| 872 | |||
| 861 | /* | 873 | /* |
| 862 | * Called when the dentry loses inode. | 874 | * Called when the dentry loses inode. |
| 863 | * We use it to clean up silly-renamed files. | 875 | * We use it to clean up silly-renamed files. |
| @@ -869,10 +881,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) | |||
| 869 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; | 881 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; |
| 870 | 882 | ||
| 871 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { | 883 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { |
| 872 | lock_kernel(); | ||
| 873 | drop_nlink(inode); | 884 | drop_nlink(inode); |
| 874 | nfs_complete_unlink(dentry, inode); | 885 | nfs_complete_unlink(dentry, inode); |
| 875 | unlock_kernel(); | ||
| 876 | } | 886 | } |
| 877 | iput(inode); | 887 | iput(inode); |
| 878 | } | 888 | } |
| @@ -903,8 +913,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru | |||
| 903 | res = ERR_PTR(-ENOMEM); | 913 | res = ERR_PTR(-ENOMEM); |
| 904 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; | 914 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; |
| 905 | 915 | ||
| 906 | lock_kernel(); | ||
| 907 | |||
| 908 | /* | 916 | /* |
| 909 | * If we're doing an exclusive create, optimize away the lookup | 917 | * If we're doing an exclusive create, optimize away the lookup |
| 910 | * but don't hash the dentry. | 918 | * but don't hash the dentry. |
| @@ -912,7 +920,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru | |||
| 912 | if (nfs_is_exclusive_create(dir, nd)) { | 920 | if (nfs_is_exclusive_create(dir, nd)) { |
| 913 | d_instantiate(dentry, NULL); | 921 | d_instantiate(dentry, NULL); |
| 914 | res = NULL; | 922 | res = NULL; |
| 915 | goto out_unlock; | 923 | goto out; |
| 916 | } | 924 | } |
| 917 | 925 | ||
| 918 | parent = dentry->d_parent; | 926 | parent = dentry->d_parent; |
| @@ -940,8 +948,6 @@ no_entry: | |||
| 940 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 948 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 941 | out_unblock_sillyrename: | 949 | out_unblock_sillyrename: |
| 942 | nfs_unblock_sillyrename(parent); | 950 | nfs_unblock_sillyrename(parent); |
| 943 | out_unlock: | ||
| 944 | unlock_kernel(); | ||
| 945 | out: | 951 | out: |
| 946 | return res; | 952 | return res; |
| 947 | } | 953 | } |
| @@ -999,9 +1005,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
| 999 | } | 1005 | } |
| 1000 | 1006 | ||
| 1001 | /* Open the file on the server */ | 1007 | /* Open the file on the server */ |
| 1002 | lock_kernel(); | ||
| 1003 | res = nfs4_atomic_open(dir, dentry, nd); | 1008 | res = nfs4_atomic_open(dir, dentry, nd); |
| 1004 | unlock_kernel(); | ||
| 1005 | if (IS_ERR(res)) { | 1009 | if (IS_ERR(res)) { |
| 1006 | error = PTR_ERR(res); | 1010 | error = PTR_ERR(res); |
| 1007 | switch (error) { | 1011 | switch (error) { |
| @@ -1063,9 +1067,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 1063 | * operations that change the directory. We therefore save the | 1067 | * operations that change the directory. We therefore save the |
| 1064 | * change attribute *before* we do the RPC call. | 1068 | * change attribute *before* we do the RPC call. |
| 1065 | */ | 1069 | */ |
| 1066 | lock_kernel(); | ||
| 1067 | ret = nfs4_open_revalidate(dir, dentry, openflags, nd); | 1070 | ret = nfs4_open_revalidate(dir, dentry, openflags, nd); |
| 1068 | unlock_kernel(); | ||
| 1069 | out: | 1071 | out: |
| 1070 | dput(parent); | 1072 | dput(parent); |
| 1071 | if (!ret) | 1073 | if (!ret) |
| @@ -1218,14 +1220,11 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 1218 | if ((nd->flags & LOOKUP_CREATE) != 0) | 1220 | if ((nd->flags & LOOKUP_CREATE) != 0) |
| 1219 | open_flags = nd->intent.open.flags; | 1221 | open_flags = nd->intent.open.flags; |
| 1220 | 1222 | ||
| 1221 | lock_kernel(); | ||
| 1222 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); | 1223 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); |
| 1223 | if (error != 0) | 1224 | if (error != 0) |
| 1224 | goto out_err; | 1225 | goto out_err; |
| 1225 | unlock_kernel(); | ||
| 1226 | return 0; | 1226 | return 0; |
| 1227 | out_err: | 1227 | out_err: |
| 1228 | unlock_kernel(); | ||
| 1229 | d_drop(dentry); | 1228 | d_drop(dentry); |
| 1230 | return error; | 1229 | return error; |
| 1231 | } | 1230 | } |
| @@ -1248,14 +1247,11 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
| 1248 | attr.ia_mode = mode; | 1247 | attr.ia_mode = mode; |
| 1249 | attr.ia_valid = ATTR_MODE; | 1248 | attr.ia_valid = ATTR_MODE; |
| 1250 | 1249 | ||
| 1251 | lock_kernel(); | ||
| 1252 | status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); | 1250 | status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); |
| 1253 | if (status != 0) | 1251 | if (status != 0) |
| 1254 | goto out_err; | 1252 | goto out_err; |
| 1255 | unlock_kernel(); | ||
| 1256 | return 0; | 1253 | return 0; |
| 1257 | out_err: | 1254 | out_err: |
| 1258 | unlock_kernel(); | ||
| 1259 | d_drop(dentry); | 1255 | d_drop(dentry); |
| 1260 | return status; | 1256 | return status; |
| 1261 | } | 1257 | } |
| @@ -1274,15 +1270,12 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 1274 | attr.ia_valid = ATTR_MODE; | 1270 | attr.ia_valid = ATTR_MODE; |
| 1275 | attr.ia_mode = mode | S_IFDIR; | 1271 | attr.ia_mode = mode | S_IFDIR; |
| 1276 | 1272 | ||
| 1277 | lock_kernel(); | ||
| 1278 | error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); | 1273 | error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); |
| 1279 | if (error != 0) | 1274 | if (error != 0) |
| 1280 | goto out_err; | 1275 | goto out_err; |
| 1281 | unlock_kernel(); | ||
| 1282 | return 0; | 1276 | return 0; |
| 1283 | out_err: | 1277 | out_err: |
| 1284 | d_drop(dentry); | 1278 | d_drop(dentry); |
| 1285 | unlock_kernel(); | ||
| 1286 | return error; | 1279 | return error; |
| 1287 | } | 1280 | } |
| 1288 | 1281 | ||
| @@ -1299,14 +1292,12 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 1299 | dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", | 1292 | dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", |
| 1300 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); | 1293 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
| 1301 | 1294 | ||
| 1302 | lock_kernel(); | ||
| 1303 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | 1295 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); |
| 1304 | /* Ensure the VFS deletes this inode */ | 1296 | /* Ensure the VFS deletes this inode */ |
| 1305 | if (error == 0 && dentry->d_inode != NULL) | 1297 | if (error == 0 && dentry->d_inode != NULL) |
| 1306 | clear_nlink(dentry->d_inode); | 1298 | clear_nlink(dentry->d_inode); |
| 1307 | else if (error == -ENOENT) | 1299 | else if (error == -ENOENT) |
| 1308 | nfs_dentry_handle_enoent(dentry); | 1300 | nfs_dentry_handle_enoent(dentry); |
| 1309 | unlock_kernel(); | ||
| 1310 | 1301 | ||
| 1311 | return error; | 1302 | return error; |
| 1312 | } | 1303 | } |
| @@ -1408,7 +1399,7 @@ static int nfs_safe_remove(struct dentry *dentry) | |||
| 1408 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); | 1399 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); |
| 1409 | /* The VFS may want to delete this inode */ | 1400 | /* The VFS may want to delete this inode */ |
| 1410 | if (error == 0) | 1401 | if (error == 0) |
| 1411 | drop_nlink(inode); | 1402 | nfs_drop_nlink(inode); |
| 1412 | nfs_mark_for_revalidate(inode); | 1403 | nfs_mark_for_revalidate(inode); |
| 1413 | } else | 1404 | } else |
| 1414 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); | 1405 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); |
| @@ -1431,7 +1422,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 1431 | dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, | 1422 | dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, |
| 1432 | dir->i_ino, dentry->d_name.name); | 1423 | dir->i_ino, dentry->d_name.name); |
| 1433 | 1424 | ||
| 1434 | lock_kernel(); | ||
| 1435 | spin_lock(&dcache_lock); | 1425 | spin_lock(&dcache_lock); |
| 1436 | spin_lock(&dentry->d_lock); | 1426 | spin_lock(&dentry->d_lock); |
| 1437 | if (atomic_read(&dentry->d_count) > 1) { | 1427 | if (atomic_read(&dentry->d_count) > 1) { |
| @@ -1440,7 +1430,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 1440 | /* Start asynchronous writeout of the inode */ | 1430 | /* Start asynchronous writeout of the inode */ |
| 1441 | write_inode_now(dentry->d_inode, 0); | 1431 | write_inode_now(dentry->d_inode, 0); |
| 1442 | error = nfs_sillyrename(dir, dentry); | 1432 | error = nfs_sillyrename(dir, dentry); |
| 1443 | unlock_kernel(); | ||
| 1444 | return error; | 1433 | return error; |
| 1445 | } | 1434 | } |
| 1446 | if (!d_unhashed(dentry)) { | 1435 | if (!d_unhashed(dentry)) { |
| @@ -1454,7 +1443,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 1454 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1443 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 1455 | } else if (need_rehash) | 1444 | } else if (need_rehash) |
| 1456 | d_rehash(dentry); | 1445 | d_rehash(dentry); |
| 1457 | unlock_kernel(); | ||
| 1458 | return error; | 1446 | return error; |
| 1459 | } | 1447 | } |
| 1460 | 1448 | ||
| @@ -1491,13 +1479,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym | |||
| 1491 | attr.ia_mode = S_IFLNK | S_IRWXUGO; | 1479 | attr.ia_mode = S_IFLNK | S_IRWXUGO; |
| 1492 | attr.ia_valid = ATTR_MODE; | 1480 | attr.ia_valid = ATTR_MODE; |
| 1493 | 1481 | ||
| 1494 | lock_kernel(); | ||
| 1495 | |||
| 1496 | page = alloc_page(GFP_HIGHUSER); | 1482 | page = alloc_page(GFP_HIGHUSER); |
| 1497 | if (!page) { | 1483 | if (!page) |
| 1498 | unlock_kernel(); | ||
| 1499 | return -ENOMEM; | 1484 | return -ENOMEM; |
| 1500 | } | ||
| 1501 | 1485 | ||
| 1502 | kaddr = kmap_atomic(page, KM_USER0); | 1486 | kaddr = kmap_atomic(page, KM_USER0); |
| 1503 | memcpy(kaddr, symname, pathlen); | 1487 | memcpy(kaddr, symname, pathlen); |
| @@ -1512,7 +1496,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym | |||
| 1512 | dentry->d_name.name, symname, error); | 1496 | dentry->d_name.name, symname, error); |
| 1513 | d_drop(dentry); | 1497 | d_drop(dentry); |
| 1514 | __free_page(page); | 1498 | __free_page(page); |
| 1515 | unlock_kernel(); | ||
| 1516 | return error; | 1499 | return error; |
| 1517 | } | 1500 | } |
| 1518 | 1501 | ||
| @@ -1530,7 +1513,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym | |||
| 1530 | } else | 1513 | } else |
| 1531 | __free_page(page); | 1514 | __free_page(page); |
| 1532 | 1515 | ||
| 1533 | unlock_kernel(); | ||
| 1534 | return 0; | 1516 | return 0; |
| 1535 | } | 1517 | } |
| 1536 | 1518 | ||
| @@ -1544,14 +1526,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) | |||
| 1544 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1526 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
| 1545 | dentry->d_parent->d_name.name, dentry->d_name.name); | 1527 | dentry->d_parent->d_name.name, dentry->d_name.name); |
| 1546 | 1528 | ||
| 1547 | lock_kernel(); | ||
| 1548 | d_drop(dentry); | 1529 | d_drop(dentry); |
| 1549 | error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); | 1530 | error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); |
| 1550 | if (error == 0) { | 1531 | if (error == 0) { |
| 1551 | atomic_inc(&inode->i_count); | 1532 | atomic_inc(&inode->i_count); |
| 1552 | d_add(dentry, inode); | 1533 | d_add(dentry, inode); |
| 1553 | } | 1534 | } |
| 1554 | unlock_kernel(); | ||
| 1555 | return error; | 1535 | return error; |
| 1556 | } | 1536 | } |
| 1557 | 1537 | ||
| @@ -1591,7 +1571,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1591 | * To prevent any new references to the target during the rename, | 1571 | * To prevent any new references to the target during the rename, |
| 1592 | * we unhash the dentry and free the inode in advance. | 1572 | * we unhash the dentry and free the inode in advance. |
| 1593 | */ | 1573 | */ |
| 1594 | lock_kernel(); | ||
| 1595 | if (!d_unhashed(new_dentry)) { | 1574 | if (!d_unhashed(new_dentry)) { |
| 1596 | d_drop(new_dentry); | 1575 | d_drop(new_dentry); |
| 1597 | rehash = new_dentry; | 1576 | rehash = new_dentry; |
| @@ -1635,7 +1614,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1635 | /* dentry still busy? */ | 1614 | /* dentry still busy? */ |
| 1636 | goto out; | 1615 | goto out; |
| 1637 | } else | 1616 | } else |
| 1638 | drop_nlink(new_inode); | 1617 | nfs_drop_nlink(new_inode); |
| 1639 | 1618 | ||
| 1640 | go_ahead: | 1619 | go_ahead: |
| 1641 | /* | 1620 | /* |
| @@ -1669,7 +1648,6 @@ out: | |||
| 1669 | /* new dentry created? */ | 1648 | /* new dentry created? */ |
| 1670 | if (dentry) | 1649 | if (dentry) |
| 1671 | dput(dentry); | 1650 | dput(dentry); |
| 1672 | unlock_kernel(); | ||
| 1673 | return error; | 1651 | return error; |
| 1674 | } | 1652 | } |
| 1675 | 1653 | ||
| @@ -1962,8 +1940,6 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 1962 | } | 1940 | } |
| 1963 | 1941 | ||
| 1964 | force_lookup: | 1942 | force_lookup: |
| 1965 | lock_kernel(); | ||
| 1966 | |||
| 1967 | if (!NFS_PROTO(inode)->access) | 1943 | if (!NFS_PROTO(inode)->access) |
| 1968 | goto out_notsup; | 1944 | goto out_notsup; |
| 1969 | 1945 | ||
| @@ -1973,7 +1949,6 @@ force_lookup: | |||
| 1973 | put_rpccred(cred); | 1949 | put_rpccred(cred); |
| 1974 | } else | 1950 | } else |
| 1975 | res = PTR_ERR(cred); | 1951 | res = PTR_ERR(cred); |
| 1976 | unlock_kernel(); | ||
| 1977 | out: | 1952 | out: |
| 1978 | dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", | 1953 | dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", |
| 1979 | inode->i_sb->s_id, inode->i_ino, mask, res); | 1954 | inode->i_sb->s_id, inode->i_ino, mask, res); |
| @@ -1982,7 +1957,6 @@ out_notsup: | |||
| 1982 | res = nfs_revalidate_inode(NFS_SERVER(inode), inode); | 1957 | res = nfs_revalidate_inode(NFS_SERVER(inode), inode); |
| 1983 | if (res == 0) | 1958 | if (res == 0) |
| 1984 | res = generic_permission(inode, mask, NULL); | 1959 | res = generic_permission(inode, mask, NULL); |
| 1985 | unlock_kernel(); | ||
| 1986 | goto out; | 1960 | goto out; |
| 1987 | } | 1961 | } |
| 1988 | 1962 | ||
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4757a2b326a1..08f6b040d289 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
| @@ -890,7 +890,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 890 | count = iov_length(iov, nr_segs); | 890 | count = iov_length(iov, nr_segs); |
| 891 | nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); | 891 | nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); |
| 892 | 892 | ||
| 893 | dprintk("nfs: direct read(%s/%s, %zd@%Ld)\n", | 893 | dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", |
| 894 | file->f_path.dentry->d_parent->d_name.name, | 894 | file->f_path.dentry->d_parent->d_name.name, |
| 895 | file->f_path.dentry->d_name.name, | 895 | file->f_path.dentry->d_name.name, |
| 896 | count, (long long) pos); | 896 | count, (long long) pos); |
| @@ -947,7 +947,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 947 | count = iov_length(iov, nr_segs); | 947 | count = iov_length(iov, nr_segs); |
| 948 | nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); | 948 | nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); |
| 949 | 949 | ||
| 950 | dfprintk(VFS, "nfs: direct write(%s/%s, %zd@%Ld)\n", | 950 | dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", |
| 951 | file->f_path.dentry->d_parent->d_name.name, | 951 | file->f_path.dentry->d_parent->d_name.name, |
| 952 | file->f_path.dentry->d_name.name, | 952 | file->f_path.dentry->d_name.name, |
| 953 | count, (long long) pos); | 953 | count, (long long) pos); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d84a3d8f32af..78460657f5cb 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -50,7 +50,7 @@ static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, | |||
| 50 | static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, | 50 | static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, |
| 51 | unsigned long nr_segs, loff_t pos); | 51 | unsigned long nr_segs, loff_t pos); |
| 52 | static int nfs_file_flush(struct file *, fl_owner_t id); | 52 | static int nfs_file_flush(struct file *, fl_owner_t id); |
| 53 | static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); | 53 | static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync); |
| 54 | static int nfs_check_flags(int flags); | 54 | static int nfs_check_flags(int flags); |
| 55 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); | 55 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); |
| 56 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); | 56 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); |
| @@ -72,7 +72,7 @@ const struct file_operations nfs_file_operations = { | |||
| 72 | .open = nfs_file_open, | 72 | .open = nfs_file_open, |
| 73 | .flush = nfs_file_flush, | 73 | .flush = nfs_file_flush, |
| 74 | .release = nfs_file_release, | 74 | .release = nfs_file_release, |
| 75 | .fsync = nfs_fsync, | 75 | .fsync = nfs_file_fsync, |
| 76 | .lock = nfs_lock, | 76 | .lock = nfs_lock, |
| 77 | .flock = nfs_flock, | 77 | .flock = nfs_flock, |
| 78 | .splice_read = nfs_file_splice_read, | 78 | .splice_read = nfs_file_splice_read, |
| @@ -119,25 +119,33 @@ nfs_file_open(struct inode *inode, struct file *filp) | |||
| 119 | { | 119 | { |
| 120 | int res; | 120 | int res; |
| 121 | 121 | ||
| 122 | dprintk("NFS: open file(%s/%s)\n", | ||
| 123 | filp->f_path.dentry->d_parent->d_name.name, | ||
| 124 | filp->f_path.dentry->d_name.name); | ||
| 125 | |||
| 122 | res = nfs_check_flags(filp->f_flags); | 126 | res = nfs_check_flags(filp->f_flags); |
| 123 | if (res) | 127 | if (res) |
| 124 | return res; | 128 | return res; |
| 125 | 129 | ||
| 126 | nfs_inc_stats(inode, NFSIOS_VFSOPEN); | 130 | nfs_inc_stats(inode, NFSIOS_VFSOPEN); |
| 127 | lock_kernel(); | 131 | res = nfs_open(inode, filp); |
| 128 | res = NFS_PROTO(inode)->file_open(inode, filp); | ||
| 129 | unlock_kernel(); | ||
| 130 | return res; | 132 | return res; |
| 131 | } | 133 | } |
| 132 | 134 | ||
| 133 | static int | 135 | static int |
| 134 | nfs_file_release(struct inode *inode, struct file *filp) | 136 | nfs_file_release(struct inode *inode, struct file *filp) |
| 135 | { | 137 | { |
| 138 | struct dentry *dentry = filp->f_path.dentry; | ||
| 139 | |||
| 140 | dprintk("NFS: release(%s/%s)\n", | ||
| 141 | dentry->d_parent->d_name.name, | ||
| 142 | dentry->d_name.name); | ||
| 143 | |||
| 136 | /* Ensure that dirty pages are flushed out with the right creds */ | 144 | /* Ensure that dirty pages are flushed out with the right creds */ |
| 137 | if (filp->f_mode & FMODE_WRITE) | 145 | if (filp->f_mode & FMODE_WRITE) |
| 138 | nfs_wb_all(filp->f_path.dentry->d_inode); | 146 | nfs_wb_all(dentry->d_inode); |
| 139 | nfs_inc_stats(inode, NFSIOS_VFSRELEASE); | 147 | nfs_inc_stats(inode, NFSIOS_VFSRELEASE); |
| 140 | return NFS_PROTO(inode)->file_release(inode, filp); | 148 | return nfs_release(inode, filp); |
| 141 | } | 149 | } |
| 142 | 150 | ||
| 143 | /** | 151 | /** |
| @@ -170,6 +178,13 @@ force_reval: | |||
| 170 | 178 | ||
| 171 | static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | 179 | static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) |
| 172 | { | 180 | { |
| 181 | loff_t loff; | ||
| 182 | |||
| 183 | dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", | ||
| 184 | filp->f_path.dentry->d_parent->d_name.name, | ||
| 185 | filp->f_path.dentry->d_name.name, | ||
| 186 | offset, origin); | ||
| 187 | |||
| 173 | /* origin == SEEK_END => we must revalidate the cached file length */ | 188 | /* origin == SEEK_END => we must revalidate the cached file length */ |
| 174 | if (origin == SEEK_END) { | 189 | if (origin == SEEK_END) { |
| 175 | struct inode *inode = filp->f_mapping->host; | 190 | struct inode *inode = filp->f_mapping->host; |
| @@ -177,11 +192,14 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
| 177 | if (retval < 0) | 192 | if (retval < 0) |
| 178 | return (loff_t)retval; | 193 | return (loff_t)retval; |
| 179 | } | 194 | } |
| 180 | return remote_llseek(filp, offset, origin); | 195 | lock_kernel(); /* BKL needed? */ |
| 196 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
| 197 | unlock_kernel(); | ||
| 198 | return loff; | ||
| 181 | } | 199 | } |
| 182 | 200 | ||
| 183 | /* | 201 | /* |
| 184 | * Helper for nfs_file_flush() and nfs_fsync() | 202 | * Helper for nfs_file_flush() and nfs_file_fsync() |
| 185 | * | 203 | * |
| 186 | * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to | 204 | * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to |
| 187 | * disk, but it retrieves and clears ctx->error after synching, despite | 205 | * disk, but it retrieves and clears ctx->error after synching, despite |
| @@ -207,16 +225,18 @@ static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) | |||
| 207 | 225 | ||
| 208 | /* | 226 | /* |
| 209 | * Flush all dirty pages, and check for write errors. | 227 | * Flush all dirty pages, and check for write errors. |
| 210 | * | ||
| 211 | */ | 228 | */ |
| 212 | static int | 229 | static int |
| 213 | nfs_file_flush(struct file *file, fl_owner_t id) | 230 | nfs_file_flush(struct file *file, fl_owner_t id) |
| 214 | { | 231 | { |
| 215 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 232 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
| 216 | struct inode *inode = file->f_path.dentry->d_inode; | 233 | struct dentry *dentry = file->f_path.dentry; |
| 234 | struct inode *inode = dentry->d_inode; | ||
| 217 | int status; | 235 | int status; |
| 218 | 236 | ||
| 219 | dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); | 237 | dprintk("NFS: flush(%s/%s)\n", |
| 238 | dentry->d_parent->d_name.name, | ||
| 239 | dentry->d_name.name); | ||
| 220 | 240 | ||
| 221 | if ((file->f_mode & FMODE_WRITE) == 0) | 241 | if ((file->f_mode & FMODE_WRITE) == 0) |
| 222 | return 0; | 242 | return 0; |
| @@ -241,7 +261,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 241 | if (iocb->ki_filp->f_flags & O_DIRECT) | 261 | if (iocb->ki_filp->f_flags & O_DIRECT) |
| 242 | return nfs_file_direct_read(iocb, iov, nr_segs, pos); | 262 | return nfs_file_direct_read(iocb, iov, nr_segs, pos); |
| 243 | 263 | ||
| 244 | dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", | 264 | dprintk("NFS: read(%s/%s, %lu@%lu)\n", |
| 245 | dentry->d_parent->d_name.name, dentry->d_name.name, | 265 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 246 | (unsigned long) count, (unsigned long) pos); | 266 | (unsigned long) count, (unsigned long) pos); |
| 247 | 267 | ||
| @@ -261,7 +281,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, | |||
| 261 | struct inode *inode = dentry->d_inode; | 281 | struct inode *inode = dentry->d_inode; |
| 262 | ssize_t res; | 282 | ssize_t res; |
| 263 | 283 | ||
| 264 | dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n", | 284 | dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", |
| 265 | dentry->d_parent->d_name.name, dentry->d_name.name, | 285 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 266 | (unsigned long) count, (unsigned long long) *ppos); | 286 | (unsigned long) count, (unsigned long long) *ppos); |
| 267 | 287 | ||
| @@ -278,7 +298,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
| 278 | struct inode *inode = dentry->d_inode; | 298 | struct inode *inode = dentry->d_inode; |
| 279 | int status; | 299 | int status; |
| 280 | 300 | ||
| 281 | dfprintk(VFS, "nfs: mmap(%s/%s)\n", | 301 | dprintk("NFS: mmap(%s/%s)\n", |
| 282 | dentry->d_parent->d_name.name, dentry->d_name.name); | 302 | dentry->d_parent->d_name.name, dentry->d_name.name); |
| 283 | 303 | ||
| 284 | status = nfs_revalidate_mapping(inode, file->f_mapping); | 304 | status = nfs_revalidate_mapping(inode, file->f_mapping); |
| @@ -296,12 +316,14 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
| 296 | * whether any write errors occurred for this process. | 316 | * whether any write errors occurred for this process. |
| 297 | */ | 317 | */ |
| 298 | static int | 318 | static int |
| 299 | nfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 319 | nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) |
| 300 | { | 320 | { |
| 301 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 321 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
| 302 | struct inode *inode = dentry->d_inode; | 322 | struct inode *inode = dentry->d_inode; |
| 303 | 323 | ||
| 304 | dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); | 324 | dprintk("NFS: fsync file(%s/%s) datasync %d\n", |
| 325 | dentry->d_parent->d_name.name, dentry->d_name.name, | ||
| 326 | datasync); | ||
| 305 | 327 | ||
| 306 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); | 328 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
| 307 | return nfs_do_fsync(ctx, inode); | 329 | return nfs_do_fsync(ctx, inode); |
| @@ -324,6 +346,11 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, | |||
| 324 | struct page *page; | 346 | struct page *page; |
| 325 | index = pos >> PAGE_CACHE_SHIFT; | 347 | index = pos >> PAGE_CACHE_SHIFT; |
| 326 | 348 | ||
| 349 | dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", | ||
| 350 | file->f_path.dentry->d_parent->d_name.name, | ||
| 351 | file->f_path.dentry->d_name.name, | ||
| 352 | mapping->host->i_ino, len, (long long) pos); | ||
| 353 | |||
| 327 | page = __grab_cache_page(mapping, index); | 354 | page = __grab_cache_page(mapping, index); |
| 328 | if (!page) | 355 | if (!page) |
| 329 | return -ENOMEM; | 356 | return -ENOMEM; |
| @@ -344,9 +371,32 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, | |||
| 344 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | 371 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); |
| 345 | int status; | 372 | int status; |
| 346 | 373 | ||
| 347 | lock_kernel(); | 374 | dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", |
| 375 | file->f_path.dentry->d_parent->d_name.name, | ||
| 376 | file->f_path.dentry->d_name.name, | ||
| 377 | mapping->host->i_ino, len, (long long) pos); | ||
| 378 | |||
| 379 | /* | ||
| 380 | * Zero any uninitialised parts of the page, and then mark the page | ||
| 381 | * as up to date if it turns out that we're extending the file. | ||
| 382 | */ | ||
| 383 | if (!PageUptodate(page)) { | ||
| 384 | unsigned pglen = nfs_page_length(page); | ||
| 385 | unsigned end = offset + len; | ||
| 386 | |||
| 387 | if (pglen == 0) { | ||
| 388 | zero_user_segments(page, 0, offset, | ||
| 389 | end, PAGE_CACHE_SIZE); | ||
| 390 | SetPageUptodate(page); | ||
| 391 | } else if (end >= pglen) { | ||
| 392 | zero_user_segment(page, end, PAGE_CACHE_SIZE); | ||
| 393 | if (offset == 0) | ||
| 394 | SetPageUptodate(page); | ||
| 395 | } else | ||
| 396 | zero_user_segment(page, pglen, PAGE_CACHE_SIZE); | ||
| 397 | } | ||
| 398 | |||
| 348 | status = nfs_updatepage(file, page, offset, copied); | 399 | status = nfs_updatepage(file, page, offset, copied); |
| 349 | unlock_kernel(); | ||
| 350 | 400 | ||
| 351 | unlock_page(page); | 401 | unlock_page(page); |
| 352 | page_cache_release(page); | 402 | page_cache_release(page); |
| @@ -358,6 +408,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, | |||
| 358 | 408 | ||
| 359 | static void nfs_invalidate_page(struct page *page, unsigned long offset) | 409 | static void nfs_invalidate_page(struct page *page, unsigned long offset) |
| 360 | { | 410 | { |
| 411 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); | ||
| 412 | |||
| 361 | if (offset != 0) | 413 | if (offset != 0) |
| 362 | return; | 414 | return; |
| 363 | /* Cancel any unstarted writes on this page */ | 415 | /* Cancel any unstarted writes on this page */ |
| @@ -366,13 +418,20 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) | |||
| 366 | 418 | ||
| 367 | static int nfs_release_page(struct page *page, gfp_t gfp) | 419 | static int nfs_release_page(struct page *page, gfp_t gfp) |
| 368 | { | 420 | { |
| 421 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); | ||
| 422 | |||
| 369 | /* If PagePrivate() is set, then the page is not freeable */ | 423 | /* If PagePrivate() is set, then the page is not freeable */ |
| 370 | return 0; | 424 | return 0; |
| 371 | } | 425 | } |
| 372 | 426 | ||
| 373 | static int nfs_launder_page(struct page *page) | 427 | static int nfs_launder_page(struct page *page) |
| 374 | { | 428 | { |
| 375 | return nfs_wb_page(page->mapping->host, page); | 429 | struct inode *inode = page->mapping->host; |
| 430 | |||
| 431 | dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", | ||
| 432 | inode->i_ino, (long long)page_offset(page)); | ||
| 433 | |||
| 434 | return nfs_wb_page(inode, page); | ||
| 376 | } | 435 | } |
| 377 | 436 | ||
| 378 | const struct address_space_operations nfs_file_aops = { | 437 | const struct address_space_operations nfs_file_aops = { |
| @@ -392,13 +451,19 @@ const struct address_space_operations nfs_file_aops = { | |||
| 392 | static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | 451 | static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) |
| 393 | { | 452 | { |
| 394 | struct file *filp = vma->vm_file; | 453 | struct file *filp = vma->vm_file; |
| 454 | struct dentry *dentry = filp->f_path.dentry; | ||
| 395 | unsigned pagelen; | 455 | unsigned pagelen; |
| 396 | int ret = -EINVAL; | 456 | int ret = -EINVAL; |
| 397 | struct address_space *mapping; | 457 | struct address_space *mapping; |
| 398 | 458 | ||
| 459 | dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", | ||
| 460 | dentry->d_parent->d_name.name, dentry->d_name.name, | ||
| 461 | filp->f_mapping->host->i_ino, | ||
| 462 | (long long)page_offset(page)); | ||
| 463 | |||
| 399 | lock_page(page); | 464 | lock_page(page); |
| 400 | mapping = page->mapping; | 465 | mapping = page->mapping; |
| 401 | if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) | 466 | if (mapping != dentry->d_inode->i_mapping) |
| 402 | goto out_unlock; | 467 | goto out_unlock; |
| 403 | 468 | ||
| 404 | ret = 0; | 469 | ret = 0; |
| @@ -446,9 +511,9 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 446 | if (iocb->ki_filp->f_flags & O_DIRECT) | 511 | if (iocb->ki_filp->f_flags & O_DIRECT) |
| 447 | return nfs_file_direct_write(iocb, iov, nr_segs, pos); | 512 | return nfs_file_direct_write(iocb, iov, nr_segs, pos); |
| 448 | 513 | ||
| 449 | dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", | 514 | dprintk("NFS: write(%s/%s, %lu@%Ld)\n", |
| 450 | dentry->d_parent->d_name.name, dentry->d_name.name, | 515 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 451 | inode->i_ino, (unsigned long) count, (long long) pos); | 516 | (unsigned long) count, (long long) pos); |
| 452 | 517 | ||
| 453 | result = -EBUSY; | 518 | result = -EBUSY; |
| 454 | if (IS_SWAPFILE(inode)) | 519 | if (IS_SWAPFILE(inode)) |
| @@ -582,7 +647,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) | |||
| 582 | * This makes locking act as a cache coherency point. | 647 | * This makes locking act as a cache coherency point. |
| 583 | */ | 648 | */ |
| 584 | nfs_sync_mapping(filp->f_mapping); | 649 | nfs_sync_mapping(filp->f_mapping); |
| 585 | nfs_zap_caches(inode); | 650 | if (!nfs_have_delegation(inode, FMODE_READ)) |
| 651 | nfs_zap_caches(inode); | ||
| 586 | out: | 652 | out: |
| 587 | return status; | 653 | return status; |
| 588 | } | 654 | } |
| @@ -592,23 +658,35 @@ out: | |||
| 592 | */ | 658 | */ |
| 593 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) | 659 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) |
| 594 | { | 660 | { |
| 595 | struct inode * inode = filp->f_mapping->host; | 661 | struct inode *inode = filp->f_mapping->host; |
| 662 | int ret = -ENOLCK; | ||
| 596 | 663 | ||
| 597 | dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", | 664 | dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", |
| 598 | inode->i_sb->s_id, inode->i_ino, | 665 | filp->f_path.dentry->d_parent->d_name.name, |
| 666 | filp->f_path.dentry->d_name.name, | ||
| 599 | fl->fl_type, fl->fl_flags, | 667 | fl->fl_type, fl->fl_flags, |
| 600 | (long long)fl->fl_start, (long long)fl->fl_end); | 668 | (long long)fl->fl_start, (long long)fl->fl_end); |
| 669 | |||
| 601 | nfs_inc_stats(inode, NFSIOS_VFSLOCK); | 670 | nfs_inc_stats(inode, NFSIOS_VFSLOCK); |
| 602 | 671 | ||
| 603 | /* No mandatory locks over NFS */ | 672 | /* No mandatory locks over NFS */ |
| 604 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) | 673 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) |
| 605 | return -ENOLCK; | 674 | goto out_err; |
| 675 | |||
| 676 | if (NFS_PROTO(inode)->lock_check_bounds != NULL) { | ||
| 677 | ret = NFS_PROTO(inode)->lock_check_bounds(fl); | ||
| 678 | if (ret < 0) | ||
| 679 | goto out_err; | ||
| 680 | } | ||
| 606 | 681 | ||
| 607 | if (IS_GETLK(cmd)) | 682 | if (IS_GETLK(cmd)) |
| 608 | return do_getlk(filp, cmd, fl); | 683 | ret = do_getlk(filp, cmd, fl); |
| 609 | if (fl->fl_type == F_UNLCK) | 684 | else if (fl->fl_type == F_UNLCK) |
| 610 | return do_unlk(filp, cmd, fl); | 685 | ret = do_unlk(filp, cmd, fl); |
| 611 | return do_setlk(filp, cmd, fl); | 686 | else |
| 687 | ret = do_setlk(filp, cmd, fl); | ||
| 688 | out_err: | ||
| 689 | return ret; | ||
| 612 | } | 690 | } |
| 613 | 691 | ||
| 614 | /* | 692 | /* |
| @@ -616,9 +694,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
| 616 | */ | 694 | */ |
| 617 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | 695 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) |
| 618 | { | 696 | { |
| 619 | dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", | 697 | dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", |
| 620 | filp->f_path.dentry->d_inode->i_sb->s_id, | 698 | filp->f_path.dentry->d_parent->d_name.name, |
| 621 | filp->f_path.dentry->d_inode->i_ino, | 699 | filp->f_path.dentry->d_name.name, |
| 622 | fl->fl_type, fl->fl_flags); | 700 | fl->fl_type, fl->fl_flags); |
| 623 | 701 | ||
| 624 | /* | 702 | /* |
| @@ -641,12 +719,15 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | |||
| 641 | return do_setlk(filp, cmd, fl); | 719 | return do_setlk(filp, cmd, fl); |
| 642 | } | 720 | } |
| 643 | 721 | ||
| 722 | /* | ||
| 723 | * There is no protocol support for leases, so we have no way to implement | ||
| 724 | * them correctly in the face of opens by other clients. | ||
| 725 | */ | ||
| 644 | static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) | 726 | static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) |
| 645 | { | 727 | { |
| 646 | /* | 728 | dprintk("NFS: setlease(%s/%s, arg=%ld)\n", |
| 647 | * There is no protocol support for leases, so we have no way | 729 | file->f_path.dentry->d_parent->d_name.name, |
| 648 | * to implement them correctly in the face of opens by other | 730 | file->f_path.dentry->d_name.name, arg); |
| 649 | * clients. | 731 | |
| 650 | */ | ||
| 651 | return -EINVAL; | 732 | return -EINVAL; |
| 652 | } | 733 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 596c5d8e86f4..df23f987da6b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -57,8 +57,6 @@ static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; | |||
| 57 | static void nfs_invalidate_inode(struct inode *); | 57 | static void nfs_invalidate_inode(struct inode *); |
| 58 | static int nfs_update_inode(struct inode *, struct nfs_fattr *); | 58 | static int nfs_update_inode(struct inode *, struct nfs_fattr *); |
| 59 | 59 | ||
| 60 | static void nfs_zap_acl_cache(struct inode *); | ||
| 61 | |||
| 62 | static struct kmem_cache * nfs_inode_cachep; | 60 | static struct kmem_cache * nfs_inode_cachep; |
| 63 | 61 | ||
| 64 | static inline unsigned long | 62 | static inline unsigned long |
| @@ -167,7 +165,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) | |||
| 167 | } | 165 | } |
| 168 | } | 166 | } |
| 169 | 167 | ||
| 170 | static void nfs_zap_acl_cache(struct inode *inode) | 168 | void nfs_zap_acl_cache(struct inode *inode) |
| 171 | { | 169 | { |
| 172 | void (*clear_acl_cache)(struct inode *); | 170 | void (*clear_acl_cache)(struct inode *); |
| 173 | 171 | ||
| @@ -347,7 +345,7 @@ out_no_inode: | |||
| 347 | goto out; | 345 | goto out; |
| 348 | } | 346 | } |
| 349 | 347 | ||
| 350 | #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET) | 348 | #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) |
| 351 | 349 | ||
| 352 | int | 350 | int |
| 353 | nfs_setattr(struct dentry *dentry, struct iattr *attr) | 351 | nfs_setattr(struct dentry *dentry, struct iattr *attr) |
| @@ -369,10 +367,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 369 | 367 | ||
| 370 | /* Optimization: if the end result is no change, don't RPC */ | 368 | /* Optimization: if the end result is no change, don't RPC */ |
| 371 | attr->ia_valid &= NFS_VALID_ATTRS; | 369 | attr->ia_valid &= NFS_VALID_ATTRS; |
| 372 | if (attr->ia_valid == 0) | 370 | if ((attr->ia_valid & ~ATTR_FILE) == 0) |
| 373 | return 0; | 371 | return 0; |
| 374 | 372 | ||
| 375 | lock_kernel(); | ||
| 376 | /* Write all dirty data */ | 373 | /* Write all dirty data */ |
| 377 | if (S_ISREG(inode->i_mode)) { | 374 | if (S_ISREG(inode->i_mode)) { |
| 378 | filemap_write_and_wait(inode->i_mapping); | 375 | filemap_write_and_wait(inode->i_mapping); |
| @@ -386,11 +383,66 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 386 | error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); | 383 | error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); |
| 387 | if (error == 0) | 384 | if (error == 0) |
| 388 | nfs_refresh_inode(inode, &fattr); | 385 | nfs_refresh_inode(inode, &fattr); |
| 389 | unlock_kernel(); | ||
| 390 | return error; | 386 | return error; |
| 391 | } | 387 | } |
| 392 | 388 | ||
| 393 | /** | 389 | /** |
| 390 | * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall | ||
| 391 | * @inode: inode of the file used | ||
| 392 | * @offset: file offset to start truncating | ||
| 393 | * | ||
| 394 | * This is a copy of the common vmtruncate, but with the locking | ||
| 395 | * corrected to take into account the fact that NFS requires | ||
| 396 | * inode->i_size to be updated under the inode->i_lock. | ||
| 397 | */ | ||
| 398 | static int nfs_vmtruncate(struct inode * inode, loff_t offset) | ||
| 399 | { | ||
| 400 | if (i_size_read(inode) < offset) { | ||
| 401 | unsigned long limit; | ||
| 402 | |||
| 403 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
| 404 | if (limit != RLIM_INFINITY && offset > limit) | ||
| 405 | goto out_sig; | ||
| 406 | if (offset > inode->i_sb->s_maxbytes) | ||
| 407 | goto out_big; | ||
| 408 | spin_lock(&inode->i_lock); | ||
| 409 | i_size_write(inode, offset); | ||
| 410 | spin_unlock(&inode->i_lock); | ||
| 411 | } else { | ||
| 412 | struct address_space *mapping = inode->i_mapping; | ||
| 413 | |||
| 414 | /* | ||
| 415 | * truncation of in-use swapfiles is disallowed - it would | ||
| 416 | * cause subsequent swapout to scribble on the now-freed | ||
| 417 | * blocks. | ||
| 418 | */ | ||
| 419 | if (IS_SWAPFILE(inode)) | ||
| 420 | return -ETXTBSY; | ||
| 421 | spin_lock(&inode->i_lock); | ||
| 422 | i_size_write(inode, offset); | ||
| 423 | spin_unlock(&inode->i_lock); | ||
| 424 | |||
| 425 | /* | ||
| 426 | * unmap_mapping_range is called twice, first simply for | ||
| 427 | * efficiency so that truncate_inode_pages does fewer | ||
| 428 | * single-page unmaps. However after this first call, and | ||
| 429 | * before truncate_inode_pages finishes, it is possible for | ||
| 430 | * private pages to be COWed, which remain after | ||
| 431 | * truncate_inode_pages finishes, hence the second | ||
| 432 | * unmap_mapping_range call must be made for correctness. | ||
| 433 | */ | ||
| 434 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
| 435 | truncate_inode_pages(mapping, offset); | ||
| 436 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
| 437 | } | ||
| 438 | return 0; | ||
| 439 | out_sig: | ||
| 440 | send_sig(SIGXFSZ, current, 0); | ||
| 441 | out_big: | ||
| 442 | return -EFBIG; | ||
| 443 | } | ||
| 444 | |||
| 445 | /** | ||
| 394 | * nfs_setattr_update_inode - Update inode metadata after a setattr call. | 446 | * nfs_setattr_update_inode - Update inode metadata after a setattr call. |
| 395 | * @inode: pointer to struct inode | 447 | * @inode: pointer to struct inode |
| 396 | * @attr: pointer to struct iattr | 448 | * @attr: pointer to struct iattr |
| @@ -416,8 +468,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
| 416 | } | 468 | } |
| 417 | if ((attr->ia_valid & ATTR_SIZE) != 0) { | 469 | if ((attr->ia_valid & ATTR_SIZE) != 0) { |
| 418 | nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); | 470 | nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); |
| 419 | inode->i_size = attr->ia_size; | 471 | nfs_vmtruncate(inode, attr->ia_size); |
| 420 | vmtruncate(inode, attr->ia_size); | ||
| 421 | } | 472 | } |
| 422 | } | 473 | } |
| 423 | 474 | ||
| @@ -647,7 +698,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 647 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); | 698 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); |
| 648 | 699 | ||
| 649 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | 700 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); |
| 650 | lock_kernel(); | ||
| 651 | if (is_bad_inode(inode)) | 701 | if (is_bad_inode(inode)) |
| 652 | goto out_nowait; | 702 | goto out_nowait; |
| 653 | if (NFS_STALE(inode)) | 703 | if (NFS_STALE(inode)) |
| @@ -696,7 +746,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 696 | nfs_wake_up_inode(inode); | 746 | nfs_wake_up_inode(inode); |
| 697 | 747 | ||
| 698 | out_nowait: | 748 | out_nowait: |
| 699 | unlock_kernel(); | ||
| 700 | return status; | 749 | return status; |
| 701 | } | 750 | } |
| 702 | 751 | ||
| @@ -831,9 +880,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 831 | if (S_ISDIR(inode->i_mode)) | 880 | if (S_ISDIR(inode->i_mode)) |
| 832 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 881 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
| 833 | } | 882 | } |
| 834 | if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) && | 883 | if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && |
| 835 | nfsi->npages == 0) | 884 | nfsi->npages == 0) |
| 836 | inode->i_size = nfs_size_to_loff_t(fattr->size); | 885 | i_size_write(inode, nfs_size_to_loff_t(fattr->size)); |
| 837 | } | 886 | } |
| 838 | } | 887 | } |
| 839 | 888 | ||
| @@ -974,7 +1023,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
| 974 | (fattr->valid & NFS_ATTR_WCC) == 0) { | 1023 | (fattr->valid & NFS_ATTR_WCC) == 0) { |
| 975 | memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); | 1024 | memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); |
| 976 | memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); | 1025 | memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); |
| 977 | fattr->pre_size = inode->i_size; | 1026 | fattr->pre_size = i_size_read(inode); |
| 978 | fattr->valid |= NFS_ATTR_WCC; | 1027 | fattr->valid |= NFS_ATTR_WCC; |
| 979 | } | 1028 | } |
| 980 | return nfs_post_op_update_inode(inode, fattr); | 1029 | return nfs_post_op_update_inode(inode, fattr); |
| @@ -1059,7 +1108,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1059 | /* Do we perhaps have any outstanding writes, or has | 1108 | /* Do we perhaps have any outstanding writes, or has |
| 1060 | * the file grown beyond our last write? */ | 1109 | * the file grown beyond our last write? */ |
| 1061 | if (nfsi->npages == 0 || new_isize > cur_isize) { | 1110 | if (nfsi->npages == 0 || new_isize > cur_isize) { |
| 1062 | inode->i_size = new_isize; | 1111 | i_size_write(inode, new_isize); |
| 1063 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1112 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
| 1064 | } | 1113 | } |
| 1065 | dprintk("NFS: isize change on server for file %s/%ld\n", | 1114 | dprintk("NFS: isize change on server for file %s/%ld\n", |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 04ae867dddba..24241fcbb98d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -150,6 +150,7 @@ extern void nfs_clear_inode(struct inode *); | |||
| 150 | #ifdef CONFIG_NFS_V4 | 150 | #ifdef CONFIG_NFS_V4 |
| 151 | extern void nfs4_clear_inode(struct inode *); | 151 | extern void nfs4_clear_inode(struct inode *); |
| 152 | #endif | 152 | #endif |
| 153 | void nfs_zap_acl_cache(struct inode *inode); | ||
| 153 | 154 | ||
| 154 | /* super.c */ | 155 | /* super.c */ |
| 155 | extern struct file_system_type nfs_xdev_fs_type; | 156 | extern struct file_system_type nfs_xdev_fs_type; |
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 6350ecbde589..a36952810032 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h | |||
| @@ -5,135 +5,41 @@ | |||
| 5 | * | 5 | * |
| 6 | * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com> | 6 | * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com> |
| 7 | * | 7 | * |
| 8 | * NFS client per-mount statistics provide information about the health of | ||
| 9 | * the NFS client and the health of each NFS mount point. Generally these | ||
| 10 | * are not for detailed problem diagnosis, but simply to indicate that there | ||
| 11 | * is a problem. | ||
| 12 | * | ||
| 13 | * These counters are not meant to be human-readable, but are meant to be | ||
| 14 | * integrated into system monitoring tools such as "sar" and "iostat". As | ||
| 15 | * such, the counters are sampled by the tools over time, and are never | ||
| 16 | * zeroed after a file system is mounted. Moving averages can be computed | ||
| 17 | * by the tools by taking the difference between two instantaneous samples | ||
| 18 | * and dividing that by the time between the samples. | ||
| 19 | */ | 8 | */ |
| 20 | 9 | ||
| 21 | #ifndef _NFS_IOSTAT | 10 | #ifndef _NFS_IOSTAT |
| 22 | #define _NFS_IOSTAT | 11 | #define _NFS_IOSTAT |
| 23 | 12 | ||
| 24 | #define NFS_IOSTAT_VERS "1.0" | ||
| 25 | |||
| 26 | /* | ||
| 27 | * NFS byte counters | ||
| 28 | * | ||
| 29 | * 1. SERVER - the number of payload bytes read from or written to the | ||
| 30 | * server by the NFS client via an NFS READ or WRITE request. | ||
| 31 | * | ||
| 32 | * 2. NORMAL - the number of bytes read or written by applications via | ||
| 33 | * the read(2) and write(2) system call interfaces. | ||
| 34 | * | ||
| 35 | * 3. DIRECT - the number of bytes read or written from files opened | ||
| 36 | * with the O_DIRECT flag. | ||
| 37 | * | ||
| 38 | * These counters give a view of the data throughput into and out of the NFS | ||
| 39 | * client. Comparing the number of bytes requested by an application with the | ||
| 40 | * number of bytes the client requests from the server can provide an | ||
| 41 | * indication of client efficiency (per-op, cache hits, etc). | ||
| 42 | * | ||
| 43 | * These counters can also help characterize which access methods are in | ||
| 44 | * use. DIRECT by itself shows whether there is any O_DIRECT traffic. | ||
| 45 | * NORMAL + DIRECT shows how much data is going through the system call | ||
| 46 | * interface. A large amount of SERVER traffic without much NORMAL or | ||
| 47 | * DIRECT traffic shows that applications are using mapped files. | ||
| 48 | * | ||
| 49 | * NFS page counters | ||
| 50 | * | ||
| 51 | * These count the number of pages read or written via nfs_readpage(), | ||
| 52 | * nfs_readpages(), or their write equivalents. | ||
| 53 | */ | ||
| 54 | enum nfs_stat_bytecounters { | ||
| 55 | NFSIOS_NORMALREADBYTES = 0, | ||
| 56 | NFSIOS_NORMALWRITTENBYTES, | ||
| 57 | NFSIOS_DIRECTREADBYTES, | ||
| 58 | NFSIOS_DIRECTWRITTENBYTES, | ||
| 59 | NFSIOS_SERVERREADBYTES, | ||
| 60 | NFSIOS_SERVERWRITTENBYTES, | ||
| 61 | NFSIOS_READPAGES, | ||
| 62 | NFSIOS_WRITEPAGES, | ||
| 63 | __NFSIOS_BYTESMAX, | ||
| 64 | }; | ||
| 65 | |||
| 66 | /* | ||
| 67 | * NFS event counters | ||
| 68 | * | ||
| 69 | * These counters provide a low-overhead way of monitoring client activity | ||
| 70 | * without enabling NFS trace debugging. The counters show the rate at | ||
| 71 | * which VFS requests are made, and how often the client invalidates its | ||
| 72 | * data and attribute caches. This allows system administrators to monitor | ||
| 73 | * such things as how close-to-open is working, and answer questions such | ||
| 74 | * as "why are there so many GETATTR requests on the wire?" | ||
| 75 | * | ||
| 76 | * They also count anamolous events such as short reads and writes, silly | ||
| 77 | * renames due to close-after-delete, and operations that change the size | ||
| 78 | * of a file (such operations can often be the source of data corruption | ||
| 79 | * if applications aren't using file locking properly). | ||
| 80 | */ | ||
| 81 | enum nfs_stat_eventcounters { | ||
| 82 | NFSIOS_INODEREVALIDATE = 0, | ||
| 83 | NFSIOS_DENTRYREVALIDATE, | ||
| 84 | NFSIOS_DATAINVALIDATE, | ||
| 85 | NFSIOS_ATTRINVALIDATE, | ||
| 86 | NFSIOS_VFSOPEN, | ||
| 87 | NFSIOS_VFSLOOKUP, | ||
| 88 | NFSIOS_VFSACCESS, | ||
| 89 | NFSIOS_VFSUPDATEPAGE, | ||
| 90 | NFSIOS_VFSREADPAGE, | ||
| 91 | NFSIOS_VFSREADPAGES, | ||
| 92 | NFSIOS_VFSWRITEPAGE, | ||
| 93 | NFSIOS_VFSWRITEPAGES, | ||
| 94 | NFSIOS_VFSGETDENTS, | ||
| 95 | NFSIOS_VFSSETATTR, | ||
| 96 | NFSIOS_VFSFLUSH, | ||
| 97 | NFSIOS_VFSFSYNC, | ||
| 98 | NFSIOS_VFSLOCK, | ||
| 99 | NFSIOS_VFSRELEASE, | ||
| 100 | NFSIOS_CONGESTIONWAIT, | ||
| 101 | NFSIOS_SETATTRTRUNC, | ||
| 102 | NFSIOS_EXTENDWRITE, | ||
| 103 | NFSIOS_SILLYRENAME, | ||
| 104 | NFSIOS_SHORTREAD, | ||
| 105 | NFSIOS_SHORTWRITE, | ||
| 106 | NFSIOS_DELAY, | ||
| 107 | __NFSIOS_COUNTSMAX, | ||
| 108 | }; | ||
| 109 | |||
| 110 | #ifdef __KERNEL__ | ||
| 111 | |||
| 112 | #include <linux/percpu.h> | 13 | #include <linux/percpu.h> |
| 113 | #include <linux/cache.h> | 14 | #include <linux/cache.h> |
| 15 | #include <linux/nfs_iostat.h> | ||
| 114 | 16 | ||
| 115 | struct nfs_iostats { | 17 | struct nfs_iostats { |
| 116 | unsigned long long bytes[__NFSIOS_BYTESMAX]; | 18 | unsigned long long bytes[__NFSIOS_BYTESMAX]; |
| 117 | unsigned long events[__NFSIOS_COUNTSMAX]; | 19 | unsigned long events[__NFSIOS_COUNTSMAX]; |
| 118 | } ____cacheline_aligned; | 20 | } ____cacheline_aligned; |
| 119 | 21 | ||
| 120 | static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat) | 22 | static inline void nfs_inc_server_stats(const struct nfs_server *server, |
| 23 | enum nfs_stat_eventcounters stat) | ||
| 121 | { | 24 | { |
| 122 | struct nfs_iostats *iostats; | 25 | struct nfs_iostats *iostats; |
| 123 | int cpu; | 26 | int cpu; |
| 124 | 27 | ||
| 125 | cpu = get_cpu(); | 28 | cpu = get_cpu(); |
| 126 | iostats = per_cpu_ptr(server->io_stats, cpu); | 29 | iostats = per_cpu_ptr(server->io_stats, cpu); |
| 127 | iostats->events[stat] ++; | 30 | iostats->events[stat]++; |
| 128 | put_cpu_no_resched(); | 31 | put_cpu_no_resched(); |
| 129 | } | 32 | } |
| 130 | 33 | ||
| 131 | static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) | 34 | static inline void nfs_inc_stats(const struct inode *inode, |
| 35 | enum nfs_stat_eventcounters stat) | ||
| 132 | { | 36 | { |
| 133 | nfs_inc_server_stats(NFS_SERVER(inode), stat); | 37 | nfs_inc_server_stats(NFS_SERVER(inode), stat); |
| 134 | } | 38 | } |
| 135 | 39 | ||
| 136 | static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) | 40 | static inline void nfs_add_server_stats(const struct nfs_server *server, |
| 41 | enum nfs_stat_bytecounters stat, | ||
| 42 | unsigned long addend) | ||
| 137 | { | 43 | { |
| 138 | struct nfs_iostats *iostats; | 44 | struct nfs_iostats *iostats; |
| 139 | int cpu; | 45 | int cpu; |
| @@ -144,7 +50,9 @@ static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat | |||
| 144 | put_cpu_no_resched(); | 50 | put_cpu_no_resched(); |
| 145 | } | 51 | } |
| 146 | 52 | ||
| 147 | static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) | 53 | static inline void nfs_add_stats(const struct inode *inode, |
| 54 | enum nfs_stat_bytecounters stat, | ||
| 55 | unsigned long addend) | ||
| 148 | { | 56 | { |
| 149 | nfs_add_server_stats(NFS_SERVER(inode), stat, addend); | 57 | nfs_add_server_stats(NFS_SERVER(inode), stat, addend); |
| 150 | } | 58 | } |
| @@ -160,5 +68,4 @@ static inline void nfs_free_iostats(struct nfs_iostats *stats) | |||
| 160 | free_percpu(stats); | 68 | free_percpu(stats); |
| 161 | } | 69 | } |
| 162 | 70 | ||
| 163 | #endif | 71 | #endif /* _NFS_IOSTAT */ |
| 164 | #endif | ||
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 49c7cd0502cc..779d2eb649c5 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
| @@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, | |||
| 130 | struct mnt_fhstatus *res) | 130 | struct mnt_fhstatus *res) |
| 131 | { | 131 | { |
| 132 | struct nfs_fh *fh = res->fh; | 132 | struct nfs_fh *fh = res->fh; |
| 133 | unsigned size; | ||
| 133 | 134 | ||
| 134 | if ((res->status = ntohl(*p++)) == 0) { | 135 | if ((res->status = ntohl(*p++)) == 0) { |
| 135 | int size = ntohl(*p++); | 136 | size = ntohl(*p++); |
| 136 | if (size <= NFS3_FHSIZE) { | 137 | if (size <= NFS3_FHSIZE && size != 0) { |
| 137 | fh->size = size; | 138 | fh->size = size; |
| 138 | memcpy(fh->data, p, size); | 139 | memcpy(fh->data, p, size); |
| 139 | } else | 140 | } else |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9b7362565c0c..423842f51ac9 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | #include <linux/posix_acl_xattr.h> | 5 | #include <linux/posix_acl_xattr.h> |
| 6 | #include <linux/nfsacl.h> | 6 | #include <linux/nfsacl.h> |
| 7 | 7 | ||
| 8 | #include "internal.h" | ||
| 9 | |||
| 8 | #define NFSDBG_FACILITY NFSDBG_PROC | 10 | #define NFSDBG_FACILITY NFSDBG_PROC |
| 9 | 11 | ||
| 10 | ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) | 12 | ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) |
| @@ -205,6 +207,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) | |||
| 205 | status = nfs_revalidate_inode(server, inode); | 207 | status = nfs_revalidate_inode(server, inode); |
| 206 | if (status < 0) | 208 | if (status < 0) |
| 207 | return ERR_PTR(status); | 209 | return ERR_PTR(status); |
| 210 | if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) | ||
| 211 | nfs_zap_acl_cache(inode); | ||
| 208 | acl = nfs3_get_cached_acl(inode, type); | 212 | acl = nfs3_get_cached_acl(inode, type); |
| 209 | if (acl != ERR_PTR(-EAGAIN)) | 213 | if (acl != ERR_PTR(-EAGAIN)) |
| 210 | return acl; | 214 | return acl; |
| @@ -319,9 +323,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
| 319 | dprintk("NFS call setacl\n"); | 323 | dprintk("NFS call setacl\n"); |
| 320 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; | 324 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; |
| 321 | status = rpc_call_sync(server->client_acl, &msg, 0); | 325 | status = rpc_call_sync(server->client_acl, &msg, 0); |
| 322 | spin_lock(&inode->i_lock); | 326 | nfs_access_zap_cache(inode); |
| 323 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; | 327 | nfs_zap_acl_cache(inode); |
| 324 | spin_unlock(&inode->i_lock); | ||
| 325 | dprintk("NFS reply setacl: %d\n", status); | 328 | dprintk("NFS reply setacl: %d\n", status); |
| 326 | 329 | ||
| 327 | /* pages may have been allocated at the xdr layer. */ | 330 | /* pages may have been allocated at the xdr layer. */ |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c3523ad03ed1..1e750e4574a9 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
| @@ -129,6 +129,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 129 | int status; | 129 | int status; |
| 130 | 130 | ||
| 131 | dprintk("NFS call setattr\n"); | 131 | dprintk("NFS call setattr\n"); |
| 132 | if (sattr->ia_valid & ATTR_FILE) | ||
| 133 | msg.rpc_cred = nfs_file_cred(sattr->ia_file); | ||
| 132 | nfs_fattr_init(fattr); | 134 | nfs_fattr_init(fattr); |
| 133 | status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); | 135 | status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); |
| 134 | if (status == 0) | 136 | if (status == 0) |
| @@ -248,6 +250,53 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page, | |||
| 248 | return status; | 250 | return status; |
| 249 | } | 251 | } |
| 250 | 252 | ||
| 253 | struct nfs3_createdata { | ||
| 254 | struct rpc_message msg; | ||
| 255 | union { | ||
| 256 | struct nfs3_createargs create; | ||
| 257 | struct nfs3_mkdirargs mkdir; | ||
| 258 | struct nfs3_symlinkargs symlink; | ||
| 259 | struct nfs3_mknodargs mknod; | ||
| 260 | } arg; | ||
| 261 | struct nfs3_diropres res; | ||
| 262 | struct nfs_fh fh; | ||
| 263 | struct nfs_fattr fattr; | ||
| 264 | struct nfs_fattr dir_attr; | ||
| 265 | }; | ||
| 266 | |||
| 267 | static struct nfs3_createdata *nfs3_alloc_createdata(void) | ||
| 268 | { | ||
| 269 | struct nfs3_createdata *data; | ||
| 270 | |||
| 271 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 272 | if (data != NULL) { | ||
| 273 | data->msg.rpc_argp = &data->arg; | ||
| 274 | data->msg.rpc_resp = &data->res; | ||
| 275 | data->res.fh = &data->fh; | ||
| 276 | data->res.fattr = &data->fattr; | ||
| 277 | data->res.dir_attr = &data->dir_attr; | ||
| 278 | nfs_fattr_init(data->res.fattr); | ||
| 279 | nfs_fattr_init(data->res.dir_attr); | ||
| 280 | } | ||
| 281 | return data; | ||
| 282 | } | ||
| 283 | |||
| 284 | static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata *data) | ||
| 285 | { | ||
| 286 | int status; | ||
| 287 | |||
| 288 | status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); | ||
| 289 | nfs_post_op_update_inode(dir, data->res.dir_attr); | ||
| 290 | if (status == 0) | ||
| 291 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | ||
| 292 | return status; | ||
| 293 | } | ||
| 294 | |||
| 295 | static void nfs3_free_createdata(struct nfs3_createdata *data) | ||
| 296 | { | ||
| 297 | kfree(data); | ||
| 298 | } | ||
| 299 | |||
| 251 | /* | 300 | /* |
| 252 | * Create a regular file. | 301 | * Create a regular file. |
| 253 | * For now, we don't implement O_EXCL. | 302 | * For now, we don't implement O_EXCL. |
| @@ -256,70 +305,60 @@ static int | |||
| 256 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 305 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
| 257 | int flags, struct nameidata *nd) | 306 | int flags, struct nameidata *nd) |
| 258 | { | 307 | { |
| 259 | struct nfs_fh fhandle; | 308 | struct nfs3_createdata *data; |
| 260 | struct nfs_fattr fattr; | ||
| 261 | struct nfs_fattr dir_attr; | ||
| 262 | struct nfs3_createargs arg = { | ||
| 263 | .fh = NFS_FH(dir), | ||
| 264 | .name = dentry->d_name.name, | ||
| 265 | .len = dentry->d_name.len, | ||
| 266 | .sattr = sattr, | ||
| 267 | }; | ||
| 268 | struct nfs3_diropres res = { | ||
| 269 | .dir_attr = &dir_attr, | ||
| 270 | .fh = &fhandle, | ||
| 271 | .fattr = &fattr | ||
| 272 | }; | ||
| 273 | struct rpc_message msg = { | ||
| 274 | .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE], | ||
| 275 | .rpc_argp = &arg, | ||
| 276 | .rpc_resp = &res, | ||
| 277 | }; | ||
| 278 | mode_t mode = sattr->ia_mode; | 309 | mode_t mode = sattr->ia_mode; |
| 279 | int status; | 310 | int status = -ENOMEM; |
| 280 | 311 | ||
| 281 | dprintk("NFS call create %s\n", dentry->d_name.name); | 312 | dprintk("NFS call create %s\n", dentry->d_name.name); |
| 282 | arg.createmode = NFS3_CREATE_UNCHECKED; | 313 | |
| 314 | data = nfs3_alloc_createdata(); | ||
| 315 | if (data == NULL) | ||
| 316 | goto out; | ||
| 317 | |||
| 318 | data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_CREATE]; | ||
| 319 | data->arg.create.fh = NFS_FH(dir); | ||
| 320 | data->arg.create.name = dentry->d_name.name; | ||
| 321 | data->arg.create.len = dentry->d_name.len; | ||
| 322 | data->arg.create.sattr = sattr; | ||
| 323 | |||
| 324 | data->arg.create.createmode = NFS3_CREATE_UNCHECKED; | ||
| 283 | if (flags & O_EXCL) { | 325 | if (flags & O_EXCL) { |
| 284 | arg.createmode = NFS3_CREATE_EXCLUSIVE; | 326 | data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE; |
| 285 | arg.verifier[0] = jiffies; | 327 | data->arg.create.verifier[0] = jiffies; |
| 286 | arg.verifier[1] = current->pid; | 328 | data->arg.create.verifier[1] = current->pid; |
| 287 | } | 329 | } |
| 288 | 330 | ||
| 289 | sattr->ia_mode &= ~current->fs->umask; | 331 | sattr->ia_mode &= ~current->fs->umask; |
| 290 | 332 | ||
| 291 | again: | 333 | for (;;) { |
| 292 | nfs_fattr_init(&dir_attr); | 334 | status = nfs3_do_create(dir, dentry, data); |
| 293 | nfs_fattr_init(&fattr); | ||
| 294 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | ||
| 295 | nfs_refresh_inode(dir, &dir_attr); | ||
| 296 | 335 | ||
| 297 | /* If the server doesn't support the exclusive creation semantics, | 336 | if (status != -ENOTSUPP) |
| 298 | * try again with simple 'guarded' mode. */ | 337 | break; |
| 299 | if (status == -ENOTSUPP) { | 338 | /* If the server doesn't support the exclusive creation |
| 300 | switch (arg.createmode) { | 339 | * semantics, try again with simple 'guarded' mode. */ |
| 340 | switch (data->arg.create.createmode) { | ||
| 301 | case NFS3_CREATE_EXCLUSIVE: | 341 | case NFS3_CREATE_EXCLUSIVE: |
| 302 | arg.createmode = NFS3_CREATE_GUARDED; | 342 | data->arg.create.createmode = NFS3_CREATE_GUARDED; |
| 303 | break; | 343 | break; |
| 304 | 344 | ||
| 305 | case NFS3_CREATE_GUARDED: | 345 | case NFS3_CREATE_GUARDED: |
| 306 | arg.createmode = NFS3_CREATE_UNCHECKED; | 346 | data->arg.create.createmode = NFS3_CREATE_UNCHECKED; |
| 307 | break; | 347 | break; |
| 308 | 348 | ||
| 309 | case NFS3_CREATE_UNCHECKED: | 349 | case NFS3_CREATE_UNCHECKED: |
| 310 | goto out; | 350 | goto out; |
| 311 | } | 351 | } |
| 312 | goto again; | 352 | nfs_fattr_init(data->res.dir_attr); |
| 353 | nfs_fattr_init(data->res.fattr); | ||
| 313 | } | 354 | } |
| 314 | 355 | ||
| 315 | if (status == 0) | ||
| 316 | status = nfs_instantiate(dentry, &fhandle, &fattr); | ||
| 317 | if (status != 0) | 356 | if (status != 0) |
| 318 | goto out; | 357 | goto out; |
| 319 | 358 | ||
| 320 | /* When we created the file with exclusive semantics, make | 359 | /* When we created the file with exclusive semantics, make |
| 321 | * sure we set the attributes afterwards. */ | 360 | * sure we set the attributes afterwards. */ |
| 322 | if (arg.createmode == NFS3_CREATE_EXCLUSIVE) { | 361 | if (data->arg.create.createmode == NFS3_CREATE_EXCLUSIVE) { |
| 323 | dprintk("NFS call setattr (post-create)\n"); | 362 | dprintk("NFS call setattr (post-create)\n"); |
| 324 | 363 | ||
| 325 | if (!(sattr->ia_valid & ATTR_ATIME_SET)) | 364 | if (!(sattr->ia_valid & ATTR_ATIME_SET)) |
| @@ -330,14 +369,15 @@ again: | |||
| 330 | /* Note: we could use a guarded setattr here, but I'm | 369 | /* Note: we could use a guarded setattr here, but I'm |
| 331 | * not sure this buys us anything (and I'd have | 370 | * not sure this buys us anything (and I'd have |
| 332 | * to revamp the NFSv3 XDR code) */ | 371 | * to revamp the NFSv3 XDR code) */ |
| 333 | status = nfs3_proc_setattr(dentry, &fattr, sattr); | 372 | status = nfs3_proc_setattr(dentry, data->res.fattr, sattr); |
| 334 | nfs_post_op_update_inode(dentry->d_inode, &fattr); | 373 | nfs_post_op_update_inode(dentry->d_inode, data->res.fattr); |
| 335 | dprintk("NFS reply setattr (post-create): %d\n", status); | 374 | dprintk("NFS reply setattr (post-create): %d\n", status); |
| 375 | if (status != 0) | ||
| 376 | goto out; | ||
| 336 | } | 377 | } |
| 337 | if (status != 0) | ||
| 338 | goto out; | ||
| 339 | status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); | 378 | status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); |
| 340 | out: | 379 | out: |
| 380 | nfs3_free_createdata(data); | ||
| 341 | dprintk("NFS reply create: %d\n", status); | 381 | dprintk("NFS reply create: %d\n", status); |
| 342 | return status; | 382 | return status; |
| 343 | } | 383 | } |
| @@ -452,40 +492,28 @@ static int | |||
| 452 | nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, | 492 | nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, |
| 453 | unsigned int len, struct iattr *sattr) | 493 | unsigned int len, struct iattr *sattr) |
| 454 | { | 494 | { |
| 455 | struct nfs_fh fhandle; | 495 | struct nfs3_createdata *data; |
| 456 | struct nfs_fattr fattr, dir_attr; | 496 | int status = -ENOMEM; |
| 457 | struct nfs3_symlinkargs arg = { | ||
| 458 | .fromfh = NFS_FH(dir), | ||
| 459 | .fromname = dentry->d_name.name, | ||
| 460 | .fromlen = dentry->d_name.len, | ||
| 461 | .pages = &page, | ||
| 462 | .pathlen = len, | ||
| 463 | .sattr = sattr | ||
| 464 | }; | ||
| 465 | struct nfs3_diropres res = { | ||
| 466 | .dir_attr = &dir_attr, | ||
| 467 | .fh = &fhandle, | ||
| 468 | .fattr = &fattr | ||
| 469 | }; | ||
| 470 | struct rpc_message msg = { | ||
| 471 | .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], | ||
| 472 | .rpc_argp = &arg, | ||
| 473 | .rpc_resp = &res, | ||
| 474 | }; | ||
| 475 | int status; | ||
| 476 | 497 | ||
| 477 | if (len > NFS3_MAXPATHLEN) | 498 | if (len > NFS3_MAXPATHLEN) |
| 478 | return -ENAMETOOLONG; | 499 | return -ENAMETOOLONG; |
| 479 | 500 | ||
| 480 | dprintk("NFS call symlink %s\n", dentry->d_name.name); | 501 | dprintk("NFS call symlink %s\n", dentry->d_name.name); |
| 481 | 502 | ||
| 482 | nfs_fattr_init(&dir_attr); | 503 | data = nfs3_alloc_createdata(); |
| 483 | nfs_fattr_init(&fattr); | 504 | if (data == NULL) |
| 484 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | ||
| 485 | nfs_post_op_update_inode(dir, &dir_attr); | ||
| 486 | if (status != 0) | ||
| 487 | goto out; | 505 | goto out; |
| 488 | status = nfs_instantiate(dentry, &fhandle, &fattr); | 506 | data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK]; |
| 507 | data->arg.symlink.fromfh = NFS_FH(dir); | ||
| 508 | data->arg.symlink.fromname = dentry->d_name.name; | ||
| 509 | data->arg.symlink.fromlen = dentry->d_name.len; | ||
| 510 | data->arg.symlink.pages = &page; | ||
| 511 | data->arg.symlink.pathlen = len; | ||
| 512 | data->arg.symlink.sattr = sattr; | ||
| 513 | |||
| 514 | status = nfs3_do_create(dir, dentry, data); | ||
| 515 | |||
| 516 | nfs3_free_createdata(data); | ||
| 489 | out: | 517 | out: |
| 490 | dprintk("NFS reply symlink: %d\n", status); | 518 | dprintk("NFS reply symlink: %d\n", status); |
| 491 | return status; | 519 | return status; |
| @@ -494,42 +522,31 @@ out: | |||
| 494 | static int | 522 | static int |
| 495 | nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | 523 | nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) |
| 496 | { | 524 | { |
| 497 | struct nfs_fh fhandle; | 525 | struct nfs3_createdata *data; |
| 498 | struct nfs_fattr fattr, dir_attr; | ||
| 499 | struct nfs3_mkdirargs arg = { | ||
| 500 | .fh = NFS_FH(dir), | ||
| 501 | .name = dentry->d_name.name, | ||
| 502 | .len = dentry->d_name.len, | ||
| 503 | .sattr = sattr | ||
| 504 | }; | ||
| 505 | struct nfs3_diropres res = { | ||
| 506 | .dir_attr = &dir_attr, | ||
| 507 | .fh = &fhandle, | ||
| 508 | .fattr = &fattr | ||
| 509 | }; | ||
| 510 | struct rpc_message msg = { | ||
| 511 | .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR], | ||
| 512 | .rpc_argp = &arg, | ||
| 513 | .rpc_resp = &res, | ||
| 514 | }; | ||
| 515 | int mode = sattr->ia_mode; | 526 | int mode = sattr->ia_mode; |
| 516 | int status; | 527 | int status = -ENOMEM; |
| 517 | 528 | ||
| 518 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); | 529 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); |
| 519 | 530 | ||
| 520 | sattr->ia_mode &= ~current->fs->umask; | 531 | sattr->ia_mode &= ~current->fs->umask; |
| 521 | 532 | ||
| 522 | nfs_fattr_init(&dir_attr); | 533 | data = nfs3_alloc_createdata(); |
| 523 | nfs_fattr_init(&fattr); | 534 | if (data == NULL) |
| 524 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | ||
| 525 | nfs_post_op_update_inode(dir, &dir_attr); | ||
| 526 | if (status != 0) | ||
| 527 | goto out; | 535 | goto out; |
| 528 | status = nfs_instantiate(dentry, &fhandle, &fattr); | 536 | |
| 537 | data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR]; | ||
| 538 | data->arg.mkdir.fh = NFS_FH(dir); | ||
| 539 | data->arg.mkdir.name = dentry->d_name.name; | ||
| 540 | data->arg.mkdir.len = dentry->d_name.len; | ||
| 541 | data->arg.mkdir.sattr = sattr; | ||
| 542 | |||
| 543 | status = nfs3_do_create(dir, dentry, data); | ||
| 529 | if (status != 0) | 544 | if (status != 0) |
| 530 | goto out; | 545 | goto out; |
| 546 | |||
| 531 | status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); | 547 | status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); |
| 532 | out: | 548 | out: |
| 549 | nfs3_free_createdata(data); | ||
| 533 | dprintk("NFS reply mkdir: %d\n", status); | 550 | dprintk("NFS reply mkdir: %d\n", status); |
| 534 | return status; | 551 | return status; |
| 535 | } | 552 | } |
| @@ -615,52 +632,50 @@ static int | |||
| 615 | nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 632 | nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
| 616 | dev_t rdev) | 633 | dev_t rdev) |
| 617 | { | 634 | { |
| 618 | struct nfs_fh fh; | 635 | struct nfs3_createdata *data; |
| 619 | struct nfs_fattr fattr, dir_attr; | ||
| 620 | struct nfs3_mknodargs arg = { | ||
| 621 | .fh = NFS_FH(dir), | ||
| 622 | .name = dentry->d_name.name, | ||
| 623 | .len = dentry->d_name.len, | ||
| 624 | .sattr = sattr, | ||
| 625 | .rdev = rdev | ||
| 626 | }; | ||
| 627 | struct nfs3_diropres res = { | ||
| 628 | .dir_attr = &dir_attr, | ||
| 629 | .fh = &fh, | ||
| 630 | .fattr = &fattr | ||
| 631 | }; | ||
| 632 | struct rpc_message msg = { | ||
| 633 | .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD], | ||
| 634 | .rpc_argp = &arg, | ||
| 635 | .rpc_resp = &res, | ||
| 636 | }; | ||
| 637 | mode_t mode = sattr->ia_mode; | 636 | mode_t mode = sattr->ia_mode; |
| 638 | int status; | 637 | int status = -ENOMEM; |
| 639 | |||
| 640 | switch (sattr->ia_mode & S_IFMT) { | ||
| 641 | case S_IFBLK: arg.type = NF3BLK; break; | ||
| 642 | case S_IFCHR: arg.type = NF3CHR; break; | ||
| 643 | case S_IFIFO: arg.type = NF3FIFO; break; | ||
| 644 | case S_IFSOCK: arg.type = NF3SOCK; break; | ||
| 645 | default: return -EINVAL; | ||
| 646 | } | ||
| 647 | 638 | ||
| 648 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, | 639 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, |
| 649 | MAJOR(rdev), MINOR(rdev)); | 640 | MAJOR(rdev), MINOR(rdev)); |
| 650 | 641 | ||
| 651 | sattr->ia_mode &= ~current->fs->umask; | 642 | sattr->ia_mode &= ~current->fs->umask; |
| 652 | 643 | ||
| 653 | nfs_fattr_init(&dir_attr); | 644 | data = nfs3_alloc_createdata(); |
| 654 | nfs_fattr_init(&fattr); | 645 | if (data == NULL) |
| 655 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | ||
| 656 | nfs_post_op_update_inode(dir, &dir_attr); | ||
| 657 | if (status != 0) | ||
| 658 | goto out; | 646 | goto out; |
| 659 | status = nfs_instantiate(dentry, &fh, &fattr); | 647 | |
| 648 | data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD]; | ||
| 649 | data->arg.mknod.fh = NFS_FH(dir); | ||
| 650 | data->arg.mknod.name = dentry->d_name.name; | ||
| 651 | data->arg.mknod.len = dentry->d_name.len; | ||
| 652 | data->arg.mknod.sattr = sattr; | ||
| 653 | data->arg.mknod.rdev = rdev; | ||
| 654 | |||
| 655 | switch (sattr->ia_mode & S_IFMT) { | ||
| 656 | case S_IFBLK: | ||
| 657 | data->arg.mknod.type = NF3BLK; | ||
| 658 | break; | ||
| 659 | case S_IFCHR: | ||
| 660 | data->arg.mknod.type = NF3CHR; | ||
| 661 | break; | ||
| 662 | case S_IFIFO: | ||
| 663 | data->arg.mknod.type = NF3FIFO; | ||
| 664 | break; | ||
| 665 | case S_IFSOCK: | ||
| 666 | data->arg.mknod.type = NF3SOCK; | ||
| 667 | break; | ||
| 668 | default: | ||
| 669 | status = -EINVAL; | ||
| 670 | goto out; | ||
| 671 | } | ||
| 672 | |||
| 673 | status = nfs3_do_create(dir, dentry, data); | ||
| 660 | if (status != 0) | 674 | if (status != 0) |
| 661 | goto out; | 675 | goto out; |
| 662 | status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); | 676 | status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); |
| 663 | out: | 677 | out: |
| 678 | nfs3_free_createdata(data); | ||
| 664 | dprintk("NFS reply mknod: %d\n", status); | 679 | dprintk("NFS reply mknod: %d\n", status); |
| 665 | return status; | 680 | return status; |
| 666 | } | 681 | } |
| @@ -801,8 +816,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
| 801 | .write_done = nfs3_write_done, | 816 | .write_done = nfs3_write_done, |
| 802 | .commit_setup = nfs3_proc_commit_setup, | 817 | .commit_setup = nfs3_proc_commit_setup, |
| 803 | .commit_done = nfs3_commit_done, | 818 | .commit_done = nfs3_commit_done, |
| 804 | .file_open = nfs_open, | ||
| 805 | .file_release = nfs_release, | ||
| 806 | .lock = nfs3_proc_lock, | 819 | .lock = nfs3_proc_lock, |
| 807 | .clear_acl_cache = nfs3_forget_cached_acls, | 820 | .clear_acl_cache = nfs3_forget_cached_acls, |
| 808 | }; | 821 | }; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1293e0acd82b..c910413eaeca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -451,9 +451,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | |||
| 451 | /* Save the delegation */ | 451 | /* Save the delegation */ |
| 452 | memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); | 452 | memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); |
| 453 | rcu_read_unlock(); | 453 | rcu_read_unlock(); |
| 454 | lock_kernel(); | ||
| 455 | ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); | 454 | ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); |
| 456 | unlock_kernel(); | ||
| 457 | if (ret != 0) | 455 | if (ret != 0) |
| 458 | goto out; | 456 | goto out; |
| 459 | ret = -EAGAIN; | 457 | ret = -EAGAIN; |
| @@ -1139,8 +1137,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int | |||
| 1139 | return res; | 1137 | return res; |
| 1140 | } | 1138 | } |
| 1141 | 1139 | ||
| 1142 | static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, | 1140 | static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
| 1143 | struct iattr *sattr, struct nfs4_state *state) | 1141 | struct nfs_fattr *fattr, struct iattr *sattr, |
| 1142 | struct nfs4_state *state) | ||
| 1144 | { | 1143 | { |
| 1145 | struct nfs_server *server = NFS_SERVER(inode); | 1144 | struct nfs_server *server = NFS_SERVER(inode); |
| 1146 | struct nfs_setattrargs arg = { | 1145 | struct nfs_setattrargs arg = { |
| @@ -1154,9 +1153,10 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, | |||
| 1154 | .server = server, | 1153 | .server = server, |
| 1155 | }; | 1154 | }; |
| 1156 | struct rpc_message msg = { | 1155 | struct rpc_message msg = { |
| 1157 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], | 1156 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], |
| 1158 | .rpc_argp = &arg, | 1157 | .rpc_argp = &arg, |
| 1159 | .rpc_resp = &res, | 1158 | .rpc_resp = &res, |
| 1159 | .rpc_cred = cred, | ||
| 1160 | }; | 1160 | }; |
| 1161 | unsigned long timestamp = jiffies; | 1161 | unsigned long timestamp = jiffies; |
| 1162 | int status; | 1162 | int status; |
| @@ -1166,7 +1166,6 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, | |||
| 1166 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { | 1166 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { |
| 1167 | /* Use that stateid */ | 1167 | /* Use that stateid */ |
| 1168 | } else if (state != NULL) { | 1168 | } else if (state != NULL) { |
| 1169 | msg.rpc_cred = state->owner->so_cred; | ||
| 1170 | nfs4_copy_stateid(&arg.stateid, state, current->files); | 1169 | nfs4_copy_stateid(&arg.stateid, state, current->files); |
| 1171 | } else | 1170 | } else |
| 1172 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); | 1171 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); |
| @@ -1177,15 +1176,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, | |||
| 1177 | return status; | 1176 | return status; |
| 1178 | } | 1177 | } |
| 1179 | 1178 | ||
| 1180 | static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, | 1179 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
| 1181 | struct iattr *sattr, struct nfs4_state *state) | 1180 | struct nfs_fattr *fattr, struct iattr *sattr, |
| 1181 | struct nfs4_state *state) | ||
| 1182 | { | 1182 | { |
| 1183 | struct nfs_server *server = NFS_SERVER(inode); | 1183 | struct nfs_server *server = NFS_SERVER(inode); |
| 1184 | struct nfs4_exception exception = { }; | 1184 | struct nfs4_exception exception = { }; |
| 1185 | int err; | 1185 | int err; |
| 1186 | do { | 1186 | do { |
| 1187 | err = nfs4_handle_exception(server, | 1187 | err = nfs4_handle_exception(server, |
| 1188 | _nfs4_do_setattr(inode, fattr, sattr, state), | 1188 | _nfs4_do_setattr(inode, cred, fattr, sattr, state), |
| 1189 | &exception); | 1189 | &exception); |
| 1190 | } while (exception.retry); | 1190 | } while (exception.retry); |
| 1191 | return err; | 1191 | return err; |
| @@ -1647,29 +1647,25 @@ static int | |||
| 1647 | nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | 1647 | nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, |
| 1648 | struct iattr *sattr) | 1648 | struct iattr *sattr) |
| 1649 | { | 1649 | { |
| 1650 | struct rpc_cred *cred; | ||
| 1651 | struct inode *inode = dentry->d_inode; | 1650 | struct inode *inode = dentry->d_inode; |
| 1652 | struct nfs_open_context *ctx; | 1651 | struct rpc_cred *cred = NULL; |
| 1653 | struct nfs4_state *state = NULL; | 1652 | struct nfs4_state *state = NULL; |
| 1654 | int status; | 1653 | int status; |
| 1655 | 1654 | ||
| 1656 | nfs_fattr_init(fattr); | 1655 | nfs_fattr_init(fattr); |
| 1657 | 1656 | ||
| 1658 | cred = rpc_lookup_cred(); | ||
| 1659 | if (IS_ERR(cred)) | ||
| 1660 | return PTR_ERR(cred); | ||
| 1661 | |||
| 1662 | /* Search for an existing open(O_WRITE) file */ | 1657 | /* Search for an existing open(O_WRITE) file */ |
| 1663 | ctx = nfs_find_open_context(inode, cred, FMODE_WRITE); | 1658 | if (sattr->ia_valid & ATTR_FILE) { |
| 1664 | if (ctx != NULL) | 1659 | struct nfs_open_context *ctx; |
| 1660 | |||
| 1661 | ctx = nfs_file_open_context(sattr->ia_file); | ||
| 1662 | cred = ctx->cred; | ||
| 1665 | state = ctx->state; | 1663 | state = ctx->state; |
| 1664 | } | ||
| 1666 | 1665 | ||
| 1667 | status = nfs4_do_setattr(inode, fattr, sattr, state); | 1666 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); |
| 1668 | if (status == 0) | 1667 | if (status == 0) |
| 1669 | nfs_setattr_update_inode(inode, sattr); | 1668 | nfs_setattr_update_inode(inode, sattr); |
| 1670 | if (ctx != NULL) | ||
| 1671 | put_nfs_open_context(ctx); | ||
| 1672 | put_rpccred(cred); | ||
| 1673 | return status; | 1669 | return status; |
| 1674 | } | 1670 | } |
| 1675 | 1671 | ||
| @@ -1897,17 +1893,16 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
| 1897 | goto out; | 1893 | goto out; |
| 1898 | } | 1894 | } |
| 1899 | state = nfs4_do_open(dir, &path, flags, sattr, cred); | 1895 | state = nfs4_do_open(dir, &path, flags, sattr, cred); |
| 1900 | put_rpccred(cred); | ||
| 1901 | d_drop(dentry); | 1896 | d_drop(dentry); |
| 1902 | if (IS_ERR(state)) { | 1897 | if (IS_ERR(state)) { |
| 1903 | status = PTR_ERR(state); | 1898 | status = PTR_ERR(state); |
| 1904 | goto out; | 1899 | goto out_putcred; |
| 1905 | } | 1900 | } |
| 1906 | d_add(dentry, igrab(state->inode)); | 1901 | d_add(dentry, igrab(state->inode)); |
| 1907 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1902 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
| 1908 | if (flags & O_EXCL) { | 1903 | if (flags & O_EXCL) { |
| 1909 | struct nfs_fattr fattr; | 1904 | struct nfs_fattr fattr; |
| 1910 | status = nfs4_do_setattr(state->inode, &fattr, sattr, state); | 1905 | status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state); |
| 1911 | if (status == 0) | 1906 | if (status == 0) |
| 1912 | nfs_setattr_update_inode(state->inode, sattr); | 1907 | nfs_setattr_update_inode(state->inode, sattr); |
| 1913 | nfs_post_op_update_inode(state->inode, &fattr); | 1908 | nfs_post_op_update_inode(state->inode, &fattr); |
| @@ -1916,6 +1911,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
| 1916 | status = nfs4_intent_set_file(nd, &path, state); | 1911 | status = nfs4_intent_set_file(nd, &path, state); |
| 1917 | else | 1912 | else |
| 1918 | nfs4_close_sync(&path, state, flags); | 1913 | nfs4_close_sync(&path, state, flags); |
| 1914 | out_putcred: | ||
| 1915 | put_rpccred(cred); | ||
| 1919 | out: | 1916 | out: |
| 1920 | return status; | 1917 | return status; |
| 1921 | } | 1918 | } |
| @@ -2079,47 +2076,81 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n | |||
| 2079 | return err; | 2076 | return err; |
| 2080 | } | 2077 | } |
| 2081 | 2078 | ||
| 2079 | struct nfs4_createdata { | ||
| 2080 | struct rpc_message msg; | ||
| 2081 | struct nfs4_create_arg arg; | ||
| 2082 | struct nfs4_create_res res; | ||
| 2083 | struct nfs_fh fh; | ||
| 2084 | struct nfs_fattr fattr; | ||
| 2085 | struct nfs_fattr dir_fattr; | ||
| 2086 | }; | ||
| 2087 | |||
| 2088 | static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | ||
| 2089 | struct qstr *name, struct iattr *sattr, u32 ftype) | ||
| 2090 | { | ||
| 2091 | struct nfs4_createdata *data; | ||
| 2092 | |||
| 2093 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 2094 | if (data != NULL) { | ||
| 2095 | struct nfs_server *server = NFS_SERVER(dir); | ||
| 2096 | |||
| 2097 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; | ||
| 2098 | data->msg.rpc_argp = &data->arg; | ||
| 2099 | data->msg.rpc_resp = &data->res; | ||
| 2100 | data->arg.dir_fh = NFS_FH(dir); | ||
| 2101 | data->arg.server = server; | ||
| 2102 | data->arg.name = name; | ||
| 2103 | data->arg.attrs = sattr; | ||
| 2104 | data->arg.ftype = ftype; | ||
| 2105 | data->arg.bitmask = server->attr_bitmask; | ||
| 2106 | data->res.server = server; | ||
| 2107 | data->res.fh = &data->fh; | ||
| 2108 | data->res.fattr = &data->fattr; | ||
| 2109 | data->res.dir_fattr = &data->dir_fattr; | ||
| 2110 | nfs_fattr_init(data->res.fattr); | ||
| 2111 | nfs_fattr_init(data->res.dir_fattr); | ||
| 2112 | } | ||
| 2113 | return data; | ||
| 2114 | } | ||
| 2115 | |||
| 2116 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) | ||
| 2117 | { | ||
| 2118 | int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); | ||
| 2119 | if (status == 0) { | ||
| 2120 | update_changeattr(dir, &data->res.dir_cinfo); | ||
| 2121 | nfs_post_op_update_inode(dir, data->res.dir_fattr); | ||
| 2122 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | ||
| 2123 | } | ||
| 2124 | return status; | ||
| 2125 | } | ||
| 2126 | |||
| 2127 | static void nfs4_free_createdata(struct nfs4_createdata *data) | ||
| 2128 | { | ||
| 2129 | kfree(data); | ||
| 2130 | } | ||
| 2131 | |||
| 2082 | static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | 2132 | static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, |
| 2083 | struct page *page, unsigned int len, struct iattr *sattr) | 2133 | struct page *page, unsigned int len, struct iattr *sattr) |
| 2084 | { | 2134 | { |
| 2085 | struct nfs_server *server = NFS_SERVER(dir); | 2135 | struct nfs4_createdata *data; |
| 2086 | struct nfs_fh fhandle; | 2136 | int status = -ENAMETOOLONG; |
| 2087 | struct nfs_fattr fattr, dir_fattr; | ||
| 2088 | struct nfs4_create_arg arg = { | ||
| 2089 | .dir_fh = NFS_FH(dir), | ||
| 2090 | .server = server, | ||
| 2091 | .name = &dentry->d_name, | ||
| 2092 | .attrs = sattr, | ||
| 2093 | .ftype = NF4LNK, | ||
| 2094 | .bitmask = server->attr_bitmask, | ||
| 2095 | }; | ||
| 2096 | struct nfs4_create_res res = { | ||
| 2097 | .server = server, | ||
| 2098 | .fh = &fhandle, | ||
| 2099 | .fattr = &fattr, | ||
| 2100 | .dir_fattr = &dir_fattr, | ||
| 2101 | }; | ||
| 2102 | struct rpc_message msg = { | ||
| 2103 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], | ||
| 2104 | .rpc_argp = &arg, | ||
| 2105 | .rpc_resp = &res, | ||
| 2106 | }; | ||
| 2107 | int status; | ||
| 2108 | 2137 | ||
| 2109 | if (len > NFS4_MAXPATHLEN) | 2138 | if (len > NFS4_MAXPATHLEN) |
| 2110 | return -ENAMETOOLONG; | 2139 | goto out; |
| 2111 | 2140 | ||
| 2112 | arg.u.symlink.pages = &page; | 2141 | status = -ENOMEM; |
| 2113 | arg.u.symlink.len = len; | 2142 | data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4LNK); |
| 2114 | nfs_fattr_init(&fattr); | 2143 | if (data == NULL) |
| 2115 | nfs_fattr_init(&dir_fattr); | 2144 | goto out; |
| 2145 | |||
| 2146 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; | ||
| 2147 | data->arg.u.symlink.pages = &page; | ||
| 2148 | data->arg.u.symlink.len = len; | ||
| 2116 | 2149 | ||
| 2117 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 2150 | status = nfs4_do_create(dir, dentry, data); |
| 2118 | if (!status) { | 2151 | |
| 2119 | update_changeattr(dir, &res.dir_cinfo); | 2152 | nfs4_free_createdata(data); |
| 2120 | nfs_post_op_update_inode(dir, res.dir_fattr); | 2153 | out: |
| 2121 | status = nfs_instantiate(dentry, &fhandle, &fattr); | ||
| 2122 | } | ||
| 2123 | return status; | 2154 | return status; |
| 2124 | } | 2155 | } |
| 2125 | 2156 | ||
| @@ -2140,39 +2171,17 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | |||
| 2140 | static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | 2171 | static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, |
| 2141 | struct iattr *sattr) | 2172 | struct iattr *sattr) |
| 2142 | { | 2173 | { |
| 2143 | struct nfs_server *server = NFS_SERVER(dir); | 2174 | struct nfs4_createdata *data; |
| 2144 | struct nfs_fh fhandle; | 2175 | int status = -ENOMEM; |
| 2145 | struct nfs_fattr fattr, dir_fattr; | ||
| 2146 | struct nfs4_create_arg arg = { | ||
| 2147 | .dir_fh = NFS_FH(dir), | ||
| 2148 | .server = server, | ||
| 2149 | .name = &dentry->d_name, | ||
| 2150 | .attrs = sattr, | ||
| 2151 | .ftype = NF4DIR, | ||
| 2152 | .bitmask = server->attr_bitmask, | ||
| 2153 | }; | ||
| 2154 | struct nfs4_create_res res = { | ||
| 2155 | .server = server, | ||
| 2156 | .fh = &fhandle, | ||
| 2157 | .fattr = &fattr, | ||
| 2158 | .dir_fattr = &dir_fattr, | ||
| 2159 | }; | ||
| 2160 | struct rpc_message msg = { | ||
| 2161 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], | ||
| 2162 | .rpc_argp = &arg, | ||
| 2163 | .rpc_resp = &res, | ||
| 2164 | }; | ||
| 2165 | int status; | ||
| 2166 | 2176 | ||
| 2167 | nfs_fattr_init(&fattr); | 2177 | data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); |
| 2168 | nfs_fattr_init(&dir_fattr); | 2178 | if (data == NULL) |
| 2169 | 2179 | goto out; | |
| 2170 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 2180 | |
| 2171 | if (!status) { | 2181 | status = nfs4_do_create(dir, dentry, data); |
| 2172 | update_changeattr(dir, &res.dir_cinfo); | 2182 | |
| 2173 | nfs_post_op_update_inode(dir, res.dir_fattr); | 2183 | nfs4_free_createdata(data); |
| 2174 | status = nfs_instantiate(dentry, &fhandle, &fattr); | 2184 | out: |
| 2175 | } | ||
| 2176 | return status; | 2185 | return status; |
| 2177 | } | 2186 | } |
| 2178 | 2187 | ||
| @@ -2242,56 +2251,34 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
| 2242 | static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, | 2251 | static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, |
| 2243 | struct iattr *sattr, dev_t rdev) | 2252 | struct iattr *sattr, dev_t rdev) |
| 2244 | { | 2253 | { |
| 2245 | struct nfs_server *server = NFS_SERVER(dir); | 2254 | struct nfs4_createdata *data; |
| 2246 | struct nfs_fh fh; | 2255 | int mode = sattr->ia_mode; |
| 2247 | struct nfs_fattr fattr, dir_fattr; | 2256 | int status = -ENOMEM; |
| 2248 | struct nfs4_create_arg arg = { | ||
| 2249 | .dir_fh = NFS_FH(dir), | ||
| 2250 | .server = server, | ||
| 2251 | .name = &dentry->d_name, | ||
| 2252 | .attrs = sattr, | ||
| 2253 | .bitmask = server->attr_bitmask, | ||
| 2254 | }; | ||
| 2255 | struct nfs4_create_res res = { | ||
| 2256 | .server = server, | ||
| 2257 | .fh = &fh, | ||
| 2258 | .fattr = &fattr, | ||
| 2259 | .dir_fattr = &dir_fattr, | ||
| 2260 | }; | ||
| 2261 | struct rpc_message msg = { | ||
| 2262 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], | ||
| 2263 | .rpc_argp = &arg, | ||
| 2264 | .rpc_resp = &res, | ||
| 2265 | }; | ||
| 2266 | int status; | ||
| 2267 | int mode = sattr->ia_mode; | ||
| 2268 | |||
| 2269 | nfs_fattr_init(&fattr); | ||
| 2270 | nfs_fattr_init(&dir_fattr); | ||
| 2271 | 2257 | ||
| 2272 | BUG_ON(!(sattr->ia_valid & ATTR_MODE)); | 2258 | BUG_ON(!(sattr->ia_valid & ATTR_MODE)); |
| 2273 | BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); | 2259 | BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); |
| 2260 | |||
| 2261 | data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK); | ||
| 2262 | if (data == NULL) | ||
| 2263 | goto out; | ||
| 2264 | |||
| 2274 | if (S_ISFIFO(mode)) | 2265 | if (S_ISFIFO(mode)) |
| 2275 | arg.ftype = NF4FIFO; | 2266 | data->arg.ftype = NF4FIFO; |
| 2276 | else if (S_ISBLK(mode)) { | 2267 | else if (S_ISBLK(mode)) { |
| 2277 | arg.ftype = NF4BLK; | 2268 | data->arg.ftype = NF4BLK; |
| 2278 | arg.u.device.specdata1 = MAJOR(rdev); | 2269 | data->arg.u.device.specdata1 = MAJOR(rdev); |
| 2279 | arg.u.device.specdata2 = MINOR(rdev); | 2270 | data->arg.u.device.specdata2 = MINOR(rdev); |
| 2280 | } | 2271 | } |
| 2281 | else if (S_ISCHR(mode)) { | 2272 | else if (S_ISCHR(mode)) { |
| 2282 | arg.ftype = NF4CHR; | 2273 | data->arg.ftype = NF4CHR; |
| 2283 | arg.u.device.specdata1 = MAJOR(rdev); | 2274 | data->arg.u.device.specdata1 = MAJOR(rdev); |
| 2284 | arg.u.device.specdata2 = MINOR(rdev); | 2275 | data->arg.u.device.specdata2 = MINOR(rdev); |
| 2285 | } | 2276 | } |
| 2286 | else | ||
| 2287 | arg.ftype = NF4SOCK; | ||
| 2288 | 2277 | ||
| 2289 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 2278 | status = nfs4_do_create(dir, dentry, data); |
| 2290 | if (status == 0) { | 2279 | |
| 2291 | update_changeattr(dir, &res.dir_cinfo); | 2280 | nfs4_free_createdata(data); |
| 2292 | nfs_post_op_update_inode(dir, res.dir_fattr); | 2281 | out: |
| 2293 | status = nfs_instantiate(dentry, &fh, &fattr); | ||
| 2294 | } | ||
| 2295 | return status; | 2282 | return status; |
| 2296 | } | 2283 | } |
| 2297 | 2284 | ||
| @@ -2706,6 +2693,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) | |||
| 2706 | ret = nfs_revalidate_inode(server, inode); | 2693 | ret = nfs_revalidate_inode(server, inode); |
| 2707 | if (ret < 0) | 2694 | if (ret < 0) |
| 2708 | return ret; | 2695 | return ret; |
| 2696 | if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) | ||
| 2697 | nfs_zap_acl_cache(inode); | ||
| 2709 | ret = nfs4_read_cached_acl(inode, buf, buflen); | 2698 | ret = nfs4_read_cached_acl(inode, buf, buflen); |
| 2710 | if (ret != -ENOENT) | 2699 | if (ret != -ENOENT) |
| 2711 | return ret; | 2700 | return ret; |
| @@ -2733,7 +2722,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl | |||
| 2733 | nfs_inode_return_delegation(inode); | 2722 | nfs_inode_return_delegation(inode); |
| 2734 | buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); | 2723 | buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); |
| 2735 | ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); | 2724 | ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); |
| 2736 | nfs_zap_caches(inode); | 2725 | nfs_access_zap_cache(inode); |
| 2726 | nfs_zap_acl_cache(inode); | ||
| 2737 | return ret; | 2727 | return ret; |
| 2738 | } | 2728 | } |
| 2739 | 2729 | ||
| @@ -2767,8 +2757,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) | |||
| 2767 | task->tk_status = 0; | 2757 | task->tk_status = 0; |
| 2768 | return -EAGAIN; | 2758 | return -EAGAIN; |
| 2769 | case -NFS4ERR_DELAY: | 2759 | case -NFS4ERR_DELAY: |
| 2770 | nfs_inc_server_stats((struct nfs_server *) server, | 2760 | nfs_inc_server_stats(server, NFSIOS_DELAY); |
| 2771 | NFSIOS_DELAY); | ||
| 2772 | case -NFS4ERR_GRACE: | 2761 | case -NFS4ERR_GRACE: |
| 2773 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 2762 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
| 2774 | task->tk_status = 0; | 2763 | task->tk_status = 0; |
| @@ -2933,7 +2922,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre | |||
| 2933 | 2922 | ||
| 2934 | int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) | 2923 | int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) |
| 2935 | { | 2924 | { |
| 2936 | long timeout; | 2925 | long timeout = 0; |
| 2937 | int err; | 2926 | int err; |
| 2938 | do { | 2927 | do { |
| 2939 | err = _nfs4_proc_setclientid_confirm(clp, cred); | 2928 | err = _nfs4_proc_setclientid_confirm(clp, cred); |
| @@ -3725,8 +3714,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
| 3725 | .write_done = nfs4_write_done, | 3714 | .write_done = nfs4_write_done, |
| 3726 | .commit_setup = nfs4_proc_commit_setup, | 3715 | .commit_setup = nfs4_proc_commit_setup, |
| 3727 | .commit_done = nfs4_commit_done, | 3716 | .commit_done = nfs4_commit_done, |
| 3728 | .file_open = nfs_open, | ||
| 3729 | .file_release = nfs_release, | ||
| 3730 | .lock = nfs4_proc_lock, | 3717 | .lock = nfs4_proc_lock, |
| 3731 | .clear_acl_cache = nfs4_zap_acl_attr, | 3718 | .clear_acl_cache = nfs4_zap_acl_attr, |
| 3732 | }; | 3719 | }; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 856a8934f610..401ef8b28f97 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
| @@ -940,7 +940,6 @@ static int reclaimer(void *ptr) | |||
| 940 | allow_signal(SIGKILL); | 940 | allow_signal(SIGKILL); |
| 941 | 941 | ||
| 942 | /* Ensure exclusive access to NFSv4 state */ | 942 | /* Ensure exclusive access to NFSv4 state */ |
| 943 | lock_kernel(); | ||
| 944 | down_write(&clp->cl_sem); | 943 | down_write(&clp->cl_sem); |
| 945 | /* Are there any NFS mounts out there? */ | 944 | /* Are there any NFS mounts out there? */ |
| 946 | if (list_empty(&clp->cl_superblocks)) | 945 | if (list_empty(&clp->cl_superblocks)) |
| @@ -1000,7 +999,6 @@ restart_loop: | |||
| 1000 | nfs_delegation_reap_unclaimed(clp); | 999 | nfs_delegation_reap_unclaimed(clp); |
| 1001 | out: | 1000 | out: |
| 1002 | up_write(&clp->cl_sem); | 1001 | up_write(&clp->cl_sem); |
| 1003 | unlock_kernel(); | ||
| 1004 | if (status == -NFS4ERR_CB_PATH_DOWN) | 1002 | if (status == -NFS4ERR_CB_PATH_DOWN) |
| 1005 | nfs_handle_cb_pathdown(clp); | 1003 | nfs_handle_cb_pathdown(clp); |
| 1006 | nfs4_clear_recover_bit(clp); | 1004 | nfs4_clear_recover_bit(clp); |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 531379d36823..46763d1cd397 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
| @@ -1,6 +1,4 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $ | ||
| 3 | * | ||
| 4 | * Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de> | 2 | * Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de> |
| 5 | * | 3 | * |
| 6 | * Allow an NFS filesystem to be mounted as root. The way this works is: | 4 | * Allow an NFS filesystem to be mounted as root. The way this works is: |
| @@ -297,10 +295,10 @@ static int __init root_nfs_name(char *name) | |||
| 297 | nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ | 295 | nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ |
| 298 | nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; | 296 | nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; |
| 299 | nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; | 297 | nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; |
| 300 | nfs_data.acregmin = 3; | 298 | nfs_data.acregmin = NFS_DEF_ACREGMIN; |
| 301 | nfs_data.acregmax = 60; | 299 | nfs_data.acregmax = NFS_DEF_ACREGMAX; |
| 302 | nfs_data.acdirmin = 30; | 300 | nfs_data.acdirmin = NFS_DEF_ACDIRMIN; |
| 303 | nfs_data.acdirmax = 60; | 301 | nfs_data.acdirmax = NFS_DEF_ACDIRMAX; |
| 304 | strcpy(buf, NFS_ROOT); | 302 | strcpy(buf, NFS_ROOT); |
| 305 | 303 | ||
| 306 | /* Process options received from the remote server */ | 304 | /* Process options received from the remote server */ |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 03599bfe81cf..4dbb84df1b68 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
| @@ -129,6 +129,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 129 | sattr->ia_mode &= S_IALLUGO; | 129 | sattr->ia_mode &= S_IALLUGO; |
| 130 | 130 | ||
| 131 | dprintk("NFS call setattr\n"); | 131 | dprintk("NFS call setattr\n"); |
| 132 | if (sattr->ia_valid & ATTR_FILE) | ||
| 133 | msg.rpc_cred = nfs_file_cred(sattr->ia_file); | ||
| 132 | nfs_fattr_init(fattr); | 134 | nfs_fattr_init(fattr); |
| 133 | status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); | 135 | status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); |
| 134 | if (status == 0) | 136 | if (status == 0) |
| @@ -598,6 +600,29 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
| 598 | return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); | 600 | return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); |
| 599 | } | 601 | } |
| 600 | 602 | ||
| 603 | /* Helper functions for NFS lock bounds checking */ | ||
| 604 | #define NFS_LOCK32_OFFSET_MAX ((__s32)0x7fffffffUL) | ||
| 605 | static int nfs_lock_check_bounds(const struct file_lock *fl) | ||
| 606 | { | ||
| 607 | __s32 start, end; | ||
| 608 | |||
| 609 | start = (__s32)fl->fl_start; | ||
| 610 | if ((loff_t)start != fl->fl_start) | ||
| 611 | goto out_einval; | ||
| 612 | |||
| 613 | if (fl->fl_end != OFFSET_MAX) { | ||
| 614 | end = (__s32)fl->fl_end; | ||
| 615 | if ((loff_t)end != fl->fl_end) | ||
| 616 | goto out_einval; | ||
| 617 | } else | ||
| 618 | end = NFS_LOCK32_OFFSET_MAX; | ||
| 619 | |||
| 620 | if (start < 0 || start > end) | ||
| 621 | goto out_einval; | ||
| 622 | return 0; | ||
| 623 | out_einval: | ||
| 624 | return -EINVAL; | ||
| 625 | } | ||
| 601 | 626 | ||
| 602 | const struct nfs_rpc_ops nfs_v2_clientops = { | 627 | const struct nfs_rpc_ops nfs_v2_clientops = { |
| 603 | .version = 2, /* protocol version */ | 628 | .version = 2, /* protocol version */ |
| @@ -630,7 +655,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
| 630 | .write_setup = nfs_proc_write_setup, | 655 | .write_setup = nfs_proc_write_setup, |
| 631 | .write_done = nfs_write_done, | 656 | .write_done = nfs_write_done, |
| 632 | .commit_setup = nfs_proc_commit_setup, | 657 | .commit_setup = nfs_proc_commit_setup, |
| 633 | .file_open = nfs_open, | ||
| 634 | .file_release = nfs_release, | ||
| 635 | .lock = nfs_proc_lock, | 658 | .lock = nfs_proc_lock, |
| 659 | .lock_check_bounds = nfs_lock_check_bounds, | ||
| 636 | }; | 660 | }; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2a4a024a4e7b..1b94e3650f5c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include <linux/inet.h> | 47 | #include <linux/inet.h> |
| 48 | #include <linux/in6.h> | 48 | #include <linux/in6.h> |
| 49 | #include <net/ipv6.h> | 49 | #include <net/ipv6.h> |
| 50 | #include <linux/netdevice.h> | ||
| 50 | #include <linux/nfs_xdr.h> | 51 | #include <linux/nfs_xdr.h> |
| 51 | #include <linux/magic.h> | 52 | #include <linux/magic.h> |
| 52 | #include <linux/parser.h> | 53 | #include <linux/parser.h> |
| @@ -65,7 +66,6 @@ | |||
| 65 | enum { | 66 | enum { |
| 66 | /* Mount options that take no arguments */ | 67 | /* Mount options that take no arguments */ |
| 67 | Opt_soft, Opt_hard, | 68 | Opt_soft, Opt_hard, |
| 68 | Opt_intr, Opt_nointr, | ||
| 69 | Opt_posix, Opt_noposix, | 69 | Opt_posix, Opt_noposix, |
| 70 | Opt_cto, Opt_nocto, | 70 | Opt_cto, Opt_nocto, |
| 71 | Opt_ac, Opt_noac, | 71 | Opt_ac, Opt_noac, |
| @@ -92,8 +92,8 @@ enum { | |||
| 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
| 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
| 94 | 94 | ||
| 95 | /* Mount options that are ignored */ | 95 | /* Special mount options */ |
| 96 | Opt_userspace, Opt_deprecated, | 96 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
| 97 | 97 | ||
| 98 | Opt_err | 98 | Opt_err |
| 99 | }; | 99 | }; |
| @@ -101,10 +101,14 @@ enum { | |||
| 101 | static match_table_t nfs_mount_option_tokens = { | 101 | static match_table_t nfs_mount_option_tokens = { |
| 102 | { Opt_userspace, "bg" }, | 102 | { Opt_userspace, "bg" }, |
| 103 | { Opt_userspace, "fg" }, | 103 | { Opt_userspace, "fg" }, |
| 104 | { Opt_userspace, "retry=%s" }, | ||
| 105 | |||
| 106 | { Opt_sloppy, "sloppy" }, | ||
| 107 | |||
| 104 | { Opt_soft, "soft" }, | 108 | { Opt_soft, "soft" }, |
| 105 | { Opt_hard, "hard" }, | 109 | { Opt_hard, "hard" }, |
| 106 | { Opt_intr, "intr" }, | 110 | { Opt_deprecated, "intr" }, |
| 107 | { Opt_nointr, "nointr" }, | 111 | { Opt_deprecated, "nointr" }, |
| 108 | { Opt_posix, "posix" }, | 112 | { Opt_posix, "posix" }, |
| 109 | { Opt_noposix, "noposix" }, | 113 | { Opt_noposix, "noposix" }, |
| 110 | { Opt_cto, "cto" }, | 114 | { Opt_cto, "cto" }, |
| @@ -136,7 +140,6 @@ static match_table_t nfs_mount_option_tokens = { | |||
| 136 | { Opt_acdirmin, "acdirmin=%u" }, | 140 | { Opt_acdirmin, "acdirmin=%u" }, |
| 137 | { Opt_acdirmax, "acdirmax=%u" }, | 141 | { Opt_acdirmax, "acdirmax=%u" }, |
| 138 | { Opt_actimeo, "actimeo=%u" }, | 142 | { Opt_actimeo, "actimeo=%u" }, |
| 139 | { Opt_userspace, "retry=%u" }, | ||
| 140 | { Opt_namelen, "namlen=%u" }, | 143 | { Opt_namelen, "namlen=%u" }, |
| 141 | { Opt_mountport, "mountport=%u" }, | 144 | { Opt_mountport, "mountport=%u" }, |
| 142 | { Opt_mountvers, "mountvers=%u" }, | 145 | { Opt_mountvers, "mountvers=%u" }, |
| @@ -207,6 +210,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, | |||
| 207 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); | 210 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); |
| 208 | static void nfs_kill_super(struct super_block *); | 211 | static void nfs_kill_super(struct super_block *); |
| 209 | static void nfs_put_super(struct super_block *); | 212 | static void nfs_put_super(struct super_block *); |
| 213 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | ||
| 210 | 214 | ||
| 211 | static struct file_system_type nfs_fs_type = { | 215 | static struct file_system_type nfs_fs_type = { |
| 212 | .owner = THIS_MODULE, | 216 | .owner = THIS_MODULE, |
| @@ -234,6 +238,7 @@ static const struct super_operations nfs_sops = { | |||
| 234 | .umount_begin = nfs_umount_begin, | 238 | .umount_begin = nfs_umount_begin, |
| 235 | .show_options = nfs_show_options, | 239 | .show_options = nfs_show_options, |
| 236 | .show_stats = nfs_show_stats, | 240 | .show_stats = nfs_show_stats, |
| 241 | .remount_fs = nfs_remount, | ||
| 237 | }; | 242 | }; |
| 238 | 243 | ||
| 239 | #ifdef CONFIG_NFS_V4 | 244 | #ifdef CONFIG_NFS_V4 |
| @@ -278,6 +283,7 @@ static const struct super_operations nfs4_sops = { | |||
| 278 | .umount_begin = nfs_umount_begin, | 283 | .umount_begin = nfs_umount_begin, |
| 279 | .show_options = nfs_show_options, | 284 | .show_options = nfs_show_options, |
| 280 | .show_stats = nfs_show_stats, | 285 | .show_stats = nfs_show_stats, |
| 286 | .remount_fs = nfs_remount, | ||
| 281 | }; | 287 | }; |
| 282 | #endif | 288 | #endif |
| 283 | 289 | ||
| @@ -368,8 +374,6 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 368 | }; | 374 | }; |
| 369 | int error; | 375 | int error; |
| 370 | 376 | ||
| 371 | lock_kernel(); | ||
| 372 | |||
| 373 | error = server->nfs_client->rpc_ops->statfs(server, fh, &res); | 377 | error = server->nfs_client->rpc_ops->statfs(server, fh, &res); |
| 374 | if (error < 0) | 378 | if (error < 0) |
| 375 | goto out_err; | 379 | goto out_err; |
| @@ -401,12 +405,10 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 401 | 405 | ||
| 402 | buf->f_namelen = server->namelen; | 406 | buf->f_namelen = server->namelen; |
| 403 | 407 | ||
| 404 | unlock_kernel(); | ||
| 405 | return 0; | 408 | return 0; |
| 406 | 409 | ||
| 407 | out_err: | 410 | out_err: |
| 408 | dprintk("%s: statfs error = %d\n", __func__, -error); | 411 | dprintk("%s: statfs error = %d\n", __func__, -error); |
| 409 | unlock_kernel(); | ||
| 410 | return error; | 412 | return error; |
| 411 | } | 413 | } |
| 412 | 414 | ||
| @@ -514,13 +516,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
| 514 | if (nfss->bsize != 0) | 516 | if (nfss->bsize != 0) |
| 515 | seq_printf(m, ",bsize=%u", nfss->bsize); | 517 | seq_printf(m, ",bsize=%u", nfss->bsize); |
| 516 | seq_printf(m, ",namlen=%u", nfss->namelen); | 518 | seq_printf(m, ",namlen=%u", nfss->namelen); |
| 517 | if (nfss->acregmin != 3*HZ || showdefaults) | 519 | if (nfss->acregmin != NFS_DEF_ACREGMIN*HZ || showdefaults) |
| 518 | seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); | 520 | seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); |
| 519 | if (nfss->acregmax != 60*HZ || showdefaults) | 521 | if (nfss->acregmax != NFS_DEF_ACREGMAX*HZ || showdefaults) |
| 520 | seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); | 522 | seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); |
| 521 | if (nfss->acdirmin != 30*HZ || showdefaults) | 523 | if (nfss->acdirmin != NFS_DEF_ACDIRMIN*HZ || showdefaults) |
| 522 | seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); | 524 | seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); |
| 523 | if (nfss->acdirmax != 60*HZ || showdefaults) | 525 | if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults) |
| 524 | seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); | 526 | seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); |
| 525 | for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { | 527 | for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { |
| 526 | if (nfss->flags & nfs_infop->flag) | 528 | if (nfss->flags & nfs_infop->flag) |
| @@ -702,49 +704,233 @@ static int nfs_verify_server_address(struct sockaddr *addr) | |||
| 702 | return 0; | 704 | return 0; |
| 703 | } | 705 | } |
| 704 | 706 | ||
| 707 | static void nfs_parse_ipv4_address(char *string, size_t str_len, | ||
| 708 | struct sockaddr *sap, size_t *addr_len) | ||
| 709 | { | ||
| 710 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
| 711 | u8 *addr = (u8 *)&sin->sin_addr.s_addr; | ||
| 712 | |||
| 713 | if (str_len <= INET_ADDRSTRLEN) { | ||
| 714 | dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n", | ||
| 715 | (int)str_len, string); | ||
| 716 | |||
| 717 | sin->sin_family = AF_INET; | ||
| 718 | *addr_len = sizeof(*sin); | ||
| 719 | if (in4_pton(string, str_len, addr, '\0', NULL)) | ||
| 720 | return; | ||
| 721 | } | ||
| 722 | |||
| 723 | sap->sa_family = AF_UNSPEC; | ||
| 724 | *addr_len = 0; | ||
| 725 | } | ||
| 726 | |||
| 727 | #define IPV6_SCOPE_DELIMITER '%' | ||
| 728 | |||
| 729 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
| 730 | static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | ||
| 731 | const char *delim, | ||
| 732 | struct sockaddr_in6 *sin6) | ||
| 733 | { | ||
| 734 | char *p; | ||
| 735 | size_t len; | ||
| 736 | |||
| 737 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | ||
| 738 | return ; | ||
| 739 | if (*delim != IPV6_SCOPE_DELIMITER) | ||
| 740 | return; | ||
| 741 | |||
| 742 | len = (string + str_len) - delim - 1; | ||
| 743 | p = kstrndup(delim + 1, len, GFP_KERNEL); | ||
| 744 | if (p) { | ||
| 745 | unsigned long scope_id = 0; | ||
| 746 | struct net_device *dev; | ||
| 747 | |||
| 748 | dev = dev_get_by_name(&init_net, p); | ||
| 749 | if (dev != NULL) { | ||
| 750 | scope_id = dev->ifindex; | ||
| 751 | dev_put(dev); | ||
| 752 | } else { | ||
| 753 | /* scope_id is set to zero on error */ | ||
| 754 | strict_strtoul(p, 10, &scope_id); | ||
| 755 | } | ||
| 756 | |||
| 757 | kfree(p); | ||
| 758 | sin6->sin6_scope_id = scope_id; | ||
| 759 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); | ||
| 760 | } | ||
| 761 | } | ||
| 762 | |||
| 763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | ||
| 764 | struct sockaddr *sap, size_t *addr_len) | ||
| 765 | { | ||
| 766 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
| 767 | u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; | ||
| 768 | const char *delim; | ||
| 769 | |||
| 770 | if (str_len <= INET6_ADDRSTRLEN) { | ||
| 771 | dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", | ||
| 772 | (int)str_len, string); | ||
| 773 | |||
| 774 | sin6->sin6_family = AF_INET6; | ||
| 775 | *addr_len = sizeof(*sin6); | ||
| 776 | if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { | ||
| 777 | nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); | ||
| 778 | return; | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 782 | sap->sa_family = AF_UNSPEC; | ||
| 783 | *addr_len = 0; | ||
| 784 | } | ||
| 785 | #else | ||
| 786 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | ||
| 787 | struct sockaddr *sap, size_t *addr_len) | ||
| 788 | { | ||
| 789 | sap->sa_family = AF_UNSPEC; | ||
| 790 | *addr_len = 0; | ||
| 791 | } | ||
| 792 | #endif | ||
| 793 | |||
| 705 | /* | 794 | /* |
| 706 | * Parse string addresses passed in via a mount option, | 795 | * Construct a sockaddr based on the contents of a string that contains |
| 707 | * and construct a sockaddr based on the result. | 796 | * an IP address in presentation format. |
| 708 | * | 797 | * |
| 709 | * If address parsing fails, set the sockaddr's address | 798 | * If there is a problem constructing the new sockaddr, set the address |
| 710 | * family to AF_UNSPEC to force nfs_verify_server_address() | 799 | * family to AF_UNSPEC. |
| 711 | * to punt the mount. | ||
| 712 | */ | 800 | */ |
| 713 | static void nfs_parse_server_address(char *value, | 801 | static void nfs_parse_ip_address(char *string, size_t str_len, |
| 714 | struct sockaddr *sap, | 802 | struct sockaddr *sap, size_t *addr_len) |
| 715 | size_t *len) | ||
| 716 | { | 803 | { |
| 717 | if (strchr(value, ':')) { | 804 | unsigned int i, colons; |
| 718 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
| 719 | u8 *addr = (u8 *)&ap->sin6_addr.in6_u; | ||
| 720 | 805 | ||
| 721 | ap->sin6_family = AF_INET6; | 806 | colons = 0; |
| 722 | *len = sizeof(*ap); | 807 | for (i = 0; i < str_len; i++) |
| 723 | if (in6_pton(value, -1, addr, '\0', NULL)) | 808 | if (string[i] == ':') |
| 724 | return; | 809 | colons++; |
| 725 | } else { | ||
| 726 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
| 727 | u8 *addr = (u8 *)&ap->sin_addr.s_addr; | ||
| 728 | 810 | ||
| 729 | ap->sin_family = AF_INET; | 811 | if (colons >= 2) |
| 730 | *len = sizeof(*ap); | 812 | nfs_parse_ipv6_address(string, str_len, sap, addr_len); |
| 731 | if (in4_pton(value, -1, addr, '\0', NULL)) | 813 | else |
| 814 | nfs_parse_ipv4_address(string, str_len, sap, addr_len); | ||
| 815 | } | ||
| 816 | |||
| 817 | /* | ||
| 818 | * Sanity check the NFS transport protocol. | ||
| 819 | * | ||
| 820 | */ | ||
| 821 | static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) | ||
| 822 | { | ||
| 823 | switch (mnt->nfs_server.protocol) { | ||
| 824 | case XPRT_TRANSPORT_UDP: | ||
| 825 | case XPRT_TRANSPORT_TCP: | ||
| 826 | case XPRT_TRANSPORT_RDMA: | ||
| 827 | break; | ||
| 828 | default: | ||
| 829 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; | ||
| 830 | } | ||
| 831 | } | ||
| 832 | |||
| 833 | /* | ||
| 834 | * For text based NFSv2/v3 mounts, the mount protocol transport default | ||
| 835 | * settings should depend upon the specified NFS transport. | ||
| 836 | */ | ||
| 837 | static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) | ||
| 838 | { | ||
| 839 | nfs_validate_transport_protocol(mnt); | ||
| 840 | |||
| 841 | if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || | ||
| 842 | mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) | ||
| 732 | return; | 843 | return; |
| 844 | switch (mnt->nfs_server.protocol) { | ||
| 845 | case XPRT_TRANSPORT_UDP: | ||
| 846 | mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; | ||
| 847 | break; | ||
| 848 | case XPRT_TRANSPORT_TCP: | ||
| 849 | case XPRT_TRANSPORT_RDMA: | ||
| 850 | mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; | ||
| 733 | } | 851 | } |
| 852 | } | ||
| 734 | 853 | ||
| 735 | sap->sa_family = AF_UNSPEC; | 854 | /* |
| 736 | *len = 0; | 855 | * Parse the value of the 'sec=' option. |
| 856 | * | ||
| 857 | * The flavor_len setting is for v4 mounts. | ||
| 858 | */ | ||
| 859 | static int nfs_parse_security_flavors(char *value, | ||
| 860 | struct nfs_parsed_mount_data *mnt) | ||
| 861 | { | ||
| 862 | substring_t args[MAX_OPT_ARGS]; | ||
| 863 | |||
| 864 | dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); | ||
| 865 | |||
| 866 | switch (match_token(value, nfs_secflavor_tokens, args)) { | ||
| 867 | case Opt_sec_none: | ||
| 868 | mnt->auth_flavor_len = 0; | ||
| 869 | mnt->auth_flavors[0] = RPC_AUTH_NULL; | ||
| 870 | break; | ||
| 871 | case Opt_sec_sys: | ||
| 872 | mnt->auth_flavor_len = 0; | ||
| 873 | mnt->auth_flavors[0] = RPC_AUTH_UNIX; | ||
| 874 | break; | ||
| 875 | case Opt_sec_krb5: | ||
| 876 | mnt->auth_flavor_len = 1; | ||
| 877 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; | ||
| 878 | break; | ||
| 879 | case Opt_sec_krb5i: | ||
| 880 | mnt->auth_flavor_len = 1; | ||
| 881 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; | ||
| 882 | break; | ||
| 883 | case Opt_sec_krb5p: | ||
| 884 | mnt->auth_flavor_len = 1; | ||
| 885 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; | ||
| 886 | break; | ||
| 887 | case Opt_sec_lkey: | ||
| 888 | mnt->auth_flavor_len = 1; | ||
| 889 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; | ||
| 890 | break; | ||
| 891 | case Opt_sec_lkeyi: | ||
| 892 | mnt->auth_flavor_len = 1; | ||
| 893 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; | ||
| 894 | break; | ||
| 895 | case Opt_sec_lkeyp: | ||
| 896 | mnt->auth_flavor_len = 1; | ||
| 897 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; | ||
| 898 | break; | ||
| 899 | case Opt_sec_spkm: | ||
| 900 | mnt->auth_flavor_len = 1; | ||
| 901 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; | ||
| 902 | break; | ||
| 903 | case Opt_sec_spkmi: | ||
| 904 | mnt->auth_flavor_len = 1; | ||
| 905 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; | ||
| 906 | break; | ||
| 907 | case Opt_sec_spkmp: | ||
| 908 | mnt->auth_flavor_len = 1; | ||
| 909 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; | ||
| 910 | break; | ||
| 911 | default: | ||
| 912 | return 0; | ||
| 913 | } | ||
| 914 | |||
| 915 | return 1; | ||
| 916 | } | ||
| 917 | |||
| 918 | static void nfs_parse_invalid_value(const char *option) | ||
| 919 | { | ||
| 920 | dfprintk(MOUNT, "NFS: bad value specified for %s option\n", option); | ||
| 737 | } | 921 | } |
| 738 | 922 | ||
| 739 | /* | 923 | /* |
| 740 | * Error-check and convert a string of mount options from user space into | 924 | * Error-check and convert a string of mount options from user space into |
| 741 | * a data structure | 925 | * a data structure. The whole mount string is processed; bad options are |
| 926 | * skipped as they are encountered. If there were no errors, return 1; | ||
| 927 | * otherwise return 0 (zero). | ||
| 742 | */ | 928 | */ |
| 743 | static int nfs_parse_mount_options(char *raw, | 929 | static int nfs_parse_mount_options(char *raw, |
| 744 | struct nfs_parsed_mount_data *mnt) | 930 | struct nfs_parsed_mount_data *mnt) |
| 745 | { | 931 | { |
| 746 | char *p, *string, *secdata; | 932 | char *p, *string, *secdata; |
| 747 | int rc; | 933 | int rc, sloppy = 0, errors = 0; |
| 748 | 934 | ||
| 749 | if (!raw) { | 935 | if (!raw) { |
| 750 | dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); | 936 | dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); |
| @@ -777,15 +963,16 @@ static int nfs_parse_mount_options(char *raw, | |||
| 777 | 963 | ||
| 778 | token = match_token(p, nfs_mount_option_tokens, args); | 964 | token = match_token(p, nfs_mount_option_tokens, args); |
| 779 | switch (token) { | 965 | switch (token) { |
| 966 | |||
| 967 | /* | ||
| 968 | * boolean options: foo/nofoo | ||
| 969 | */ | ||
| 780 | case Opt_soft: | 970 | case Opt_soft: |
| 781 | mnt->flags |= NFS_MOUNT_SOFT; | 971 | mnt->flags |= NFS_MOUNT_SOFT; |
| 782 | break; | 972 | break; |
| 783 | case Opt_hard: | 973 | case Opt_hard: |
| 784 | mnt->flags &= ~NFS_MOUNT_SOFT; | 974 | mnt->flags &= ~NFS_MOUNT_SOFT; |
| 785 | break; | 975 | break; |
| 786 | case Opt_intr: | ||
| 787 | case Opt_nointr: | ||
| 788 | break; | ||
| 789 | case Opt_posix: | 976 | case Opt_posix: |
| 790 | mnt->flags |= NFS_MOUNT_POSIX; | 977 | mnt->flags |= NFS_MOUNT_POSIX; |
| 791 | break; | 978 | break; |
| @@ -819,20 +1006,14 @@ static int nfs_parse_mount_options(char *raw, | |||
| 819 | case Opt_udp: | 1006 | case Opt_udp: |
| 820 | mnt->flags &= ~NFS_MOUNT_TCP; | 1007 | mnt->flags &= ~NFS_MOUNT_TCP; |
| 821 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; | 1008 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
| 822 | mnt->timeo = 7; | ||
| 823 | mnt->retrans = 5; | ||
| 824 | break; | 1009 | break; |
| 825 | case Opt_tcp: | 1010 | case Opt_tcp: |
| 826 | mnt->flags |= NFS_MOUNT_TCP; | 1011 | mnt->flags |= NFS_MOUNT_TCP; |
| 827 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; | 1012 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
| 828 | mnt->timeo = 600; | ||
| 829 | mnt->retrans = 2; | ||
| 830 | break; | 1013 | break; |
| 831 | case Opt_rdma: | 1014 | case Opt_rdma: |
| 832 | mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ | 1015 | mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ |
| 833 | mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; | 1016 | mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; |
| 834 | mnt->timeo = 600; | ||
| 835 | mnt->retrans = 2; | ||
| 836 | break; | 1017 | break; |
| 837 | case Opt_acl: | 1018 | case Opt_acl: |
| 838 | mnt->flags &= ~NFS_MOUNT_NOACL; | 1019 | mnt->flags &= ~NFS_MOUNT_NOACL; |
| @@ -853,165 +1034,144 @@ static int nfs_parse_mount_options(char *raw, | |||
| 853 | mnt->flags |= NFS_MOUNT_UNSHARED; | 1034 | mnt->flags |= NFS_MOUNT_UNSHARED; |
| 854 | break; | 1035 | break; |
| 855 | 1036 | ||
| 1037 | /* | ||
| 1038 | * options that take numeric values | ||
| 1039 | */ | ||
| 856 | case Opt_port: | 1040 | case Opt_port: |
| 857 | if (match_int(args, &option)) | 1041 | if (match_int(args, &option) || |
| 858 | return 0; | 1042 | option < 0 || option > USHORT_MAX) { |
| 859 | if (option < 0 || option > 65535) | 1043 | errors++; |
| 860 | return 0; | 1044 | nfs_parse_invalid_value("port"); |
| 861 | mnt->nfs_server.port = option; | 1045 | } else |
| 1046 | mnt->nfs_server.port = option; | ||
| 862 | break; | 1047 | break; |
| 863 | case Opt_rsize: | 1048 | case Opt_rsize: |
| 864 | if (match_int(args, &mnt->rsize)) | 1049 | if (match_int(args, &option) || option < 0) { |
| 865 | return 0; | 1050 | errors++; |
| 1051 | nfs_parse_invalid_value("rsize"); | ||
| 1052 | } else | ||
| 1053 | mnt->rsize = option; | ||
| 866 | break; | 1054 | break; |
| 867 | case Opt_wsize: | 1055 | case Opt_wsize: |
| 868 | if (match_int(args, &mnt->wsize)) | 1056 | if (match_int(args, &option) || option < 0) { |
| 869 | return 0; | 1057 | errors++; |
| 1058 | nfs_parse_invalid_value("wsize"); | ||
| 1059 | } else | ||
| 1060 | mnt->wsize = option; | ||
| 870 | break; | 1061 | break; |
| 871 | case Opt_bsize: | 1062 | case Opt_bsize: |
| 872 | if (match_int(args, &option)) | 1063 | if (match_int(args, &option) || option < 0) { |
| 873 | return 0; | 1064 | errors++; |
| 874 | if (option < 0) | 1065 | nfs_parse_invalid_value("bsize"); |
| 875 | return 0; | 1066 | } else |
| 876 | mnt->bsize = option; | 1067 | mnt->bsize = option; |
| 877 | break; | 1068 | break; |
| 878 | case Opt_timeo: | 1069 | case Opt_timeo: |
| 879 | if (match_int(args, &mnt->timeo)) | 1070 | if (match_int(args, &option) || option <= 0) { |
| 880 | return 0; | 1071 | errors++; |
| 1072 | nfs_parse_invalid_value("timeo"); | ||
| 1073 | } else | ||
| 1074 | mnt->timeo = option; | ||
| 881 | break; | 1075 | break; |
| 882 | case Opt_retrans: | 1076 | case Opt_retrans: |
| 883 | if (match_int(args, &mnt->retrans)) | 1077 | if (match_int(args, &option) || option <= 0) { |
| 884 | return 0; | 1078 | errors++; |
| 1079 | nfs_parse_invalid_value("retrans"); | ||
| 1080 | } else | ||
| 1081 | mnt->retrans = option; | ||
| 885 | break; | 1082 | break; |
| 886 | case Opt_acregmin: | 1083 | case Opt_acregmin: |
| 887 | if (match_int(args, &mnt->acregmin)) | 1084 | if (match_int(args, &option) || option < 0) { |
| 888 | return 0; | 1085 | errors++; |
| 1086 | nfs_parse_invalid_value("acregmin"); | ||
| 1087 | } else | ||
| 1088 | mnt->acregmin = option; | ||
| 889 | break; | 1089 | break; |
| 890 | case Opt_acregmax: | 1090 | case Opt_acregmax: |
| 891 | if (match_int(args, &mnt->acregmax)) | 1091 | if (match_int(args, &option) || option < 0) { |
| 892 | return 0; | 1092 | errors++; |
| 1093 | nfs_parse_invalid_value("acregmax"); | ||
| 1094 | } else | ||
| 1095 | mnt->acregmax = option; | ||
| 893 | break; | 1096 | break; |
| 894 | case Opt_acdirmin: | 1097 | case Opt_acdirmin: |
| 895 | if (match_int(args, &mnt->acdirmin)) | 1098 | if (match_int(args, &option) || option < 0) { |
| 896 | return 0; | 1099 | errors++; |
| 1100 | nfs_parse_invalid_value("acdirmin"); | ||
| 1101 | } else | ||
| 1102 | mnt->acdirmin = option; | ||
| 897 | break; | 1103 | break; |
| 898 | case Opt_acdirmax: | 1104 | case Opt_acdirmax: |
| 899 | if (match_int(args, &mnt->acdirmax)) | 1105 | if (match_int(args, &option) || option < 0) { |
| 900 | return 0; | 1106 | errors++; |
| 1107 | nfs_parse_invalid_value("acdirmax"); | ||
| 1108 | } else | ||
| 1109 | mnt->acdirmax = option; | ||
| 901 | break; | 1110 | break; |
| 902 | case Opt_actimeo: | 1111 | case Opt_actimeo: |
| 903 | if (match_int(args, &option)) | 1112 | if (match_int(args, &option) || option < 0) { |
| 904 | return 0; | 1113 | errors++; |
| 905 | if (option < 0) | 1114 | nfs_parse_invalid_value("actimeo"); |
| 906 | return 0; | 1115 | } else |
| 907 | mnt->acregmin = | 1116 | mnt->acregmin = mnt->acregmax = |
| 908 | mnt->acregmax = | 1117 | mnt->acdirmin = mnt->acdirmax = option; |
| 909 | mnt->acdirmin = | ||
| 910 | mnt->acdirmax = option; | ||
| 911 | break; | 1118 | break; |
| 912 | case Opt_namelen: | 1119 | case Opt_namelen: |
| 913 | if (match_int(args, &mnt->namlen)) | 1120 | if (match_int(args, &option) || option < 0) { |
| 914 | return 0; | 1121 | errors++; |
| 1122 | nfs_parse_invalid_value("namlen"); | ||
| 1123 | } else | ||
| 1124 | mnt->namlen = option; | ||
| 915 | break; | 1125 | break; |
| 916 | case Opt_mountport: | 1126 | case Opt_mountport: |
| 917 | if (match_int(args, &option)) | 1127 | if (match_int(args, &option) || |
| 918 | return 0; | 1128 | option < 0 || option > USHORT_MAX) { |
| 919 | if (option < 0 || option > 65535) | 1129 | errors++; |
| 920 | return 0; | 1130 | nfs_parse_invalid_value("mountport"); |
| 921 | mnt->mount_server.port = option; | 1131 | } else |
| 1132 | mnt->mount_server.port = option; | ||
| 922 | break; | 1133 | break; |
| 923 | case Opt_mountvers: | 1134 | case Opt_mountvers: |
| 924 | if (match_int(args, &option)) | 1135 | if (match_int(args, &option) || |
| 925 | return 0; | 1136 | option < NFS_MNT_VERSION || |
| 926 | if (option < 0) | 1137 | option > NFS_MNT3_VERSION) { |
| 927 | return 0; | 1138 | errors++; |
| 928 | mnt->mount_server.version = option; | 1139 | nfs_parse_invalid_value("mountvers"); |
| 1140 | } else | ||
| 1141 | mnt->mount_server.version = option; | ||
| 929 | break; | 1142 | break; |
| 930 | case Opt_nfsvers: | 1143 | case Opt_nfsvers: |
| 931 | if (match_int(args, &option)) | 1144 | if (match_int(args, &option)) { |
| 932 | return 0; | 1145 | errors++; |
| 1146 | nfs_parse_invalid_value("nfsvers"); | ||
| 1147 | break; | ||
| 1148 | } | ||
| 933 | switch (option) { | 1149 | switch (option) { |
| 934 | case 2: | 1150 | case NFS2_VERSION: |
| 935 | mnt->flags &= ~NFS_MOUNT_VER3; | 1151 | mnt->flags &= ~NFS_MOUNT_VER3; |
| 936 | break; | 1152 | break; |
| 937 | case 3: | 1153 | case NFS3_VERSION: |
| 938 | mnt->flags |= NFS_MOUNT_VER3; | 1154 | mnt->flags |= NFS_MOUNT_VER3; |
| 939 | break; | 1155 | break; |
| 940 | default: | 1156 | default: |
| 941 | goto out_unrec_vers; | 1157 | errors++; |
| 1158 | nfs_parse_invalid_value("nfsvers"); | ||
| 942 | } | 1159 | } |
| 943 | break; | 1160 | break; |
| 944 | 1161 | ||
| 1162 | /* | ||
| 1163 | * options that take text values | ||
| 1164 | */ | ||
| 945 | case Opt_sec: | 1165 | case Opt_sec: |
| 946 | string = match_strdup(args); | 1166 | string = match_strdup(args); |
| 947 | if (string == NULL) | 1167 | if (string == NULL) |
| 948 | goto out_nomem; | 1168 | goto out_nomem; |
| 949 | token = match_token(string, nfs_secflavor_tokens, args); | 1169 | rc = nfs_parse_security_flavors(string, mnt); |
| 950 | kfree(string); | 1170 | kfree(string); |
| 951 | 1171 | if (!rc) { | |
| 952 | /* | 1172 | errors++; |
| 953 | * The flags setting is for v2/v3. The flavor_len | 1173 | dfprintk(MOUNT, "NFS: unrecognized " |
| 954 | * setting is for v4. v2/v3 also need to know the | 1174 | "security flavor\n"); |
| 955 | * difference between NULL and UNIX. | ||
| 956 | */ | ||
| 957 | switch (token) { | ||
| 958 | case Opt_sec_none: | ||
| 959 | mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; | ||
| 960 | mnt->auth_flavor_len = 0; | ||
| 961 | mnt->auth_flavors[0] = RPC_AUTH_NULL; | ||
| 962 | break; | ||
| 963 | case Opt_sec_sys: | ||
| 964 | mnt->flags &= ~NFS_MOUNT_SECFLAVOUR; | ||
| 965 | mnt->auth_flavor_len = 0; | ||
| 966 | mnt->auth_flavors[0] = RPC_AUTH_UNIX; | ||
| 967 | break; | ||
| 968 | case Opt_sec_krb5: | ||
| 969 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 970 | mnt->auth_flavor_len = 1; | ||
| 971 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; | ||
| 972 | break; | ||
| 973 | case Opt_sec_krb5i: | ||
| 974 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 975 | mnt->auth_flavor_len = 1; | ||
| 976 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; | ||
| 977 | break; | ||
| 978 | case Opt_sec_krb5p: | ||
| 979 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 980 | mnt->auth_flavor_len = 1; | ||
| 981 | mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; | ||
| 982 | break; | ||
| 983 | case Opt_sec_lkey: | ||
| 984 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 985 | mnt->auth_flavor_len = 1; | ||
| 986 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; | ||
| 987 | break; | ||
| 988 | case Opt_sec_lkeyi: | ||
| 989 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 990 | mnt->auth_flavor_len = 1; | ||
| 991 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; | ||
| 992 | break; | ||
| 993 | case Opt_sec_lkeyp: | ||
| 994 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 995 | mnt->auth_flavor_len = 1; | ||
| 996 | mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; | ||
| 997 | break; | ||
| 998 | case Opt_sec_spkm: | ||
| 999 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 1000 | mnt->auth_flavor_len = 1; | ||
| 1001 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; | ||
| 1002 | break; | ||
| 1003 | case Opt_sec_spkmi: | ||
| 1004 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 1005 | mnt->auth_flavor_len = 1; | ||
| 1006 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; | ||
| 1007 | break; | ||
| 1008 | case Opt_sec_spkmp: | ||
| 1009 | mnt->flags |= NFS_MOUNT_SECFLAVOUR; | ||
| 1010 | mnt->auth_flavor_len = 1; | ||
| 1011 | mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; | ||
| 1012 | break; | ||
| 1013 | default: | ||
| 1014 | goto out_unrec_sec; | ||
| 1015 | } | 1175 | } |
| 1016 | break; | 1176 | break; |
| 1017 | case Opt_proto: | 1177 | case Opt_proto: |
| @@ -1026,24 +1186,20 @@ static int nfs_parse_mount_options(char *raw, | |||
| 1026 | case Opt_xprt_udp: | 1186 | case Opt_xprt_udp: |
| 1027 | mnt->flags &= ~NFS_MOUNT_TCP; | 1187 | mnt->flags &= ~NFS_MOUNT_TCP; |
| 1028 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; | 1188 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
| 1029 | mnt->timeo = 7; | ||
| 1030 | mnt->retrans = 5; | ||
| 1031 | break; | 1189 | break; |
| 1032 | case Opt_xprt_tcp: | 1190 | case Opt_xprt_tcp: |
| 1033 | mnt->flags |= NFS_MOUNT_TCP; | 1191 | mnt->flags |= NFS_MOUNT_TCP; |
| 1034 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; | 1192 | mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
| 1035 | mnt->timeo = 600; | ||
| 1036 | mnt->retrans = 2; | ||
| 1037 | break; | 1193 | break; |
| 1038 | case Opt_xprt_rdma: | 1194 | case Opt_xprt_rdma: |
| 1039 | /* vector side protocols to TCP */ | 1195 | /* vector side protocols to TCP */ |
| 1040 | mnt->flags |= NFS_MOUNT_TCP; | 1196 | mnt->flags |= NFS_MOUNT_TCP; |
| 1041 | mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; | 1197 | mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; |
| 1042 | mnt->timeo = 600; | ||
| 1043 | mnt->retrans = 2; | ||
| 1044 | break; | 1198 | break; |
| 1045 | default: | 1199 | default: |
| 1046 | goto out_unrec_xprt; | 1200 | errors++; |
| 1201 | dfprintk(MOUNT, "NFS: unrecognized " | ||
| 1202 | "transport protocol\n"); | ||
| 1047 | } | 1203 | } |
| 1048 | break; | 1204 | break; |
| 1049 | case Opt_mountproto: | 1205 | case Opt_mountproto: |
| @@ -1063,16 +1219,19 @@ static int nfs_parse_mount_options(char *raw, | |||
| 1063 | break; | 1219 | break; |
| 1064 | case Opt_xprt_rdma: /* not used for side protocols */ | 1220 | case Opt_xprt_rdma: /* not used for side protocols */ |
| 1065 | default: | 1221 | default: |
| 1066 | goto out_unrec_xprt; | 1222 | errors++; |
| 1223 | dfprintk(MOUNT, "NFS: unrecognized " | ||
| 1224 | "transport protocol\n"); | ||
| 1067 | } | 1225 | } |
| 1068 | break; | 1226 | break; |
| 1069 | case Opt_addr: | 1227 | case Opt_addr: |
| 1070 | string = match_strdup(args); | 1228 | string = match_strdup(args); |
| 1071 | if (string == NULL) | 1229 | if (string == NULL) |
| 1072 | goto out_nomem; | 1230 | goto out_nomem; |
| 1073 | nfs_parse_server_address(string, (struct sockaddr *) | 1231 | nfs_parse_ip_address(string, strlen(string), |
| 1074 | &mnt->nfs_server.address, | 1232 | (struct sockaddr *) |
| 1075 | &mnt->nfs_server.addrlen); | 1233 | &mnt->nfs_server.address, |
| 1234 | &mnt->nfs_server.addrlen); | ||
| 1076 | kfree(string); | 1235 | kfree(string); |
| 1077 | break; | 1236 | break; |
| 1078 | case Opt_clientaddr: | 1237 | case Opt_clientaddr: |
| @@ -1093,24 +1252,33 @@ static int nfs_parse_mount_options(char *raw, | |||
| 1093 | string = match_strdup(args); | 1252 | string = match_strdup(args); |
| 1094 | if (string == NULL) | 1253 | if (string == NULL) |
| 1095 | goto out_nomem; | 1254 | goto out_nomem; |
| 1096 | nfs_parse_server_address(string, (struct sockaddr *) | 1255 | nfs_parse_ip_address(string, strlen(string), |
| 1097 | &mnt->mount_server.address, | 1256 | (struct sockaddr *) |
| 1098 | &mnt->mount_server.addrlen); | 1257 | &mnt->mount_server.address, |
| 1258 | &mnt->mount_server.addrlen); | ||
| 1099 | kfree(string); | 1259 | kfree(string); |
| 1100 | break; | 1260 | break; |
| 1101 | 1261 | ||
| 1262 | /* | ||
| 1263 | * Special options | ||
| 1264 | */ | ||
| 1265 | case Opt_sloppy: | ||
| 1266 | sloppy = 1; | ||
| 1267 | dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); | ||
| 1268 | break; | ||
| 1102 | case Opt_userspace: | 1269 | case Opt_userspace: |
| 1103 | case Opt_deprecated: | 1270 | case Opt_deprecated: |
| 1271 | dfprintk(MOUNT, "NFS: ignoring mount option " | ||
| 1272 | "'%s'\n", p); | ||
| 1104 | break; | 1273 | break; |
| 1105 | 1274 | ||
| 1106 | default: | 1275 | default: |
| 1107 | goto out_unknown; | 1276 | errors++; |
| 1277 | dfprintk(MOUNT, "NFS: unrecognized mount option " | ||
| 1278 | "'%s'\n", p); | ||
| 1108 | } | 1279 | } |
| 1109 | } | 1280 | } |
| 1110 | 1281 | ||
| 1111 | nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, | ||
| 1112 | mnt->nfs_server.port); | ||
| 1113 | |||
| 1114 | return 1; | 1282 | return 1; |
| 1115 | 1283 | ||
| 1116 | out_nomem: | 1284 | out_nomem: |
| @@ -1120,21 +1288,6 @@ out_security_failure: | |||
| 1120 | free_secdata(secdata); | 1288 | free_secdata(secdata); |
| 1121 | printk(KERN_INFO "NFS: security options invalid: %d\n", rc); | 1289 | printk(KERN_INFO "NFS: security options invalid: %d\n", rc); |
| 1122 | return 0; | 1290 | return 0; |
| 1123 | out_unrec_vers: | ||
| 1124 | printk(KERN_INFO "NFS: unrecognized NFS version number\n"); | ||
| 1125 | return 0; | ||
| 1126 | |||
| 1127 | out_unrec_xprt: | ||
| 1128 | printk(KERN_INFO "NFS: unrecognized transport protocol\n"); | ||
| 1129 | return 0; | ||
| 1130 | |||
| 1131 | out_unrec_sec: | ||
| 1132 | printk(KERN_INFO "NFS: unrecognized security flavor\n"); | ||
| 1133 | return 0; | ||
| 1134 | |||
| 1135 | out_unknown: | ||
| 1136 | printk(KERN_INFO "NFS: unknown mount option: %s\n", p); | ||
| 1137 | return 0; | ||
| 1138 | } | 1291 | } |
| 1139 | 1292 | ||
| 1140 | /* | 1293 | /* |
| @@ -1188,11 +1341,146 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
| 1188 | if (status == 0) | 1341 | if (status == 0) |
| 1189 | return 0; | 1342 | return 0; |
| 1190 | 1343 | ||
| 1191 | dfprintk(MOUNT, "NFS: unable to mount server %s, error %d", | 1344 | dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", |
| 1192 | hostname, status); | 1345 | hostname, status); |
| 1193 | return status; | 1346 | return status; |
| 1194 | } | 1347 | } |
| 1195 | 1348 | ||
| 1349 | static int nfs_parse_simple_hostname(const char *dev_name, | ||
| 1350 | char **hostname, size_t maxnamlen, | ||
| 1351 | char **export_path, size_t maxpathlen) | ||
| 1352 | { | ||
| 1353 | size_t len; | ||
| 1354 | char *colon, *comma; | ||
| 1355 | |||
| 1356 | colon = strchr(dev_name, ':'); | ||
| 1357 | if (colon == NULL) | ||
| 1358 | goto out_bad_devname; | ||
| 1359 | |||
| 1360 | len = colon - dev_name; | ||
| 1361 | if (len > maxnamlen) | ||
| 1362 | goto out_hostname; | ||
| 1363 | |||
| 1364 | /* N.B. caller will free nfs_server.hostname in all cases */ | ||
| 1365 | *hostname = kstrndup(dev_name, len, GFP_KERNEL); | ||
| 1366 | if (!*hostname) | ||
| 1367 | goto out_nomem; | ||
| 1368 | |||
| 1369 | /* kill possible hostname list: not supported */ | ||
| 1370 | comma = strchr(*hostname, ','); | ||
| 1371 | if (comma != NULL) { | ||
| 1372 | if (comma == *hostname) | ||
| 1373 | goto out_bad_devname; | ||
| 1374 | *comma = '\0'; | ||
| 1375 | } | ||
| 1376 | |||
| 1377 | colon++; | ||
| 1378 | len = strlen(colon); | ||
| 1379 | if (len > maxpathlen) | ||
| 1380 | goto out_path; | ||
| 1381 | *export_path = kstrndup(colon, len, GFP_KERNEL); | ||
| 1382 | if (!*export_path) | ||
| 1383 | goto out_nomem; | ||
| 1384 | |||
| 1385 | dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); | ||
| 1386 | return 0; | ||
| 1387 | |||
| 1388 | out_bad_devname: | ||
| 1389 | dfprintk(MOUNT, "NFS: device name not in host:path format\n"); | ||
| 1390 | return -EINVAL; | ||
| 1391 | |||
| 1392 | out_nomem: | ||
| 1393 | dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); | ||
| 1394 | return -ENOMEM; | ||
| 1395 | |||
| 1396 | out_hostname: | ||
| 1397 | dfprintk(MOUNT, "NFS: server hostname too long\n"); | ||
| 1398 | return -ENAMETOOLONG; | ||
| 1399 | |||
| 1400 | out_path: | ||
| 1401 | dfprintk(MOUNT, "NFS: export pathname too long\n"); | ||
| 1402 | return -ENAMETOOLONG; | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | /* | ||
| 1406 | * Hostname has square brackets around it because it contains one or | ||
| 1407 | * more colons. We look for the first closing square bracket, and a | ||
| 1408 | * colon must follow it. | ||
| 1409 | */ | ||
| 1410 | static int nfs_parse_protected_hostname(const char *dev_name, | ||
| 1411 | char **hostname, size_t maxnamlen, | ||
| 1412 | char **export_path, size_t maxpathlen) | ||
| 1413 | { | ||
| 1414 | size_t len; | ||
| 1415 | char *start, *end; | ||
| 1416 | |||
| 1417 | start = (char *)(dev_name + 1); | ||
| 1418 | |||
| 1419 | end = strchr(start, ']'); | ||
| 1420 | if (end == NULL) | ||
| 1421 | goto out_bad_devname; | ||
| 1422 | if (*(end + 1) != ':') | ||
| 1423 | goto out_bad_devname; | ||
| 1424 | |||
| 1425 | len = end - start; | ||
| 1426 | if (len > maxnamlen) | ||
| 1427 | goto out_hostname; | ||
| 1428 | |||
| 1429 | /* N.B. caller will free nfs_server.hostname in all cases */ | ||
| 1430 | *hostname = kstrndup(start, len, GFP_KERNEL); | ||
| 1431 | if (*hostname == NULL) | ||
| 1432 | goto out_nomem; | ||
| 1433 | |||
| 1434 | end += 2; | ||
| 1435 | len = strlen(end); | ||
| 1436 | if (len > maxpathlen) | ||
| 1437 | goto out_path; | ||
| 1438 | *export_path = kstrndup(end, len, GFP_KERNEL); | ||
| 1439 | if (!*export_path) | ||
| 1440 | goto out_nomem; | ||
| 1441 | |||
| 1442 | return 0; | ||
| 1443 | |||
| 1444 | out_bad_devname: | ||
| 1445 | dfprintk(MOUNT, "NFS: device name not in host:path format\n"); | ||
| 1446 | return -EINVAL; | ||
| 1447 | |||
| 1448 | out_nomem: | ||
| 1449 | dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); | ||
| 1450 | return -ENOMEM; | ||
| 1451 | |||
| 1452 | out_hostname: | ||
| 1453 | dfprintk(MOUNT, "NFS: server hostname too long\n"); | ||
| 1454 | return -ENAMETOOLONG; | ||
| 1455 | |||
| 1456 | out_path: | ||
| 1457 | dfprintk(MOUNT, "NFS: export pathname too long\n"); | ||
| 1458 | return -ENAMETOOLONG; | ||
| 1459 | } | ||
| 1460 | |||
| 1461 | /* | ||
| 1462 | * Split "dev_name" into "hostname:export_path". | ||
| 1463 | * | ||
| 1464 | * The leftmost colon demarks the split between the server's hostname | ||
| 1465 | * and the export path. If the hostname starts with a left square | ||
| 1466 | * bracket, then it may contain colons. | ||
| 1467 | * | ||
| 1468 | * Note: caller frees hostname and export path, even on error. | ||
| 1469 | */ | ||
| 1470 | static int nfs_parse_devname(const char *dev_name, | ||
| 1471 | char **hostname, size_t maxnamlen, | ||
| 1472 | char **export_path, size_t maxpathlen) | ||
| 1473 | { | ||
| 1474 | if (*dev_name == '[') | ||
| 1475 | return nfs_parse_protected_hostname(dev_name, | ||
| 1476 | hostname, maxnamlen, | ||
| 1477 | export_path, maxpathlen); | ||
| 1478 | |||
| 1479 | return nfs_parse_simple_hostname(dev_name, | ||
| 1480 | hostname, maxnamlen, | ||
| 1481 | export_path, maxpathlen); | ||
| 1482 | } | ||
| 1483 | |||
| 1196 | /* | 1484 | /* |
| 1197 | * Validate the NFS2/NFS3 mount data | 1485 | * Validate the NFS2/NFS3 mount data |
| 1198 | * - fills in the mount root filehandle | 1486 | * - fills in the mount root filehandle |
| @@ -1216,24 +1504,20 @@ static int nfs_validate_mount_data(void *options, | |||
| 1216 | { | 1504 | { |
| 1217 | struct nfs_mount_data *data = (struct nfs_mount_data *)options; | 1505 | struct nfs_mount_data *data = (struct nfs_mount_data *)options; |
| 1218 | 1506 | ||
| 1219 | memset(args, 0, sizeof(*args)); | ||
| 1220 | |||
| 1221 | if (data == NULL) | 1507 | if (data == NULL) |
| 1222 | goto out_no_data; | 1508 | goto out_no_data; |
| 1223 | 1509 | ||
| 1224 | args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); | 1510 | args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); |
| 1225 | args->rsize = NFS_MAX_FILE_IO_SIZE; | 1511 | args->rsize = NFS_MAX_FILE_IO_SIZE; |
| 1226 | args->wsize = NFS_MAX_FILE_IO_SIZE; | 1512 | args->wsize = NFS_MAX_FILE_IO_SIZE; |
| 1227 | args->timeo = 600; | 1513 | args->acregmin = NFS_DEF_ACREGMIN; |
| 1228 | args->retrans = 2; | 1514 | args->acregmax = NFS_DEF_ACREGMAX; |
| 1229 | args->acregmin = 3; | 1515 | args->acdirmin = NFS_DEF_ACDIRMIN; |
| 1230 | args->acregmax = 60; | 1516 | args->acdirmax = NFS_DEF_ACDIRMAX; |
| 1231 | args->acdirmin = 30; | ||
| 1232 | args->acdirmax = 60; | ||
| 1233 | args->mount_server.port = 0; /* autobind unless user sets port */ | 1517 | args->mount_server.port = 0; /* autobind unless user sets port */ |
| 1234 | args->mount_server.protocol = XPRT_TRANSPORT_UDP; | ||
| 1235 | args->nfs_server.port = 0; /* autobind unless user sets port */ | 1518 | args->nfs_server.port = 0; /* autobind unless user sets port */ |
| 1236 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; | 1519 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
| 1520 | args->auth_flavors[0] = RPC_AUTH_UNIX; | ||
| 1237 | 1521 | ||
| 1238 | switch (data->version) { | 1522 | switch (data->version) { |
| 1239 | case 1: | 1523 | case 1: |
| @@ -1251,13 +1535,13 @@ static int nfs_validate_mount_data(void *options, | |||
| 1251 | case 5: | 1535 | case 5: |
| 1252 | memset(data->context, 0, sizeof(data->context)); | 1536 | memset(data->context, 0, sizeof(data->context)); |
| 1253 | case 6: | 1537 | case 6: |
| 1254 | if (data->flags & NFS_MOUNT_VER3) | 1538 | if (data->flags & NFS_MOUNT_VER3) { |
| 1539 | if (data->root.size > NFS3_FHSIZE || data->root.size == 0) | ||
| 1540 | goto out_invalid_fh; | ||
| 1255 | mntfh->size = data->root.size; | 1541 | mntfh->size = data->root.size; |
| 1256 | else | 1542 | } else |
| 1257 | mntfh->size = NFS2_FHSIZE; | 1543 | mntfh->size = NFS2_FHSIZE; |
| 1258 | 1544 | ||
| 1259 | if (mntfh->size > sizeof(mntfh->data)) | ||
| 1260 | goto out_invalid_fh; | ||
| 1261 | 1545 | ||
| 1262 | memcpy(mntfh->data, data->root.data, mntfh->size); | 1546 | memcpy(mntfh->data, data->root.data, mntfh->size); |
| 1263 | if (mntfh->size < sizeof(mntfh->data)) | 1547 | if (mntfh->size < sizeof(mntfh->data)) |
| @@ -1291,7 +1575,9 @@ static int nfs_validate_mount_data(void *options, | |||
| 1291 | args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); | 1575 | args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); |
| 1292 | args->namlen = data->namlen; | 1576 | args->namlen = data->namlen; |
| 1293 | args->bsize = data->bsize; | 1577 | args->bsize = data->bsize; |
| 1294 | args->auth_flavors[0] = data->pseudoflavor; | 1578 | |
| 1579 | if (data->flags & NFS_MOUNT_SECFLAVOUR) | ||
| 1580 | args->auth_flavors[0] = data->pseudoflavor; | ||
| 1295 | if (!args->nfs_server.hostname) | 1581 | if (!args->nfs_server.hostname) |
| 1296 | goto out_nomem; | 1582 | goto out_nomem; |
| 1297 | 1583 | ||
| @@ -1323,8 +1609,6 @@ static int nfs_validate_mount_data(void *options, | |||
| 1323 | 1609 | ||
| 1324 | break; | 1610 | break; |
| 1325 | default: { | 1611 | default: { |
| 1326 | unsigned int len; | ||
| 1327 | char *c; | ||
| 1328 | int status; | 1612 | int status; |
| 1329 | 1613 | ||
| 1330 | if (nfs_parse_mount_options((char *)options, args) == 0) | 1614 | if (nfs_parse_mount_options((char *)options, args) == 0) |
| @@ -1334,21 +1618,22 @@ static int nfs_validate_mount_data(void *options, | |||
| 1334 | &args->nfs_server.address)) | 1618 | &args->nfs_server.address)) |
| 1335 | goto out_no_address; | 1619 | goto out_no_address; |
| 1336 | 1620 | ||
| 1337 | c = strchr(dev_name, ':'); | 1621 | nfs_set_port((struct sockaddr *)&args->nfs_server.address, |
| 1338 | if (c == NULL) | 1622 | args->nfs_server.port); |
| 1339 | return -EINVAL; | ||
| 1340 | len = c - dev_name; | ||
| 1341 | /* N.B. caller will free nfs_server.hostname in all cases */ | ||
| 1342 | args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); | ||
| 1343 | if (!args->nfs_server.hostname) | ||
| 1344 | goto out_nomem; | ||
| 1345 | 1623 | ||
| 1346 | c++; | 1624 | nfs_set_mount_transport_protocol(args); |
| 1347 | if (strlen(c) > NFS_MAXPATHLEN) | 1625 | |
| 1348 | return -ENAMETOOLONG; | 1626 | status = nfs_parse_devname(dev_name, |
| 1349 | args->nfs_server.export_path = c; | 1627 | &args->nfs_server.hostname, |
| 1628 | PAGE_SIZE, | ||
| 1629 | &args->nfs_server.export_path, | ||
| 1630 | NFS_MAXPATHLEN); | ||
| 1631 | if (!status) | ||
| 1632 | status = nfs_try_mount(args, mntfh); | ||
| 1633 | |||
| 1634 | kfree(args->nfs_server.export_path); | ||
| 1635 | args->nfs_server.export_path = NULL; | ||
| 1350 | 1636 | ||
| 1351 | status = nfs_try_mount(args, mntfh); | ||
| 1352 | if (status) | 1637 | if (status) |
| 1353 | return status; | 1638 | return status; |
| 1354 | 1639 | ||
| @@ -1356,9 +1641,6 @@ static int nfs_validate_mount_data(void *options, | |||
| 1356 | } | 1641 | } |
| 1357 | } | 1642 | } |
| 1358 | 1643 | ||
| 1359 | if (!(args->flags & NFS_MOUNT_SECFLAVOUR)) | ||
| 1360 | args->auth_flavors[0] = RPC_AUTH_UNIX; | ||
| 1361 | |||
| 1362 | #ifndef CONFIG_NFS_V3 | 1644 | #ifndef CONFIG_NFS_V3 |
| 1363 | if (args->flags & NFS_MOUNT_VER3) | 1645 | if (args->flags & NFS_MOUNT_VER3) |
| 1364 | goto out_v3_not_compiled; | 1646 | goto out_v3_not_compiled; |
| @@ -1398,6 +1680,80 @@ out_invalid_fh: | |||
| 1398 | return -EINVAL; | 1680 | return -EINVAL; |
| 1399 | } | 1681 | } |
| 1400 | 1682 | ||
| 1683 | static int | ||
| 1684 | nfs_compare_remount_data(struct nfs_server *nfss, | ||
| 1685 | struct nfs_parsed_mount_data *data) | ||
| 1686 | { | ||
| 1687 | if (data->flags != nfss->flags || | ||
| 1688 | data->rsize != nfss->rsize || | ||
| 1689 | data->wsize != nfss->wsize || | ||
| 1690 | data->retrans != nfss->client->cl_timeout->to_retries || | ||
| 1691 | data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || | ||
| 1692 | data->acregmin != nfss->acregmin / HZ || | ||
| 1693 | data->acregmax != nfss->acregmax / HZ || | ||
| 1694 | data->acdirmin != nfss->acdirmin / HZ || | ||
| 1695 | data->acdirmax != nfss->acdirmax / HZ || | ||
| 1696 | data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || | ||
| 1697 | data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || | ||
| 1698 | memcmp(&data->nfs_server.address, &nfss->nfs_client->cl_addr, | ||
| 1699 | data->nfs_server.addrlen) != 0) | ||
| 1700 | return -EINVAL; | ||
| 1701 | |||
| 1702 | return 0; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | static int | ||
| 1706 | nfs_remount(struct super_block *sb, int *flags, char *raw_data) | ||
| 1707 | { | ||
| 1708 | int error; | ||
| 1709 | struct nfs_server *nfss = sb->s_fs_info; | ||
| 1710 | struct nfs_parsed_mount_data *data; | ||
| 1711 | struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; | ||
| 1712 | struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; | ||
| 1713 | u32 nfsvers = nfss->nfs_client->rpc_ops->version; | ||
| 1714 | |||
| 1715 | /* | ||
| 1716 | * Userspace mount programs that send binary options generally send | ||
| 1717 | * them populated with default values. We have no way to know which | ||
| 1718 | * ones were explicitly specified. Fall back to legacy behavior and | ||
| 1719 | * just return success. | ||
| 1720 | */ | ||
| 1721 | if ((nfsvers == 4 && options4->version == 1) || | ||
| 1722 | (nfsvers <= 3 && options->version >= 1 && | ||
| 1723 | options->version <= 6)) | ||
| 1724 | return 0; | ||
| 1725 | |||
| 1726 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 1727 | if (data == NULL) | ||
| 1728 | return -ENOMEM; | ||
| 1729 | |||
| 1730 | /* fill out struct with values from existing mount */ | ||
| 1731 | data->flags = nfss->flags; | ||
| 1732 | data->rsize = nfss->rsize; | ||
| 1733 | data->wsize = nfss->wsize; | ||
| 1734 | data->retrans = nfss->client->cl_timeout->to_retries; | ||
| 1735 | data->auth_flavors[0] = nfss->client->cl_auth->au_flavor; | ||
| 1736 | data->acregmin = nfss->acregmin / HZ; | ||
| 1737 | data->acregmax = nfss->acregmax / HZ; | ||
| 1738 | data->acdirmin = nfss->acdirmin / HZ; | ||
| 1739 | data->acdirmax = nfss->acdirmax / HZ; | ||
| 1740 | data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; | ||
| 1741 | data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; | ||
| 1742 | memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, | ||
| 1743 | data->nfs_server.addrlen); | ||
| 1744 | |||
| 1745 | /* overwrite those values with any that were specified */ | ||
| 1746 | error = nfs_parse_mount_options((char *)options, data); | ||
| 1747 | if (error < 0) | ||
| 1748 | goto out; | ||
| 1749 | |||
| 1750 | /* compare new mount options with old ones */ | ||
| 1751 | error = nfs_compare_remount_data(nfss, data); | ||
| 1752 | out: | ||
| 1753 | kfree(data); | ||
| 1754 | return error; | ||
| 1755 | } | ||
| 1756 | |||
| 1401 | /* | 1757 | /* |
| 1402 | * Initialise the common bits of the superblock | 1758 | * Initialise the common bits of the superblock |
| 1403 | */ | 1759 | */ |
| @@ -1585,24 +1941,29 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
| 1585 | { | 1941 | { |
| 1586 | struct nfs_server *server = NULL; | 1942 | struct nfs_server *server = NULL; |
| 1587 | struct super_block *s; | 1943 | struct super_block *s; |
| 1588 | struct nfs_fh mntfh; | 1944 | struct nfs_parsed_mount_data *data; |
| 1589 | struct nfs_parsed_mount_data data; | 1945 | struct nfs_fh *mntfh; |
| 1590 | struct dentry *mntroot; | 1946 | struct dentry *mntroot; |
| 1591 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; | 1947 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; |
| 1592 | struct nfs_sb_mountdata sb_mntdata = { | 1948 | struct nfs_sb_mountdata sb_mntdata = { |
| 1593 | .mntflags = flags, | 1949 | .mntflags = flags, |
| 1594 | }; | 1950 | }; |
| 1595 | int error; | 1951 | int error = -ENOMEM; |
| 1596 | 1952 | ||
| 1597 | security_init_mnt_opts(&data.lsm_opts); | 1953 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
| 1954 | mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); | ||
| 1955 | if (data == NULL || mntfh == NULL) | ||
| 1956 | goto out_free_fh; | ||
| 1957 | |||
| 1958 | security_init_mnt_opts(&data->lsm_opts); | ||
| 1598 | 1959 | ||
| 1599 | /* Validate the mount data */ | 1960 | /* Validate the mount data */ |
| 1600 | error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); | 1961 | error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); |
| 1601 | if (error < 0) | 1962 | if (error < 0) |
| 1602 | goto out; | 1963 | goto out; |
| 1603 | 1964 | ||
| 1604 | /* Get a volume representation */ | 1965 | /* Get a volume representation */ |
| 1605 | server = nfs_create_server(&data, &mntfh); | 1966 | server = nfs_create_server(data, mntfh); |
| 1606 | if (IS_ERR(server)) { | 1967 | if (IS_ERR(server)) { |
| 1607 | error = PTR_ERR(server); | 1968 | error = PTR_ERR(server); |
| 1608 | goto out; | 1969 | goto out; |
| @@ -1630,16 +1991,16 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
| 1630 | 1991 | ||
| 1631 | if (!s->s_root) { | 1992 | if (!s->s_root) { |
| 1632 | /* initial superblock/root creation */ | 1993 | /* initial superblock/root creation */ |
| 1633 | nfs_fill_super(s, &data); | 1994 | nfs_fill_super(s, data); |
| 1634 | } | 1995 | } |
| 1635 | 1996 | ||
| 1636 | mntroot = nfs_get_root(s, &mntfh); | 1997 | mntroot = nfs_get_root(s, mntfh); |
| 1637 | if (IS_ERR(mntroot)) { | 1998 | if (IS_ERR(mntroot)) { |
| 1638 | error = PTR_ERR(mntroot); | 1999 | error = PTR_ERR(mntroot); |
| 1639 | goto error_splat_super; | 2000 | goto error_splat_super; |
| 1640 | } | 2001 | } |
| 1641 | 2002 | ||
| 1642 | error = security_sb_set_mnt_opts(s, &data.lsm_opts); | 2003 | error = security_sb_set_mnt_opts(s, &data->lsm_opts); |
| 1643 | if (error) | 2004 | if (error) |
| 1644 | goto error_splat_root; | 2005 | goto error_splat_root; |
| 1645 | 2006 | ||
| @@ -1649,9 +2010,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
| 1649 | error = 0; | 2010 | error = 0; |
| 1650 | 2011 | ||
| 1651 | out: | 2012 | out: |
| 1652 | kfree(data.nfs_server.hostname); | 2013 | kfree(data->nfs_server.hostname); |
| 1653 | kfree(data.mount_server.hostname); | 2014 | kfree(data->mount_server.hostname); |
| 1654 | security_free_mnt_opts(&data.lsm_opts); | 2015 | security_free_mnt_opts(&data->lsm_opts); |
| 2016 | out_free_fh: | ||
| 2017 | kfree(mntfh); | ||
| 2018 | kfree(data); | ||
| 1655 | return error; | 2019 | return error; |
| 1656 | 2020 | ||
| 1657 | out_err_nosb: | 2021 | out_err_nosb: |
| @@ -1800,21 +2164,18 @@ static int nfs4_validate_mount_data(void *options, | |||
| 1800 | struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; | 2164 | struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; |
| 1801 | char *c; | 2165 | char *c; |
| 1802 | 2166 | ||
| 1803 | memset(args, 0, sizeof(*args)); | ||
| 1804 | |||
| 1805 | if (data == NULL) | 2167 | if (data == NULL) |
| 1806 | goto out_no_data; | 2168 | goto out_no_data; |
| 1807 | 2169 | ||
| 1808 | args->rsize = NFS_MAX_FILE_IO_SIZE; | 2170 | args->rsize = NFS_MAX_FILE_IO_SIZE; |
| 1809 | args->wsize = NFS_MAX_FILE_IO_SIZE; | 2171 | args->wsize = NFS_MAX_FILE_IO_SIZE; |
| 1810 | args->timeo = 600; | 2172 | args->acregmin = NFS_DEF_ACREGMIN; |
| 1811 | args->retrans = 2; | 2173 | args->acregmax = NFS_DEF_ACREGMAX; |
| 1812 | args->acregmin = 3; | 2174 | args->acdirmin = NFS_DEF_ACDIRMIN; |
| 1813 | args->acregmax = 60; | 2175 | args->acdirmax = NFS_DEF_ACDIRMAX; |
| 1814 | args->acdirmin = 30; | ||
| 1815 | args->acdirmax = 60; | ||
| 1816 | args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ | 2176 | args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ |
| 1817 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; | 2177 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
| 2178 | args->auth_flavor_len = 0; | ||
| 1818 | 2179 | ||
| 1819 | switch (data->version) { | 2180 | switch (data->version) { |
| 1820 | case 1: | 2181 | case 1: |
| @@ -1830,18 +2191,13 @@ static int nfs4_validate_mount_data(void *options, | |||
| 1830 | &args->nfs_server.address)) | 2191 | &args->nfs_server.address)) |
| 1831 | goto out_no_address; | 2192 | goto out_no_address; |
| 1832 | 2193 | ||
| 1833 | switch (data->auth_flavourlen) { | 2194 | if (data->auth_flavourlen) { |
| 1834 | case 0: | 2195 | if (data->auth_flavourlen > 1) |
| 1835 | args->auth_flavors[0] = RPC_AUTH_UNIX; | 2196 | goto out_inval_auth; |
| 1836 | break; | ||
| 1837 | case 1: | ||
| 1838 | if (copy_from_user(&args->auth_flavors[0], | 2197 | if (copy_from_user(&args->auth_flavors[0], |
| 1839 | data->auth_flavours, | 2198 | data->auth_flavours, |
| 1840 | sizeof(args->auth_flavors[0]))) | 2199 | sizeof(args->auth_flavors[0]))) |
| 1841 | return -EFAULT; | 2200 | return -EFAULT; |
| 1842 | break; | ||
| 1843 | default: | ||
| 1844 | goto out_inval_auth; | ||
| 1845 | } | 2201 | } |
| 1846 | 2202 | ||
| 1847 | c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); | 2203 | c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); |
| @@ -1875,10 +2231,11 @@ static int nfs4_validate_mount_data(void *options, | |||
| 1875 | args->acdirmin = data->acdirmin; | 2231 | args->acdirmin = data->acdirmin; |
| 1876 | args->acdirmax = data->acdirmax; | 2232 | args->acdirmax = data->acdirmax; |
| 1877 | args->nfs_server.protocol = data->proto; | 2233 | args->nfs_server.protocol = data->proto; |
| 2234 | nfs_validate_transport_protocol(args); | ||
| 1878 | 2235 | ||
| 1879 | break; | 2236 | break; |
| 1880 | default: { | 2237 | default: { |
| 1881 | unsigned int len; | 2238 | int status; |
| 1882 | 2239 | ||
| 1883 | if (nfs_parse_mount_options((char *)options, args) == 0) | 2240 | if (nfs_parse_mount_options((char *)options, args) == 0) |
| 1884 | return -EINVAL; | 2241 | return -EINVAL; |
| @@ -1887,44 +2244,25 @@ static int nfs4_validate_mount_data(void *options, | |||
| 1887 | &args->nfs_server.address)) | 2244 | &args->nfs_server.address)) |
| 1888 | return -EINVAL; | 2245 | return -EINVAL; |
| 1889 | 2246 | ||
| 1890 | switch (args->auth_flavor_len) { | 2247 | nfs_set_port((struct sockaddr *)&args->nfs_server.address, |
| 1891 | case 0: | 2248 | args->nfs_server.port); |
| 1892 | args->auth_flavors[0] = RPC_AUTH_UNIX; | ||
| 1893 | break; | ||
| 1894 | case 1: | ||
| 1895 | break; | ||
| 1896 | default: | ||
| 1897 | goto out_inval_auth; | ||
| 1898 | } | ||
| 1899 | |||
| 1900 | /* | ||
| 1901 | * Split "dev_name" into "hostname:mntpath". | ||
| 1902 | */ | ||
| 1903 | c = strchr(dev_name, ':'); | ||
| 1904 | if (c == NULL) | ||
| 1905 | return -EINVAL; | ||
| 1906 | /* while calculating len, pretend ':' is '\0' */ | ||
| 1907 | len = c - dev_name; | ||
| 1908 | if (len > NFS4_MAXNAMLEN) | ||
| 1909 | return -ENAMETOOLONG; | ||
| 1910 | /* N.B. caller will free nfs_server.hostname in all cases */ | ||
| 1911 | args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); | ||
| 1912 | if (!args->nfs_server.hostname) | ||
| 1913 | goto out_nomem; | ||
| 1914 | 2249 | ||
| 1915 | c++; /* step over the ':' */ | 2250 | nfs_validate_transport_protocol(args); |
| 1916 | len = strlen(c); | ||
| 1917 | if (len > NFS4_MAXPATHLEN) | ||
| 1918 | return -ENAMETOOLONG; | ||
| 1919 | args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); | ||
| 1920 | if (!args->nfs_server.export_path) | ||
| 1921 | goto out_nomem; | ||
| 1922 | 2251 | ||
| 1923 | dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); | 2252 | if (args->auth_flavor_len > 1) |
| 2253 | goto out_inval_auth; | ||
| 1924 | 2254 | ||
| 1925 | if (args->client_address == NULL) | 2255 | if (args->client_address == NULL) |
| 1926 | goto out_no_client_address; | 2256 | goto out_no_client_address; |
| 1927 | 2257 | ||
| 2258 | status = nfs_parse_devname(dev_name, | ||
| 2259 | &args->nfs_server.hostname, | ||
| 2260 | NFS4_MAXNAMLEN, | ||
| 2261 | &args->nfs_server.export_path, | ||
| 2262 | NFS4_MAXPATHLEN); | ||
| 2263 | if (status < 0) | ||
| 2264 | return status; | ||
| 2265 | |||
| 1928 | break; | 2266 | break; |
| 1929 | } | 2267 | } |
| 1930 | } | 2268 | } |
| @@ -1940,10 +2278,6 @@ out_inval_auth: | |||
| 1940 | data->auth_flavourlen); | 2278 | data->auth_flavourlen); |
| 1941 | return -EINVAL; | 2279 | return -EINVAL; |
| 1942 | 2280 | ||
| 1943 | out_nomem: | ||
| 1944 | dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n"); | ||
| 1945 | return -ENOMEM; | ||
| 1946 | |||
| 1947 | out_no_address: | 2281 | out_no_address: |
| 1948 | dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); | 2282 | dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); |
| 1949 | return -EINVAL; | 2283 | return -EINVAL; |
| @@ -1959,26 +2293,31 @@ out_no_client_address: | |||
| 1959 | static int nfs4_get_sb(struct file_system_type *fs_type, | 2293 | static int nfs4_get_sb(struct file_system_type *fs_type, |
| 1960 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) | 2294 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) |
| 1961 | { | 2295 | { |
| 1962 | struct nfs_parsed_mount_data data; | 2296 | struct nfs_parsed_mount_data *data; |
| 1963 | struct super_block *s; | 2297 | struct super_block *s; |
| 1964 | struct nfs_server *server; | 2298 | struct nfs_server *server; |
| 1965 | struct nfs_fh mntfh; | 2299 | struct nfs_fh *mntfh; |
| 1966 | struct dentry *mntroot; | 2300 | struct dentry *mntroot; |
| 1967 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; | 2301 | int (*compare_super)(struct super_block *, void *) = nfs_compare_super; |
| 1968 | struct nfs_sb_mountdata sb_mntdata = { | 2302 | struct nfs_sb_mountdata sb_mntdata = { |
| 1969 | .mntflags = flags, | 2303 | .mntflags = flags, |
| 1970 | }; | 2304 | }; |
| 1971 | int error; | 2305 | int error = -ENOMEM; |
| 2306 | |||
| 2307 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 2308 | mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); | ||
| 2309 | if (data == NULL || mntfh == NULL) | ||
| 2310 | goto out_free_fh; | ||
| 1972 | 2311 | ||
| 1973 | security_init_mnt_opts(&data.lsm_opts); | 2312 | security_init_mnt_opts(&data->lsm_opts); |
| 1974 | 2313 | ||
| 1975 | /* Validate the mount data */ | 2314 | /* Validate the mount data */ |
| 1976 | error = nfs4_validate_mount_data(raw_data, &data, dev_name); | 2315 | error = nfs4_validate_mount_data(raw_data, data, dev_name); |
| 1977 | if (error < 0) | 2316 | if (error < 0) |
| 1978 | goto out; | 2317 | goto out; |
| 1979 | 2318 | ||
| 1980 | /* Get a volume representation */ | 2319 | /* Get a volume representation */ |
| 1981 | server = nfs4_create_server(&data, &mntfh); | 2320 | server = nfs4_create_server(data, mntfh); |
| 1982 | if (IS_ERR(server)) { | 2321 | if (IS_ERR(server)) { |
| 1983 | error = PTR_ERR(server); | 2322 | error = PTR_ERR(server); |
| 1984 | goto out; | 2323 | goto out; |
| @@ -2009,13 +2348,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
| 2009 | nfs4_fill_super(s); | 2348 | nfs4_fill_super(s); |
| 2010 | } | 2349 | } |
| 2011 | 2350 | ||
| 2012 | mntroot = nfs4_get_root(s, &mntfh); | 2351 | mntroot = nfs4_get_root(s, mntfh); |
| 2013 | if (IS_ERR(mntroot)) { | 2352 | if (IS_ERR(mntroot)) { |
| 2014 | error = PTR_ERR(mntroot); | 2353 | error = PTR_ERR(mntroot); |
| 2015 | goto error_splat_super; | 2354 | goto error_splat_super; |
| 2016 | } | 2355 | } |
| 2017 | 2356 | ||
| 2018 | error = security_sb_set_mnt_opts(s, &data.lsm_opts); | 2357 | error = security_sb_set_mnt_opts(s, &data->lsm_opts); |
| 2019 | if (error) | 2358 | if (error) |
| 2020 | goto error_splat_root; | 2359 | goto error_splat_root; |
| 2021 | 2360 | ||
| @@ -2025,10 +2364,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
| 2025 | error = 0; | 2364 | error = 0; |
| 2026 | 2365 | ||
| 2027 | out: | 2366 | out: |
| 2028 | kfree(data.client_address); | 2367 | kfree(data->client_address); |
| 2029 | kfree(data.nfs_server.export_path); | 2368 | kfree(data->nfs_server.export_path); |
| 2030 | kfree(data.nfs_server.hostname); | 2369 | kfree(data->nfs_server.hostname); |
| 2031 | security_free_mnt_opts(&data.lsm_opts); | 2370 | security_free_mnt_opts(&data->lsm_opts); |
| 2371 | out_free_fh: | ||
| 2372 | kfree(mntfh); | ||
| 2373 | kfree(data); | ||
| 2032 | return error; | 2374 | return error; |
| 2033 | 2375 | ||
| 2034 | out_free: | 2376 | out_free: |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6d8ace3e3259..3229e217c773 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -34,9 +34,6 @@ | |||
| 34 | /* | 34 | /* |
| 35 | * Local function declarations | 35 | * Local function declarations |
| 36 | */ | 36 | */ |
| 37 | static struct nfs_page * nfs_update_request(struct nfs_open_context*, | ||
| 38 | struct page *, | ||
| 39 | unsigned int, unsigned int); | ||
| 40 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, | 37 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, |
| 41 | struct inode *inode, int ioflags); | 38 | struct inode *inode, int ioflags); |
| 42 | static void nfs_redirty_request(struct nfs_page *req); | 39 | static void nfs_redirty_request(struct nfs_page *req); |
| @@ -136,16 +133,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page) | |||
| 136 | static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) | 133 | static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) |
| 137 | { | 134 | { |
| 138 | struct inode *inode = page->mapping->host; | 135 | struct inode *inode = page->mapping->host; |
| 139 | loff_t end, i_size = i_size_read(inode); | 136 | loff_t end, i_size; |
| 140 | pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | 137 | pgoff_t end_index; |
| 141 | 138 | ||
| 139 | spin_lock(&inode->i_lock); | ||
| 140 | i_size = i_size_read(inode); | ||
| 141 | end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | ||
| 142 | if (i_size > 0 && page->index < end_index) | 142 | if (i_size > 0 && page->index < end_index) |
| 143 | return; | 143 | goto out; |
| 144 | end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); | 144 | end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); |
| 145 | if (i_size >= end) | 145 | if (i_size >= end) |
| 146 | return; | 146 | goto out; |
| 147 | nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); | ||
| 148 | i_size_write(inode, end); | 147 | i_size_write(inode, end); |
| 148 | nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); | ||
| 149 | out: | ||
| 150 | spin_unlock(&inode->i_lock); | ||
| 149 | } | 151 | } |
| 150 | 152 | ||
| 151 | /* A writeback failed: mark the page as bad, and invalidate the page cache */ | 153 | /* A writeback failed: mark the page as bad, and invalidate the page cache */ |
| @@ -169,29 +171,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int | |||
| 169 | SetPageUptodate(page); | 171 | SetPageUptodate(page); |
| 170 | } | 172 | } |
| 171 | 173 | ||
| 172 | static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | ||
| 173 | unsigned int offset, unsigned int count) | ||
| 174 | { | ||
| 175 | struct nfs_page *req; | ||
| 176 | int ret; | ||
| 177 | |||
| 178 | for (;;) { | ||
| 179 | req = nfs_update_request(ctx, page, offset, count); | ||
| 180 | if (!IS_ERR(req)) | ||
| 181 | break; | ||
| 182 | ret = PTR_ERR(req); | ||
| 183 | if (ret != -EBUSY) | ||
| 184 | return ret; | ||
| 185 | ret = nfs_wb_page(page->mapping->host, page); | ||
| 186 | if (ret != 0) | ||
| 187 | return ret; | ||
| 188 | } | ||
| 189 | /* Update file length */ | ||
| 190 | nfs_grow_file(page, offset, count); | ||
| 191 | nfs_clear_page_tag_locked(req); | ||
| 192 | return 0; | ||
| 193 | } | ||
| 194 | |||
| 195 | static int wb_priority(struct writeback_control *wbc) | 174 | static int wb_priority(struct writeback_control *wbc) |
| 196 | { | 175 | { |
| 197 | if (wbc->for_reclaim) | 176 | if (wbc->for_reclaim) |
| @@ -268,12 +247,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
| 268 | return ret; | 247 | return ret; |
| 269 | spin_lock(&inode->i_lock); | 248 | spin_lock(&inode->i_lock); |
| 270 | } | 249 | } |
| 271 | if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 250 | if (test_bit(PG_CLEAN, &req->wb_flags)) { |
| 272 | /* This request is marked for commit */ | ||
| 273 | spin_unlock(&inode->i_lock); | 251 | spin_unlock(&inode->i_lock); |
| 274 | nfs_clear_page_tag_locked(req); | 252 | BUG(); |
| 275 | nfs_pageio_complete(pgio); | ||
| 276 | return 0; | ||
| 277 | } | 253 | } |
| 278 | if (nfs_set_page_writeback(page) != 0) { | 254 | if (nfs_set_page_writeback(page) != 0) { |
| 279 | spin_unlock(&inode->i_lock); | 255 | spin_unlock(&inode->i_lock); |
| @@ -355,11 +331,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
| 355 | /* | 331 | /* |
| 356 | * Insert a write request into an inode | 332 | * Insert a write request into an inode |
| 357 | */ | 333 | */ |
| 358 | static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | 334 | static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) |
| 359 | { | 335 | { |
| 360 | struct nfs_inode *nfsi = NFS_I(inode); | 336 | struct nfs_inode *nfsi = NFS_I(inode); |
| 361 | int error; | 337 | int error; |
| 362 | 338 | ||
| 339 | error = radix_tree_preload(GFP_NOFS); | ||
| 340 | if (error != 0) | ||
| 341 | goto out; | ||
| 342 | |||
| 343 | /* Lock the request! */ | ||
| 344 | nfs_lock_request_dontget(req); | ||
| 345 | |||
| 346 | spin_lock(&inode->i_lock); | ||
| 363 | error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); | 347 | error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); |
| 364 | BUG_ON(error); | 348 | BUG_ON(error); |
| 365 | if (!nfsi->npages) { | 349 | if (!nfsi->npages) { |
| @@ -373,6 +357,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
| 373 | kref_get(&req->wb_kref); | 357 | kref_get(&req->wb_kref); |
| 374 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, | 358 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, |
| 375 | NFS_PAGE_TAG_LOCKED); | 359 | NFS_PAGE_TAG_LOCKED); |
| 360 | spin_unlock(&inode->i_lock); | ||
| 361 | radix_tree_preload_end(); | ||
| 362 | out: | ||
| 363 | return error; | ||
| 376 | } | 364 | } |
| 377 | 365 | ||
| 378 | /* | 366 | /* |
| @@ -405,19 +393,6 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
| 405 | __set_page_dirty_nobuffers(req->wb_page); | 393 | __set_page_dirty_nobuffers(req->wb_page); |
| 406 | } | 394 | } |
| 407 | 395 | ||
| 408 | /* | ||
| 409 | * Check if a request is dirty | ||
| 410 | */ | ||
| 411 | static inline int | ||
| 412 | nfs_dirty_request(struct nfs_page *req) | ||
| 413 | { | ||
| 414 | struct page *page = req->wb_page; | ||
| 415 | |||
| 416 | if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) | ||
| 417 | return 0; | ||
| 418 | return !PageWriteback(page); | ||
| 419 | } | ||
| 420 | |||
| 421 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 396 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
| 422 | /* | 397 | /* |
| 423 | * Add a request to the inode's commit list. | 398 | * Add a request to the inode's commit list. |
| @@ -430,7 +405,7 @@ nfs_mark_request_commit(struct nfs_page *req) | |||
| 430 | 405 | ||
| 431 | spin_lock(&inode->i_lock); | 406 | spin_lock(&inode->i_lock); |
| 432 | nfsi->ncommit++; | 407 | nfsi->ncommit++; |
| 433 | set_bit(PG_NEED_COMMIT, &(req)->wb_flags); | 408 | set_bit(PG_CLEAN, &(req)->wb_flags); |
| 434 | radix_tree_tag_set(&nfsi->nfs_page_tree, | 409 | radix_tree_tag_set(&nfsi->nfs_page_tree, |
| 435 | req->wb_index, | 410 | req->wb_index, |
| 436 | NFS_PAGE_TAG_COMMIT); | 411 | NFS_PAGE_TAG_COMMIT); |
| @@ -440,6 +415,19 @@ nfs_mark_request_commit(struct nfs_page *req) | |||
| 440 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 415 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
| 441 | } | 416 | } |
| 442 | 417 | ||
| 418 | static int | ||
| 419 | nfs_clear_request_commit(struct nfs_page *req) | ||
| 420 | { | ||
| 421 | struct page *page = req->wb_page; | ||
| 422 | |||
| 423 | if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { | ||
| 424 | dec_zone_page_state(page, NR_UNSTABLE_NFS); | ||
| 425 | dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); | ||
| 426 | return 1; | ||
| 427 | } | ||
| 428 | return 0; | ||
| 429 | } | ||
| 430 | |||
| 443 | static inline | 431 | static inline |
| 444 | int nfs_write_need_commit(struct nfs_write_data *data) | 432 | int nfs_write_need_commit(struct nfs_write_data *data) |
| 445 | { | 433 | { |
| @@ -449,7 +437,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) | |||
| 449 | static inline | 437 | static inline |
| 450 | int nfs_reschedule_unstable_write(struct nfs_page *req) | 438 | int nfs_reschedule_unstable_write(struct nfs_page *req) |
| 451 | { | 439 | { |
| 452 | if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 440 | if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { |
| 453 | nfs_mark_request_commit(req); | 441 | nfs_mark_request_commit(req); |
| 454 | return 1; | 442 | return 1; |
| 455 | } | 443 | } |
| @@ -465,6 +453,12 @@ nfs_mark_request_commit(struct nfs_page *req) | |||
| 465 | { | 453 | { |
| 466 | } | 454 | } |
| 467 | 455 | ||
| 456 | static inline int | ||
| 457 | nfs_clear_request_commit(struct nfs_page *req) | ||
| 458 | { | ||
| 459 | return 0; | ||
| 460 | } | ||
| 461 | |||
| 468 | static inline | 462 | static inline |
| 469 | int nfs_write_need_commit(struct nfs_write_data *data) | 463 | int nfs_write_need_commit(struct nfs_write_data *data) |
| 470 | { | 464 | { |
| @@ -522,11 +516,8 @@ static void nfs_cancel_commit_list(struct list_head *head) | |||
| 522 | 516 | ||
| 523 | while(!list_empty(head)) { | 517 | while(!list_empty(head)) { |
| 524 | req = nfs_list_entry(head->next); | 518 | req = nfs_list_entry(head->next); |
| 525 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
| 526 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, | ||
| 527 | BDI_RECLAIMABLE); | ||
| 528 | nfs_list_remove_request(req); | 519 | nfs_list_remove_request(req); |
| 529 | clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); | 520 | nfs_clear_request_commit(req); |
| 530 | nfs_inode_remove_request(req); | 521 | nfs_inode_remove_request(req); |
| 531 | nfs_unlock_request(req); | 522 | nfs_unlock_request(req); |
| 532 | } | 523 | } |
| @@ -564,110 +555,124 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg | |||
| 564 | #endif | 555 | #endif |
| 565 | 556 | ||
| 566 | /* | 557 | /* |
| 567 | * Try to update any existing write request, or create one if there is none. | 558 | * Search for an existing write request, and attempt to update |
| 568 | * In order to match, the request's credentials must match those of | 559 | * it to reflect a new dirty region on a given page. |
| 569 | * the calling process. | ||
| 570 | * | 560 | * |
| 571 | * Note: Should always be called with the Page Lock held! | 561 | * If the attempt fails, then the existing request is flushed out |
| 562 | * to disk. | ||
| 572 | */ | 563 | */ |
| 573 | static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, | 564 | static struct nfs_page *nfs_try_to_update_request(struct inode *inode, |
| 574 | struct page *page, unsigned int offset, unsigned int bytes) | 565 | struct page *page, |
| 566 | unsigned int offset, | ||
| 567 | unsigned int bytes) | ||
| 575 | { | 568 | { |
| 576 | struct address_space *mapping = page->mapping; | 569 | struct nfs_page *req; |
| 577 | struct inode *inode = mapping->host; | 570 | unsigned int rqend; |
| 578 | struct nfs_page *req, *new = NULL; | 571 | unsigned int end; |
| 579 | pgoff_t rqend, end; | 572 | int error; |
| 573 | |||
| 574 | if (!PagePrivate(page)) | ||
| 575 | return NULL; | ||
| 580 | 576 | ||
| 581 | end = offset + bytes; | 577 | end = offset + bytes; |
| 578 | spin_lock(&inode->i_lock); | ||
| 582 | 579 | ||
| 583 | for (;;) { | 580 | for (;;) { |
| 584 | /* Loop over all inode entries and see if we find | 581 | req = nfs_page_find_request_locked(page); |
| 585 | * A request for the page we wish to update | 582 | if (req == NULL) |
| 583 | goto out_unlock; | ||
| 584 | |||
| 585 | rqend = req->wb_offset + req->wb_bytes; | ||
| 586 | /* | ||
| 587 | * Tell the caller to flush out the request if | ||
| 588 | * the offsets are non-contiguous. | ||
| 589 | * Note: nfs_flush_incompatible() will already | ||
| 590 | * have flushed out requests having wrong owners. | ||
| 586 | */ | 591 | */ |
| 587 | if (new) { | 592 | if (offset > rqend |
| 588 | if (radix_tree_preload(GFP_NOFS)) { | 593 | || end < req->wb_offset) |
| 589 | nfs_release_request(new); | 594 | goto out_flushme; |
| 590 | return ERR_PTR(-ENOMEM); | ||
| 591 | } | ||
| 592 | } | ||
| 593 | 595 | ||
| 594 | spin_lock(&inode->i_lock); | 596 | if (nfs_set_page_tag_locked(req)) |
| 595 | req = nfs_page_find_request_locked(page); | ||
| 596 | if (req) { | ||
| 597 | if (!nfs_set_page_tag_locked(req)) { | ||
| 598 | int error; | ||
| 599 | |||
| 600 | spin_unlock(&inode->i_lock); | ||
| 601 | error = nfs_wait_on_request(req); | ||
| 602 | nfs_release_request(req); | ||
| 603 | if (error < 0) { | ||
| 604 | if (new) { | ||
| 605 | radix_tree_preload_end(); | ||
| 606 | nfs_release_request(new); | ||
| 607 | } | ||
| 608 | return ERR_PTR(error); | ||
| 609 | } | ||
| 610 | continue; | ||
| 611 | } | ||
| 612 | spin_unlock(&inode->i_lock); | ||
| 613 | if (new) { | ||
| 614 | radix_tree_preload_end(); | ||
| 615 | nfs_release_request(new); | ||
| 616 | } | ||
| 617 | break; | 597 | break; |
| 618 | } | ||
| 619 | 598 | ||
| 620 | if (new) { | 599 | /* The request is locked, so wait and then retry */ |
| 621 | nfs_lock_request_dontget(new); | ||
| 622 | nfs_inode_add_request(inode, new); | ||
| 623 | spin_unlock(&inode->i_lock); | ||
| 624 | radix_tree_preload_end(); | ||
| 625 | req = new; | ||
| 626 | goto zero_page; | ||
| 627 | } | ||
| 628 | spin_unlock(&inode->i_lock); | 600 | spin_unlock(&inode->i_lock); |
| 629 | 601 | error = nfs_wait_on_request(req); | |
| 630 | new = nfs_create_request(ctx, inode, page, offset, bytes); | 602 | nfs_release_request(req); |
| 631 | if (IS_ERR(new)) | 603 | if (error != 0) |
| 632 | return new; | 604 | goto out_err; |
| 605 | spin_lock(&inode->i_lock); | ||
| 633 | } | 606 | } |
| 634 | 607 | ||
| 635 | /* We have a request for our page. | 608 | if (nfs_clear_request_commit(req)) |
| 636 | * If the creds don't match, or the | 609 | radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, |
| 637 | * page addresses don't match, | 610 | req->wb_index, NFS_PAGE_TAG_COMMIT); |
| 638 | * tell the caller to wait on the conflicting | ||
| 639 | * request. | ||
| 640 | */ | ||
| 641 | rqend = req->wb_offset + req->wb_bytes; | ||
| 642 | if (req->wb_context != ctx | ||
| 643 | || req->wb_page != page | ||
| 644 | || !nfs_dirty_request(req) | ||
| 645 | || offset > rqend || end < req->wb_offset) { | ||
| 646 | nfs_clear_page_tag_locked(req); | ||
| 647 | return ERR_PTR(-EBUSY); | ||
| 648 | } | ||
| 649 | 611 | ||
| 650 | /* Okay, the request matches. Update the region */ | 612 | /* Okay, the request matches. Update the region */ |
| 651 | if (offset < req->wb_offset) { | 613 | if (offset < req->wb_offset) { |
| 652 | req->wb_offset = offset; | 614 | req->wb_offset = offset; |
| 653 | req->wb_pgbase = offset; | 615 | req->wb_pgbase = offset; |
| 654 | req->wb_bytes = max(end, rqend) - req->wb_offset; | ||
| 655 | goto zero_page; | ||
| 656 | } | 616 | } |
| 657 | |||
| 658 | if (end > rqend) | 617 | if (end > rqend) |
| 659 | req->wb_bytes = end - req->wb_offset; | 618 | req->wb_bytes = end - req->wb_offset; |
| 660 | 619 | else | |
| 620 | req->wb_bytes = rqend - req->wb_offset; | ||
| 621 | out_unlock: | ||
| 622 | spin_unlock(&inode->i_lock); | ||
| 661 | return req; | 623 | return req; |
| 662 | zero_page: | 624 | out_flushme: |
| 663 | /* If this page might potentially be marked as up to date, | 625 | spin_unlock(&inode->i_lock); |
| 664 | * then we need to zero any uninitalised data. */ | 626 | nfs_release_request(req); |
| 665 | if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE | 627 | error = nfs_wb_page(inode, page); |
| 666 | && !PageUptodate(req->wb_page)) | 628 | out_err: |
| 667 | zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE); | 629 | return ERR_PTR(error); |
| 630 | } | ||
| 631 | |||
| 632 | /* | ||
| 633 | * Try to update an existing write request, or create one if there is none. | ||
| 634 | * | ||
| 635 | * Note: Should always be called with the Page Lock held to prevent races | ||
| 636 | * if we have to add a new request. Also assumes that the caller has | ||
| 637 | * already called nfs_flush_incompatible() if necessary. | ||
| 638 | */ | ||
| 639 | static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, | ||
| 640 | struct page *page, unsigned int offset, unsigned int bytes) | ||
| 641 | { | ||
| 642 | struct inode *inode = page->mapping->host; | ||
| 643 | struct nfs_page *req; | ||
| 644 | int error; | ||
| 645 | |||
| 646 | req = nfs_try_to_update_request(inode, page, offset, bytes); | ||
| 647 | if (req != NULL) | ||
| 648 | goto out; | ||
| 649 | req = nfs_create_request(ctx, inode, page, offset, bytes); | ||
| 650 | if (IS_ERR(req)) | ||
| 651 | goto out; | ||
| 652 | error = nfs_inode_add_request(inode, req); | ||
| 653 | if (error != 0) { | ||
| 654 | nfs_release_request(req); | ||
| 655 | req = ERR_PTR(error); | ||
| 656 | } | ||
| 657 | out: | ||
| 668 | return req; | 658 | return req; |
| 669 | } | 659 | } |
| 670 | 660 | ||
| 661 | static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | ||
| 662 | unsigned int offset, unsigned int count) | ||
| 663 | { | ||
| 664 | struct nfs_page *req; | ||
| 665 | |||
| 666 | req = nfs_setup_write_request(ctx, page, offset, count); | ||
| 667 | if (IS_ERR(req)) | ||
| 668 | return PTR_ERR(req); | ||
| 669 | /* Update file length */ | ||
| 670 | nfs_grow_file(page, offset, count); | ||
| 671 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | ||
| 672 | nfs_clear_page_tag_locked(req); | ||
| 673 | return 0; | ||
| 674 | } | ||
| 675 | |||
| 671 | int nfs_flush_incompatible(struct file *file, struct page *page) | 676 | int nfs_flush_incompatible(struct file *file, struct page *page) |
| 672 | { | 677 | { |
| 673 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 678 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
| @@ -685,8 +690,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
| 685 | req = nfs_page_find_request(page); | 690 | req = nfs_page_find_request(page); |
| 686 | if (req == NULL) | 691 | if (req == NULL) |
| 687 | return 0; | 692 | return 0; |
| 688 | do_flush = req->wb_page != page || req->wb_context != ctx | 693 | do_flush = req->wb_page != page || req->wb_context != ctx; |
| 689 | || !nfs_dirty_request(req); | ||
| 690 | nfs_release_request(req); | 694 | nfs_release_request(req); |
| 691 | if (!do_flush) | 695 | if (!do_flush) |
| 692 | return 0; | 696 | return 0; |
| @@ -721,10 +725,10 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
| 721 | 725 | ||
| 722 | nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); | 726 | nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); |
| 723 | 727 | ||
| 724 | dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", | 728 | dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", |
| 725 | file->f_path.dentry->d_parent->d_name.name, | 729 | file->f_path.dentry->d_parent->d_name.name, |
| 726 | file->f_path.dentry->d_name.name, count, | 730 | file->f_path.dentry->d_name.name, count, |
| 727 | (long long)(page_offset(page) +offset)); | 731 | (long long)(page_offset(page) + offset)); |
| 728 | 732 | ||
| 729 | /* If we're not using byte range locks, and we know the page | 733 | /* If we're not using byte range locks, and we know the page |
| 730 | * is up to date, it may be more efficient to extend the write | 734 | * is up to date, it may be more efficient to extend the write |
| @@ -739,24 +743,20 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
| 739 | } | 743 | } |
| 740 | 744 | ||
| 741 | status = nfs_writepage_setup(ctx, page, offset, count); | 745 | status = nfs_writepage_setup(ctx, page, offset, count); |
| 742 | __set_page_dirty_nobuffers(page); | ||
| 743 | |||
| 744 | dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", | ||
| 745 | status, (long long)i_size_read(inode)); | ||
| 746 | if (status < 0) | 746 | if (status < 0) |
| 747 | nfs_set_pageerror(page); | 747 | nfs_set_pageerror(page); |
| 748 | else | ||
| 749 | __set_page_dirty_nobuffers(page); | ||
| 750 | |||
| 751 | dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", | ||
| 752 | status, (long long)i_size_read(inode)); | ||
| 748 | return status; | 753 | return status; |
| 749 | } | 754 | } |
| 750 | 755 | ||
| 751 | static void nfs_writepage_release(struct nfs_page *req) | 756 | static void nfs_writepage_release(struct nfs_page *req) |
| 752 | { | 757 | { |
| 753 | 758 | ||
| 754 | if (PageError(req->wb_page)) { | 759 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { |
| 755 | nfs_end_page_writeback(req->wb_page); | ||
| 756 | nfs_inode_remove_request(req); | ||
| 757 | } else if (!nfs_reschedule_unstable_write(req)) { | ||
| 758 | /* Set the PG_uptodate flag */ | ||
| 759 | nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); | ||
| 760 | nfs_end_page_writeback(req->wb_page); | 760 | nfs_end_page_writeback(req->wb_page); |
| 761 | nfs_inode_remove_request(req); | 761 | nfs_inode_remove_request(req); |
| 762 | } else | 762 | } else |
| @@ -833,7 +833,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
| 833 | NFS_PROTO(inode)->write_setup(data, &msg); | 833 | NFS_PROTO(inode)->write_setup(data, &msg); |
| 834 | 834 | ||
| 835 | dprintk("NFS: %5u initiated write call " | 835 | dprintk("NFS: %5u initiated write call " |
| 836 | "(req %s/%Ld, %u bytes @ offset %Lu)\n", | 836 | "(req %s/%lld, %u bytes @ offset %llu)\n", |
| 837 | data->task.tk_pid, | 837 | data->task.tk_pid, |
| 838 | inode->i_sb->s_id, | 838 | inode->i_sb->s_id, |
| 839 | (long long)NFS_FILEID(inode), | 839 | (long long)NFS_FILEID(inode), |
| @@ -977,13 +977,13 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | |||
| 977 | static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) | 977 | static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) |
| 978 | { | 978 | { |
| 979 | struct nfs_write_data *data = calldata; | 979 | struct nfs_write_data *data = calldata; |
| 980 | struct nfs_page *req = data->req; | ||
| 981 | 980 | ||
| 982 | dprintk("NFS: write (%s/%Ld %d@%Ld)", | 981 | dprintk("NFS: %5u write(%s/%lld %d@%lld)", |
| 983 | req->wb_context->path.dentry->d_inode->i_sb->s_id, | 982 | task->tk_pid, |
| 984 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), | 983 | data->req->wb_context->path.dentry->d_inode->i_sb->s_id, |
| 985 | req->wb_bytes, | 984 | (long long) |
| 986 | (long long)req_offset(req)); | 985 | NFS_FILEID(data->req->wb_context->path.dentry->d_inode), |
| 986 | data->req->wb_bytes, (long long)req_offset(data->req)); | ||
| 987 | 987 | ||
| 988 | nfs_writeback_done(task, data); | 988 | nfs_writeback_done(task, data); |
| 989 | } | 989 | } |
| @@ -1057,7 +1057,8 @@ static void nfs_writeback_release_full(void *calldata) | |||
| 1057 | 1057 | ||
| 1058 | nfs_list_remove_request(req); | 1058 | nfs_list_remove_request(req); |
| 1059 | 1059 | ||
| 1060 | dprintk("NFS: write (%s/%Ld %d@%Ld)", | 1060 | dprintk("NFS: %5u write (%s/%lld %d@%lld)", |
| 1061 | data->task.tk_pid, | ||
| 1061 | req->wb_context->path.dentry->d_inode->i_sb->s_id, | 1062 | req->wb_context->path.dentry->d_inode->i_sb->s_id, |
| 1062 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), | 1063 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), |
| 1063 | req->wb_bytes, | 1064 | req->wb_bytes, |
| @@ -1077,8 +1078,6 @@ static void nfs_writeback_release_full(void *calldata) | |||
| 1077 | dprintk(" marked for commit\n"); | 1078 | dprintk(" marked for commit\n"); |
| 1078 | goto next; | 1079 | goto next; |
| 1079 | } | 1080 | } |
| 1080 | /* Set the PG_uptodate flag? */ | ||
| 1081 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | ||
| 1082 | dprintk(" OK\n"); | 1081 | dprintk(" OK\n"); |
| 1083 | remove_request: | 1082 | remove_request: |
| 1084 | nfs_end_page_writeback(page); | 1083 | nfs_end_page_writeback(page); |
| @@ -1132,7 +1131,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 1132 | static unsigned long complain; | 1131 | static unsigned long complain; |
| 1133 | 1132 | ||
| 1134 | if (time_before(complain, jiffies)) { | 1133 | if (time_before(complain, jiffies)) { |
| 1135 | dprintk("NFS: faulty NFS server %s:" | 1134 | dprintk("NFS: faulty NFS server %s:" |
| 1136 | " (committed = %d) != (stable = %d)\n", | 1135 | " (committed = %d) != (stable = %d)\n", |
| 1137 | NFS_SERVER(data->inode)->nfs_client->cl_hostname, | 1136 | NFS_SERVER(data->inode)->nfs_client->cl_hostname, |
| 1138 | resp->verf->committed, argp->stable); | 1137 | resp->verf->committed, argp->stable); |
| @@ -1296,12 +1295,9 @@ static void nfs_commit_release(void *calldata) | |||
| 1296 | while (!list_empty(&data->pages)) { | 1295 | while (!list_empty(&data->pages)) { |
| 1297 | req = nfs_list_entry(data->pages.next); | 1296 | req = nfs_list_entry(data->pages.next); |
| 1298 | nfs_list_remove_request(req); | 1297 | nfs_list_remove_request(req); |
| 1299 | clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); | 1298 | nfs_clear_request_commit(req); |
| 1300 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
| 1301 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, | ||
| 1302 | BDI_RECLAIMABLE); | ||
| 1303 | 1299 | ||
| 1304 | dprintk("NFS: commit (%s/%Ld %d@%Ld)", | 1300 | dprintk("NFS: commit (%s/%lld %d@%lld)", |
| 1305 | req->wb_context->path.dentry->d_inode->i_sb->s_id, | 1301 | req->wb_context->path.dentry->d_inode->i_sb->s_id, |
| 1306 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), | 1302 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), |
| 1307 | req->wb_bytes, | 1303 | req->wb_bytes, |
| @@ -1317,9 +1313,6 @@ static void nfs_commit_release(void *calldata) | |||
| 1317 | * returned by the server against all stored verfs. */ | 1313 | * returned by the server against all stored verfs. */ |
| 1318 | if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { | 1314 | if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { |
| 1319 | /* We have a match */ | 1315 | /* We have a match */ |
| 1320 | /* Set the PG_uptodate flag */ | ||
| 1321 | nfs_mark_uptodate(req->wb_page, req->wb_pgbase, | ||
| 1322 | req->wb_bytes); | ||
| 1323 | nfs_inode_remove_request(req); | 1316 | nfs_inode_remove_request(req); |
| 1324 | dprintk(" OK\n"); | 1317 | dprintk(" OK\n"); |
| 1325 | goto next; | 1318 | goto next; |
| @@ -1478,7 +1471,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
| 1478 | req = nfs_page_find_request(page); | 1471 | req = nfs_page_find_request(page); |
| 1479 | if (req == NULL) | 1472 | if (req == NULL) |
| 1480 | goto out; | 1473 | goto out; |
| 1481 | if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 1474 | if (test_bit(PG_CLEAN, &req->wb_flags)) { |
| 1482 | nfs_release_request(req); | 1475 | nfs_release_request(req); |
| 1483 | break; | 1476 | break; |
| 1484 | } | 1477 | } |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4d4760e687c3..702fa577aa6e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
| @@ -381,7 +381,7 @@ static int do_probe_callback(void *data) | |||
| 381 | .program = &cb_program, | 381 | .program = &cb_program, |
| 382 | .version = nfs_cb_version[1]->number, | 382 | .version = nfs_cb_version[1]->number, |
| 383 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ | 383 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ |
| 384 | .flags = (RPC_CLNT_CREATE_NOPING), | 384 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), |
| 385 | }; | 385 | }; |
| 386 | struct rpc_message msg = { | 386 | struct rpc_message msg = { |
| 387 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | 387 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index cf9401e8cd0b..cfdb08b484ed 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | 21 | ||
| 22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
| 23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
| 24 | #include <linux/sysctl.h> | ||
| 25 | #include <linux/configfs.h> | 24 | #include <linux/configfs.h> |
| 26 | 25 | ||
| 27 | #include "tcp.h" | 26 | #include "tcp.h" |
| @@ -36,65 +35,6 @@ | |||
| 36 | * cluster references throughout where nodes are looked up */ | 35 | * cluster references throughout where nodes are looked up */ |
| 37 | struct o2nm_cluster *o2nm_single_cluster = NULL; | 36 | struct o2nm_cluster *o2nm_single_cluster = NULL; |
| 38 | 37 | ||
| 39 | #define OCFS2_MAX_HB_CTL_PATH 256 | ||
| 40 | static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; | ||
| 41 | |||
| 42 | static ctl_table ocfs2_nm_table[] = { | ||
| 43 | { | ||
| 44 | .ctl_name = 1, | ||
| 45 | .procname = "hb_ctl_path", | ||
| 46 | .data = ocfs2_hb_ctl_path, | ||
| 47 | .maxlen = OCFS2_MAX_HB_CTL_PATH, | ||
| 48 | .mode = 0644, | ||
| 49 | .proc_handler = &proc_dostring, | ||
| 50 | .strategy = &sysctl_string, | ||
| 51 | }, | ||
| 52 | { .ctl_name = 0 } | ||
| 53 | }; | ||
| 54 | |||
| 55 | static ctl_table ocfs2_mod_table[] = { | ||
| 56 | { | ||
| 57 | .ctl_name = FS_OCFS2_NM, | ||
| 58 | .procname = "nm", | ||
| 59 | .data = NULL, | ||
| 60 | .maxlen = 0, | ||
| 61 | .mode = 0555, | ||
| 62 | .child = ocfs2_nm_table | ||
| 63 | }, | ||
| 64 | { .ctl_name = 0} | ||
| 65 | }; | ||
| 66 | |||
| 67 | static ctl_table ocfs2_kern_table[] = { | ||
| 68 | { | ||
| 69 | .ctl_name = FS_OCFS2, | ||
| 70 | .procname = "ocfs2", | ||
| 71 | .data = NULL, | ||
| 72 | .maxlen = 0, | ||
| 73 | .mode = 0555, | ||
| 74 | .child = ocfs2_mod_table | ||
| 75 | }, | ||
| 76 | { .ctl_name = 0} | ||
| 77 | }; | ||
| 78 | |||
| 79 | static ctl_table ocfs2_root_table[] = { | ||
| 80 | { | ||
| 81 | .ctl_name = CTL_FS, | ||
| 82 | .procname = "fs", | ||
| 83 | .data = NULL, | ||
| 84 | .maxlen = 0, | ||
| 85 | .mode = 0555, | ||
| 86 | .child = ocfs2_kern_table | ||
| 87 | }, | ||
| 88 | { .ctl_name = 0 } | ||
| 89 | }; | ||
| 90 | |||
| 91 | static struct ctl_table_header *ocfs2_table_header = NULL; | ||
| 92 | |||
| 93 | const char *o2nm_get_hb_ctl_path(void) | ||
| 94 | { | ||
| 95 | return ocfs2_hb_ctl_path; | ||
| 96 | } | ||
| 97 | EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); | ||
| 98 | 38 | ||
| 99 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) | 39 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) |
| 100 | { | 40 | { |
| @@ -941,9 +881,6 @@ void o2nm_undepend_this_node(void) | |||
| 941 | 881 | ||
| 942 | static void __exit exit_o2nm(void) | 882 | static void __exit exit_o2nm(void) |
| 943 | { | 883 | { |
| 944 | if (ocfs2_table_header) | ||
| 945 | unregister_sysctl_table(ocfs2_table_header); | ||
| 946 | |||
| 947 | /* XXX sync with hb callbacks and shut down hb? */ | 884 | /* XXX sync with hb callbacks and shut down hb? */ |
| 948 | o2net_unregister_hb_callbacks(); | 885 | o2net_unregister_hb_callbacks(); |
| 949 | configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); | 886 | configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); |
| @@ -964,16 +901,9 @@ static int __init init_o2nm(void) | |||
| 964 | if (ret) | 901 | if (ret) |
| 965 | goto out; | 902 | goto out; |
| 966 | 903 | ||
| 967 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); | ||
| 968 | if (!ocfs2_table_header) { | ||
| 969 | printk(KERN_ERR "nodemanager: unable to register sysctl\n"); | ||
| 970 | ret = -ENOMEM; /* or something. */ | ||
| 971 | goto out_o2net; | ||
| 972 | } | ||
| 973 | |||
| 974 | ret = o2net_register_hb_callbacks(); | 904 | ret = o2net_register_hb_callbacks(); |
| 975 | if (ret) | 905 | if (ret) |
| 976 | goto out_sysctl; | 906 | goto out_o2net; |
| 977 | 907 | ||
| 978 | config_group_init(&o2nm_cluster_group.cs_subsys.su_group); | 908 | config_group_init(&o2nm_cluster_group.cs_subsys.su_group); |
| 979 | mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); | 909 | mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); |
| @@ -990,8 +920,6 @@ static int __init init_o2nm(void) | |||
| 990 | configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); | 920 | configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); |
| 991 | out_callbacks: | 921 | out_callbacks: |
| 992 | o2net_unregister_hb_callbacks(); | 922 | o2net_unregister_hb_callbacks(); |
| 993 | out_sysctl: | ||
| 994 | unregister_sysctl_table(ocfs2_table_header); | ||
| 995 | out_o2net: | 923 | out_o2net: |
| 996 | o2net_exit(); | 924 | o2net_exit(); |
| 997 | out: | 925 | out: |
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 7c860361b8dd..c992ea0da4ad 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h | |||
| @@ -33,10 +33,6 @@ | |||
| 33 | #include <linux/configfs.h> | 33 | #include <linux/configfs.h> |
| 34 | #include <linux/rbtree.h> | 34 | #include <linux/rbtree.h> |
| 35 | 35 | ||
| 36 | #define FS_OCFS2_NM 1 | ||
| 37 | |||
| 38 | const char *o2nm_get_hb_ctl_path(void); | ||
| 39 | |||
| 40 | struct o2nm_node { | 36 | struct o2nm_node { |
| 41 | spinlock_t nd_lock; | 37 | spinlock_t nd_lock; |
| 42 | struct config_item nd_item; | 38 | struct config_item nd_item; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index efc015c6128a..44f87caf3683 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -606,7 +606,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 606 | 606 | ||
| 607 | res->last_used = 0; | 607 | res->last_used = 0; |
| 608 | 608 | ||
| 609 | spin_lock(&dlm->spinlock); | ||
| 609 | list_add_tail(&res->tracking, &dlm->tracking_list); | 610 | list_add_tail(&res->tracking, &dlm->tracking_list); |
| 611 | spin_unlock(&dlm->spinlock); | ||
| 610 | 612 | ||
| 611 | memset(res->lvb, 0, DLM_LVB_LEN); | 613 | memset(res->lvb, 0, DLM_LVB_LEN); |
| 612 | memset(res->refmap, 0, sizeof(res->refmap)); | 614 | memset(res->refmap, 0, sizeof(res->refmap)); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 394d25a131a5..80e20d9f2780 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -1554,8 +1554,8 @@ out: | |||
| 1554 | */ | 1554 | */ |
| 1555 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | 1555 | int ocfs2_file_lock(struct file *file, int ex, int trylock) |
| 1556 | { | 1556 | { |
| 1557 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | 1557 | int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1558 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | 1558 | unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; |
| 1559 | unsigned long flags; | 1559 | unsigned long flags; |
| 1560 | struct ocfs2_file_private *fp = file->private_data; | 1560 | struct ocfs2_file_private *fp = file->private_data; |
| 1561 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1561 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
| @@ -1582,7 +1582,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1582 | * Get the lock at NLMODE to start - that way we | 1582 | * Get the lock at NLMODE to start - that way we |
| 1583 | * can cancel the upconvert request if need be. | 1583 | * can cancel the upconvert request if need be. |
| 1584 | */ | 1584 | */ |
| 1585 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | 1585 | ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); |
| 1586 | if (ret < 0) { | 1586 | if (ret < 0) { |
| 1587 | mlog_errno(ret); | 1587 | mlog_errno(ret); |
| 1588 | goto out; | 1588 | goto out; |
| @@ -1597,7 +1597,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1597 | } | 1597 | } |
| 1598 | 1598 | ||
| 1599 | lockres->l_action = OCFS2_AST_CONVERT; | 1599 | lockres->l_action = OCFS2_AST_CONVERT; |
| 1600 | lkm_flags |= LKM_CONVERT; | 1600 | lkm_flags |= DLM_LKF_CONVERT; |
| 1601 | lockres->l_requested = level; | 1601 | lockres->l_requested = level; |
| 1602 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1602 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 1603 | 1603 | ||
| @@ -1664,7 +1664,7 @@ void ocfs2_file_unlock(struct file *file) | |||
| 1664 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | 1664 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
| 1665 | return; | 1665 | return; |
| 1666 | 1666 | ||
| 1667 | if (lockres->l_level == LKM_NLMODE) | 1667 | if (lockres->l_level == DLM_LOCK_NL) |
| 1668 | return; | 1668 | return; |
| 1669 | 1669 | ||
| 1670 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | 1670 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", |
| @@ -1678,11 +1678,11 @@ void ocfs2_file_unlock(struct file *file) | |||
| 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
| 1679 | lockres->l_blocking = DLM_LOCK_EX; | 1679 | lockres->l_blocking = DLM_LOCK_EX; |
| 1680 | 1680 | ||
| 1681 | gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | 1681 | gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); |
| 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
| 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1684 | 1684 | ||
| 1685 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); | 1685 | ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); |
| 1686 | if (ret) { | 1686 | if (ret) { |
| 1687 | mlog_errno(ret); | 1687 | mlog_errno(ret); |
| 1688 | return; | 1688 | return; |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index bbd1667aa7d3..fcd120f1493a 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
| @@ -317,8 +317,7 @@ out: | |||
| 317 | return rc; | 317 | return rc; |
| 318 | } | 318 | } |
| 319 | 319 | ||
| 320 | static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, | 320 | static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn) |
| 321 | int hangup_pending) | ||
| 322 | { | 321 | { |
| 323 | struct dlm_ctxt *dlm = conn->cc_lockspace; | 322 | struct dlm_ctxt *dlm = conn->cc_lockspace; |
| 324 | struct o2dlm_private *priv = conn->cc_private; | 323 | struct o2dlm_private *priv = conn->cc_private; |
| @@ -333,43 +332,6 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, | |||
| 333 | return 0; | 332 | return 0; |
| 334 | } | 333 | } |
| 335 | 334 | ||
| 336 | static void o2hb_stop(const char *group) | ||
| 337 | { | ||
| 338 | int ret; | ||
| 339 | char *argv[5], *envp[3]; | ||
| 340 | |||
| 341 | argv[0] = (char *)o2nm_get_hb_ctl_path(); | ||
| 342 | argv[1] = "-K"; | ||
| 343 | argv[2] = "-u"; | ||
| 344 | argv[3] = (char *)group; | ||
| 345 | argv[4] = NULL; | ||
| 346 | |||
| 347 | mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); | ||
| 348 | |||
| 349 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
| 350 | envp[0] = "HOME=/"; | ||
| 351 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
| 352 | envp[2] = NULL; | ||
| 353 | |||
| 354 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
| 355 | if (ret < 0) | ||
| 356 | mlog_errno(ret); | ||
| 357 | } | ||
| 358 | |||
| 359 | /* | ||
| 360 | * Hangup is a hack for tools compatibility. Older ocfs2-tools software | ||
| 361 | * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This | ||
| 362 | * happens regardless of whether the DLM got started, so we can't do it | ||
| 363 | * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into | ||
| 364 | * the glue and provide a "hangup" API for super.c to call. | ||
| 365 | * | ||
| 366 | * Other stacks will eventually provide a NULL ->hangup() pointer. | ||
| 367 | */ | ||
| 368 | static void o2cb_cluster_hangup(const char *group, int grouplen) | ||
| 369 | { | ||
| 370 | o2hb_stop(group); | ||
| 371 | } | ||
| 372 | |||
| 373 | static int o2cb_cluster_this_node(unsigned int *node) | 335 | static int o2cb_cluster_this_node(unsigned int *node) |
| 374 | { | 336 | { |
| 375 | int node_num; | 337 | int node_num; |
| @@ -388,7 +350,6 @@ static int o2cb_cluster_this_node(unsigned int *node) | |||
| 388 | static struct ocfs2_stack_operations o2cb_stack_ops = { | 350 | static struct ocfs2_stack_operations o2cb_stack_ops = { |
| 389 | .connect = o2cb_cluster_connect, | 351 | .connect = o2cb_cluster_connect, |
| 390 | .disconnect = o2cb_cluster_disconnect, | 352 | .disconnect = o2cb_cluster_disconnect, |
| 391 | .hangup = o2cb_cluster_hangup, | ||
| 392 | .this_node = o2cb_cluster_this_node, | 353 | .this_node = o2cb_cluster_this_node, |
| 393 | .dlm_lock = o2cb_dlm_lock, | 354 | .dlm_lock = o2cb_dlm_lock, |
| 394 | .dlm_unlock = o2cb_dlm_unlock, | 355 | .dlm_unlock = o2cb_dlm_unlock, |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 6b97d11f6bf8..bd7e0f3acfc7 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
| 22 | #include <linux/miscdevice.h> | 22 | #include <linux/miscdevice.h> |
| 23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
| 24 | #include <linux/smp_lock.h> | ||
| 24 | #include <linux/reboot.h> | 25 | #include <linux/reboot.h> |
| 25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
| 26 | 27 | ||
| @@ -619,10 +620,12 @@ static int ocfs2_control_open(struct inode *inode, struct file *file) | |||
| 619 | return -ENOMEM; | 620 | return -ENOMEM; |
| 620 | p->op_this_node = -1; | 621 | p->op_this_node = -1; |
| 621 | 622 | ||
| 623 | lock_kernel(); | ||
| 622 | mutex_lock(&ocfs2_control_lock); | 624 | mutex_lock(&ocfs2_control_lock); |
| 623 | file->private_data = p; | 625 | file->private_data = p; |
| 624 | list_add(&p->op_list, &ocfs2_control_private_list); | 626 | list_add(&p->op_list, &ocfs2_control_private_list); |
| 625 | mutex_unlock(&ocfs2_control_lock); | 627 | mutex_unlock(&ocfs2_control_lock); |
| 628 | unlock_kernel(); | ||
| 626 | 629 | ||
| 627 | return 0; | 630 | return 0; |
| 628 | } | 631 | } |
| @@ -816,8 +819,7 @@ out: | |||
| 816 | return rc; | 819 | return rc; |
| 817 | } | 820 | } |
| 818 | 821 | ||
| 819 | static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, | 822 | static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) |
| 820 | int hangup_pending) | ||
| 821 | { | 823 | { |
| 822 | dlm_release_lockspace(conn->cc_lockspace, 2); | 824 | dlm_release_lockspace(conn->cc_lockspace, 2); |
| 823 | conn->cc_lockspace = NULL; | 825 | conn->cc_lockspace = NULL; |
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 119f60cea9cc..10e149ae5e3a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
| 27 | #include <linux/kobject.h> | 27 | #include <linux/kobject.h> |
| 28 | #include <linux/sysfs.h> | 28 | #include <linux/sysfs.h> |
| 29 | #include <linux/sysctl.h> | ||
| 29 | 30 | ||
| 30 | #include "ocfs2_fs.h" | 31 | #include "ocfs2_fs.h" |
| 31 | 32 | ||
| @@ -33,11 +34,13 @@ | |||
| 33 | 34 | ||
| 34 | #define OCFS2_STACK_PLUGIN_O2CB "o2cb" | 35 | #define OCFS2_STACK_PLUGIN_O2CB "o2cb" |
| 35 | #define OCFS2_STACK_PLUGIN_USER "user" | 36 | #define OCFS2_STACK_PLUGIN_USER "user" |
| 37 | #define OCFS2_MAX_HB_CTL_PATH 256 | ||
| 36 | 38 | ||
| 37 | static struct ocfs2_locking_protocol *lproto; | 39 | static struct ocfs2_locking_protocol *lproto; |
| 38 | static DEFINE_SPINLOCK(ocfs2_stack_lock); | 40 | static DEFINE_SPINLOCK(ocfs2_stack_lock); |
| 39 | static LIST_HEAD(ocfs2_stack_list); | 41 | static LIST_HEAD(ocfs2_stack_list); |
| 40 | static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; | 42 | static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; |
| 43 | static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; | ||
| 41 | 44 | ||
| 42 | /* | 45 | /* |
| 43 | * The stack currently in use. If not null, active_stack->sp_count > 0, | 46 | * The stack currently in use. If not null, active_stack->sp_count > 0, |
| @@ -349,7 +352,7 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | |||
| 349 | 352 | ||
| 350 | BUG_ON(conn == NULL); | 353 | BUG_ON(conn == NULL); |
| 351 | 354 | ||
| 352 | ret = active_stack->sp_ops->disconnect(conn, hangup_pending); | 355 | ret = active_stack->sp_ops->disconnect(conn); |
| 353 | 356 | ||
| 354 | /* XXX Should we free it anyway? */ | 357 | /* XXX Should we free it anyway? */ |
| 355 | if (!ret) { | 358 | if (!ret) { |
| @@ -362,13 +365,48 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | |||
| 362 | } | 365 | } |
| 363 | EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); | 366 | EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); |
| 364 | 367 | ||
| 368 | /* | ||
| 369 | * Leave the group for this filesystem. This is executed by a userspace | ||
| 370 | * program (stored in ocfs2_hb_ctl_path). | ||
| 371 | */ | ||
| 372 | static void ocfs2_leave_group(const char *group) | ||
| 373 | { | ||
| 374 | int ret; | ||
| 375 | char *argv[5], *envp[3]; | ||
| 376 | |||
| 377 | argv[0] = ocfs2_hb_ctl_path; | ||
| 378 | argv[1] = "-K"; | ||
| 379 | argv[2] = "-u"; | ||
| 380 | argv[3] = (char *)group; | ||
| 381 | argv[4] = NULL; | ||
| 382 | |||
| 383 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
| 384 | envp[0] = "HOME=/"; | ||
| 385 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
| 386 | envp[2] = NULL; | ||
| 387 | |||
| 388 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
| 389 | if (ret < 0) { | ||
| 390 | printk(KERN_ERR | ||
| 391 | "ocfs2: Error %d running user helper " | ||
| 392 | "\"%s %s %s %s\"\n", | ||
| 393 | ret, argv[0], argv[1], argv[2], argv[3]); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 397 | /* | ||
| 398 | * Hangup is a required post-umount. ocfs2-tools software expects the | ||
| 399 | * filesystem to call "ocfs2_hb_ctl" during unmount. This happens | ||
| 400 | * regardless of whether the DLM got started, so we can't do it | ||
| 401 | * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does | ||
| 402 | * the actual work. | ||
| 403 | */ | ||
| 365 | void ocfs2_cluster_hangup(const char *group, int grouplen) | 404 | void ocfs2_cluster_hangup(const char *group, int grouplen) |
| 366 | { | 405 | { |
| 367 | BUG_ON(group == NULL); | 406 | BUG_ON(group == NULL); |
| 368 | BUG_ON(group[grouplen] != '\0'); | 407 | BUG_ON(group[grouplen] != '\0'); |
| 369 | 408 | ||
| 370 | if (active_stack->sp_ops->hangup) | 409 | ocfs2_leave_group(group); |
| 371 | active_stack->sp_ops->hangup(group, grouplen); | ||
| 372 | 410 | ||
| 373 | /* cluster_disconnect() was called with hangup_pending==1 */ | 411 | /* cluster_disconnect() was called with hangup_pending==1 */ |
| 374 | ocfs2_stack_driver_put(); | 412 | ocfs2_stack_driver_put(); |
| @@ -548,10 +586,83 @@ error: | |||
| 548 | return ret; | 586 | return ret; |
| 549 | } | 587 | } |
| 550 | 588 | ||
| 589 | /* | ||
| 590 | * Sysctl bits | ||
| 591 | * | ||
| 592 | * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't | ||
| 593 | * make as much sense in a multiple cluster stack world, but it's safer | ||
| 594 | * and easier to preserve the name. | ||
| 595 | */ | ||
| 596 | |||
| 597 | #define FS_OCFS2_NM 1 | ||
| 598 | |||
| 599 | static ctl_table ocfs2_nm_table[] = { | ||
| 600 | { | ||
| 601 | .ctl_name = 1, | ||
| 602 | .procname = "hb_ctl_path", | ||
| 603 | .data = ocfs2_hb_ctl_path, | ||
| 604 | .maxlen = OCFS2_MAX_HB_CTL_PATH, | ||
| 605 | .mode = 0644, | ||
| 606 | .proc_handler = &proc_dostring, | ||
| 607 | .strategy = &sysctl_string, | ||
| 608 | }, | ||
| 609 | { .ctl_name = 0 } | ||
| 610 | }; | ||
| 611 | |||
| 612 | static ctl_table ocfs2_mod_table[] = { | ||
| 613 | { | ||
| 614 | .ctl_name = FS_OCFS2_NM, | ||
| 615 | .procname = "nm", | ||
| 616 | .data = NULL, | ||
| 617 | .maxlen = 0, | ||
| 618 | .mode = 0555, | ||
| 619 | .child = ocfs2_nm_table | ||
| 620 | }, | ||
| 621 | { .ctl_name = 0} | ||
| 622 | }; | ||
| 623 | |||
| 624 | static ctl_table ocfs2_kern_table[] = { | ||
| 625 | { | ||
| 626 | .ctl_name = FS_OCFS2, | ||
| 627 | .procname = "ocfs2", | ||
| 628 | .data = NULL, | ||
| 629 | .maxlen = 0, | ||
| 630 | .mode = 0555, | ||
| 631 | .child = ocfs2_mod_table | ||
| 632 | }, | ||
| 633 | { .ctl_name = 0} | ||
| 634 | }; | ||
| 635 | |||
| 636 | static ctl_table ocfs2_root_table[] = { | ||
| 637 | { | ||
| 638 | .ctl_name = CTL_FS, | ||
| 639 | .procname = "fs", | ||
| 640 | .data = NULL, | ||
| 641 | .maxlen = 0, | ||
| 642 | .mode = 0555, | ||
| 643 | .child = ocfs2_kern_table | ||
| 644 | }, | ||
| 645 | { .ctl_name = 0 } | ||
| 646 | }; | ||
| 647 | |||
| 648 | static struct ctl_table_header *ocfs2_table_header = NULL; | ||
| 649 | |||
| 650 | |||
| 651 | /* | ||
| 652 | * Initialization | ||
| 653 | */ | ||
| 654 | |||
| 551 | static int __init ocfs2_stack_glue_init(void) | 655 | static int __init ocfs2_stack_glue_init(void) |
| 552 | { | 656 | { |
| 553 | strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); | 657 | strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); |
| 554 | 658 | ||
| 659 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); | ||
| 660 | if (!ocfs2_table_header) { | ||
| 661 | printk(KERN_ERR | ||
| 662 | "ocfs2 stack glue: unable to register sysctl\n"); | ||
| 663 | return -ENOMEM; /* or something. */ | ||
| 664 | } | ||
| 665 | |||
| 555 | return ocfs2_sysfs_init(); | 666 | return ocfs2_sysfs_init(); |
| 556 | } | 667 | } |
| 557 | 668 | ||
| @@ -559,6 +670,8 @@ static void __exit ocfs2_stack_glue_exit(void) | |||
| 559 | { | 670 | { |
| 560 | lproto = NULL; | 671 | lproto = NULL; |
| 561 | ocfs2_sysfs_exit(); | 672 | ocfs2_sysfs_exit(); |
| 673 | if (ocfs2_table_header) | ||
| 674 | unregister_sysctl_table(ocfs2_table_header); | ||
| 562 | } | 675 | } |
| 563 | 676 | ||
| 564 | MODULE_AUTHOR("Oracle"); | 677 | MODULE_AUTHOR("Oracle"); |
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 005e4f170e0f..db56281dd1be 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h | |||
| @@ -134,22 +134,10 @@ struct ocfs2_stack_operations { | |||
| 134 | * be freed. Thus, a stack must not return from ->disconnect() | 134 | * be freed. Thus, a stack must not return from ->disconnect() |
| 135 | * until it will no longer reference the conn pointer. | 135 | * until it will no longer reference the conn pointer. |
| 136 | * | 136 | * |
| 137 | * If hangup_pending is zero, ocfs2_cluster_disconnect() will also | 137 | * Once this call returns, the stack glue will be dropping this |
| 138 | * be dropping the reference on the module. | 138 | * connection's reference on the module. |
| 139 | */ | 139 | */ |
| 140 | int (*disconnect)(struct ocfs2_cluster_connection *conn, | 140 | int (*disconnect)(struct ocfs2_cluster_connection *conn); |
| 141 | int hangup_pending); | ||
| 142 | |||
| 143 | /* | ||
| 144 | * ocfs2_cluster_hangup() exists for compatibility with older | ||
| 145 | * ocfs2 tools. Only the classic stack really needs it. As such | ||
| 146 | * ->hangup() is not required of all stacks. See the comment by | ||
| 147 | * ocfs2_cluster_hangup() for more details. | ||
| 148 | * | ||
| 149 | * Note that ocfs2_cluster_hangup() can only be called if | ||
| 150 | * hangup_pending was passed to ocfs2_cluster_disconnect(). | ||
| 151 | */ | ||
| 152 | void (*hangup)(const char *group, int grouplen); | ||
| 153 | 141 | ||
| 154 | /* | 142 | /* |
| 155 | * ->this_node() returns the cluster's unique identifier for the | 143 | * ->this_node() returns the cluster's unique identifier for the |
| @@ -258,4 +246,5 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) | |||
| 258 | /* Used by stack plugins */ | 246 | /* Used by stack plugins */ |
| 259 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); | 247 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); |
| 260 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); | 248 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); |
| 249 | |||
| 261 | #endif /* STACKGLUE_H */ | 250 | #endif /* STACKGLUE_H */ |
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/namei.h> | 16 | #include <linux/namei.h> |
| 17 | #include <linux/backing-dev.h> | 17 | #include <linux/backing-dev.h> |
| 18 | #include <linux/capability.h> | 18 | #include <linux/capability.h> |
| 19 | #include <linux/securebits.h> | ||
| 19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
| 20 | #include <linux/mount.h> | 21 | #include <linux/mount.h> |
| 21 | #include <linux/vfs.h> | 22 | #include <linux/vfs.h> |
| @@ -425,7 +426,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) | |||
| 425 | { | 426 | { |
| 426 | struct nameidata nd; | 427 | struct nameidata nd; |
| 427 | int old_fsuid, old_fsgid; | 428 | int old_fsuid, old_fsgid; |
| 428 | kernel_cap_t old_cap; | 429 | kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ |
| 429 | int res; | 430 | int res; |
| 430 | 431 | ||
| 431 | if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ | 432 | if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ |
| @@ -433,23 +434,27 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) | |||
| 433 | 434 | ||
| 434 | old_fsuid = current->fsuid; | 435 | old_fsuid = current->fsuid; |
| 435 | old_fsgid = current->fsgid; | 436 | old_fsgid = current->fsgid; |
| 436 | old_cap = current->cap_effective; | ||
| 437 | 437 | ||
| 438 | current->fsuid = current->uid; | 438 | current->fsuid = current->uid; |
| 439 | current->fsgid = current->gid; | 439 | current->fsgid = current->gid; |
| 440 | 440 | ||
| 441 | /* | 441 | if (!issecure(SECURE_NO_SETUID_FIXUP)) { |
| 442 | * Clear the capabilities if we switch to a non-root user | 442 | /* |
| 443 | * | 443 | * Clear the capabilities if we switch to a non-root user |
| 444 | * FIXME: There is a race here against sys_capset. The | 444 | */ |
| 445 | * capabilities can change yet we will restore the old | 445 | #ifndef CONFIG_SECURITY_FILE_CAPABILITIES |
| 446 | * value below. We should hold task_capabilities_lock, | 446 | /* |
| 447 | * but we cannot because user_path_walk can sleep. | 447 | * FIXME: There is a race here against sys_capset. The |
| 448 | */ | 448 | * capabilities can change yet we will restore the old |
| 449 | if (current->uid) | 449 | * value below. We should hold task_capabilities_lock, |
| 450 | cap_clear(current->cap_effective); | 450 | * but we cannot because user_path_walk can sleep. |
| 451 | else | 451 | */ |
| 452 | current->cap_effective = current->cap_permitted; | 452 | #endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ |
| 453 | if (current->uid) | ||
| 454 | old_cap = cap_set_effective(__cap_empty_set); | ||
| 455 | else | ||
| 456 | old_cap = cap_set_effective(current->cap_permitted); | ||
| 457 | } | ||
| 453 | 458 | ||
| 454 | res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); | 459 | res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); |
| 455 | if (res) | 460 | if (res) |
| @@ -478,7 +483,9 @@ out_path_release: | |||
| 478 | out: | 483 | out: |
| 479 | current->fsuid = old_fsuid; | 484 | current->fsuid = old_fsuid; |
| 480 | current->fsgid = old_fsgid; | 485 | current->fsgid = old_fsgid; |
| 481 | current->cap_effective = old_cap; | 486 | |
| 487 | if (!issecure(SECURE_NO_SETUID_FIXUP)) | ||
| 488 | cap_set_effective(old_cap); | ||
| 482 | 489 | ||
| 483 | return res; | 490 | return res; |
| 484 | } | 491 | } |
| @@ -1003,8 +1003,7 @@ struct file *create_write_pipe(void) | |||
| 1003 | void free_write_pipe(struct file *f) | 1003 | void free_write_pipe(struct file *f) |
| 1004 | { | 1004 | { |
| 1005 | free_pipe_info(f->f_dentry->d_inode); | 1005 | free_pipe_info(f->f_dentry->d_inode); |
| 1006 | dput(f->f_path.dentry); | 1006 | path_put(&f->f_path); |
| 1007 | mntput(f->f_path.mnt); | ||
| 1008 | put_filp(f); | 1007 | put_filp(f); |
| 1009 | } | 1008 | } |
| 1010 | 1009 | ||
| @@ -1015,8 +1014,8 @@ struct file *create_read_pipe(struct file *wrf) | |||
| 1015 | return ERR_PTR(-ENFILE); | 1014 | return ERR_PTR(-ENFILE); |
| 1016 | 1015 | ||
| 1017 | /* Grab pipe from the writer */ | 1016 | /* Grab pipe from the writer */ |
| 1018 | f->f_path.mnt = mntget(wrf->f_path.mnt); | 1017 | f->f_path = wrf->f_path; |
| 1019 | f->f_path.dentry = dget(wrf->f_path.dentry); | 1018 | path_get(&wrf->f_path); |
| 1020 | f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; | 1019 | f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; |
| 1021 | 1020 | ||
| 1022 | f->f_pos = 0; | 1021 | f->f_pos = 0; |
| @@ -1068,8 +1067,7 @@ int do_pipe(int *fd) | |||
| 1068 | err_fdr: | 1067 | err_fdr: |
| 1069 | put_unused_fd(fdr); | 1068 | put_unused_fd(fdr); |
| 1070 | err_read_pipe: | 1069 | err_read_pipe: |
| 1071 | dput(fr->f_dentry); | 1070 | path_put(&fr->f_path); |
| 1072 | mntput(fr->f_vfsmnt); | ||
| 1073 | put_filp(fr); | 1071 | put_filp(fr); |
| 1074 | err_write_pipe: | 1072 | err_write_pipe: |
| 1075 | free_write_pipe(fw); | 1073 | free_write_pipe(fw); |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b455371e7ff..58c3e6a8e15e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -233,7 +233,7 @@ static int check_mem_permission(struct task_struct *task) | |||
| 233 | */ | 233 | */ |
| 234 | if (task->parent == current && (task->ptrace & PT_PTRACED) && | 234 | if (task->parent == current && (task->ptrace & PT_PTRACED) && |
| 235 | task_is_stopped_or_traced(task) && | 235 | task_is_stopped_or_traced(task) && |
| 236 | ptrace_may_attach(task)) | 236 | ptrace_may_access(task, PTRACE_MODE_ATTACH)) |
| 237 | return 0; | 237 | return 0; |
| 238 | 238 | ||
| 239 | /* | 239 | /* |
| @@ -251,7 +251,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task) | |||
| 251 | task_lock(task); | 251 | task_lock(task); |
| 252 | if (task->mm != mm) | 252 | if (task->mm != mm) |
| 253 | goto out; | 253 | goto out; |
| 254 | if (task->mm != current->mm && __ptrace_may_attach(task) < 0) | 254 | if (task->mm != current->mm && |
| 255 | __ptrace_may_access(task, PTRACE_MODE_READ) < 0) | ||
| 255 | goto out; | 256 | goto out; |
| 256 | task_unlock(task); | 257 | task_unlock(task); |
| 257 | return mm; | 258 | return mm; |
| @@ -518,7 +519,7 @@ static int proc_fd_access_allowed(struct inode *inode) | |||
| 518 | */ | 519 | */ |
| 519 | task = get_proc_task(inode); | 520 | task = get_proc_task(inode); |
| 520 | if (task) { | 521 | if (task) { |
| 521 | allowed = ptrace_may_attach(task); | 522 | allowed = ptrace_may_access(task, PTRACE_MODE_READ); |
| 522 | put_task_struct(task); | 523 | put_task_struct(task); |
| 523 | } | 524 | } |
| 524 | return allowed; | 525 | return allowed; |
| @@ -904,7 +905,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, | |||
| 904 | if (!task) | 905 | if (!task) |
| 905 | goto out_no_task; | 906 | goto out_no_task; |
| 906 | 907 | ||
| 907 | if (!ptrace_may_attach(task)) | 908 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 908 | goto out; | 909 | goto out; |
| 909 | 910 | ||
| 910 | ret = -ENOMEM; | 911 | ret = -ENOMEM; |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 7e277f2ad466..c652d469dc08 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
| @@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off, | |||
| 123 | return proc_calc_metrics(page, start, off, count, eof, len); | 123 | return proc_calc_metrics(page, start, off, count, eof, len); |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | int __attribute__((weak)) arch_report_meminfo(char *page) | ||
| 127 | { | ||
| 128 | return 0; | ||
| 129 | } | ||
| 130 | |||
| 126 | static int meminfo_read_proc(char *page, char **start, off_t off, | 131 | static int meminfo_read_proc(char *page, char **start, off_t off, |
| 127 | int count, int *eof, void *data) | 132 | int count, int *eof, void *data) |
| 128 | { | 133 | { |
| @@ -221,6 +226,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
| 221 | 226 | ||
| 222 | len += hugetlb_report_meminfo(page + len); | 227 | len += hugetlb_report_meminfo(page + len); |
| 223 | 228 | ||
| 229 | len += arch_report_meminfo(page + len); | ||
| 230 | |||
| 224 | return proc_calc_metrics(page, start, off, count, eof, len); | 231 | return proc_calc_metrics(page, start, off, count, eof, len); |
| 225 | #undef K | 232 | #undef K |
| 226 | } | 233 | } |
| @@ -472,6 +479,13 @@ static const struct file_operations proc_vmalloc_operations = { | |||
| 472 | }; | 479 | }; |
| 473 | #endif | 480 | #endif |
| 474 | 481 | ||
| 482 | #ifndef arch_irq_stat_cpu | ||
| 483 | #define arch_irq_stat_cpu(cpu) 0 | ||
| 484 | #endif | ||
| 485 | #ifndef arch_irq_stat | ||
| 486 | #define arch_irq_stat() 0 | ||
| 487 | #endif | ||
| 488 | |||
| 475 | static int show_stat(struct seq_file *p, void *v) | 489 | static int show_stat(struct seq_file *p, void *v) |
| 476 | { | 490 | { |
| 477 | int i; | 491 | int i; |
| @@ -509,7 +523,9 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 509 | sum += temp; | 523 | sum += temp; |
| 510 | per_irq_sum[j] += temp; | 524 | per_irq_sum[j] += temp; |
| 511 | } | 525 | } |
| 526 | sum += arch_irq_stat_cpu(i); | ||
| 512 | } | 527 | } |
| 528 | sum += arch_irq_stat(); | ||
| 513 | 529 | ||
| 514 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | 530 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", |
| 515 | (unsigned long long)cputime64_to_clock_t(user), | 531 | (unsigned long long)cputime64_to_clock_t(user), |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 17403629e330..164bd9f9ede3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -210,7 +210,7 @@ static int show_map(struct seq_file *m, void *v) | |||
| 210 | dev_t dev = 0; | 210 | dev_t dev = 0; |
| 211 | int len; | 211 | int len; |
| 212 | 212 | ||
| 213 | if (maps_protect && !ptrace_may_attach(task)) | 213 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) |
| 214 | return -EACCES; | 214 | return -EACCES; |
| 215 | 215 | ||
| 216 | if (file) { | 216 | if (file) { |
| @@ -315,9 +315,9 @@ struct mem_size_stats { | |||
| 315 | }; | 315 | }; |
| 316 | 316 | ||
| 317 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 317 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
| 318 | void *private) | 318 | struct mm_walk *walk) |
| 319 | { | 319 | { |
| 320 | struct mem_size_stats *mss = private; | 320 | struct mem_size_stats *mss = walk->private; |
| 321 | struct vm_area_struct *vma = mss->vma; | 321 | struct vm_area_struct *vma = mss->vma; |
| 322 | pte_t *pte, ptent; | 322 | pte_t *pte, ptent; |
| 323 | spinlock_t *ptl; | 323 | spinlock_t *ptl; |
| @@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 365 | return 0; | 365 | return 0; |
| 366 | } | 366 | } |
| 367 | 367 | ||
| 368 | static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; | ||
| 369 | |||
| 370 | static int show_smap(struct seq_file *m, void *v) | 368 | static int show_smap(struct seq_file *m, void *v) |
| 371 | { | 369 | { |
| 372 | struct vm_area_struct *vma = v; | 370 | struct vm_area_struct *vma = v; |
| 373 | struct mem_size_stats mss; | 371 | struct mem_size_stats mss; |
| 374 | int ret; | 372 | int ret; |
| 373 | struct mm_walk smaps_walk = { | ||
| 374 | .pmd_entry = smaps_pte_range, | ||
| 375 | .mm = vma->vm_mm, | ||
| 376 | .private = &mss, | ||
| 377 | }; | ||
| 375 | 378 | ||
| 376 | memset(&mss, 0, sizeof mss); | 379 | memset(&mss, 0, sizeof mss); |
| 377 | mss.vma = vma; | 380 | mss.vma = vma; |
| 378 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 381 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
| 379 | walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, | 382 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
| 380 | &smaps_walk, &mss); | ||
| 381 | 383 | ||
| 382 | ret = show_map(m, v); | 384 | ret = show_map(m, v); |
| 383 | if (ret) | 385 | if (ret) |
| @@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = { | |||
| 426 | }; | 428 | }; |
| 427 | 429 | ||
| 428 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 430 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
| 429 | unsigned long end, void *private) | 431 | unsigned long end, struct mm_walk *walk) |
| 430 | { | 432 | { |
| 431 | struct vm_area_struct *vma = private; | 433 | struct vm_area_struct *vma = walk->private; |
| 432 | pte_t *pte, ptent; | 434 | pte_t *pte, ptent; |
| 433 | spinlock_t *ptl; | 435 | spinlock_t *ptl; |
| 434 | struct page *page; | 436 | struct page *page; |
| @@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
| 452 | return 0; | 454 | return 0; |
| 453 | } | 455 | } |
| 454 | 456 | ||
| 455 | static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; | ||
| 456 | |||
| 457 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 457 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
| 458 | size_t count, loff_t *ppos) | 458 | size_t count, loff_t *ppos) |
| 459 | { | 459 | { |
| @@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
| 476 | return -ESRCH; | 476 | return -ESRCH; |
| 477 | mm = get_task_mm(task); | 477 | mm = get_task_mm(task); |
| 478 | if (mm) { | 478 | if (mm) { |
| 479 | struct mm_walk clear_refs_walk = { | ||
| 480 | .pmd_entry = clear_refs_pte_range, | ||
| 481 | .mm = mm, | ||
| 482 | }; | ||
| 479 | down_read(&mm->mmap_sem); | 483 | down_read(&mm->mmap_sem); |
| 480 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 484 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
| 485 | clear_refs_walk.private = vma; | ||
| 481 | if (!is_vm_hugetlb_page(vma)) | 486 | if (!is_vm_hugetlb_page(vma)) |
| 482 | walk_page_range(mm, vma->vm_start, vma->vm_end, | 487 | walk_page_range(vma->vm_start, vma->vm_end, |
| 483 | &clear_refs_walk, vma); | 488 | &clear_refs_walk); |
| 489 | } | ||
| 484 | flush_tlb_mm(mm); | 490 | flush_tlb_mm(mm); |
| 485 | up_read(&mm->mmap_sem); | 491 | up_read(&mm->mmap_sem); |
| 486 | mmput(mm); | 492 | mmput(mm); |
| @@ -528,9 +534,9 @@ static int add_to_pagemap(unsigned long addr, u64 pfn, | |||
| 528 | } | 534 | } |
| 529 | 535 | ||
| 530 | static int pagemap_pte_hole(unsigned long start, unsigned long end, | 536 | static int pagemap_pte_hole(unsigned long start, unsigned long end, |
| 531 | void *private) | 537 | struct mm_walk *walk) |
| 532 | { | 538 | { |
| 533 | struct pagemapread *pm = private; | 539 | struct pagemapread *pm = walk->private; |
| 534 | unsigned long addr; | 540 | unsigned long addr; |
| 535 | int err = 0; | 541 | int err = 0; |
| 536 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 542 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
| @@ -547,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) | |||
| 547 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); | 553 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); |
| 548 | } | 554 | } |
| 549 | 555 | ||
| 556 | static unsigned long pte_to_pagemap_entry(pte_t pte) | ||
| 557 | { | ||
| 558 | unsigned long pme = 0; | ||
| 559 | if (is_swap_pte(pte)) | ||
| 560 | pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | ||
| 561 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | ||
| 562 | else if (pte_present(pte)) | ||
| 563 | pme = PM_PFRAME(pte_pfn(pte)) | ||
| 564 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | ||
| 565 | return pme; | ||
| 566 | } | ||
| 567 | |||
| 550 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 568 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
| 551 | void *private) | 569 | struct mm_walk *walk) |
| 552 | { | 570 | { |
| 553 | struct pagemapread *pm = private; | 571 | struct vm_area_struct *vma; |
| 572 | struct pagemapread *pm = walk->private; | ||
| 554 | pte_t *pte; | 573 | pte_t *pte; |
| 555 | int err = 0; | 574 | int err = 0; |
| 556 | 575 | ||
| 576 | /* find the first VMA at or above 'addr' */ | ||
| 577 | vma = find_vma(walk->mm, addr); | ||
| 557 | for (; addr != end; addr += PAGE_SIZE) { | 578 | for (; addr != end; addr += PAGE_SIZE) { |
| 558 | u64 pfn = PM_NOT_PRESENT; | 579 | u64 pfn = PM_NOT_PRESENT; |
| 559 | pte = pte_offset_map(pmd, addr); | 580 | |
| 560 | if (is_swap_pte(*pte)) | 581 | /* check to see if we've left 'vma' behind |
| 561 | pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) | 582 | * and need a new, higher one */ |
| 562 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | 583 | if (vma && (addr >= vma->vm_end)) |
| 563 | else if (pte_present(*pte)) | 584 | vma = find_vma(walk->mm, addr); |
| 564 | pfn = PM_PFRAME(pte_pfn(*pte)) | 585 | |
| 565 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | 586 | /* check that 'vma' actually covers this address, |
| 566 | /* unmap so we're not in atomic when we copy to userspace */ | 587 | * and that it isn't a huge page vma */ |
| 567 | pte_unmap(pte); | 588 | if (vma && (vma->vm_start <= addr) && |
| 589 | !is_vm_hugetlb_page(vma)) { | ||
| 590 | pte = pte_offset_map(pmd, addr); | ||
| 591 | pfn = pte_to_pagemap_entry(*pte); | ||
| 592 | /* unmap before userspace copy */ | ||
| 593 | pte_unmap(pte); | ||
| 594 | } | ||
| 568 | err = add_to_pagemap(addr, pfn, pm); | 595 | err = add_to_pagemap(addr, pfn, pm); |
| 569 | if (err) | 596 | if (err) |
| 570 | return err; | 597 | return err; |
| @@ -575,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 575 | return err; | 602 | return err; |
| 576 | } | 603 | } |
| 577 | 604 | ||
| 578 | static struct mm_walk pagemap_walk = { | ||
| 579 | .pmd_entry = pagemap_pte_range, | ||
| 580 | .pte_hole = pagemap_pte_hole | ||
| 581 | }; | ||
| 582 | |||
| 583 | /* | 605 | /* |
| 584 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | 606 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
| 585 | * | 607 | * |
| @@ -614,12 +636,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
| 614 | struct pagemapread pm; | 636 | struct pagemapread pm; |
| 615 | int pagecount; | 637 | int pagecount; |
| 616 | int ret = -ESRCH; | 638 | int ret = -ESRCH; |
| 639 | struct mm_walk pagemap_walk; | ||
| 640 | unsigned long src; | ||
| 641 | unsigned long svpfn; | ||
| 642 | unsigned long start_vaddr; | ||
| 643 | unsigned long end_vaddr; | ||
| 617 | 644 | ||
| 618 | if (!task) | 645 | if (!task) |
| 619 | goto out; | 646 | goto out; |
| 620 | 647 | ||
| 621 | ret = -EACCES; | 648 | ret = -EACCES; |
| 622 | if (!ptrace_may_attach(task)) | 649 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 623 | goto out_task; | 650 | goto out_task; |
| 624 | 651 | ||
| 625 | ret = -EINVAL; | 652 | ret = -EINVAL; |
| @@ -632,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
| 632 | if (!mm) | 659 | if (!mm) |
| 633 | goto out_task; | 660 | goto out_task; |
| 634 | 661 | ||
| 635 | ret = -ENOMEM; | 662 | |
| 636 | uaddr = (unsigned long)buf & PAGE_MASK; | 663 | uaddr = (unsigned long)buf & PAGE_MASK; |
| 637 | uend = (unsigned long)(buf + count); | 664 | uend = (unsigned long)(buf + count); |
| 638 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; | 665 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; |
| 639 | pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); | 666 | ret = 0; |
| 667 | if (pagecount == 0) | ||
| 668 | goto out_mm; | ||
| 669 | pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); | ||
| 670 | ret = -ENOMEM; | ||
| 640 | if (!pages) | 671 | if (!pages) |
| 641 | goto out_mm; | 672 | goto out_mm; |
| 642 | 673 | ||
| @@ -657,33 +688,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
| 657 | pm.out = (u64 *)buf; | 688 | pm.out = (u64 *)buf; |
| 658 | pm.end = (u64 *)(buf + count); | 689 | pm.end = (u64 *)(buf + count); |
| 659 | 690 | ||
| 660 | if (!ptrace_may_attach(task)) { | 691 | pagemap_walk.pmd_entry = pagemap_pte_range; |
| 661 | ret = -EIO; | 692 | pagemap_walk.pte_hole = pagemap_pte_hole; |
| 662 | } else { | 693 | pagemap_walk.mm = mm; |
| 663 | unsigned long src = *ppos; | 694 | pagemap_walk.private = ± |
| 664 | unsigned long svpfn = src / PM_ENTRY_BYTES; | 695 | |
| 665 | unsigned long start_vaddr = svpfn << PAGE_SHIFT; | 696 | src = *ppos; |
| 666 | unsigned long end_vaddr = TASK_SIZE_OF(task); | 697 | svpfn = src / PM_ENTRY_BYTES; |
| 667 | 698 | start_vaddr = svpfn << PAGE_SHIFT; | |
| 668 | /* watch out for wraparound */ | 699 | end_vaddr = TASK_SIZE_OF(task); |
| 669 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | 700 | |
| 670 | start_vaddr = end_vaddr; | 701 | /* watch out for wraparound */ |
| 671 | 702 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | |
| 672 | /* | 703 | start_vaddr = end_vaddr; |
| 673 | * The odds are that this will stop walking way | 704 | |
| 674 | * before end_vaddr, because the length of the | 705 | /* |
| 675 | * user buffer is tracked in "pm", and the walk | 706 | * The odds are that this will stop walking way |
| 676 | * will stop when we hit the end of the buffer. | 707 | * before end_vaddr, because the length of the |
| 677 | */ | 708 | * user buffer is tracked in "pm", and the walk |
| 678 | ret = walk_page_range(mm, start_vaddr, end_vaddr, | 709 | * will stop when we hit the end of the buffer. |
| 679 | &pagemap_walk, &pm); | 710 | */ |
| 680 | if (ret == PM_END_OF_BUFFER) | 711 | ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); |
| 681 | ret = 0; | 712 | if (ret == PM_END_OF_BUFFER) |
| 682 | /* don't need mmap_sem for these, but this looks cleaner */ | 713 | ret = 0; |
| 683 | *ppos += (char *)pm.out - buf; | 714 | /* don't need mmap_sem for these, but this looks cleaner */ |
| 684 | if (!ret) | 715 | *ppos += (char *)pm.out - buf; |
| 685 | ret = (char *)pm.out - buf; | 716 | if (!ret) |
| 686 | } | 717 | ret = (char *)pm.out - buf; |
| 687 | 718 | ||
| 688 | out_pages: | 719 | out_pages: |
| 689 | for (; pagecount; pagecount--) { | 720 | for (; pagecount; pagecount--) { |
| @@ -716,7 +747,7 @@ static int show_numa_map_checked(struct seq_file *m, void *v) | |||
| 716 | struct proc_maps_private *priv = m->private; | 747 | struct proc_maps_private *priv = m->private; |
| 717 | struct task_struct *task = priv->task; | 748 | struct task_struct *task = priv->task; |
| 718 | 749 | ||
| 719 | if (maps_protect && !ptrace_may_attach(task)) | 750 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) |
| 720 | return -EACCES; | 751 | return -EACCES; |
| 721 | 752 | ||
| 722 | return show_numa_map(m, v); | 753 | return show_numa_map(m, v); |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 4b4f9cc2f186..5d84e7121df8 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
| @@ -113,7 +113,7 @@ static int show_map(struct seq_file *m, void *_vml) | |||
| 113 | struct proc_maps_private *priv = m->private; | 113 | struct proc_maps_private *priv = m->private; |
| 114 | struct task_struct *task = priv->task; | 114 | struct task_struct *task = priv->task; |
| 115 | 115 | ||
| 116 | if (maps_protect && !ptrace_may_attach(task)) | 116 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) |
| 117 | return -EACCES; | 117 | return -EACCES; |
| 118 | 118 | ||
| 119 | return nommu_vma_show(m, vml->vma); | 119 | return nommu_vma_show(m, vml->vma); |
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 9590b9024300..78f613cb9c76 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c | |||
| @@ -45,6 +45,7 @@ const struct file_operations ramfs_file_operations = { | |||
| 45 | .mmap = generic_file_mmap, | 45 | .mmap = generic_file_mmap, |
| 46 | .fsync = simple_sync_file, | 46 | .fsync = simple_sync_file, |
| 47 | .splice_read = generic_file_splice_read, | 47 | .splice_read = generic_file_splice_read, |
| 48 | .splice_write = generic_file_splice_write, | ||
| 48 | .llseek = generic_file_llseek, | 49 | .llseek = generic_file_llseek, |
| 49 | }; | 50 | }; |
| 50 | 51 | ||
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 0989bc2c2f69..52312ec93ff4 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
| @@ -43,6 +43,7 @@ const struct file_operations ramfs_file_operations = { | |||
| 43 | .aio_write = generic_file_aio_write, | 43 | .aio_write = generic_file_aio_write, |
| 44 | .fsync = simple_sync_file, | 44 | .fsync = simple_sync_file, |
| 45 | .splice_read = generic_file_splice_read, | 45 | .splice_read = generic_file_splice_read, |
| 46 | .splice_write = generic_file_splice_write, | ||
| 46 | .llseek = generic_file_llseek, | 47 | .llseek = generic_file_llseek, |
| 47 | }; | 48 | }; |
| 48 | 49 | ||
diff --git a/fs/read_write.c b/fs/read_write.c index f0d1240a5c69..9ba495d5a29b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -31,12 +31,12 @@ const struct file_operations generic_ro_fops = { | |||
| 31 | 31 | ||
| 32 | EXPORT_SYMBOL(generic_ro_fops); | 32 | EXPORT_SYMBOL(generic_ro_fops); |
| 33 | 33 | ||
| 34 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | 34 | loff_t |
| 35 | generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) | ||
| 35 | { | 36 | { |
| 36 | loff_t retval; | 37 | loff_t retval; |
| 37 | struct inode *inode = file->f_mapping->host; | 38 | struct inode *inode = file->f_mapping->host; |
| 38 | 39 | ||
| 39 | mutex_lock(&inode->i_mutex); | ||
| 40 | switch (origin) { | 40 | switch (origin) { |
| 41 | case SEEK_END: | 41 | case SEEK_END: |
| 42 | offset += inode->i_size; | 42 | offset += inode->i_size; |
| @@ -46,42 +46,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | |||
| 46 | } | 46 | } |
| 47 | retval = -EINVAL; | 47 | retval = -EINVAL; |
| 48 | if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { | 48 | if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { |
| 49 | /* Special lock needed here? */ | ||
| 49 | if (offset != file->f_pos) { | 50 | if (offset != file->f_pos) { |
| 50 | file->f_pos = offset; | 51 | file->f_pos = offset; |
| 51 | file->f_version = 0; | 52 | file->f_version = 0; |
| 52 | } | 53 | } |
| 53 | retval = offset; | 54 | retval = offset; |
| 54 | } | 55 | } |
| 55 | mutex_unlock(&inode->i_mutex); | ||
| 56 | return retval; | 56 | return retval; |
| 57 | } | 57 | } |
| 58 | EXPORT_SYMBOL(generic_file_llseek_unlocked); | ||
| 58 | 59 | ||
| 59 | EXPORT_SYMBOL(generic_file_llseek); | 60 | loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) |
| 60 | |||
| 61 | loff_t remote_llseek(struct file *file, loff_t offset, int origin) | ||
| 62 | { | 61 | { |
| 63 | loff_t retval; | 62 | loff_t n; |
| 64 | 63 | mutex_lock(&file->f_dentry->d_inode->i_mutex); | |
| 65 | lock_kernel(); | 64 | n = generic_file_llseek_unlocked(file, offset, origin); |
| 66 | switch (origin) { | 65 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); |
| 67 | case SEEK_END: | 66 | return n; |
| 68 | offset += i_size_read(file->f_path.dentry->d_inode); | ||
| 69 | break; | ||
| 70 | case SEEK_CUR: | ||
| 71 | offset += file->f_pos; | ||
| 72 | } | ||
| 73 | retval = -EINVAL; | ||
| 74 | if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) { | ||
| 75 | if (offset != file->f_pos) { | ||
| 76 | file->f_pos = offset; | ||
| 77 | file->f_version = 0; | ||
| 78 | } | ||
| 79 | retval = offset; | ||
| 80 | } | ||
| 81 | unlock_kernel(); | ||
| 82 | return retval; | ||
| 83 | } | 67 | } |
| 84 | EXPORT_SYMBOL(remote_llseek); | 68 | EXPORT_SYMBOL(generic_file_llseek); |
| 85 | 69 | ||
| 86 | loff_t no_llseek(struct file *file, loff_t offset, int origin) | 70 | loff_t no_llseek(struct file *file, loff_t offset, int origin) |
| 87 | { | 71 | { |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 57917932212e..192269698a8a 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -45,6 +45,8 @@ void reiserfs_delete_inode(struct inode *inode) | |||
| 45 | goto out; | 45 | goto out; |
| 46 | reiserfs_update_inode_transaction(inode); | 46 | reiserfs_update_inode_transaction(inode); |
| 47 | 47 | ||
| 48 | reiserfs_discard_prealloc(&th, inode); | ||
| 49 | |||
| 48 | err = reiserfs_delete_object(&th, inode); | 50 | err = reiserfs_delete_object(&th, inode); |
| 49 | 51 | ||
| 50 | /* Do quota update inside a transaction for journaled quotas. We must do that | 52 | /* Do quota update inside a transaction for journaled quotas. We must do that |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index ed424d708e69..1d40f2bd1970 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
| @@ -2165,8 +2165,10 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | |||
| 2165 | blk++; | 2165 | blk++; |
| 2166 | } | 2166 | } |
| 2167 | out: | 2167 | out: |
| 2168 | if (len == towrite) | 2168 | if (len == towrite) { |
| 2169 | mutex_unlock(&inode->i_mutex); | ||
| 2169 | return err; | 2170 | return err; |
| 2171 | } | ||
| 2170 | if (inode->i_size < off + len - towrite) | 2172 | if (inode->i_size < off + len - towrite) |
| 2171 | i_size_write(inode, off + len - towrite); | 2173 | i_size_write(inode, off + len - towrite); |
| 2172 | inode->i_version++; | 2174 | inode->i_version++; |
diff --git a/fs/select.c b/fs/select.c index 8dda969614a9..da0e88201c3a 100644 --- a/fs/select.c +++ b/fs/select.c | |||
| @@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) | |||
| 249 | retval++; | 249 | retval++; |
| 250 | } | 250 | } |
| 251 | } | 251 | } |
| 252 | cond_resched(); | ||
| 253 | } | 252 | } |
| 254 | if (res_in) | 253 | if (res_in) |
| 255 | *rinp = res_in; | 254 | *rinp = res_in; |
| @@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) | |||
| 257 | *routp = res_out; | 256 | *routp = res_out; |
| 258 | if (res_ex) | 257 | if (res_ex) |
| 259 | *rexp = res_ex; | 258 | *rexp = res_ex; |
| 259 | cond_resched(); | ||
| 260 | } | 260 | } |
| 261 | wait = NULL; | 261 | wait = NULL; |
| 262 | if (retval || !*timeout || signal_pending(current)) | 262 | if (retval || !*timeout || signal_pending(current)) |
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index efbe29af3d7a..2294783320cb 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c | |||
| @@ -422,9 +422,18 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 422 | return error; | 422 | return error; |
| 423 | } | 423 | } |
| 424 | 424 | ||
| 425 | static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin) | ||
| 426 | { | ||
| 427 | loff_t ret; | ||
| 428 | lock_kernel(); | ||
| 429 | ret = generic_file_llseek_unlocked(file, offset, origin); | ||
| 430 | unlock_kernel(); | ||
| 431 | return ret; | ||
| 432 | } | ||
| 433 | |||
| 425 | const struct file_operations smb_file_operations = | 434 | const struct file_operations smb_file_operations = |
| 426 | { | 435 | { |
| 427 | .llseek = remote_llseek, | 436 | .llseek = smb_remote_llseek, |
| 428 | .read = do_sync_read, | 437 | .read = do_sync_read, |
| 429 | .aio_read = smb_file_aio_read, | 438 | .aio_read = smb_file_aio_read, |
| 430 | .write = do_sync_write, | 439 | .write = do_sync_write, |
diff --git a/fs/splice.c b/fs/splice.c index aa5f6f60b305..399442179d89 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -379,13 +379,22 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
| 379 | lock_page(page); | 379 | lock_page(page); |
| 380 | 380 | ||
| 381 | /* | 381 | /* |
| 382 | * page was truncated, stop here. if this isn't the | 382 | * Page was truncated, or invalidated by the |
| 383 | * first page, we'll just complete what we already | 383 | * filesystem. Redo the find/create, but this time the |
| 384 | * added | 384 | * page is kept locked, so there's no chance of another |
| 385 | * race with truncate/invalidate. | ||
| 385 | */ | 386 | */ |
| 386 | if (!page->mapping) { | 387 | if (!page->mapping) { |
| 387 | unlock_page(page); | 388 | unlock_page(page); |
| 388 | break; | 389 | page = find_or_create_page(mapping, index, |
| 390 | mapping_gfp_mask(mapping)); | ||
| 391 | |||
| 392 | if (!page) { | ||
| 393 | error = -ENOMEM; | ||
| 394 | break; | ||
| 395 | } | ||
| 396 | page_cache_release(pages[page_nr]); | ||
| 397 | pages[page_nr] = page; | ||
| 389 | } | 398 | } |
| 390 | /* | 399 | /* |
| 391 | * page was already under io and is now done, great | 400 | * page was already under io and is now done, great |
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig new file mode 100644 index 000000000000..91ceeda7e5bf --- /dev/null +++ b/fs/ubifs/Kconfig | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | config UBIFS_FS | ||
| 2 | tristate "UBIFS file system support" | ||
| 3 | select CRC16 | ||
| 4 | select CRC32 | ||
| 5 | select CRYPTO if UBIFS_FS_ADVANCED_COMPR | ||
| 6 | select CRYPTO if UBIFS_FS_LZO | ||
| 7 | select CRYPTO if UBIFS_FS_ZLIB | ||
| 8 | select CRYPTO_LZO if UBIFS_FS_LZO | ||
| 9 | select CRYPTO_DEFLATE if UBIFS_FS_ZLIB | ||
| 10 | depends on MTD_UBI | ||
| 11 | help | ||
| 12 | UBIFS is a file system for flash devices which works on top of UBI. | ||
| 13 | |||
| 14 | config UBIFS_FS_XATTR | ||
| 15 | bool "Extended attributes support" | ||
| 16 | depends on UBIFS_FS | ||
| 17 | help | ||
| 18 | This option enables support of extended attributes. | ||
| 19 | |||
| 20 | config UBIFS_FS_ADVANCED_COMPR | ||
| 21 | bool "Advanced compression options" | ||
| 22 | depends on UBIFS_FS | ||
| 23 | help | ||
| 24 | This option allows to explicitly choose which compressions, if any, | ||
| 25 | are enabled in UBIFS. Removing compressors means inbility to read | ||
| 26 | existing file systems. | ||
| 27 | |||
| 28 | If unsure, say 'N'. | ||
| 29 | |||
| 30 | config UBIFS_FS_LZO | ||
| 31 | bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR | ||
| 32 | depends on UBIFS_FS | ||
| 33 | default y | ||
| 34 | help | ||
| 35 | LZO compressor is generally faster then zlib but compresses worse. | ||
| 36 | Say 'Y' if unsure. | ||
| 37 | |||
| 38 | config UBIFS_FS_ZLIB | ||
| 39 | bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR | ||
| 40 | depends on UBIFS_FS | ||
| 41 | default y | ||
| 42 | help | ||
| 43 | Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. | ||
| 44 | |||
| 45 | # Debugging-related stuff | ||
| 46 | config UBIFS_FS_DEBUG | ||
| 47 | bool "Enable debugging" | ||
| 48 | depends on UBIFS_FS | ||
| 49 | select DEBUG_FS | ||
| 50 | select KALLSYMS_ALL | ||
| 51 | help | ||
| 52 | This option enables UBIFS debugging. | ||
| 53 | |||
| 54 | config UBIFS_FS_DEBUG_MSG_LVL | ||
| 55 | int "Default message level (0 = no extra messages, 3 = lots)" | ||
| 56 | depends on UBIFS_FS_DEBUG | ||
| 57 | default "0" | ||
| 58 | help | ||
| 59 | This controls the amount of debugging messages produced by UBIFS. | ||
| 60 | If reporting bugs, please try to have available a full dump of the | ||
| 61 | messages at level 1 while the misbehaviour was occurring. Level 2 | ||
| 62 | may become necessary if level 1 messages were not enough to find the | ||
| 63 | bug. Generally Level 3 should be avoided. | ||
| 64 | |||
| 65 | config UBIFS_FS_DEBUG_CHKS | ||
| 66 | bool "Enable extra checks" | ||
| 67 | depends on UBIFS_FS_DEBUG | ||
| 68 | help | ||
| 69 | If extra checks are enabled UBIFS will check the consistency of its | ||
| 70 | internal data structures during operation. However, UBIFS performance | ||
| 71 | is dramatically slower when this option is selected especially if the | ||
| 72 | file system is large. | ||
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile new file mode 100644 index 000000000000..80e93c35e496 --- /dev/null +++ b/fs/ubifs/Makefile | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | obj-$(CONFIG_UBIFS_FS) += ubifs.o | ||
| 2 | |||
| 3 | ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o | ||
| 4 | ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o | ||
| 5 | ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o | ||
| 6 | ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o | ||
| 7 | |||
| 8 | ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o | ||
| 9 | ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o | ||
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c new file mode 100644 index 000000000000..d81fb9ed2b8e --- /dev/null +++ b/fs/ubifs/budget.c | |||
| @@ -0,0 +1,731 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements the budgeting sub-system which is responsible for UBIFS | ||
| 25 | * space management. | ||
| 26 | * | ||
| 27 | * Factors such as compression, wasted space at the ends of LEBs, space in other | ||
| 28 | * journal heads, the effect of updates on the index, and so on, make it | ||
| 29 | * impossible to accurately predict the amount of space needed. Consequently | ||
| 30 | * approximations are used. | ||
| 31 | */ | ||
| 32 | |||
| 33 | #include "ubifs.h" | ||
| 34 | #include <linux/writeback.h> | ||
| 35 | #include <asm/div64.h> | ||
| 36 | |||
| 37 | /* | ||
| 38 | * When pessimistic budget calculations say that there is no enough space, | ||
| 39 | * UBIFS starts writing back dirty inodes and pages, doing garbage collection, | ||
| 40 | * or committing. The below constants define maximum number of times UBIFS | ||
| 41 | * repeats the operations. | ||
| 42 | */ | ||
| 43 | #define MAX_SHRINK_RETRIES 8 | ||
| 44 | #define MAX_GC_RETRIES 4 | ||
| 45 | #define MAX_CMT_RETRIES 2 | ||
| 46 | #define MAX_NOSPC_RETRIES 1 | ||
| 47 | |||
| 48 | /* | ||
| 49 | * The below constant defines amount of dirty pages which should be written | ||
| 50 | * back at when trying to shrink the liability. | ||
| 51 | */ | ||
| 52 | #define NR_TO_WRITE 16 | ||
| 53 | |||
| 54 | /** | ||
| 55 | * struct retries_info - information about re-tries while making free space. | ||
| 56 | * @prev_liability: previous liability | ||
| 57 | * @shrink_cnt: how many times the liability was shrinked | ||
| 58 | * @shrink_retries: count of liability shrink re-tries (increased when | ||
| 59 | * liability does not shrink) | ||
| 60 | * @try_gc: GC should be tried first | ||
| 61 | * @gc_retries: how many times GC was run | ||
| 62 | * @cmt_retries: how many times commit has been done | ||
| 63 | * @nospc_retries: how many times GC returned %-ENOSPC | ||
| 64 | * | ||
| 65 | * Since we consider budgeting to be the fast-path, and this structure has to | ||
| 66 | * be allocated on stack and zeroed out, we make it smaller using bit-fields. | ||
| 67 | */ | ||
| 68 | struct retries_info { | ||
| 69 | long long prev_liability; | ||
| 70 | unsigned int shrink_cnt; | ||
| 71 | unsigned int shrink_retries:5; | ||
| 72 | unsigned int try_gc:1; | ||
| 73 | unsigned int gc_retries:4; | ||
| 74 | unsigned int cmt_retries:3; | ||
| 75 | unsigned int nospc_retries:1; | ||
| 76 | }; | ||
| 77 | |||
| 78 | /** | ||
| 79 | * shrink_liability - write-back some dirty pages/inodes. | ||
| 80 | * @c: UBIFS file-system description object | ||
| 81 | * @nr_to_write: how many dirty pages to write-back | ||
| 82 | * | ||
| 83 | * This function shrinks UBIFS liability by means of writing back some amount | ||
| 84 | * of dirty inodes and their pages. Returns the amount of pages which were | ||
| 85 | * written back. The returned value does not include dirty inodes which were | ||
| 86 | * synchronized. | ||
| 87 | * | ||
| 88 | * Note, this function synchronizes even VFS inodes which are locked | ||
| 89 | * (@i_mutex) by the caller of the budgeting function, because write-back does | ||
| 90 | * not touch @i_mutex. | ||
| 91 | */ | ||
| 92 | static int shrink_liability(struct ubifs_info *c, int nr_to_write) | ||
| 93 | { | ||
| 94 | int nr_written; | ||
| 95 | struct writeback_control wbc = { | ||
| 96 | .sync_mode = WB_SYNC_NONE, | ||
| 97 | .range_end = LLONG_MAX, | ||
| 98 | .nr_to_write = nr_to_write, | ||
| 99 | }; | ||
| 100 | |||
| 101 | generic_sync_sb_inodes(c->vfs_sb, &wbc); | ||
| 102 | nr_written = nr_to_write - wbc.nr_to_write; | ||
| 103 | |||
| 104 | if (!nr_written) { | ||
| 105 | /* | ||
| 106 | * Re-try again but wait on pages/inodes which are being | ||
| 107 | * written-back concurrently (e.g., by pdflush). | ||
| 108 | */ | ||
| 109 | memset(&wbc, 0, sizeof(struct writeback_control)); | ||
| 110 | wbc.sync_mode = WB_SYNC_ALL; | ||
| 111 | wbc.range_end = LLONG_MAX; | ||
| 112 | wbc.nr_to_write = nr_to_write; | ||
| 113 | generic_sync_sb_inodes(c->vfs_sb, &wbc); | ||
| 114 | nr_written = nr_to_write - wbc.nr_to_write; | ||
| 115 | } | ||
| 116 | |||
| 117 | dbg_budg("%d pages were written back", nr_written); | ||
| 118 | return nr_written; | ||
| 119 | } | ||
| 120 | |||
| 121 | |||
| 122 | /** | ||
| 123 | * run_gc - run garbage collector. | ||
| 124 | * @c: UBIFS file-system description object | ||
| 125 | * | ||
| 126 | * This function runs garbage collector to make some more free space. Returns | ||
| 127 | * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a | ||
| 128 | * negative error code in case of failure. | ||
| 129 | */ | ||
| 130 | static int run_gc(struct ubifs_info *c) | ||
| 131 | { | ||
| 132 | int err, lnum; | ||
| 133 | |||
| 134 | /* Make some free space by garbage-collecting dirty space */ | ||
| 135 | down_read(&c->commit_sem); | ||
| 136 | lnum = ubifs_garbage_collect(c, 1); | ||
| 137 | up_read(&c->commit_sem); | ||
| 138 | if (lnum < 0) | ||
| 139 | return lnum; | ||
| 140 | |||
| 141 | /* GC freed one LEB, return it to lprops */ | ||
| 142 | dbg_budg("GC freed LEB %d", lnum); | ||
| 143 | err = ubifs_return_leb(c, lnum); | ||
| 144 | if (err) | ||
| 145 | return err; | ||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | |||
| 149 | /** | ||
| 150 | * make_free_space - make more free space on the file-system. | ||
| 151 | * @c: UBIFS file-system description object | ||
| 152 | * @ri: information about previous invocations of this function | ||
| 153 | * | ||
| 154 | * This function is called when an operation cannot be budgeted because there | ||
| 155 | * is supposedly no free space. But in most cases there is some free space: | ||
| 156 | * o budgeting is pessimistic, so it always budgets more then it is actually | ||
| 157 | * needed, so shrinking the liability is one way to make free space - the | ||
| 158 | * cached data will take less space then it was budgeted for; | ||
| 159 | * o GC may turn some dark space into free space (budgeting treats dark space | ||
| 160 | * as not available); | ||
| 161 | * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs. | ||
| 162 | * | ||
| 163 | * So this function tries to do the above. Returns %-EAGAIN if some free space | ||
| 164 | * was presumably made and the caller has to re-try budgeting the operation. | ||
| 165 | * Returns %-ENOSPC if it couldn't do more free space, and other negative error | ||
| 166 | * codes on failures. | ||
| 167 | */ | ||
| 168 | static int make_free_space(struct ubifs_info *c, struct retries_info *ri) | ||
| 169 | { | ||
| 170 | int err; | ||
| 171 | |||
| 172 | /* | ||
| 173 | * If we have some dirty pages and inodes (liability), try to write | ||
| 174 | * them back unless this was tried too many times without effect | ||
| 175 | * already. | ||
| 176 | */ | ||
| 177 | if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) { | ||
| 178 | long long liability; | ||
| 179 | |||
| 180 | spin_lock(&c->space_lock); | ||
| 181 | liability = c->budg_idx_growth + c->budg_data_growth + | ||
| 182 | c->budg_dd_growth; | ||
| 183 | spin_unlock(&c->space_lock); | ||
| 184 | |||
| 185 | if (ri->prev_liability >= liability) { | ||
| 186 | /* Liability does not shrink, next time try GC then */ | ||
| 187 | ri->shrink_retries += 1; | ||
| 188 | if (ri->gc_retries < MAX_GC_RETRIES) | ||
| 189 | ri->try_gc = 1; | ||
| 190 | dbg_budg("liability did not shrink: retries %d of %d", | ||
| 191 | ri->shrink_retries, MAX_SHRINK_RETRIES); | ||
| 192 | } | ||
| 193 | |||
| 194 | dbg_budg("force write-back (count %d)", ri->shrink_cnt); | ||
| 195 | shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); | ||
| 196 | |||
| 197 | ri->prev_liability = liability; | ||
| 198 | ri->shrink_cnt += 1; | ||
| 199 | return -EAGAIN; | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | ||
| 203 | * Try to run garbage collector unless it was already tried too many | ||
| 204 | * times. | ||
| 205 | */ | ||
| 206 | if (ri->gc_retries < MAX_GC_RETRIES) { | ||
| 207 | ri->gc_retries += 1; | ||
| 208 | dbg_budg("run GC, retries %d of %d", | ||
| 209 | ri->gc_retries, MAX_GC_RETRIES); | ||
| 210 | |||
| 211 | ri->try_gc = 0; | ||
| 212 | err = run_gc(c); | ||
| 213 | if (!err) | ||
| 214 | return -EAGAIN; | ||
| 215 | |||
| 216 | if (err == -EAGAIN) { | ||
| 217 | dbg_budg("GC asked to commit"); | ||
| 218 | err = ubifs_run_commit(c); | ||
| 219 | if (err) | ||
| 220 | return err; | ||
| 221 | return -EAGAIN; | ||
| 222 | } | ||
| 223 | |||
| 224 | if (err != -ENOSPC) | ||
| 225 | return err; | ||
| 226 | |||
| 227 | /* | ||
| 228 | * GC could not make any progress. If this is the first time, | ||
| 229 | * then it makes sense to try to commit, because it might make | ||
| 230 | * some dirty space. | ||
| 231 | */ | ||
| 232 | dbg_budg("GC returned -ENOSPC, retries %d", | ||
| 233 | ri->nospc_retries); | ||
| 234 | if (ri->nospc_retries >= MAX_NOSPC_RETRIES) | ||
| 235 | return err; | ||
| 236 | ri->nospc_retries += 1; | ||
| 237 | } | ||
| 238 | |||
| 239 | /* Neither GC nor write-back helped, try to commit */ | ||
| 240 | if (ri->cmt_retries < MAX_CMT_RETRIES) { | ||
| 241 | ri->cmt_retries += 1; | ||
| 242 | dbg_budg("run commit, retries %d of %d", | ||
| 243 | ri->cmt_retries, MAX_CMT_RETRIES); | ||
| 244 | err = ubifs_run_commit(c); | ||
| 245 | if (err) | ||
| 246 | return err; | ||
| 247 | return -EAGAIN; | ||
| 248 | } | ||
| 249 | return -ENOSPC; | ||
| 250 | } | ||
| 251 | |||
| 252 | /** | ||
| 253 | * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. | ||
| 254 | * @c: UBIFS file-system description object | ||
| 255 | * | ||
| 256 | * This function calculates and returns the number of eraseblocks which should | ||
| 257 | * be kept for index usage. | ||
| 258 | */ | ||
| 259 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | ||
| 260 | { | ||
| 261 | int ret; | ||
| 262 | uint64_t idx_size; | ||
| 263 | |||
| 264 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; | ||
| 265 | |||
| 266 | /* And make sure we have twice the index size of space reserved */ | ||
| 267 | idx_size <<= 1; | ||
| 268 | |||
| 269 | /* | ||
| 270 | * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' | ||
| 271 | * pair, nor similarly the two variables for the new index size, so we | ||
| 272 | * have to do this costly 64-bit division on fast-path. | ||
| 273 | */ | ||
| 274 | if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) | ||
| 275 | ret = idx_size + 1; | ||
| 276 | else | ||
| 277 | ret = idx_size; | ||
| 278 | /* | ||
| 279 | * The index head is not available for the in-the-gaps method, so add an | ||
| 280 | * extra LEB to compensate. | ||
| 281 | */ | ||
| 282 | ret += 1; | ||
| 283 | /* | ||
| 284 | * At present the index needs at least 2 LEBs: one for the index head | ||
| 285 | * and one for in-the-gaps method (which currently does not cater for | ||
| 286 | * the index head and so excludes it from consideration). | ||
| 287 | */ | ||
| 288 | if (ret < 2) | ||
| 289 | ret = 2; | ||
| 290 | return ret; | ||
| 291 | } | ||
| 292 | |||
| 293 | /** | ||
| 294 | * ubifs_calc_available - calculate available FS space. | ||
| 295 | * @c: UBIFS file-system description object | ||
| 296 | * @min_idx_lebs: minimum number of LEBs reserved for the index | ||
| 297 | * | ||
| 298 | * This function calculates and returns amount of FS space available for use. | ||
| 299 | */ | ||
| 300 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | ||
| 301 | { | ||
| 302 | int subtract_lebs; | ||
| 303 | long long available; | ||
| 304 | |||
| 305 | /* | ||
| 306 | * Force the amount available to the total size reported if the used | ||
| 307 | * space is zero. | ||
| 308 | */ | ||
| 309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
| 310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
| 311 | /* Do the same calculation as for c->block_cnt */ | ||
| 312 | available = c->main_lebs - 2; | ||
| 313 | available *= c->leb_size - c->dark_wm; | ||
| 314 | return available; | ||
| 315 | } | ||
| 316 | |||
| 317 | available = c->main_bytes - c->lst.total_used; | ||
| 318 | |||
| 319 | /* | ||
| 320 | * Now 'available' contains theoretically available flash space | ||
| 321 | * assuming there is no index, so we have to subtract the space which | ||
| 322 | * is reserved for the index. | ||
| 323 | */ | ||
| 324 | subtract_lebs = min_idx_lebs; | ||
| 325 | |||
| 326 | /* Take into account that GC reserves one LEB for its own needs */ | ||
| 327 | subtract_lebs += 1; | ||
| 328 | |||
| 329 | /* | ||
| 330 | * The GC journal head LEB is not really accessible. And since | ||
| 331 | * different write types go to different heads, we may count only on | ||
| 332 | * one head's space. | ||
| 333 | */ | ||
| 334 | subtract_lebs += c->jhead_cnt - 1; | ||
| 335 | |||
| 336 | /* We also reserve one LEB for deletions, which bypass budgeting */ | ||
| 337 | subtract_lebs += 1; | ||
| 338 | |||
| 339 | available -= (long long)subtract_lebs * c->leb_size; | ||
| 340 | |||
| 341 | /* Subtract the dead space which is not available for use */ | ||
| 342 | available -= c->lst.total_dead; | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Subtract dark space, which might or might not be usable - it depends | ||
| 346 | * on the data which we have on the media and which will be written. If | ||
| 347 | * this is a lot of uncompressed or not-compressible data, the dark | ||
| 348 | * space cannot be used. | ||
| 349 | */ | ||
| 350 | available -= c->lst.total_dark; | ||
| 351 | |||
| 352 | /* | ||
| 353 | * However, there is more dark space. The index may be bigger than | ||
| 354 | * @min_idx_lebs. Those extra LEBs are assumed to be available, but | ||
| 355 | * their dark space is not included in total_dark, so it is subtracted | ||
| 356 | * here. | ||
| 357 | */ | ||
| 358 | if (c->lst.idx_lebs > min_idx_lebs) { | ||
| 359 | subtract_lebs = c->lst.idx_lebs - min_idx_lebs; | ||
| 360 | available -= subtract_lebs * c->dark_wm; | ||
| 361 | } | ||
| 362 | |||
| 363 | /* The calculations are rough and may end up with a negative number */ | ||
| 364 | return available > 0 ? available : 0; | ||
| 365 | } | ||
| 366 | |||
| 367 | /** | ||
| 368 | * can_use_rp - check whether the user is allowed to use reserved pool. | ||
| 369 | * @c: UBIFS file-system description object | ||
| 370 | * | ||
| 371 | * UBIFS has so-called "reserved pool" which is flash space reserved | ||
| 372 | * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock. | ||
| 373 | * This function checks whether current user is allowed to use reserved pool. | ||
| 374 | * Returns %1 current user is allowed to use reserved pool and %0 otherwise. | ||
| 375 | */ | ||
| 376 | static int can_use_rp(struct ubifs_info *c) | ||
| 377 | { | ||
| 378 | if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) || | ||
| 379 | (c->rp_gid != 0 && in_group_p(c->rp_gid))) | ||
| 380 | return 1; | ||
| 381 | return 0; | ||
| 382 | } | ||
| 383 | |||
| 384 | /** | ||
| 385 | * do_budget_space - reserve flash space for index and data growth. | ||
| 386 | * @c: UBIFS file-system description object | ||
| 387 | * | ||
| 388 | * This function makes sure UBIFS has enough free eraseblocks for index growth | ||
| 389 | * and data. | ||
| 390 | * | ||
| 391 | * When budgeting index space, UBIFS reserves twice as more LEBs as the index | ||
| 392 | * would take if it was consolidated and written to the flash. This guarantees | ||
| 393 | * that the "in-the-gaps" commit method always succeeds and UBIFS will always | ||
| 394 | * be able to commit dirty index. So this function basically adds amount of | ||
| 395 | * budgeted index space to the size of the current index, multiplies this by 2, | ||
| 396 | * and makes sure this does not exceed the amount of free eraseblocks. | ||
| 397 | * | ||
| 398 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: | ||
| 399 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might | ||
| 400 | * be large, because UBIFS does not do any index consolidation as long as | ||
| 401 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs | ||
| 402 | * will contain a lot of dirt. | ||
| 403 | * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be | ||
| 404 | * consolidated to take up to @c->min_idx_lebs LEBs. | ||
| 405 | * | ||
| 406 | * This function returns zero in case of success, and %-ENOSPC in case of | ||
| 407 | * failure. | ||
| 408 | */ | ||
| 409 | static int do_budget_space(struct ubifs_info *c) | ||
| 410 | { | ||
| 411 | long long outstanding, available; | ||
| 412 | int lebs, rsvd_idx_lebs, min_idx_lebs; | ||
| 413 | |||
| 414 | /* First budget index space */ | ||
| 415 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | ||
| 416 | |||
| 417 | /* Now 'min_idx_lebs' contains number of LEBs to reserve */ | ||
| 418 | if (min_idx_lebs > c->lst.idx_lebs) | ||
| 419 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
| 420 | else | ||
| 421 | rsvd_idx_lebs = 0; | ||
| 422 | |||
| 423 | /* | ||
| 424 | * The number of LEBs that are available to be used by the index is: | ||
| 425 | * | ||
| 426 | * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - | ||
| 427 | * @c->lst.taken_empty_lebs | ||
| 428 | * | ||
| 429 | * @empty_lebs are available because they are empty. @freeable_cnt are | ||
| 430 | * available because they contain only free and dirty space and the | ||
| 431 | * index allocation always occurs after wbufs are synch'ed. | ||
| 432 | * @idx_gc_cnt are available because they are index LEBs that have been | ||
| 433 | * garbage collected (including trivial GC) and are awaiting the commit | ||
| 434 | * before they can be unmapped - note that the in-the-gaps method will | ||
| 435 | * grab these if it needs them. @taken_empty_lebs are empty_lebs that | ||
| 436 | * have already been allocated for some purpose (also includes those | ||
| 437 | * LEBs on the @idx_gc list). | ||
| 438 | * | ||
| 439 | * Note, @taken_empty_lebs may temporarily be higher by one because of | ||
| 440 | * the way we serialize LEB allocations and budgeting. See a comment in | ||
| 441 | * 'ubifs_find_free_space()'. | ||
| 442 | */ | ||
| 443 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
| 444 | c->lst.taken_empty_lebs; | ||
| 445 | if (unlikely(rsvd_idx_lebs > lebs)) { | ||
| 446 | dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " | ||
| 447 | "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, | ||
| 448 | rsvd_idx_lebs); | ||
| 449 | return -ENOSPC; | ||
| 450 | } | ||
| 451 | |||
| 452 | available = ubifs_calc_available(c, min_idx_lebs); | ||
| 453 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
| 454 | |||
| 455 | if (unlikely(available < outstanding)) { | ||
| 456 | dbg_budg("out of data space: available %lld, outstanding %lld", | ||
| 457 | available, outstanding); | ||
| 458 | return -ENOSPC; | ||
| 459 | } | ||
| 460 | |||
| 461 | if (available - outstanding <= c->rp_size && !can_use_rp(c)) | ||
| 462 | return -ENOSPC; | ||
| 463 | |||
| 464 | c->min_idx_lebs = min_idx_lebs; | ||
| 465 | return 0; | ||
| 466 | } | ||
| 467 | |||
| 468 | /** | ||
| 469 | * calc_idx_growth - calculate approximate index growth from budgeting request. | ||
| 470 | * @c: UBIFS file-system description object | ||
| 471 | * @req: budgeting request | ||
| 472 | * | ||
| 473 | * For now we assume each new node adds one znode. But this is rather poor | ||
| 474 | * approximation, though. | ||
| 475 | */ | ||
| 476 | static int calc_idx_growth(const struct ubifs_info *c, | ||
| 477 | const struct ubifs_budget_req *req) | ||
| 478 | { | ||
| 479 | int znodes; | ||
| 480 | |||
| 481 | znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) + | ||
| 482 | req->new_dent; | ||
| 483 | return znodes * c->max_idx_node_sz; | ||
| 484 | } | ||
| 485 | |||
| 486 | /** | ||
| 487 | * calc_data_growth - calculate approximate amount of new data from budgeting | ||
| 488 | * request. | ||
| 489 | * @c: UBIFS file-system description object | ||
| 490 | * @req: budgeting request | ||
| 491 | */ | ||
| 492 | static int calc_data_growth(const struct ubifs_info *c, | ||
| 493 | const struct ubifs_budget_req *req) | ||
| 494 | { | ||
| 495 | int data_growth; | ||
| 496 | |||
| 497 | data_growth = req->new_ino ? c->inode_budget : 0; | ||
| 498 | if (req->new_page) | ||
| 499 | data_growth += c->page_budget; | ||
| 500 | if (req->new_dent) | ||
| 501 | data_growth += c->dent_budget; | ||
| 502 | data_growth += req->new_ino_d; | ||
| 503 | return data_growth; | ||
| 504 | } | ||
| 505 | |||
| 506 | /** | ||
| 507 | * calc_dd_growth - calculate approximate amount of data which makes other data | ||
| 508 | * dirty from budgeting request. | ||
| 509 | * @c: UBIFS file-system description object | ||
| 510 | * @req: budgeting request | ||
| 511 | */ | ||
| 512 | static int calc_dd_growth(const struct ubifs_info *c, | ||
| 513 | const struct ubifs_budget_req *req) | ||
| 514 | { | ||
| 515 | int dd_growth; | ||
| 516 | |||
| 517 | dd_growth = req->dirtied_page ? c->page_budget : 0; | ||
| 518 | |||
| 519 | if (req->dirtied_ino) | ||
| 520 | dd_growth += c->inode_budget << (req->dirtied_ino - 1); | ||
| 521 | if (req->mod_dent) | ||
| 522 | dd_growth += c->dent_budget; | ||
| 523 | dd_growth += req->dirtied_ino_d; | ||
| 524 | return dd_growth; | ||
| 525 | } | ||
| 526 | |||
| 527 | /** | ||
| 528 | * ubifs_budget_space - ensure there is enough space to complete an operation. | ||
| 529 | * @c: UBIFS file-system description object | ||
| 530 | * @req: budget request | ||
| 531 | * | ||
| 532 | * This function allocates budget for an operation. It uses pessimistic | ||
| 533 | * approximation of how much flash space the operation needs. The goal of this | ||
| 534 | * function is to make sure UBIFS always has flash space to flush all dirty | ||
| 535 | * pages, dirty inodes, and dirty znodes (liability). This function may force | ||
| 536 | * commit, garbage-collection or write-back. Returns zero in case of success, | ||
| 537 | * %-ENOSPC if there is no free space and other negative error codes in case of | ||
| 538 | * failures. | ||
| 539 | */ | ||
| 540 | int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) | ||
| 541 | { | ||
| 542 | int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); | ||
| 543 | int err, idx_growth, data_growth, dd_growth; | ||
| 544 | struct retries_info ri; | ||
| 545 | |||
| 546 | ubifs_assert(req->dirtied_ino <= 4); | ||
| 547 | ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); | ||
| 548 | |||
| 549 | data_growth = calc_data_growth(c, req); | ||
| 550 | dd_growth = calc_dd_growth(c, req); | ||
| 551 | if (!data_growth && !dd_growth) | ||
| 552 | return 0; | ||
| 553 | idx_growth = calc_idx_growth(c, req); | ||
| 554 | memset(&ri, 0, sizeof(struct retries_info)); | ||
| 555 | |||
| 556 | again: | ||
| 557 | spin_lock(&c->space_lock); | ||
| 558 | ubifs_assert(c->budg_idx_growth >= 0); | ||
| 559 | ubifs_assert(c->budg_data_growth >= 0); | ||
| 560 | ubifs_assert(c->budg_dd_growth >= 0); | ||
| 561 | |||
| 562 | if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { | ||
| 563 | dbg_budg("no space"); | ||
| 564 | spin_unlock(&c->space_lock); | ||
| 565 | return -ENOSPC; | ||
| 566 | } | ||
| 567 | |||
| 568 | c->budg_idx_growth += idx_growth; | ||
| 569 | c->budg_data_growth += data_growth; | ||
| 570 | c->budg_dd_growth += dd_growth; | ||
| 571 | |||
| 572 | err = do_budget_space(c); | ||
| 573 | if (likely(!err)) { | ||
| 574 | req->idx_growth = idx_growth; | ||
| 575 | req->data_growth = data_growth; | ||
| 576 | req->dd_growth = dd_growth; | ||
| 577 | spin_unlock(&c->space_lock); | ||
| 578 | return 0; | ||
| 579 | } | ||
| 580 | |||
| 581 | /* Restore the old values */ | ||
| 582 | c->budg_idx_growth -= idx_growth; | ||
| 583 | c->budg_data_growth -= data_growth; | ||
| 584 | c->budg_dd_growth -= dd_growth; | ||
| 585 | spin_unlock(&c->space_lock); | ||
| 586 | |||
| 587 | if (req->fast) { | ||
| 588 | dbg_budg("no space for fast budgeting"); | ||
| 589 | return err; | ||
| 590 | } | ||
| 591 | |||
| 592 | err = make_free_space(c, &ri); | ||
| 593 | if (err == -EAGAIN) { | ||
| 594 | dbg_budg("try again"); | ||
| 595 | cond_resched(); | ||
| 596 | goto again; | ||
| 597 | } else if (err == -ENOSPC) { | ||
| 598 | dbg_budg("FS is full, -ENOSPC"); | ||
| 599 | c->nospace = 1; | ||
| 600 | if (can_use_rp(c) || c->rp_size == 0) | ||
| 601 | c->nospace_rp = 1; | ||
| 602 | smp_wmb(); | ||
| 603 | } else | ||
| 604 | ubifs_err("cannot budget space, error %d", err); | ||
| 605 | return err; | ||
| 606 | } | ||
| 607 | |||
| 608 | /** | ||
| 609 | * ubifs_release_budget - release budgeted free space. | ||
| 610 | * @c: UBIFS file-system description object | ||
| 611 | * @req: budget request | ||
| 612 | * | ||
| 613 | * This function releases the space budgeted by 'ubifs_budget_space()'. Note, | ||
| 614 | * since the index changes (which were budgeted for in @req->idx_growth) will | ||
| 615 | * only be written to the media on commit, this function moves the index budget | ||
| 616 | * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be | ||
| 617 | * zeroed by the commit operation. | ||
| 618 | */ | ||
| 619 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | ||
| 620 | { | ||
| 621 | ubifs_assert(req->dirtied_ino <= 4); | ||
| 622 | ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); | ||
| 623 | if (!req->recalculate) { | ||
| 624 | ubifs_assert(req->idx_growth >= 0); | ||
| 625 | ubifs_assert(req->data_growth >= 0); | ||
| 626 | ubifs_assert(req->dd_growth >= 0); | ||
| 627 | } | ||
| 628 | |||
| 629 | if (req->recalculate) { | ||
| 630 | req->data_growth = calc_data_growth(c, req); | ||
| 631 | req->dd_growth = calc_dd_growth(c, req); | ||
| 632 | req->idx_growth = calc_idx_growth(c, req); | ||
| 633 | } | ||
| 634 | |||
| 635 | if (!req->data_growth && !req->dd_growth) | ||
| 636 | return; | ||
| 637 | |||
| 638 | c->nospace = c->nospace_rp = 0; | ||
| 639 | smp_wmb(); | ||
| 640 | |||
| 641 | spin_lock(&c->space_lock); | ||
| 642 | c->budg_idx_growth -= req->idx_growth; | ||
| 643 | c->budg_uncommitted_idx += req->idx_growth; | ||
| 644 | c->budg_data_growth -= req->data_growth; | ||
| 645 | c->budg_dd_growth -= req->dd_growth; | ||
| 646 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | ||
| 647 | |||
| 648 | ubifs_assert(c->budg_idx_growth >= 0); | ||
| 649 | ubifs_assert(c->budg_data_growth >= 0); | ||
| 650 | ubifs_assert(c->min_idx_lebs < c->main_lebs); | ||
| 651 | spin_unlock(&c->space_lock); | ||
| 652 | } | ||
| 653 | |||
| 654 | /** | ||
| 655 | * ubifs_convert_page_budget - convert budget of a new page. | ||
| 656 | * @c: UBIFS file-system description object | ||
| 657 | * | ||
| 658 | * This function converts budget which was allocated for a new page of data to | ||
| 659 | * the budget of changing an existing page of data. The latter is smaller then | ||
| 660 | * the former, so this function only does simple re-calculation and does not | ||
| 661 | * involve any write-back. | ||
| 662 | */ | ||
| 663 | void ubifs_convert_page_budget(struct ubifs_info *c) | ||
| 664 | { | ||
| 665 | spin_lock(&c->space_lock); | ||
| 666 | /* Release the index growth reservation */ | ||
| 667 | c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; | ||
| 668 | /* Release the data growth reservation */ | ||
| 669 | c->budg_data_growth -= c->page_budget; | ||
| 670 | /* Increase the dirty data growth reservation instead */ | ||
| 671 | c->budg_dd_growth += c->page_budget; | ||
| 672 | /* And re-calculate the indexing space reservation */ | ||
| 673 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | ||
| 674 | spin_unlock(&c->space_lock); | ||
| 675 | } | ||
| 676 | |||
| 677 | /** | ||
| 678 | * ubifs_release_dirty_inode_budget - release dirty inode budget. | ||
| 679 | * @c: UBIFS file-system description object | ||
| 680 | * @ui: UBIFS inode to release the budget for | ||
| 681 | * | ||
| 682 | * This function releases budget corresponding to a dirty inode. It is usually | ||
| 683 | * called when after the inode has been written to the media and marked as | ||
| 684 | * clean. | ||
| 685 | */ | ||
| 686 | void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | ||
| 687 | struct ubifs_inode *ui) | ||
| 688 | { | ||
| 689 | struct ubifs_budget_req req = {.dd_growth = c->inode_budget, | ||
| 690 | .dirtied_ino_d = ui->data_len}; | ||
| 691 | |||
| 692 | ubifs_release_budget(c, &req); | ||
| 693 | } | ||
| 694 | |||
| 695 | /** | ||
| 696 | * ubifs_budg_get_free_space - return amount of free space. | ||
| 697 | * @c: UBIFS file-system description object | ||
| 698 | * | ||
| 699 | * This function returns amount of free space on the file-system. | ||
| 700 | */ | ||
| 701 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | ||
| 702 | { | ||
| 703 | int min_idx_lebs, rsvd_idx_lebs; | ||
| 704 | long long available, outstanding, free; | ||
| 705 | |||
| 706 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
| 707 | spin_lock(&c->space_lock); | ||
| 708 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | ||
| 709 | |||
| 710 | if (min_idx_lebs > c->lst.idx_lebs) | ||
| 711 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
| 712 | else | ||
| 713 | rsvd_idx_lebs = 0; | ||
| 714 | |||
| 715 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
| 716 | - c->lst.taken_empty_lebs) { | ||
| 717 | spin_unlock(&c->space_lock); | ||
| 718 | return 0; | ||
| 719 | } | ||
| 720 | |||
| 721 | available = ubifs_calc_available(c, min_idx_lebs); | ||
| 722 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
| 723 | c->min_idx_lebs = min_idx_lebs; | ||
| 724 | spin_unlock(&c->space_lock); | ||
| 725 | |||
| 726 | if (available > outstanding) | ||
| 727 | free = ubifs_reported_space(c, available - outstanding); | ||
| 728 | else | ||
| 729 | free = 0; | ||
| 730 | return free; | ||
| 731 | } | ||
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c new file mode 100644 index 000000000000..3b516316c9b3 --- /dev/null +++ b/fs/ubifs/commit.c | |||
| @@ -0,0 +1,677 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements functions that manage the running of the commit process. | ||
| 25 | * Each affected module has its own functions to accomplish their part in the | ||
| 26 | * commit and those functions are called here. | ||
| 27 | * | ||
| 28 | * The commit is the process whereby all updates to the index and LEB properties | ||
| 29 | * are written out together and the journal becomes empty. This keeps the | ||
| 30 | * file system consistent - at all times the state can be recreated by reading | ||
| 31 | * the index and LEB properties and then replaying the journal. | ||
| 32 | * | ||
| 33 | * The commit is split into two parts named "commit start" and "commit end". | ||
| 34 | * During commit start, the commit process has exclusive access to the journal | ||
| 35 | * by holding the commit semaphore down for writing. As few I/O operations as | ||
| 36 | * possible are performed during commit start, instead the nodes that are to be | ||
| 37 | * written are merely identified. During commit end, the commit semaphore is no | ||
| 38 | * longer held and the journal is again in operation, allowing users to continue | ||
| 39 | * to use the file system while the bulk of the commit I/O is performed. The | ||
| 40 | * purpose of this two-step approach is to prevent the commit from causing any | ||
| 41 | * latency blips. Note that in any case, the commit does not prevent lookups | ||
| 42 | * (as permitted by the TNC mutex), or access to VFS data structures e.g. page | ||
| 43 | * cache. | ||
| 44 | */ | ||
| 45 | |||
| 46 | #include <linux/freezer.h> | ||
| 47 | #include <linux/kthread.h> | ||
| 48 | #include "ubifs.h" | ||
| 49 | |||
| 50 | /** | ||
| 51 | * do_commit - commit the journal. | ||
| 52 | * @c: UBIFS file-system description object | ||
| 53 | * | ||
| 54 | * This function implements UBIFS commit. It has to be called with commit lock | ||
| 55 | * locked. Returns zero in case of success and a negative error code in case of | ||
| 56 | * failure. | ||
| 57 | */ | ||
| 58 | static int do_commit(struct ubifs_info *c) | ||
| 59 | { | ||
| 60 | int err, new_ltail_lnum, old_ltail_lnum, i; | ||
| 61 | struct ubifs_zbranch zroot; | ||
| 62 | struct ubifs_lp_stats lst; | ||
| 63 | |||
| 64 | dbg_cmt("start"); | ||
| 65 | if (c->ro_media) { | ||
| 66 | err = -EROFS; | ||
| 67 | goto out_up; | ||
| 68 | } | ||
| 69 | |||
| 70 | /* Sync all write buffers (necessary for recovery) */ | ||
| 71 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 72 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
| 73 | if (err) | ||
| 74 | goto out_up; | ||
| 75 | } | ||
| 76 | |||
| 77 | err = ubifs_gc_start_commit(c); | ||
| 78 | if (err) | ||
| 79 | goto out_up; | ||
| 80 | err = dbg_check_lprops(c); | ||
| 81 | if (err) | ||
| 82 | goto out_up; | ||
| 83 | err = ubifs_log_start_commit(c, &new_ltail_lnum); | ||
| 84 | if (err) | ||
| 85 | goto out_up; | ||
| 86 | err = ubifs_tnc_start_commit(c, &zroot); | ||
| 87 | if (err) | ||
| 88 | goto out_up; | ||
| 89 | err = ubifs_lpt_start_commit(c); | ||
| 90 | if (err) | ||
| 91 | goto out_up; | ||
| 92 | err = ubifs_orphan_start_commit(c); | ||
| 93 | if (err) | ||
| 94 | goto out_up; | ||
| 95 | |||
| 96 | ubifs_get_lp_stats(c, &lst); | ||
| 97 | |||
| 98 | up_write(&c->commit_sem); | ||
| 99 | |||
| 100 | err = ubifs_tnc_end_commit(c); | ||
| 101 | if (err) | ||
| 102 | goto out; | ||
| 103 | err = ubifs_lpt_end_commit(c); | ||
| 104 | if (err) | ||
| 105 | goto out; | ||
| 106 | err = ubifs_orphan_end_commit(c); | ||
| 107 | if (err) | ||
| 108 | goto out; | ||
| 109 | old_ltail_lnum = c->ltail_lnum; | ||
| 110 | err = ubifs_log_end_commit(c, new_ltail_lnum); | ||
| 111 | if (err) | ||
| 112 | goto out; | ||
| 113 | err = dbg_check_old_index(c, &zroot); | ||
| 114 | if (err) | ||
| 115 | goto out; | ||
| 116 | |||
| 117 | mutex_lock(&c->mst_mutex); | ||
| 118 | c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); | ||
| 119 | c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); | ||
| 120 | c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); | ||
| 121 | c->mst_node->root_offs = cpu_to_le32(zroot.offs); | ||
| 122 | c->mst_node->root_len = cpu_to_le32(zroot.len); | ||
| 123 | c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); | ||
| 124 | c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); | ||
| 125 | c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); | ||
| 126 | c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); | ||
| 127 | c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); | ||
| 128 | c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); | ||
| 129 | c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs); | ||
| 130 | c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum); | ||
| 131 | c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs); | ||
| 132 | c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum); | ||
| 133 | c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs); | ||
| 134 | c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum); | ||
| 135 | c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs); | ||
| 136 | c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs); | ||
| 137 | c->mst_node->total_free = cpu_to_le64(lst.total_free); | ||
| 138 | c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty); | ||
| 139 | c->mst_node->total_used = cpu_to_le64(lst.total_used); | ||
| 140 | c->mst_node->total_dead = cpu_to_le64(lst.total_dead); | ||
| 141 | c->mst_node->total_dark = cpu_to_le64(lst.total_dark); | ||
| 142 | if (c->no_orphs) | ||
| 143 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); | ||
| 144 | else | ||
| 145 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); | ||
| 146 | err = ubifs_write_master(c); | ||
| 147 | mutex_unlock(&c->mst_mutex); | ||
| 148 | if (err) | ||
| 149 | goto out; | ||
| 150 | |||
| 151 | err = ubifs_log_post_commit(c, old_ltail_lnum); | ||
| 152 | if (err) | ||
| 153 | goto out; | ||
| 154 | err = ubifs_gc_end_commit(c); | ||
| 155 | if (err) | ||
| 156 | goto out; | ||
| 157 | err = ubifs_lpt_post_commit(c); | ||
| 158 | if (err) | ||
| 159 | goto out; | ||
| 160 | |||
| 161 | spin_lock(&c->cs_lock); | ||
| 162 | c->cmt_state = COMMIT_RESTING; | ||
| 163 | wake_up(&c->cmt_wq); | ||
| 164 | dbg_cmt("commit end"); | ||
| 165 | spin_unlock(&c->cs_lock); | ||
| 166 | |||
| 167 | return 0; | ||
| 168 | |||
| 169 | out_up: | ||
| 170 | up_write(&c->commit_sem); | ||
| 171 | out: | ||
| 172 | ubifs_err("commit failed, error %d", err); | ||
| 173 | spin_lock(&c->cs_lock); | ||
| 174 | c->cmt_state = COMMIT_BROKEN; | ||
| 175 | wake_up(&c->cmt_wq); | ||
| 176 | spin_unlock(&c->cs_lock); | ||
| 177 | ubifs_ro_mode(c, err); | ||
| 178 | return err; | ||
| 179 | } | ||
| 180 | |||
| 181 | /** | ||
| 182 | * run_bg_commit - run background commit if it is needed. | ||
| 183 | * @c: UBIFS file-system description object | ||
| 184 | * | ||
| 185 | * This function runs background commit if it is needed. Returns zero in case | ||
| 186 | * of success and a negative error code in case of failure. | ||
| 187 | */ | ||
| 188 | static int run_bg_commit(struct ubifs_info *c) | ||
| 189 | { | ||
| 190 | spin_lock(&c->cs_lock); | ||
| 191 | /* | ||
| 192 | * Run background commit only if background commit was requested or if | ||
| 193 | * commit is required. | ||
| 194 | */ | ||
| 195 | if (c->cmt_state != COMMIT_BACKGROUND && | ||
| 196 | c->cmt_state != COMMIT_REQUIRED) | ||
| 197 | goto out; | ||
| 198 | spin_unlock(&c->cs_lock); | ||
| 199 | |||
| 200 | down_write(&c->commit_sem); | ||
| 201 | spin_lock(&c->cs_lock); | ||
| 202 | if (c->cmt_state == COMMIT_REQUIRED) | ||
| 203 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | ||
| 204 | else if (c->cmt_state == COMMIT_BACKGROUND) | ||
| 205 | c->cmt_state = COMMIT_RUNNING_BACKGROUND; | ||
| 206 | else | ||
| 207 | goto out_cmt_unlock; | ||
| 208 | spin_unlock(&c->cs_lock); | ||
| 209 | |||
| 210 | return do_commit(c); | ||
| 211 | |||
| 212 | out_cmt_unlock: | ||
| 213 | up_write(&c->commit_sem); | ||
| 214 | out: | ||
| 215 | spin_unlock(&c->cs_lock); | ||
| 216 | return 0; | ||
| 217 | } | ||
| 218 | |||
| 219 | /** | ||
| 220 | * ubifs_bg_thread - UBIFS background thread function. | ||
| 221 | * @info: points to the file-system description object | ||
| 222 | * | ||
| 223 | * This function implements various file-system background activities: | ||
| 224 | * o when a write-buffer timer expires it synchronizes the appropriate | ||
| 225 | * write-buffer; | ||
| 226 | * o when the journal is about to be full, it starts in-advance commit. | ||
| 227 | * | ||
| 228 | * Note, other stuff like background garbage collection may be added here in | ||
| 229 | * future. | ||
| 230 | */ | ||
| 231 | int ubifs_bg_thread(void *info) | ||
| 232 | { | ||
| 233 | int err; | ||
| 234 | struct ubifs_info *c = info; | ||
| 235 | |||
| 236 | ubifs_msg("background thread \"%s\" started, PID %d", | ||
| 237 | c->bgt_name, current->pid); | ||
| 238 | set_freezable(); | ||
| 239 | |||
| 240 | while (1) { | ||
| 241 | if (kthread_should_stop()) | ||
| 242 | break; | ||
| 243 | |||
| 244 | if (try_to_freeze()) | ||
| 245 | continue; | ||
| 246 | |||
| 247 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 248 | /* Check if there is something to do */ | ||
| 249 | if (!c->need_bgt) { | ||
| 250 | /* | ||
| 251 | * Nothing prevents us from going sleep now and | ||
| 252 | * be never woken up and block the task which | ||
| 253 | * could wait in 'kthread_stop()' forever. | ||
| 254 | */ | ||
| 255 | if (kthread_should_stop()) | ||
| 256 | break; | ||
| 257 | schedule(); | ||
| 258 | continue; | ||
| 259 | } else | ||
| 260 | __set_current_state(TASK_RUNNING); | ||
| 261 | |||
| 262 | c->need_bgt = 0; | ||
| 263 | err = ubifs_bg_wbufs_sync(c); | ||
| 264 | if (err) | ||
| 265 | ubifs_ro_mode(c, err); | ||
| 266 | |||
| 267 | run_bg_commit(c); | ||
| 268 | cond_resched(); | ||
| 269 | } | ||
| 270 | |||
| 271 | dbg_msg("background thread \"%s\" stops", c->bgt_name); | ||
| 272 | return 0; | ||
| 273 | } | ||
| 274 | |||
| 275 | /** | ||
| 276 | * ubifs_commit_required - set commit state to "required". | ||
| 277 | * @c: UBIFS file-system description object | ||
| 278 | * | ||
| 279 | * This function is called if a commit is required but cannot be done from the | ||
| 280 | * calling function, so it is just flagged instead. | ||
| 281 | */ | ||
| 282 | void ubifs_commit_required(struct ubifs_info *c) | ||
| 283 | { | ||
| 284 | spin_lock(&c->cs_lock); | ||
| 285 | switch (c->cmt_state) { | ||
| 286 | case COMMIT_RESTING: | ||
| 287 | case COMMIT_BACKGROUND: | ||
| 288 | dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), | ||
| 289 | dbg_cstate(COMMIT_REQUIRED)); | ||
| 290 | c->cmt_state = COMMIT_REQUIRED; | ||
| 291 | break; | ||
| 292 | case COMMIT_RUNNING_BACKGROUND: | ||
| 293 | dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), | ||
| 294 | dbg_cstate(COMMIT_RUNNING_REQUIRED)); | ||
| 295 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | ||
| 296 | break; | ||
| 297 | case COMMIT_REQUIRED: | ||
| 298 | case COMMIT_RUNNING_REQUIRED: | ||
| 299 | case COMMIT_BROKEN: | ||
| 300 | break; | ||
| 301 | } | ||
| 302 | spin_unlock(&c->cs_lock); | ||
| 303 | } | ||
| 304 | |||
| 305 | /** | ||
| 306 | * ubifs_request_bg_commit - notify the background thread to do a commit. | ||
| 307 | * @c: UBIFS file-system description object | ||
| 308 | * | ||
| 309 | * This function is called if the journal is full enough to make a commit | ||
| 310 | * worthwhile, so background thread is kicked to start it. | ||
| 311 | */ | ||
| 312 | void ubifs_request_bg_commit(struct ubifs_info *c) | ||
| 313 | { | ||
| 314 | spin_lock(&c->cs_lock); | ||
| 315 | if (c->cmt_state == COMMIT_RESTING) { | ||
| 316 | dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), | ||
| 317 | dbg_cstate(COMMIT_BACKGROUND)); | ||
| 318 | c->cmt_state = COMMIT_BACKGROUND; | ||
| 319 | spin_unlock(&c->cs_lock); | ||
| 320 | ubifs_wake_up_bgt(c); | ||
| 321 | } else | ||
| 322 | spin_unlock(&c->cs_lock); | ||
| 323 | } | ||
| 324 | |||
| 325 | /** | ||
| 326 | * wait_for_commit - wait for commit. | ||
| 327 | * @c: UBIFS file-system description object | ||
| 328 | * | ||
| 329 | * This function sleeps until the commit operation is no longer running. | ||
| 330 | */ | ||
| 331 | static int wait_for_commit(struct ubifs_info *c) | ||
| 332 | { | ||
| 333 | dbg_cmt("pid %d goes sleep", current->pid); | ||
| 334 | |||
| 335 | /* | ||
| 336 | * The following sleeps if the condition is false, and will be woken | ||
| 337 | * when the commit ends. It is possible, although very unlikely, that we | ||
| 338 | * will wake up and see the subsequent commit running, rather than the | ||
| 339 | * one we were waiting for, and go back to sleep. However, we will be | ||
| 340 | * woken again, so there is no danger of sleeping forever. | ||
| 341 | */ | ||
| 342 | wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND && | ||
| 343 | c->cmt_state != COMMIT_RUNNING_REQUIRED); | ||
| 344 | dbg_cmt("commit finished, pid %d woke up", current->pid); | ||
| 345 | return 0; | ||
| 346 | } | ||
| 347 | |||
| 348 | /** | ||
| 349 | * ubifs_run_commit - run or wait for commit. | ||
| 350 | * @c: UBIFS file-system description object | ||
| 351 | * | ||
| 352 | * This function runs commit and returns zero in case of success and a negative | ||
| 353 | * error code in case of failure. | ||
| 354 | */ | ||
| 355 | int ubifs_run_commit(struct ubifs_info *c) | ||
| 356 | { | ||
| 357 | int err = 0; | ||
| 358 | |||
| 359 | spin_lock(&c->cs_lock); | ||
| 360 | if (c->cmt_state == COMMIT_BROKEN) { | ||
| 361 | err = -EINVAL; | ||
| 362 | goto out; | ||
| 363 | } | ||
| 364 | |||
| 365 | if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) | ||
| 366 | /* | ||
| 367 | * We set the commit state to 'running required' to indicate | ||
| 368 | * that we want it to complete as quickly as possible. | ||
| 369 | */ | ||
| 370 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | ||
| 371 | |||
| 372 | if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { | ||
| 373 | spin_unlock(&c->cs_lock); | ||
| 374 | return wait_for_commit(c); | ||
| 375 | } | ||
| 376 | spin_unlock(&c->cs_lock); | ||
| 377 | |||
| 378 | /* Ok, the commit is indeed needed */ | ||
| 379 | |||
| 380 | down_write(&c->commit_sem); | ||
| 381 | spin_lock(&c->cs_lock); | ||
| 382 | /* | ||
| 383 | * Since we unlocked 'c->cs_lock', the state may have changed, so | ||
| 384 | * re-check it. | ||
| 385 | */ | ||
| 386 | if (c->cmt_state == COMMIT_BROKEN) { | ||
| 387 | err = -EINVAL; | ||
| 388 | goto out_cmt_unlock; | ||
| 389 | } | ||
| 390 | |||
| 391 | if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) | ||
| 392 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | ||
| 393 | |||
| 394 | if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { | ||
| 395 | up_write(&c->commit_sem); | ||
| 396 | spin_unlock(&c->cs_lock); | ||
| 397 | return wait_for_commit(c); | ||
| 398 | } | ||
| 399 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | ||
| 400 | spin_unlock(&c->cs_lock); | ||
| 401 | |||
| 402 | err = do_commit(c); | ||
| 403 | return err; | ||
| 404 | |||
| 405 | out_cmt_unlock: | ||
| 406 | up_write(&c->commit_sem); | ||
| 407 | out: | ||
| 408 | spin_unlock(&c->cs_lock); | ||
| 409 | return err; | ||
| 410 | } | ||
| 411 | |||
| 412 | /** | ||
| 413 | * ubifs_gc_should_commit - determine if it is time for GC to run commit. | ||
| 414 | * @c: UBIFS file-system description object | ||
| 415 | * | ||
| 416 | * This function is called by garbage collection to determine if commit should | ||
| 417 | * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal | ||
| 418 | * is full enough to start commit, this function returns true. It is not | ||
| 419 | * absolutely necessary to commit yet, but it feels like this should be better | ||
| 420 | * then to keep doing GC. This function returns %1 if GC has to initiate commit | ||
| 421 | * and %0 if not. | ||
| 422 | */ | ||
| 423 | int ubifs_gc_should_commit(struct ubifs_info *c) | ||
| 424 | { | ||
| 425 | int ret = 0; | ||
| 426 | |||
| 427 | spin_lock(&c->cs_lock); | ||
| 428 | if (c->cmt_state == COMMIT_BACKGROUND) { | ||
| 429 | dbg_cmt("commit required now"); | ||
| 430 | c->cmt_state = COMMIT_REQUIRED; | ||
| 431 | } else | ||
| 432 | dbg_cmt("commit not requested"); | ||
| 433 | if (c->cmt_state == COMMIT_REQUIRED) | ||
| 434 | ret = 1; | ||
| 435 | spin_unlock(&c->cs_lock); | ||
| 436 | return ret; | ||
| 437 | } | ||
| 438 | |||
| 439 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 440 | |||
| 441 | /** | ||
| 442 | * struct idx_node - hold index nodes during index tree traversal. | ||
| 443 | * @list: list | ||
| 444 | * @iip: index in parent (slot number of this indexing node in the parent | ||
| 445 | * indexing node) | ||
| 446 | * @upper_key: all keys in this indexing node have to be less or equivalent to | ||
| 447 | * this key | ||
| 448 | * @idx: index node (8-byte aligned because all node structures must be 8-byte | ||
| 449 | * aligned) | ||
| 450 | */ | ||
| 451 | struct idx_node { | ||
| 452 | struct list_head list; | ||
| 453 | int iip; | ||
| 454 | union ubifs_key upper_key; | ||
| 455 | struct ubifs_idx_node idx __attribute__((aligned(8))); | ||
| 456 | }; | ||
| 457 | |||
| 458 | /** | ||
| 459 | * dbg_old_index_check_init - get information for the next old index check. | ||
| 460 | * @c: UBIFS file-system description object | ||
| 461 | * @zroot: root of the index | ||
| 462 | * | ||
| 463 | * This function records information about the index that will be needed for the | ||
| 464 | * next old index check i.e. 'dbg_check_old_index()'. | ||
| 465 | * | ||
| 466 | * This function returns %0 on success and a negative error code on failure. | ||
| 467 | */ | ||
| 468 | int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) | ||
| 469 | { | ||
| 470 | struct ubifs_idx_node *idx; | ||
| 471 | int lnum, offs, len, err = 0; | ||
| 472 | |||
| 473 | c->old_zroot = *zroot; | ||
| 474 | |||
| 475 | lnum = c->old_zroot.lnum; | ||
| 476 | offs = c->old_zroot.offs; | ||
| 477 | len = c->old_zroot.len; | ||
| 478 | |||
| 479 | idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); | ||
| 480 | if (!idx) | ||
| 481 | return -ENOMEM; | ||
| 482 | |||
| 483 | err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); | ||
| 484 | if (err) | ||
| 485 | goto out; | ||
| 486 | |||
| 487 | c->old_zroot_level = le16_to_cpu(idx->level); | ||
| 488 | c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); | ||
| 489 | out: | ||
| 490 | kfree(idx); | ||
| 491 | return err; | ||
| 492 | } | ||
| 493 | |||
| 494 | /** | ||
| 495 | * dbg_check_old_index - check the old copy of the index. | ||
| 496 | * @c: UBIFS file-system description object | ||
| 497 | * @zroot: root of the new index | ||
| 498 | * | ||
| 499 | * In order to be able to recover from an unclean unmount, a complete copy of | ||
| 500 | * the index must exist on flash. This is the "old" index. The commit process | ||
| 501 | * must write the "new" index to flash without overwriting or destroying any | ||
| 502 | * part of the old index. This function is run at commit end in order to check | ||
| 503 | * that the old index does indeed exist completely intact. | ||
| 504 | * | ||
| 505 | * This function returns %0 on success and a negative error code on failure. | ||
| 506 | */ | ||
| 507 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) | ||
| 508 | { | ||
| 509 | int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; | ||
| 510 | int first = 1, iip; | ||
| 511 | union ubifs_key lower_key, upper_key, l_key, u_key; | ||
| 512 | unsigned long long uninitialized_var(last_sqnum); | ||
| 513 | struct ubifs_idx_node *idx; | ||
| 514 | struct list_head list; | ||
| 515 | struct idx_node *i; | ||
| 516 | size_t sz; | ||
| 517 | |||
| 518 | if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) | ||
| 519 | goto out; | ||
| 520 | |||
| 521 | INIT_LIST_HEAD(&list); | ||
| 522 | |||
| 523 | sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) - | ||
| 524 | UBIFS_IDX_NODE_SZ; | ||
| 525 | |||
| 526 | /* Start at the old zroot */ | ||
| 527 | lnum = c->old_zroot.lnum; | ||
| 528 | offs = c->old_zroot.offs; | ||
| 529 | len = c->old_zroot.len; | ||
| 530 | iip = 0; | ||
| 531 | |||
| 532 | /* | ||
| 533 | * Traverse the index tree preorder depth-first i.e. do a node and then | ||
| 534 | * its subtrees from left to right. | ||
| 535 | */ | ||
| 536 | while (1) { | ||
| 537 | struct ubifs_branch *br; | ||
| 538 | |||
| 539 | /* Get the next index node */ | ||
| 540 | i = kmalloc(sz, GFP_NOFS); | ||
| 541 | if (!i) { | ||
| 542 | err = -ENOMEM; | ||
| 543 | goto out_free; | ||
| 544 | } | ||
| 545 | i->iip = iip; | ||
| 546 | /* Keep the index nodes on our path in a linked list */ | ||
| 547 | list_add_tail(&i->list, &list); | ||
| 548 | /* Read the index node */ | ||
| 549 | idx = &i->idx; | ||
| 550 | err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); | ||
| 551 | if (err) | ||
| 552 | goto out_free; | ||
| 553 | /* Validate index node */ | ||
| 554 | child_cnt = le16_to_cpu(idx->child_cnt); | ||
| 555 | if (child_cnt < 1 || child_cnt > c->fanout) { | ||
| 556 | err = 1; | ||
| 557 | goto out_dump; | ||
| 558 | } | ||
| 559 | if (first) { | ||
| 560 | first = 0; | ||
| 561 | /* Check root level and sqnum */ | ||
| 562 | if (le16_to_cpu(idx->level) != c->old_zroot_level) { | ||
| 563 | err = 2; | ||
| 564 | goto out_dump; | ||
| 565 | } | ||
| 566 | if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { | ||
| 567 | err = 3; | ||
| 568 | goto out_dump; | ||
| 569 | } | ||
| 570 | /* Set last values as though root had a parent */ | ||
| 571 | last_level = le16_to_cpu(idx->level) + 1; | ||
| 572 | last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1; | ||
| 573 | key_read(c, ubifs_idx_key(c, idx), &lower_key); | ||
| 574 | highest_ino_key(c, &upper_key, INUM_WATERMARK); | ||
| 575 | } | ||
| 576 | key_copy(c, &upper_key, &i->upper_key); | ||
| 577 | if (le16_to_cpu(idx->level) != last_level - 1) { | ||
| 578 | err = 3; | ||
| 579 | goto out_dump; | ||
| 580 | } | ||
| 581 | /* | ||
| 582 | * The index is always written bottom up hence a child's sqnum | ||
| 583 | * is always less than the parents. | ||
| 584 | */ | ||
| 585 | if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) { | ||
| 586 | err = 4; | ||
| 587 | goto out_dump; | ||
| 588 | } | ||
| 589 | /* Check key range */ | ||
| 590 | key_read(c, ubifs_idx_key(c, idx), &l_key); | ||
| 591 | br = ubifs_idx_branch(c, idx, child_cnt - 1); | ||
| 592 | key_read(c, &br->key, &u_key); | ||
| 593 | if (keys_cmp(c, &lower_key, &l_key) > 0) { | ||
| 594 | err = 5; | ||
| 595 | goto out_dump; | ||
| 596 | } | ||
| 597 | if (keys_cmp(c, &upper_key, &u_key) < 0) { | ||
| 598 | err = 6; | ||
| 599 | goto out_dump; | ||
| 600 | } | ||
| 601 | if (keys_cmp(c, &upper_key, &u_key) == 0) | ||
| 602 | if (!is_hash_key(c, &u_key)) { | ||
| 603 | err = 7; | ||
| 604 | goto out_dump; | ||
| 605 | } | ||
| 606 | /* Go to next index node */ | ||
| 607 | if (le16_to_cpu(idx->level) == 0) { | ||
| 608 | /* At the bottom, so go up until can go right */ | ||
| 609 | while (1) { | ||
| 610 | /* Drop the bottom of the list */ | ||
| 611 | list_del(&i->list); | ||
| 612 | kfree(i); | ||
| 613 | /* No more list means we are done */ | ||
| 614 | if (list_empty(&list)) | ||
| 615 | goto out; | ||
| 616 | /* Look at the new bottom */ | ||
| 617 | i = list_entry(list.prev, struct idx_node, | ||
| 618 | list); | ||
| 619 | idx = &i->idx; | ||
| 620 | /* Can we go right */ | ||
| 621 | if (iip + 1 < le16_to_cpu(idx->child_cnt)) { | ||
| 622 | iip = iip + 1; | ||
| 623 | break; | ||
| 624 | } else | ||
| 625 | /* Nope, so go up again */ | ||
| 626 | iip = i->iip; | ||
| 627 | } | ||
| 628 | } else | ||
| 629 | /* Go down left */ | ||
| 630 | iip = 0; | ||
| 631 | /* | ||
| 632 | * We have the parent in 'idx' and now we set up for reading the | ||
| 633 | * child pointed to by slot 'iip'. | ||
| 634 | */ | ||
| 635 | last_level = le16_to_cpu(idx->level); | ||
| 636 | last_sqnum = le64_to_cpu(idx->ch.sqnum); | ||
| 637 | br = ubifs_idx_branch(c, idx, iip); | ||
| 638 | lnum = le32_to_cpu(br->lnum); | ||
| 639 | offs = le32_to_cpu(br->offs); | ||
| 640 | len = le32_to_cpu(br->len); | ||
| 641 | key_read(c, &br->key, &lower_key); | ||
| 642 | if (iip + 1 < le16_to_cpu(idx->child_cnt)) { | ||
| 643 | br = ubifs_idx_branch(c, idx, iip + 1); | ||
| 644 | key_read(c, &br->key, &upper_key); | ||
| 645 | } else | ||
| 646 | key_copy(c, &i->upper_key, &upper_key); | ||
| 647 | } | ||
| 648 | out: | ||
| 649 | err = dbg_old_index_check_init(c, zroot); | ||
| 650 | if (err) | ||
| 651 | goto out_free; | ||
| 652 | |||
| 653 | return 0; | ||
| 654 | |||
| 655 | out_dump: | ||
| 656 | dbg_err("dumping index node (iip=%d)", i->iip); | ||
| 657 | dbg_dump_node(c, idx); | ||
| 658 | list_del(&i->list); | ||
| 659 | kfree(i); | ||
| 660 | if (!list_empty(&list)) { | ||
| 661 | i = list_entry(list.prev, struct idx_node, list); | ||
| 662 | dbg_err("dumping parent index node"); | ||
| 663 | dbg_dump_node(c, &i->idx); | ||
| 664 | } | ||
| 665 | out_free: | ||
| 666 | while (!list_empty(&list)) { | ||
| 667 | i = list_entry(list.next, struct idx_node, list); | ||
| 668 | list_del(&i->list); | ||
| 669 | kfree(i); | ||
| 670 | } | ||
| 671 | ubifs_err("failed, error %d", err); | ||
| 672 | if (err > 0) | ||
| 673 | err = -EINVAL; | ||
| 674 | return err; | ||
| 675 | } | ||
| 676 | |||
| 677 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c new file mode 100644 index 000000000000..5bb51dac3c16 --- /dev/null +++ b/fs/ubifs/compress.c | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * Copyright (C) 2006, 2007 University of Szeged, Hungary | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify it | ||
| 8 | * under the terms of the GNU General Public License version 2 as published by | ||
| 9 | * the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 14 | * more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License along with | ||
| 17 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 18 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | * | ||
| 20 | * Authors: Adrian Hunter | ||
| 21 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 22 | * Zoltan Sogor | ||
| 23 | */ | ||
| 24 | |||
| 25 | /* | ||
| 26 | * This file provides a single place to access to compression and | ||
| 27 | * decompression. | ||
| 28 | */ | ||
| 29 | |||
| 30 | #include <linux/crypto.h> | ||
| 31 | #include "ubifs.h" | ||
| 32 | |||
| 33 | /* Fake description object for the "none" compressor */ | ||
| 34 | static struct ubifs_compressor none_compr = { | ||
| 35 | .compr_type = UBIFS_COMPR_NONE, | ||
| 36 | .name = "no compression", | ||
| 37 | .capi_name = "", | ||
| 38 | }; | ||
| 39 | |||
| 40 | #ifdef CONFIG_UBIFS_FS_LZO | ||
| 41 | static DEFINE_MUTEX(lzo_mutex); | ||
| 42 | |||
| 43 | static struct ubifs_compressor lzo_compr = { | ||
| 44 | .compr_type = UBIFS_COMPR_LZO, | ||
| 45 | .comp_mutex = &lzo_mutex, | ||
| 46 | .name = "LZO", | ||
| 47 | .capi_name = "lzo", | ||
| 48 | }; | ||
| 49 | #else | ||
| 50 | static struct ubifs_compressor lzo_compr = { | ||
| 51 | .compr_type = UBIFS_COMPR_LZO, | ||
| 52 | .name = "LZO", | ||
| 53 | }; | ||
| 54 | #endif | ||
| 55 | |||
| 56 | #ifdef CONFIG_UBIFS_FS_ZLIB | ||
| 57 | static DEFINE_MUTEX(deflate_mutex); | ||
| 58 | static DEFINE_MUTEX(inflate_mutex); | ||
| 59 | |||
| 60 | static struct ubifs_compressor zlib_compr = { | ||
| 61 | .compr_type = UBIFS_COMPR_ZLIB, | ||
| 62 | .comp_mutex = &deflate_mutex, | ||
| 63 | .decomp_mutex = &inflate_mutex, | ||
| 64 | .name = "zlib", | ||
| 65 | .capi_name = "deflate", | ||
| 66 | }; | ||
| 67 | #else | ||
| 68 | static struct ubifs_compressor zlib_compr = { | ||
| 69 | .compr_type = UBIFS_COMPR_ZLIB, | ||
| 70 | .name = "zlib", | ||
| 71 | }; | ||
| 72 | #endif | ||
| 73 | |||
| 74 | /* All UBIFS compressors */ | ||
| 75 | struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; | ||
| 76 | |||
| 77 | /** | ||
| 78 | * ubifs_compress - compress data. | ||
| 79 | * @in_buf: data to compress | ||
| 80 | * @in_len: length of the data to compress | ||
| 81 | * @out_buf: output buffer where compressed data should be stored | ||
| 82 | * @out_len: output buffer length is returned here | ||
| 83 | * @compr_type: type of compression to use on enter, actually used compression | ||
| 84 | * type on exit | ||
| 85 | * | ||
| 86 | * This function compresses input buffer @in_buf of length @in_len and stores | ||
| 87 | * the result in the output buffer @out_buf and the resulting length in | ||
| 88 | * @out_len. If the input buffer does not compress, it is just copied to the | ||
| 89 | * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if | ||
| 90 | * compression error occurred. | ||
| 91 | * | ||
| 92 | * Note, if the input buffer was not compressed, it is copied to the output | ||
| 93 | * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. | ||
| 94 | * | ||
| 95 | * This functions returns %0 on success or a negative error code on failure. | ||
| 96 | */ | ||
| 97 | void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, | ||
| 98 | int *compr_type) | ||
| 99 | { | ||
| 100 | int err; | ||
| 101 | struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; | ||
| 102 | |||
| 103 | if (*compr_type == UBIFS_COMPR_NONE) | ||
| 104 | goto no_compr; | ||
| 105 | |||
| 106 | /* If the input data is small, do not even try to compress it */ | ||
| 107 | if (in_len < UBIFS_MIN_COMPR_LEN) | ||
| 108 | goto no_compr; | ||
| 109 | |||
| 110 | if (compr->comp_mutex) | ||
| 111 | mutex_lock(compr->comp_mutex); | ||
| 112 | err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, | ||
| 113 | out_len); | ||
| 114 | if (compr->comp_mutex) | ||
| 115 | mutex_unlock(compr->comp_mutex); | ||
| 116 | if (unlikely(err)) { | ||
| 117 | ubifs_warn("cannot compress %d bytes, compressor %s, " | ||
| 118 | "error %d, leave data uncompressed", | ||
| 119 | in_len, compr->name, err); | ||
| 120 | goto no_compr; | ||
| 121 | } | ||
| 122 | |||
| 123 | /* | ||
| 124 | * Presently, we just require that compression results in less data, | ||
| 125 | * rather than any defined minimum compression ratio or amount. | ||
| 126 | */ | ||
| 127 | if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8)) | ||
| 128 | goto no_compr; | ||
| 129 | |||
| 130 | return; | ||
| 131 | |||
| 132 | no_compr: | ||
| 133 | memcpy(out_buf, in_buf, in_len); | ||
| 134 | *out_len = in_len; | ||
| 135 | *compr_type = UBIFS_COMPR_NONE; | ||
| 136 | } | ||
| 137 | |||
| 138 | /** | ||
| 139 | * ubifs_decompress - decompress data. | ||
| 140 | * @in_buf: data to decompress | ||
| 141 | * @in_len: length of the data to decompress | ||
| 142 | * @out_buf: output buffer where decompressed data should | ||
| 143 | * @out_len: output length is returned here | ||
| 144 | * @compr_type: type of compression | ||
| 145 | * | ||
| 146 | * This function decompresses data from buffer @in_buf into buffer @out_buf. | ||
| 147 | * The length of the uncompressed data is returned in @out_len. This functions | ||
| 148 | * returns %0 on success or a negative error code on failure. | ||
| 149 | */ | ||
| 150 | int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, | ||
| 151 | int *out_len, int compr_type) | ||
| 152 | { | ||
| 153 | int err; | ||
| 154 | struct ubifs_compressor *compr; | ||
| 155 | |||
| 156 | if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { | ||
| 157 | ubifs_err("invalid compression type %d", compr_type); | ||
| 158 | return -EINVAL; | ||
| 159 | } | ||
| 160 | |||
| 161 | compr = ubifs_compressors[compr_type]; | ||
| 162 | |||
| 163 | if (unlikely(!compr->capi_name)) { | ||
| 164 | ubifs_err("%s compression is not compiled in", compr->name); | ||
| 165 | return -EINVAL; | ||
| 166 | } | ||
| 167 | |||
| 168 | if (compr_type == UBIFS_COMPR_NONE) { | ||
| 169 | memcpy(out_buf, in_buf, in_len); | ||
| 170 | *out_len = in_len; | ||
| 171 | return 0; | ||
| 172 | } | ||
| 173 | |||
| 174 | if (compr->decomp_mutex) | ||
| 175 | mutex_lock(compr->decomp_mutex); | ||
| 176 | err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, | ||
| 177 | out_len); | ||
| 178 | if (compr->decomp_mutex) | ||
| 179 | mutex_unlock(compr->decomp_mutex); | ||
| 180 | if (err) | ||
| 181 | ubifs_err("cannot decompress %d bytes, compressor %s, " | ||
| 182 | "error %d", in_len, compr->name, err); | ||
| 183 | |||
| 184 | return err; | ||
| 185 | } | ||
| 186 | |||
| 187 | /** | ||
| 188 | * compr_init - initialize a compressor. | ||
| 189 | * @compr: compressor description object | ||
| 190 | * | ||
| 191 | * This function initializes the requested compressor and returns zero in case | ||
| 192 | * of success or a negative error code in case of failure. | ||
| 193 | */ | ||
| 194 | static int __init compr_init(struct ubifs_compressor *compr) | ||
| 195 | { | ||
| 196 | if (compr->capi_name) { | ||
| 197 | compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); | ||
| 198 | if (IS_ERR(compr->cc)) { | ||
| 199 | ubifs_err("cannot initialize compressor %s, error %ld", | ||
| 200 | compr->name, PTR_ERR(compr->cc)); | ||
| 201 | return PTR_ERR(compr->cc); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | ubifs_compressors[compr->compr_type] = compr; | ||
| 206 | return 0; | ||
| 207 | } | ||
| 208 | |||
| 209 | /** | ||
| 210 | * compr_exit - de-initialize a compressor. | ||
| 211 | * @compr: compressor description object | ||
| 212 | */ | ||
| 213 | static void compr_exit(struct ubifs_compressor *compr) | ||
| 214 | { | ||
| 215 | if (compr->capi_name) | ||
| 216 | crypto_free_comp(compr->cc); | ||
| 217 | return; | ||
| 218 | } | ||
| 219 | |||
| 220 | /** | ||
| 221 | * ubifs_compressors_init - initialize UBIFS compressors. | ||
| 222 | * | ||
| 223 | * This function initializes the compressor which were compiled in. Returns | ||
| 224 | * zero in case of success and a negative error code in case of failure. | ||
| 225 | */ | ||
| 226 | int __init ubifs_compressors_init(void) | ||
| 227 | { | ||
| 228 | int err; | ||
| 229 | |||
| 230 | err = compr_init(&lzo_compr); | ||
| 231 | if (err) | ||
| 232 | return err; | ||
| 233 | |||
| 234 | err = compr_init(&zlib_compr); | ||
| 235 | if (err) | ||
| 236 | goto out_lzo; | ||
| 237 | |||
| 238 | ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; | ||
| 239 | return 0; | ||
| 240 | |||
| 241 | out_lzo: | ||
| 242 | compr_exit(&lzo_compr); | ||
| 243 | return err; | ||
| 244 | } | ||
| 245 | |||
| 246 | /** | ||
| 247 | * ubifs_compressors_exit - de-initialize UBIFS compressors. | ||
| 248 | */ | ||
| 249 | void __exit ubifs_compressors_exit(void) | ||
| 250 | { | ||
| 251 | compr_exit(&lzo_compr); | ||
| 252 | compr_exit(&zlib_compr); | ||
| 253 | } | ||
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c new file mode 100644 index 000000000000..4e3aaeba4eca --- /dev/null +++ b/fs/ubifs/debug.c | |||
| @@ -0,0 +1,2289 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements most of the debugging stuff which is compiled in only | ||
| 25 | * when it is enabled. But some debugging check functions are implemented in | ||
| 26 | * corresponding subsystem, just because they are closely related and utilize | ||
| 27 | * various local functions of those subsystems. | ||
| 28 | */ | ||
| 29 | |||
| 30 | #define UBIFS_DBG_PRESERVE_UBI | ||
| 31 | |||
| 32 | #include "ubifs.h" | ||
| 33 | #include <linux/module.h> | ||
| 34 | #include <linux/moduleparam.h> | ||
| 35 | |||
| 36 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 37 | |||
| 38 | DEFINE_SPINLOCK(dbg_lock); | ||
| 39 | |||
| 40 | static char dbg_key_buf0[128]; | ||
| 41 | static char dbg_key_buf1[128]; | ||
| 42 | |||
| 43 | unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; | ||
| 44 | unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; | ||
| 45 | unsigned int ubifs_tst_flags; | ||
| 46 | |||
| 47 | module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); | ||
| 48 | module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); | ||
| 49 | module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); | ||
| 50 | |||
| 51 | MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); | ||
| 52 | MODULE_PARM_DESC(debug_chks, "Debug check flags"); | ||
| 53 | MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); | ||
| 54 | |||
| 55 | static const char *get_key_fmt(int fmt) | ||
| 56 | { | ||
| 57 | switch (fmt) { | ||
| 58 | case UBIFS_SIMPLE_KEY_FMT: | ||
| 59 | return "simple"; | ||
| 60 | default: | ||
| 61 | return "unknown/invalid format"; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | static const char *get_key_hash(int hash) | ||
| 66 | { | ||
| 67 | switch (hash) { | ||
| 68 | case UBIFS_KEY_HASH_R5: | ||
| 69 | return "R5"; | ||
| 70 | case UBIFS_KEY_HASH_TEST: | ||
| 71 | return "test"; | ||
| 72 | default: | ||
| 73 | return "unknown/invalid name hash"; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | static const char *get_key_type(int type) | ||
| 78 | { | ||
| 79 | switch (type) { | ||
| 80 | case UBIFS_INO_KEY: | ||
| 81 | return "inode"; | ||
| 82 | case UBIFS_DENT_KEY: | ||
| 83 | return "direntry"; | ||
| 84 | case UBIFS_XENT_KEY: | ||
| 85 | return "xentry"; | ||
| 86 | case UBIFS_DATA_KEY: | ||
| 87 | return "data"; | ||
| 88 | case UBIFS_TRUN_KEY: | ||
| 89 | return "truncate"; | ||
| 90 | default: | ||
| 91 | return "unknown/invalid key"; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, | ||
| 96 | char *buffer) | ||
| 97 | { | ||
| 98 | char *p = buffer; | ||
| 99 | int type = key_type(c, key); | ||
| 100 | |||
| 101 | if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { | ||
| 102 | switch (type) { | ||
| 103 | case UBIFS_INO_KEY: | ||
| 104 | sprintf(p, "(%lu, %s)", key_inum(c, key), | ||
| 105 | get_key_type(type)); | ||
| 106 | break; | ||
| 107 | case UBIFS_DENT_KEY: | ||
| 108 | case UBIFS_XENT_KEY: | ||
| 109 | sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key), | ||
| 110 | get_key_type(type), key_hash(c, key)); | ||
| 111 | break; | ||
| 112 | case UBIFS_DATA_KEY: | ||
| 113 | sprintf(p, "(%lu, %s, %u)", key_inum(c, key), | ||
| 114 | get_key_type(type), key_block(c, key)); | ||
| 115 | break; | ||
| 116 | case UBIFS_TRUN_KEY: | ||
| 117 | sprintf(p, "(%lu, %s)", | ||
| 118 | key_inum(c, key), get_key_type(type)); | ||
| 119 | break; | ||
| 120 | default: | ||
| 121 | sprintf(p, "(bad key type: %#08x, %#08x)", | ||
| 122 | key->u32[0], key->u32[1]); | ||
| 123 | } | ||
| 124 | } else | ||
| 125 | sprintf(p, "bad key format %d", c->key_fmt); | ||
| 126 | } | ||
| 127 | |||
| 128 | const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) | ||
| 129 | { | ||
| 130 | /* dbg_lock must be held */ | ||
| 131 | sprintf_key(c, key, dbg_key_buf0); | ||
| 132 | return dbg_key_buf0; | ||
| 133 | } | ||
| 134 | |||
| 135 | const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) | ||
| 136 | { | ||
| 137 | /* dbg_lock must be held */ | ||
| 138 | sprintf_key(c, key, dbg_key_buf1); | ||
| 139 | return dbg_key_buf1; | ||
| 140 | } | ||
| 141 | |||
| 142 | const char *dbg_ntype(int type) | ||
| 143 | { | ||
| 144 | switch (type) { | ||
| 145 | case UBIFS_PAD_NODE: | ||
| 146 | return "padding node"; | ||
| 147 | case UBIFS_SB_NODE: | ||
| 148 | return "superblock node"; | ||
| 149 | case UBIFS_MST_NODE: | ||
| 150 | return "master node"; | ||
| 151 | case UBIFS_REF_NODE: | ||
| 152 | return "reference node"; | ||
| 153 | case UBIFS_INO_NODE: | ||
| 154 | return "inode node"; | ||
| 155 | case UBIFS_DENT_NODE: | ||
| 156 | return "direntry node"; | ||
| 157 | case UBIFS_XENT_NODE: | ||
| 158 | return "xentry node"; | ||
| 159 | case UBIFS_DATA_NODE: | ||
| 160 | return "data node"; | ||
| 161 | case UBIFS_TRUN_NODE: | ||
| 162 | return "truncate node"; | ||
| 163 | case UBIFS_IDX_NODE: | ||
| 164 | return "indexing node"; | ||
| 165 | case UBIFS_CS_NODE: | ||
| 166 | return "commit start node"; | ||
| 167 | case UBIFS_ORPH_NODE: | ||
| 168 | return "orphan node"; | ||
| 169 | default: | ||
| 170 | return "unknown node"; | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | static const char *dbg_gtype(int type) | ||
| 175 | { | ||
| 176 | switch (type) { | ||
| 177 | case UBIFS_NO_NODE_GROUP: | ||
| 178 | return "no node group"; | ||
| 179 | case UBIFS_IN_NODE_GROUP: | ||
| 180 | return "in node group"; | ||
| 181 | case UBIFS_LAST_OF_NODE_GROUP: | ||
| 182 | return "last of node group"; | ||
| 183 | default: | ||
| 184 | return "unknown"; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | const char *dbg_cstate(int cmt_state) | ||
| 189 | { | ||
| 190 | switch (cmt_state) { | ||
| 191 | case COMMIT_RESTING: | ||
| 192 | return "commit resting"; | ||
| 193 | case COMMIT_BACKGROUND: | ||
| 194 | return "background commit requested"; | ||
| 195 | case COMMIT_REQUIRED: | ||
| 196 | return "commit required"; | ||
| 197 | case COMMIT_RUNNING_BACKGROUND: | ||
| 198 | return "BACKGROUND commit running"; | ||
| 199 | case COMMIT_RUNNING_REQUIRED: | ||
| 200 | return "commit running and required"; | ||
| 201 | case COMMIT_BROKEN: | ||
| 202 | return "broken commit"; | ||
| 203 | default: | ||
| 204 | return "unknown commit state"; | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | static void dump_ch(const struct ubifs_ch *ch) | ||
| 209 | { | ||
| 210 | printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); | ||
| 211 | printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); | ||
| 212 | printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, | ||
| 213 | dbg_ntype(ch->node_type)); | ||
| 214 | printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, | ||
| 215 | dbg_gtype(ch->group_type)); | ||
| 216 | printk(KERN_DEBUG "\tsqnum %llu\n", | ||
| 217 | (unsigned long long)le64_to_cpu(ch->sqnum)); | ||
| 218 | printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); | ||
| 219 | } | ||
| 220 | |||
| 221 | void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) | ||
| 222 | { | ||
| 223 | const struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 224 | |||
| 225 | printk(KERN_DEBUG "inode %lu\n", inode->i_ino); | ||
| 226 | printk(KERN_DEBUG "size %llu\n", | ||
| 227 | (unsigned long long)i_size_read(inode)); | ||
| 228 | printk(KERN_DEBUG "nlink %u\n", inode->i_nlink); | ||
| 229 | printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid); | ||
| 230 | printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid); | ||
| 231 | printk(KERN_DEBUG "atime %u.%u\n", | ||
| 232 | (unsigned int)inode->i_atime.tv_sec, | ||
| 233 | (unsigned int)inode->i_atime.tv_nsec); | ||
| 234 | printk(KERN_DEBUG "mtime %u.%u\n", | ||
| 235 | (unsigned int)inode->i_mtime.tv_sec, | ||
| 236 | (unsigned int)inode->i_mtime.tv_nsec); | ||
| 237 | printk(KERN_DEBUG "ctime %u.%u\n", | ||
| 238 | (unsigned int)inode->i_ctime.tv_sec, | ||
| 239 | (unsigned int)inode->i_ctime.tv_nsec); | ||
| 240 | printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum); | ||
| 241 | printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size); | ||
| 242 | printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt); | ||
| 243 | printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names); | ||
| 244 | printk(KERN_DEBUG "dirty %u\n", ui->dirty); | ||
| 245 | printk(KERN_DEBUG "xattr %u\n", ui->xattr); | ||
| 246 | printk(KERN_DEBUG "flags %d\n", ui->flags); | ||
| 247 | printk(KERN_DEBUG "compr_type %d\n", ui->compr_type); | ||
| 248 | printk(KERN_DEBUG "data_len %d\n", ui->data_len); | ||
| 249 | } | ||
| 250 | |||
| 251 | void dbg_dump_node(const struct ubifs_info *c, const void *node) | ||
| 252 | { | ||
| 253 | int i, n; | ||
| 254 | union ubifs_key key; | ||
| 255 | const struct ubifs_ch *ch = node; | ||
| 256 | |||
| 257 | if (dbg_failure_mode) | ||
| 258 | return; | ||
| 259 | |||
| 260 | /* If the magic is incorrect, just hexdump the first bytes */ | ||
| 261 | if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { | ||
| 262 | printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); | ||
| 263 | print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, | ||
| 264 | (void *)node, UBIFS_CH_SZ, 1); | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | |||
| 268 | spin_lock(&dbg_lock); | ||
| 269 | dump_ch(node); | ||
| 270 | |||
| 271 | switch (ch->node_type) { | ||
| 272 | case UBIFS_PAD_NODE: | ||
| 273 | { | ||
| 274 | const struct ubifs_pad_node *pad = node; | ||
| 275 | |||
| 276 | printk(KERN_DEBUG "\tpad_len %u\n", | ||
| 277 | le32_to_cpu(pad->pad_len)); | ||
| 278 | break; | ||
| 279 | } | ||
| 280 | case UBIFS_SB_NODE: | ||
| 281 | { | ||
| 282 | const struct ubifs_sb_node *sup = node; | ||
| 283 | unsigned int sup_flags = le32_to_cpu(sup->flags); | ||
| 284 | |||
| 285 | printk(KERN_DEBUG "\tkey_hash %d (%s)\n", | ||
| 286 | (int)sup->key_hash, get_key_hash(sup->key_hash)); | ||
| 287 | printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", | ||
| 288 | (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); | ||
| 289 | printk(KERN_DEBUG "\tflags %#x\n", sup_flags); | ||
| 290 | printk(KERN_DEBUG "\t big_lpt %u\n", | ||
| 291 | !!(sup_flags & UBIFS_FLG_BIGLPT)); | ||
| 292 | printk(KERN_DEBUG "\tmin_io_size %u\n", | ||
| 293 | le32_to_cpu(sup->min_io_size)); | ||
| 294 | printk(KERN_DEBUG "\tleb_size %u\n", | ||
| 295 | le32_to_cpu(sup->leb_size)); | ||
| 296 | printk(KERN_DEBUG "\tleb_cnt %u\n", | ||
| 297 | le32_to_cpu(sup->leb_cnt)); | ||
| 298 | printk(KERN_DEBUG "\tmax_leb_cnt %u\n", | ||
| 299 | le32_to_cpu(sup->max_leb_cnt)); | ||
| 300 | printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", | ||
| 301 | (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); | ||
| 302 | printk(KERN_DEBUG "\tlog_lebs %u\n", | ||
| 303 | le32_to_cpu(sup->log_lebs)); | ||
| 304 | printk(KERN_DEBUG "\tlpt_lebs %u\n", | ||
| 305 | le32_to_cpu(sup->lpt_lebs)); | ||
| 306 | printk(KERN_DEBUG "\torph_lebs %u\n", | ||
| 307 | le32_to_cpu(sup->orph_lebs)); | ||
| 308 | printk(KERN_DEBUG "\tjhead_cnt %u\n", | ||
| 309 | le32_to_cpu(sup->jhead_cnt)); | ||
| 310 | printk(KERN_DEBUG "\tfanout %u\n", | ||
| 311 | le32_to_cpu(sup->fanout)); | ||
| 312 | printk(KERN_DEBUG "\tlsave_cnt %u\n", | ||
| 313 | le32_to_cpu(sup->lsave_cnt)); | ||
| 314 | printk(KERN_DEBUG "\tdefault_compr %u\n", | ||
| 315 | (int)le16_to_cpu(sup->default_compr)); | ||
| 316 | printk(KERN_DEBUG "\trp_size %llu\n", | ||
| 317 | (unsigned long long)le64_to_cpu(sup->rp_size)); | ||
| 318 | printk(KERN_DEBUG "\trp_uid %u\n", | ||
| 319 | le32_to_cpu(sup->rp_uid)); | ||
| 320 | printk(KERN_DEBUG "\trp_gid %u\n", | ||
| 321 | le32_to_cpu(sup->rp_gid)); | ||
| 322 | printk(KERN_DEBUG "\tfmt_version %u\n", | ||
| 323 | le32_to_cpu(sup->fmt_version)); | ||
| 324 | printk(KERN_DEBUG "\ttime_gran %u\n", | ||
| 325 | le32_to_cpu(sup->time_gran)); | ||
| 326 | printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" | ||
| 327 | "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", | ||
| 328 | sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3], | ||
| 329 | sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7], | ||
| 330 | sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11], | ||
| 331 | sup->uuid[12], sup->uuid[13], sup->uuid[14], | ||
| 332 | sup->uuid[15]); | ||
| 333 | break; | ||
| 334 | } | ||
| 335 | case UBIFS_MST_NODE: | ||
| 336 | { | ||
| 337 | const struct ubifs_mst_node *mst = node; | ||
| 338 | |||
| 339 | printk(KERN_DEBUG "\thighest_inum %llu\n", | ||
| 340 | (unsigned long long)le64_to_cpu(mst->highest_inum)); | ||
| 341 | printk(KERN_DEBUG "\tcommit number %llu\n", | ||
| 342 | (unsigned long long)le64_to_cpu(mst->cmt_no)); | ||
| 343 | printk(KERN_DEBUG "\tflags %#x\n", | ||
| 344 | le32_to_cpu(mst->flags)); | ||
| 345 | printk(KERN_DEBUG "\tlog_lnum %u\n", | ||
| 346 | le32_to_cpu(mst->log_lnum)); | ||
| 347 | printk(KERN_DEBUG "\troot_lnum %u\n", | ||
| 348 | le32_to_cpu(mst->root_lnum)); | ||
| 349 | printk(KERN_DEBUG "\troot_offs %u\n", | ||
| 350 | le32_to_cpu(mst->root_offs)); | ||
| 351 | printk(KERN_DEBUG "\troot_len %u\n", | ||
| 352 | le32_to_cpu(mst->root_len)); | ||
| 353 | printk(KERN_DEBUG "\tgc_lnum %u\n", | ||
| 354 | le32_to_cpu(mst->gc_lnum)); | ||
| 355 | printk(KERN_DEBUG "\tihead_lnum %u\n", | ||
| 356 | le32_to_cpu(mst->ihead_lnum)); | ||
| 357 | printk(KERN_DEBUG "\tihead_offs %u\n", | ||
| 358 | le32_to_cpu(mst->ihead_offs)); | ||
| 359 | printk(KERN_DEBUG "\tindex_size %u\n", | ||
| 360 | le32_to_cpu(mst->index_size)); | ||
| 361 | printk(KERN_DEBUG "\tlpt_lnum %u\n", | ||
| 362 | le32_to_cpu(mst->lpt_lnum)); | ||
| 363 | printk(KERN_DEBUG "\tlpt_offs %u\n", | ||
| 364 | le32_to_cpu(mst->lpt_offs)); | ||
| 365 | printk(KERN_DEBUG "\tnhead_lnum %u\n", | ||
| 366 | le32_to_cpu(mst->nhead_lnum)); | ||
| 367 | printk(KERN_DEBUG "\tnhead_offs %u\n", | ||
| 368 | le32_to_cpu(mst->nhead_offs)); | ||
| 369 | printk(KERN_DEBUG "\tltab_lnum %u\n", | ||
| 370 | le32_to_cpu(mst->ltab_lnum)); | ||
| 371 | printk(KERN_DEBUG "\tltab_offs %u\n", | ||
| 372 | le32_to_cpu(mst->ltab_offs)); | ||
| 373 | printk(KERN_DEBUG "\tlsave_lnum %u\n", | ||
| 374 | le32_to_cpu(mst->lsave_lnum)); | ||
| 375 | printk(KERN_DEBUG "\tlsave_offs %u\n", | ||
| 376 | le32_to_cpu(mst->lsave_offs)); | ||
| 377 | printk(KERN_DEBUG "\tlscan_lnum %u\n", | ||
| 378 | le32_to_cpu(mst->lscan_lnum)); | ||
| 379 | printk(KERN_DEBUG "\tleb_cnt %u\n", | ||
| 380 | le32_to_cpu(mst->leb_cnt)); | ||
| 381 | printk(KERN_DEBUG "\tempty_lebs %u\n", | ||
| 382 | le32_to_cpu(mst->empty_lebs)); | ||
| 383 | printk(KERN_DEBUG "\tidx_lebs %u\n", | ||
| 384 | le32_to_cpu(mst->idx_lebs)); | ||
| 385 | printk(KERN_DEBUG "\ttotal_free %llu\n", | ||
| 386 | (unsigned long long)le64_to_cpu(mst->total_free)); | ||
| 387 | printk(KERN_DEBUG "\ttotal_dirty %llu\n", | ||
| 388 | (unsigned long long)le64_to_cpu(mst->total_dirty)); | ||
| 389 | printk(KERN_DEBUG "\ttotal_used %llu\n", | ||
| 390 | (unsigned long long)le64_to_cpu(mst->total_used)); | ||
| 391 | printk(KERN_DEBUG "\ttotal_dead %llu\n", | ||
| 392 | (unsigned long long)le64_to_cpu(mst->total_dead)); | ||
| 393 | printk(KERN_DEBUG "\ttotal_dark %llu\n", | ||
| 394 | (unsigned long long)le64_to_cpu(mst->total_dark)); | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | case UBIFS_REF_NODE: | ||
| 398 | { | ||
| 399 | const struct ubifs_ref_node *ref = node; | ||
| 400 | |||
| 401 | printk(KERN_DEBUG "\tlnum %u\n", | ||
| 402 | le32_to_cpu(ref->lnum)); | ||
| 403 | printk(KERN_DEBUG "\toffs %u\n", | ||
| 404 | le32_to_cpu(ref->offs)); | ||
| 405 | printk(KERN_DEBUG "\tjhead %u\n", | ||
| 406 | le32_to_cpu(ref->jhead)); | ||
| 407 | break; | ||
| 408 | } | ||
| 409 | case UBIFS_INO_NODE: | ||
| 410 | { | ||
| 411 | const struct ubifs_ino_node *ino = node; | ||
| 412 | |||
| 413 | key_read(c, &ino->key, &key); | ||
| 414 | printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); | ||
| 415 | printk(KERN_DEBUG "\tcreat_sqnum %llu\n", | ||
| 416 | (unsigned long long)le64_to_cpu(ino->creat_sqnum)); | ||
| 417 | printk(KERN_DEBUG "\tsize %llu\n", | ||
| 418 | (unsigned long long)le64_to_cpu(ino->size)); | ||
| 419 | printk(KERN_DEBUG "\tnlink %u\n", | ||
| 420 | le32_to_cpu(ino->nlink)); | ||
| 421 | printk(KERN_DEBUG "\tatime %lld.%u\n", | ||
| 422 | (long long)le64_to_cpu(ino->atime_sec), | ||
| 423 | le32_to_cpu(ino->atime_nsec)); | ||
| 424 | printk(KERN_DEBUG "\tmtime %lld.%u\n", | ||
| 425 | (long long)le64_to_cpu(ino->mtime_sec), | ||
| 426 | le32_to_cpu(ino->mtime_nsec)); | ||
| 427 | printk(KERN_DEBUG "\tctime %lld.%u\n", | ||
| 428 | (long long)le64_to_cpu(ino->ctime_sec), | ||
| 429 | le32_to_cpu(ino->ctime_nsec)); | ||
| 430 | printk(KERN_DEBUG "\tuid %u\n", | ||
| 431 | le32_to_cpu(ino->uid)); | ||
| 432 | printk(KERN_DEBUG "\tgid %u\n", | ||
| 433 | le32_to_cpu(ino->gid)); | ||
| 434 | printk(KERN_DEBUG "\tmode %u\n", | ||
| 435 | le32_to_cpu(ino->mode)); | ||
| 436 | printk(KERN_DEBUG "\tflags %#x\n", | ||
| 437 | le32_to_cpu(ino->flags)); | ||
| 438 | printk(KERN_DEBUG "\txattr_cnt %u\n", | ||
| 439 | le32_to_cpu(ino->xattr_cnt)); | ||
| 440 | printk(KERN_DEBUG "\txattr_size %u\n", | ||
| 441 | le32_to_cpu(ino->xattr_size)); | ||
| 442 | printk(KERN_DEBUG "\txattr_names %u\n", | ||
| 443 | le32_to_cpu(ino->xattr_names)); | ||
| 444 | printk(KERN_DEBUG "\tcompr_type %#x\n", | ||
| 445 | (int)le16_to_cpu(ino->compr_type)); | ||
| 446 | printk(KERN_DEBUG "\tdata len %u\n", | ||
| 447 | le32_to_cpu(ino->data_len)); | ||
| 448 | break; | ||
| 449 | } | ||
| 450 | case UBIFS_DENT_NODE: | ||
| 451 | case UBIFS_XENT_NODE: | ||
| 452 | { | ||
| 453 | const struct ubifs_dent_node *dent = node; | ||
| 454 | int nlen = le16_to_cpu(dent->nlen); | ||
| 455 | |||
| 456 | key_read(c, &dent->key, &key); | ||
| 457 | printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); | ||
| 458 | printk(KERN_DEBUG "\tinum %llu\n", | ||
| 459 | (unsigned long long)le64_to_cpu(dent->inum)); | ||
| 460 | printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); | ||
| 461 | printk(KERN_DEBUG "\tnlen %d\n", nlen); | ||
| 462 | printk(KERN_DEBUG "\tname "); | ||
| 463 | |||
| 464 | if (nlen > UBIFS_MAX_NLEN) | ||
| 465 | printk(KERN_DEBUG "(bad name length, not printing, " | ||
| 466 | "bad or corrupted node)"); | ||
| 467 | else { | ||
| 468 | for (i = 0; i < nlen && dent->name[i]; i++) | ||
| 469 | printk("%c", dent->name[i]); | ||
| 470 | } | ||
| 471 | printk("\n"); | ||
| 472 | |||
| 473 | break; | ||
| 474 | } | ||
| 475 | case UBIFS_DATA_NODE: | ||
| 476 | { | ||
| 477 | const struct ubifs_data_node *dn = node; | ||
| 478 | int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; | ||
| 479 | |||
| 480 | key_read(c, &dn->key, &key); | ||
| 481 | printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); | ||
| 482 | printk(KERN_DEBUG "\tsize %u\n", | ||
| 483 | le32_to_cpu(dn->size)); | ||
| 484 | printk(KERN_DEBUG "\tcompr_typ %d\n", | ||
| 485 | (int)le16_to_cpu(dn->compr_type)); | ||
| 486 | printk(KERN_DEBUG "\tdata size %d\n", | ||
| 487 | dlen); | ||
| 488 | printk(KERN_DEBUG "\tdata:\n"); | ||
| 489 | print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, | ||
| 490 | (void *)&dn->data, dlen, 0); | ||
| 491 | break; | ||
| 492 | } | ||
| 493 | case UBIFS_TRUN_NODE: | ||
| 494 | { | ||
| 495 | const struct ubifs_trun_node *trun = node; | ||
| 496 | |||
| 497 | printk(KERN_DEBUG "\tinum %u\n", | ||
| 498 | le32_to_cpu(trun->inum)); | ||
| 499 | printk(KERN_DEBUG "\told_size %llu\n", | ||
| 500 | (unsigned long long)le64_to_cpu(trun->old_size)); | ||
| 501 | printk(KERN_DEBUG "\tnew_size %llu\n", | ||
| 502 | (unsigned long long)le64_to_cpu(trun->new_size)); | ||
| 503 | break; | ||
| 504 | } | ||
| 505 | case UBIFS_IDX_NODE: | ||
| 506 | { | ||
| 507 | const struct ubifs_idx_node *idx = node; | ||
| 508 | |||
| 509 | n = le16_to_cpu(idx->child_cnt); | ||
| 510 | printk(KERN_DEBUG "\tchild_cnt %d\n", n); | ||
| 511 | printk(KERN_DEBUG "\tlevel %d\n", | ||
| 512 | (int)le16_to_cpu(idx->level)); | ||
| 513 | printk(KERN_DEBUG "\tBranches:\n"); | ||
| 514 | |||
| 515 | for (i = 0; i < n && i < c->fanout - 1; i++) { | ||
| 516 | const struct ubifs_branch *br; | ||
| 517 | |||
| 518 | br = ubifs_idx_branch(c, idx, i); | ||
| 519 | key_read(c, &br->key, &key); | ||
| 520 | printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", | ||
| 521 | i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), | ||
| 522 | le32_to_cpu(br->len), DBGKEY(&key)); | ||
| 523 | } | ||
| 524 | break; | ||
| 525 | } | ||
| 526 | case UBIFS_CS_NODE: | ||
| 527 | break; | ||
| 528 | case UBIFS_ORPH_NODE: | ||
| 529 | { | ||
| 530 | const struct ubifs_orph_node *orph = node; | ||
| 531 | |||
| 532 | printk(KERN_DEBUG "\tcommit number %llu\n", | ||
| 533 | (unsigned long long) | ||
| 534 | le64_to_cpu(orph->cmt_no) & LLONG_MAX); | ||
| 535 | printk(KERN_DEBUG "\tlast node flag %llu\n", | ||
| 536 | (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); | ||
| 537 | n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; | ||
| 538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); | ||
| 539 | for (i = 0; i < n; i++) | ||
| 540 | printk(KERN_DEBUG "\t ino %llu\n", | ||
| 541 | le64_to_cpu(orph->inos[i])); | ||
| 542 | break; | ||
| 543 | } | ||
| 544 | default: | ||
| 545 | printk(KERN_DEBUG "node type %d was not recognized\n", | ||
| 546 | (int)ch->node_type); | ||
| 547 | } | ||
| 548 | spin_unlock(&dbg_lock); | ||
| 549 | } | ||
| 550 | |||
| 551 | void dbg_dump_budget_req(const struct ubifs_budget_req *req) | ||
| 552 | { | ||
| 553 | spin_lock(&dbg_lock); | ||
| 554 | printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", | ||
| 555 | req->new_ino, req->dirtied_ino); | ||
| 556 | printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", | ||
| 557 | req->new_ino_d, req->dirtied_ino_d); | ||
| 558 | printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", | ||
| 559 | req->new_page, req->dirtied_page); | ||
| 560 | printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", | ||
| 561 | req->new_dent, req->mod_dent); | ||
| 562 | printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); | ||
| 563 | printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", | ||
| 564 | req->data_growth, req->dd_growth); | ||
| 565 | spin_unlock(&dbg_lock); | ||
| 566 | } | ||
| 567 | |||
| 568 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst) | ||
| 569 | { | ||
| 570 | spin_lock(&dbg_lock); | ||
| 571 | printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", | ||
| 572 | lst->empty_lebs, lst->idx_lebs); | ||
| 573 | printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " | ||
| 574 | "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, | ||
| 575 | lst->total_dirty); | ||
| 576 | printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " | ||
| 577 | "total_dead %lld\n", lst->total_used, lst->total_dark, | ||
| 578 | lst->total_dead); | ||
| 579 | spin_unlock(&dbg_lock); | ||
| 580 | } | ||
| 581 | |||
| 582 | void dbg_dump_budg(struct ubifs_info *c) | ||
| 583 | { | ||
| 584 | int i; | ||
| 585 | struct rb_node *rb; | ||
| 586 | struct ubifs_bud *bud; | ||
| 587 | struct ubifs_gced_idx_leb *idx_gc; | ||
| 588 | |||
| 589 | spin_lock(&dbg_lock); | ||
| 590 | printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " | ||
| 591 | "budg_dd_growth %lld, budg_idx_growth %lld\n", | ||
| 592 | c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); | ||
| 593 | printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " | ||
| 594 | "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, | ||
| 595 | c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, | ||
| 596 | c->freeable_cnt); | ||
| 597 | printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " | ||
| 598 | "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, | ||
| 599 | c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); | ||
| 600 | printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " | ||
| 601 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), | ||
| 602 | atomic_long_read(&c->dirty_zn_cnt), | ||
| 603 | atomic_long_read(&c->clean_zn_cnt)); | ||
| 604 | printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", | ||
| 605 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); | ||
| 606 | printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", | ||
| 607 | c->gc_lnum, c->ihead_lnum); | ||
| 608 | for (i = 0; i < c->jhead_cnt; i++) | ||
| 609 | printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", | ||
| 610 | c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); | ||
| 611 | for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { | ||
| 612 | bud = rb_entry(rb, struct ubifs_bud, rb); | ||
| 613 | printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); | ||
| 614 | } | ||
| 615 | list_for_each_entry(bud, &c->old_buds, list) | ||
| 616 | printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); | ||
| 617 | list_for_each_entry(idx_gc, &c->idx_gc, list) | ||
| 618 | printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", | ||
| 619 | idx_gc->lnum, idx_gc->unmap); | ||
| 620 | printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); | ||
| 621 | spin_unlock(&dbg_lock); | ||
| 622 | } | ||
| 623 | |||
| 624 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | ||
| 625 | { | ||
| 626 | printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " | ||
| 627 | "flags %#x\n", lp->lnum, lp->free, lp->dirty, | ||
| 628 | c->leb_size - lp->free - lp->dirty, lp->flags); | ||
| 629 | } | ||
| 630 | |||
| 631 | void dbg_dump_lprops(struct ubifs_info *c) | ||
| 632 | { | ||
| 633 | int lnum, err; | ||
| 634 | struct ubifs_lprops lp; | ||
| 635 | struct ubifs_lp_stats lst; | ||
| 636 | |||
| 637 | printk(KERN_DEBUG "Dumping LEB properties\n"); | ||
| 638 | ubifs_get_lp_stats(c, &lst); | ||
| 639 | dbg_dump_lstats(&lst); | ||
| 640 | |||
| 641 | for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { | ||
| 642 | err = ubifs_read_one_lp(c, lnum, &lp); | ||
| 643 | if (err) | ||
| 644 | ubifs_err("cannot read lprops for LEB %d", lnum); | ||
| 645 | |||
| 646 | dbg_dump_lprop(c, &lp); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | |||
| 650 | void dbg_dump_leb(const struct ubifs_info *c, int lnum) | ||
| 651 | { | ||
| 652 | struct ubifs_scan_leb *sleb; | ||
| 653 | struct ubifs_scan_node *snod; | ||
| 654 | |||
| 655 | if (dbg_failure_mode) | ||
| 656 | return; | ||
| 657 | |||
| 658 | printk(KERN_DEBUG "Dumping LEB %d\n", lnum); | ||
| 659 | |||
| 660 | sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); | ||
| 661 | if (IS_ERR(sleb)) { | ||
| 662 | ubifs_err("scan error %d", (int)PTR_ERR(sleb)); | ||
| 663 | return; | ||
| 664 | } | ||
| 665 | |||
| 666 | printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, | ||
| 667 | sleb->nodes_cnt, sleb->endpt); | ||
| 668 | |||
| 669 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 670 | cond_resched(); | ||
| 671 | printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, | ||
| 672 | snod->offs, snod->len); | ||
| 673 | dbg_dump_node(c, snod->node); | ||
| 674 | } | ||
| 675 | |||
| 676 | ubifs_scan_destroy(sleb); | ||
| 677 | return; | ||
| 678 | } | ||
| 679 | |||
| 680 | void dbg_dump_znode(const struct ubifs_info *c, | ||
| 681 | const struct ubifs_znode *znode) | ||
| 682 | { | ||
| 683 | int n; | ||
| 684 | const struct ubifs_zbranch *zbr; | ||
| 685 | |||
| 686 | spin_lock(&dbg_lock); | ||
| 687 | if (znode->parent) | ||
| 688 | zbr = &znode->parent->zbranch[znode->iip]; | ||
| 689 | else | ||
| 690 | zbr = &c->zroot; | ||
| 691 | |||
| 692 | printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" | ||
| 693 | " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, | ||
| 694 | zbr->len, znode->parent, znode->iip, znode->level, | ||
| 695 | znode->child_cnt, znode->flags); | ||
| 696 | |||
| 697 | if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { | ||
| 698 | spin_unlock(&dbg_lock); | ||
| 699 | return; | ||
| 700 | } | ||
| 701 | |||
| 702 | printk(KERN_DEBUG "zbranches:\n"); | ||
| 703 | for (n = 0; n < znode->child_cnt; n++) { | ||
| 704 | zbr = &znode->zbranch[n]; | ||
| 705 | if (znode->level > 0) | ||
| 706 | printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " | ||
| 707 | "%s\n", n, zbr->znode, zbr->lnum, | ||
| 708 | zbr->offs, zbr->len, | ||
| 709 | DBGKEY(&zbr->key)); | ||
| 710 | else | ||
| 711 | printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " | ||
| 712 | "%s\n", n, zbr->znode, zbr->lnum, | ||
| 713 | zbr->offs, zbr->len, | ||
| 714 | DBGKEY(&zbr->key)); | ||
| 715 | } | ||
| 716 | spin_unlock(&dbg_lock); | ||
| 717 | } | ||
| 718 | |||
| 719 | void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) | ||
| 720 | { | ||
| 721 | int i; | ||
| 722 | |||
| 723 | printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", | ||
| 724 | cat, heap->cnt); | ||
| 725 | for (i = 0; i < heap->cnt; i++) { | ||
| 726 | struct ubifs_lprops *lprops = heap->arr[i]; | ||
| 727 | |||
| 728 | printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " | ||
| 729 | "flags %d\n", i, lprops->lnum, lprops->hpos, | ||
| 730 | lprops->free, lprops->dirty, lprops->flags); | ||
| 731 | } | ||
| 732 | } | ||
| 733 | |||
| 734 | void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | ||
| 735 | struct ubifs_nnode *parent, int iip) | ||
| 736 | { | ||
| 737 | int i; | ||
| 738 | |||
| 739 | printk(KERN_DEBUG "Dumping pnode:\n"); | ||
| 740 | printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", | ||
| 741 | (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); | ||
| 742 | printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", | ||
| 743 | pnode->flags, iip, pnode->level, pnode->num); | ||
| 744 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 745 | struct ubifs_lprops *lp = &pnode->lprops[i]; | ||
| 746 | |||
| 747 | printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", | ||
| 748 | i, lp->free, lp->dirty, lp->flags, lp->lnum); | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | void dbg_dump_tnc(struct ubifs_info *c) | ||
| 753 | { | ||
| 754 | struct ubifs_znode *znode; | ||
| 755 | int level; | ||
| 756 | |||
| 757 | printk(KERN_DEBUG "\n"); | ||
| 758 | printk(KERN_DEBUG "Dumping the TNC tree\n"); | ||
| 759 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | ||
| 760 | level = znode->level; | ||
| 761 | printk(KERN_DEBUG "== Level %d ==\n", level); | ||
| 762 | while (znode) { | ||
| 763 | if (level != znode->level) { | ||
| 764 | level = znode->level; | ||
| 765 | printk(KERN_DEBUG "== Level %d ==\n", level); | ||
| 766 | } | ||
| 767 | dbg_dump_znode(c, znode); | ||
| 768 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); | ||
| 769 | } | ||
| 770 | |||
| 771 | printk(KERN_DEBUG "\n"); | ||
| 772 | } | ||
| 773 | |||
| 774 | static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, | ||
| 775 | void *priv) | ||
| 776 | { | ||
| 777 | dbg_dump_znode(c, znode); | ||
| 778 | return 0; | ||
| 779 | } | ||
| 780 | |||
| 781 | /** | ||
| 782 | * dbg_dump_index - dump the on-flash index. | ||
| 783 | * @c: UBIFS file-system description object | ||
| 784 | * | ||
| 785 | * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()' | ||
| 786 | * which dumps only in-memory znodes and does not read znodes which from flash. | ||
| 787 | */ | ||
| 788 | void dbg_dump_index(struct ubifs_info *c) | ||
| 789 | { | ||
| 790 | dbg_walk_index(c, NULL, dump_znode, NULL); | ||
| 791 | } | ||
| 792 | |||
| 793 | /** | ||
| 794 | * dbg_check_synced_i_size - check synchronized inode size. | ||
| 795 | * @inode: inode to check | ||
| 796 | * | ||
| 797 | * If inode is clean, synchronized inode size has to be equivalent to current | ||
| 798 | * inode size. This function has to be called only for locked inodes (@i_mutex | ||
| 799 | * has to be locked). Returns %0 if synchronized inode size if correct, and | ||
| 800 | * %-EINVAL if not. | ||
| 801 | */ | ||
| 802 | int dbg_check_synced_i_size(struct inode *inode) | ||
| 803 | { | ||
| 804 | int err = 0; | ||
| 805 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 806 | |||
| 807 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
| 808 | return 0; | ||
| 809 | if (!S_ISREG(inode->i_mode)) | ||
| 810 | return 0; | ||
| 811 | |||
| 812 | mutex_lock(&ui->ui_mutex); | ||
| 813 | spin_lock(&ui->ui_lock); | ||
| 814 | if (ui->ui_size != ui->synced_i_size && !ui->dirty) { | ||
| 815 | ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " | ||
| 816 | "is clean", ui->ui_size, ui->synced_i_size); | ||
| 817 | ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, | ||
| 818 | inode->i_mode, i_size_read(inode)); | ||
| 819 | dbg_dump_stack(); | ||
| 820 | err = -EINVAL; | ||
| 821 | } | ||
| 822 | spin_unlock(&ui->ui_lock); | ||
| 823 | mutex_unlock(&ui->ui_mutex); | ||
| 824 | return err; | ||
| 825 | } | ||
| 826 | |||
| 827 | /* | ||
| 828 | * dbg_check_dir - check directory inode size and link count. | ||
| 829 | * @c: UBIFS file-system description object | ||
| 830 | * @dir: the directory to calculate size for | ||
| 831 | * @size: the result is returned here | ||
| 832 | * | ||
| 833 | * This function makes sure that directory size and link count are correct. | ||
| 834 | * Returns zero in case of success and a negative error code in case of | ||
| 835 | * failure. | ||
| 836 | * | ||
| 837 | * Note, it is good idea to make sure the @dir->i_mutex is locked before | ||
| 838 | * calling this function. | ||
| 839 | */ | ||
| 840 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) | ||
| 841 | { | ||
| 842 | unsigned int nlink = 2; | ||
| 843 | union ubifs_key key; | ||
| 844 | struct ubifs_dent_node *dent, *pdent = NULL; | ||
| 845 | struct qstr nm = { .name = NULL }; | ||
| 846 | loff_t size = UBIFS_INO_NODE_SZ; | ||
| 847 | |||
| 848 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
| 849 | return 0; | ||
| 850 | |||
| 851 | if (!S_ISDIR(dir->i_mode)) | ||
| 852 | return 0; | ||
| 853 | |||
| 854 | lowest_dent_key(c, &key, dir->i_ino); | ||
| 855 | while (1) { | ||
| 856 | int err; | ||
| 857 | |||
| 858 | dent = ubifs_tnc_next_ent(c, &key, &nm); | ||
| 859 | if (IS_ERR(dent)) { | ||
| 860 | err = PTR_ERR(dent); | ||
| 861 | if (err == -ENOENT) | ||
| 862 | break; | ||
| 863 | return err; | ||
| 864 | } | ||
| 865 | |||
| 866 | nm.name = dent->name; | ||
| 867 | nm.len = le16_to_cpu(dent->nlen); | ||
| 868 | size += CALC_DENT_SIZE(nm.len); | ||
| 869 | if (dent->type == UBIFS_ITYPE_DIR) | ||
| 870 | nlink += 1; | ||
| 871 | kfree(pdent); | ||
| 872 | pdent = dent; | ||
| 873 | key_read(c, &dent->key, &key); | ||
| 874 | } | ||
| 875 | kfree(pdent); | ||
| 876 | |||
| 877 | if (i_size_read(dir) != size) { | ||
| 878 | ubifs_err("directory inode %lu has size %llu, " | ||
| 879 | "but calculated size is %llu", dir->i_ino, | ||
| 880 | (unsigned long long)i_size_read(dir), | ||
| 881 | (unsigned long long)size); | ||
| 882 | dump_stack(); | ||
| 883 | return -EINVAL; | ||
| 884 | } | ||
| 885 | if (dir->i_nlink != nlink) { | ||
| 886 | ubifs_err("directory inode %lu has nlink %u, but calculated " | ||
| 887 | "nlink is %u", dir->i_ino, dir->i_nlink, nlink); | ||
| 888 | dump_stack(); | ||
| 889 | return -EINVAL; | ||
| 890 | } | ||
| 891 | |||
| 892 | return 0; | ||
| 893 | } | ||
| 894 | |||
| 895 | /** | ||
| 896 | * dbg_check_key_order - make sure that colliding keys are properly ordered. | ||
| 897 | * @c: UBIFS file-system description object | ||
| 898 | * @zbr1: first zbranch | ||
| 899 | * @zbr2: following zbranch | ||
| 900 | * | ||
| 901 | * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of | ||
| 902 | * names of the direntries/xentries which are referred by the keys. This | ||
| 903 | * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes | ||
| 904 | * sure the name of direntry/xentry referred by @zbr1 is less than | ||
| 905 | * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not, | ||
| 906 | * and a negative error code in case of failure. | ||
| 907 | */ | ||
| 908 | static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, | ||
| 909 | struct ubifs_zbranch *zbr2) | ||
| 910 | { | ||
| 911 | int err, nlen1, nlen2, cmp; | ||
| 912 | struct ubifs_dent_node *dent1, *dent2; | ||
| 913 | union ubifs_key key; | ||
| 914 | |||
| 915 | ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); | ||
| 916 | dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); | ||
| 917 | if (!dent1) | ||
| 918 | return -ENOMEM; | ||
| 919 | dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); | ||
| 920 | if (!dent2) { | ||
| 921 | err = -ENOMEM; | ||
| 922 | goto out_free; | ||
| 923 | } | ||
| 924 | |||
| 925 | err = ubifs_tnc_read_node(c, zbr1, dent1); | ||
| 926 | if (err) | ||
| 927 | goto out_free; | ||
| 928 | err = ubifs_validate_entry(c, dent1); | ||
| 929 | if (err) | ||
| 930 | goto out_free; | ||
| 931 | |||
| 932 | err = ubifs_tnc_read_node(c, zbr2, dent2); | ||
| 933 | if (err) | ||
| 934 | goto out_free; | ||
| 935 | err = ubifs_validate_entry(c, dent2); | ||
| 936 | if (err) | ||
| 937 | goto out_free; | ||
| 938 | |||
| 939 | /* Make sure node keys are the same as in zbranch */ | ||
| 940 | err = 1; | ||
| 941 | key_read(c, &dent1->key, &key); | ||
| 942 | if (keys_cmp(c, &zbr1->key, &key)) { | ||
| 943 | dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, | ||
| 944 | zbr1->offs, DBGKEY(&key)); | ||
| 945 | dbg_err("but it should have key %s according to tnc", | ||
| 946 | DBGKEY(&zbr1->key)); | ||
| 947 | dbg_dump_node(c, dent1); | ||
| 948 | goto out_free; | ||
| 949 | } | ||
| 950 | |||
| 951 | key_read(c, &dent2->key, &key); | ||
| 952 | if (keys_cmp(c, &zbr2->key, &key)) { | ||
| 953 | dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, | ||
| 954 | zbr1->offs, DBGKEY(&key)); | ||
| 955 | dbg_err("but it should have key %s according to tnc", | ||
| 956 | DBGKEY(&zbr2->key)); | ||
| 957 | dbg_dump_node(c, dent2); | ||
| 958 | goto out_free; | ||
| 959 | } | ||
| 960 | |||
| 961 | nlen1 = le16_to_cpu(dent1->nlen); | ||
| 962 | nlen2 = le16_to_cpu(dent2->nlen); | ||
| 963 | |||
| 964 | cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2)); | ||
| 965 | if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) { | ||
| 966 | err = 0; | ||
| 967 | goto out_free; | ||
| 968 | } | ||
| 969 | if (cmp == 0 && nlen1 == nlen2) | ||
| 970 | dbg_err("2 xent/dent nodes with the same name"); | ||
| 971 | else | ||
| 972 | dbg_err("bad order of colliding key %s", | ||
| 973 | DBGKEY(&key)); | ||
| 974 | |||
| 975 | dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); | ||
| 976 | dbg_dump_node(c, dent1); | ||
| 977 | dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); | ||
| 978 | dbg_dump_node(c, dent2); | ||
| 979 | |||
| 980 | out_free: | ||
| 981 | kfree(dent2); | ||
| 982 | kfree(dent1); | ||
| 983 | return err; | ||
| 984 | } | ||
| 985 | |||
| 986 | /** | ||
| 987 | * dbg_check_znode - check if znode is all right. | ||
| 988 | * @c: UBIFS file-system description object | ||
| 989 | * @zbr: zbranch which points to this znode | ||
| 990 | * | ||
| 991 | * This function makes sure that znode referred to by @zbr is all right. | ||
| 992 | * Returns zero if it is, and %-EINVAL if it is not. | ||
| 993 | */ | ||
| 994 | static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) | ||
| 995 | { | ||
| 996 | struct ubifs_znode *znode = zbr->znode; | ||
| 997 | struct ubifs_znode *zp = znode->parent; | ||
| 998 | int n, err, cmp; | ||
| 999 | |||
| 1000 | if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { | ||
| 1001 | err = 1; | ||
| 1002 | goto out; | ||
| 1003 | } | ||
| 1004 | if (znode->level < 0) { | ||
| 1005 | err = 2; | ||
| 1006 | goto out; | ||
| 1007 | } | ||
| 1008 | if (znode->iip < 0 || znode->iip >= c->fanout) { | ||
| 1009 | err = 3; | ||
| 1010 | goto out; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | if (zbr->len == 0) | ||
| 1014 | /* Only dirty zbranch may have no on-flash nodes */ | ||
| 1015 | if (!ubifs_zn_dirty(znode)) { | ||
| 1016 | err = 4; | ||
| 1017 | goto out; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | if (ubifs_zn_dirty(znode)) { | ||
| 1021 | /* | ||
| 1022 | * If znode is dirty, its parent has to be dirty as well. The | ||
| 1023 | * order of the operation is important, so we have to have | ||
| 1024 | * memory barriers. | ||
| 1025 | */ | ||
| 1026 | smp_mb(); | ||
| 1027 | if (zp && !ubifs_zn_dirty(zp)) { | ||
| 1028 | /* | ||
| 1029 | * The dirty flag is atomic and is cleared outside the | ||
| 1030 | * TNC mutex, so znode's dirty flag may now have | ||
| 1031 | * been cleared. The child is always cleared before the | ||
| 1032 | * parent, so we just need to check again. | ||
| 1033 | */ | ||
| 1034 | smp_mb(); | ||
| 1035 | if (ubifs_zn_dirty(znode)) { | ||
| 1036 | err = 5; | ||
| 1037 | goto out; | ||
| 1038 | } | ||
| 1039 | } | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | if (zp) { | ||
| 1043 | const union ubifs_key *min, *max; | ||
| 1044 | |||
| 1045 | if (znode->level != zp->level - 1) { | ||
| 1046 | err = 6; | ||
| 1047 | goto out; | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | /* Make sure the 'parent' pointer in our znode is correct */ | ||
| 1051 | err = ubifs_search_zbranch(c, zp, &zbr->key, &n); | ||
| 1052 | if (!err) { | ||
| 1053 | /* This zbranch does not exist in the parent */ | ||
| 1054 | err = 7; | ||
| 1055 | goto out; | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | if (znode->iip >= zp->child_cnt) { | ||
| 1059 | err = 8; | ||
| 1060 | goto out; | ||
| 1061 | } | ||
| 1062 | |||
| 1063 | if (znode->iip != n) { | ||
| 1064 | /* This may happen only in case of collisions */ | ||
| 1065 | if (keys_cmp(c, &zp->zbranch[n].key, | ||
| 1066 | &zp->zbranch[znode->iip].key)) { | ||
| 1067 | err = 9; | ||
| 1068 | goto out; | ||
| 1069 | } | ||
| 1070 | n = znode->iip; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | /* | ||
| 1074 | * Make sure that the first key in our znode is greater than or | ||
| 1075 | * equal to the key in the pointing zbranch. | ||
| 1076 | */ | ||
| 1077 | min = &zbr->key; | ||
| 1078 | cmp = keys_cmp(c, min, &znode->zbranch[0].key); | ||
| 1079 | if (cmp == 1) { | ||
| 1080 | err = 10; | ||
| 1081 | goto out; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | if (n + 1 < zp->child_cnt) { | ||
| 1085 | max = &zp->zbranch[n + 1].key; | ||
| 1086 | |||
| 1087 | /* | ||
| 1088 | * Make sure the last key in our znode is less or | ||
| 1089 | * equivalent than the the key in zbranch which goes | ||
| 1090 | * after our pointing zbranch. | ||
| 1091 | */ | ||
| 1092 | cmp = keys_cmp(c, max, | ||
| 1093 | &znode->zbranch[znode->child_cnt - 1].key); | ||
| 1094 | if (cmp == -1) { | ||
| 1095 | err = 11; | ||
| 1096 | goto out; | ||
| 1097 | } | ||
| 1098 | } | ||
| 1099 | } else { | ||
| 1100 | /* This may only be root znode */ | ||
| 1101 | if (zbr != &c->zroot) { | ||
| 1102 | err = 12; | ||
| 1103 | goto out; | ||
| 1104 | } | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | /* | ||
| 1108 | * Make sure that next key is greater or equivalent then the previous | ||
| 1109 | * one. | ||
| 1110 | */ | ||
| 1111 | for (n = 1; n < znode->child_cnt; n++) { | ||
| 1112 | cmp = keys_cmp(c, &znode->zbranch[n - 1].key, | ||
| 1113 | &znode->zbranch[n].key); | ||
| 1114 | if (cmp > 0) { | ||
| 1115 | err = 13; | ||
| 1116 | goto out; | ||
| 1117 | } | ||
| 1118 | if (cmp == 0) { | ||
| 1119 | /* This can only be keys with colliding hash */ | ||
| 1120 | if (!is_hash_key(c, &znode->zbranch[n].key)) { | ||
| 1121 | err = 14; | ||
| 1122 | goto out; | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | if (znode->level != 0 || c->replaying) | ||
| 1126 | continue; | ||
| 1127 | |||
| 1128 | /* | ||
| 1129 | * Colliding keys should follow binary order of | ||
| 1130 | * corresponding xentry/dentry names. | ||
| 1131 | */ | ||
| 1132 | err = dbg_check_key_order(c, &znode->zbranch[n - 1], | ||
| 1133 | &znode->zbranch[n]); | ||
| 1134 | if (err < 0) | ||
| 1135 | return err; | ||
| 1136 | if (err) { | ||
| 1137 | err = 15; | ||
| 1138 | goto out; | ||
| 1139 | } | ||
| 1140 | } | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | for (n = 0; n < znode->child_cnt; n++) { | ||
| 1144 | if (!znode->zbranch[n].znode && | ||
| 1145 | (znode->zbranch[n].lnum == 0 || | ||
| 1146 | znode->zbranch[n].len == 0)) { | ||
| 1147 | err = 16; | ||
| 1148 | goto out; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | if (znode->zbranch[n].lnum != 0 && | ||
| 1152 | znode->zbranch[n].len == 0) { | ||
| 1153 | err = 17; | ||
| 1154 | goto out; | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | if (znode->zbranch[n].lnum == 0 && | ||
| 1158 | znode->zbranch[n].len != 0) { | ||
| 1159 | err = 18; | ||
| 1160 | goto out; | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | if (znode->zbranch[n].lnum == 0 && | ||
| 1164 | znode->zbranch[n].offs != 0) { | ||
| 1165 | err = 19; | ||
| 1166 | goto out; | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | if (znode->level != 0 && znode->zbranch[n].znode) | ||
| 1170 | if (znode->zbranch[n].znode->parent != znode) { | ||
| 1171 | err = 20; | ||
| 1172 | goto out; | ||
| 1173 | } | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | return 0; | ||
| 1177 | |||
| 1178 | out: | ||
| 1179 | ubifs_err("failed, error %d", err); | ||
| 1180 | ubifs_msg("dump of the znode"); | ||
| 1181 | dbg_dump_znode(c, znode); | ||
| 1182 | if (zp) { | ||
| 1183 | ubifs_msg("dump of the parent znode"); | ||
| 1184 | dbg_dump_znode(c, zp); | ||
| 1185 | } | ||
| 1186 | dump_stack(); | ||
| 1187 | return -EINVAL; | ||
| 1188 | } | ||
| 1189 | |||
| 1190 | /** | ||
| 1191 | * dbg_check_tnc - check TNC tree. | ||
| 1192 | * @c: UBIFS file-system description object | ||
| 1193 | * @extra: do extra checks that are possible at start commit | ||
| 1194 | * | ||
| 1195 | * This function traverses whole TNC tree and checks every znode. Returns zero | ||
| 1196 | * if everything is all right and %-EINVAL if something is wrong with TNC. | ||
| 1197 | */ | ||
| 1198 | int dbg_check_tnc(struct ubifs_info *c, int extra) | ||
| 1199 | { | ||
| 1200 | struct ubifs_znode *znode; | ||
| 1201 | long clean_cnt = 0, dirty_cnt = 0; | ||
| 1202 | int err, last; | ||
| 1203 | |||
| 1204 | if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) | ||
| 1205 | return 0; | ||
| 1206 | |||
| 1207 | ubifs_assert(mutex_is_locked(&c->tnc_mutex)); | ||
| 1208 | if (!c->zroot.znode) | ||
| 1209 | return 0; | ||
| 1210 | |||
| 1211 | znode = ubifs_tnc_postorder_first(c->zroot.znode); | ||
| 1212 | while (1) { | ||
| 1213 | struct ubifs_znode *prev; | ||
| 1214 | struct ubifs_zbranch *zbr; | ||
| 1215 | |||
| 1216 | if (!znode->parent) | ||
| 1217 | zbr = &c->zroot; | ||
| 1218 | else | ||
| 1219 | zbr = &znode->parent->zbranch[znode->iip]; | ||
| 1220 | |||
| 1221 | err = dbg_check_znode(c, zbr); | ||
| 1222 | if (err) | ||
| 1223 | return err; | ||
| 1224 | |||
| 1225 | if (extra) { | ||
| 1226 | if (ubifs_zn_dirty(znode)) | ||
| 1227 | dirty_cnt += 1; | ||
| 1228 | else | ||
| 1229 | clean_cnt += 1; | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | prev = znode; | ||
| 1233 | znode = ubifs_tnc_postorder_next(znode); | ||
| 1234 | if (!znode) | ||
| 1235 | break; | ||
| 1236 | |||
| 1237 | /* | ||
| 1238 | * If the last key of this znode is equivalent to the first key | ||
| 1239 | * of the next znode (collision), then check order of the keys. | ||
| 1240 | */ | ||
| 1241 | last = prev->child_cnt - 1; | ||
| 1242 | if (prev->level == 0 && znode->level == 0 && !c->replaying && | ||
| 1243 | !keys_cmp(c, &prev->zbranch[last].key, | ||
| 1244 | &znode->zbranch[0].key)) { | ||
| 1245 | err = dbg_check_key_order(c, &prev->zbranch[last], | ||
| 1246 | &znode->zbranch[0]); | ||
| 1247 | if (err < 0) | ||
| 1248 | return err; | ||
| 1249 | if (err) { | ||
| 1250 | ubifs_msg("first znode"); | ||
| 1251 | dbg_dump_znode(c, prev); | ||
| 1252 | ubifs_msg("second znode"); | ||
| 1253 | dbg_dump_znode(c, znode); | ||
| 1254 | return -EINVAL; | ||
| 1255 | } | ||
| 1256 | } | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | if (extra) { | ||
| 1260 | if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { | ||
| 1261 | ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", | ||
| 1262 | atomic_long_read(&c->clean_zn_cnt), | ||
| 1263 | clean_cnt); | ||
| 1264 | return -EINVAL; | ||
| 1265 | } | ||
| 1266 | if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { | ||
| 1267 | ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", | ||
| 1268 | atomic_long_read(&c->dirty_zn_cnt), | ||
| 1269 | dirty_cnt); | ||
| 1270 | return -EINVAL; | ||
| 1271 | } | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | return 0; | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | /** | ||
| 1278 | * dbg_walk_index - walk the on-flash index. | ||
| 1279 | * @c: UBIFS file-system description object | ||
| 1280 | * @leaf_cb: called for each leaf node | ||
| 1281 | * @znode_cb: called for each indexing node | ||
| 1282 | * @priv: private date which is passed to callbacks | ||
| 1283 | * | ||
| 1284 | * This function walks the UBIFS index and calls the @leaf_cb for each leaf | ||
| 1285 | * node and @znode_cb for each indexing node. Returns zero in case of success | ||
| 1286 | * and a negative error code in case of failure. | ||
| 1287 | * | ||
| 1288 | * It would be better if this function removed every znode it pulled to into | ||
| 1289 | * the TNC, so that the behavior more closely matched the non-debugging | ||
| 1290 | * behavior. | ||
| 1291 | */ | ||
| 1292 | int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, | ||
| 1293 | dbg_znode_callback znode_cb, void *priv) | ||
| 1294 | { | ||
| 1295 | int err; | ||
| 1296 | struct ubifs_zbranch *zbr; | ||
| 1297 | struct ubifs_znode *znode, *child; | ||
| 1298 | |||
| 1299 | mutex_lock(&c->tnc_mutex); | ||
| 1300 | /* If the root indexing node is not in TNC - pull it */ | ||
| 1301 | if (!c->zroot.znode) { | ||
| 1302 | c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0); | ||
| 1303 | if (IS_ERR(c->zroot.znode)) { | ||
| 1304 | err = PTR_ERR(c->zroot.znode); | ||
| 1305 | c->zroot.znode = NULL; | ||
| 1306 | goto out_unlock; | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | /* | ||
| 1311 | * We are going to traverse the indexing tree in the postorder manner. | ||
| 1312 | * Go down and find the leftmost indexing node where we are going to | ||
| 1313 | * start from. | ||
| 1314 | */ | ||
| 1315 | znode = c->zroot.znode; | ||
| 1316 | while (znode->level > 0) { | ||
| 1317 | zbr = &znode->zbranch[0]; | ||
| 1318 | child = zbr->znode; | ||
| 1319 | if (!child) { | ||
| 1320 | child = ubifs_load_znode(c, zbr, znode, 0); | ||
| 1321 | if (IS_ERR(child)) { | ||
| 1322 | err = PTR_ERR(child); | ||
| 1323 | goto out_unlock; | ||
| 1324 | } | ||
| 1325 | zbr->znode = child; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | znode = child; | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | /* Iterate over all indexing nodes */ | ||
| 1332 | while (1) { | ||
| 1333 | int idx; | ||
| 1334 | |||
| 1335 | cond_resched(); | ||
| 1336 | |||
| 1337 | if (znode_cb) { | ||
| 1338 | err = znode_cb(c, znode, priv); | ||
| 1339 | if (err) { | ||
| 1340 | ubifs_err("znode checking function returned " | ||
| 1341 | "error %d", err); | ||
| 1342 | dbg_dump_znode(c, znode); | ||
| 1343 | goto out_dump; | ||
| 1344 | } | ||
| 1345 | } | ||
| 1346 | if (leaf_cb && znode->level == 0) { | ||
| 1347 | for (idx = 0; idx < znode->child_cnt; idx++) { | ||
| 1348 | zbr = &znode->zbranch[idx]; | ||
| 1349 | err = leaf_cb(c, zbr, priv); | ||
| 1350 | if (err) { | ||
| 1351 | ubifs_err("leaf checking function " | ||
| 1352 | "returned error %d, for leaf " | ||
| 1353 | "at LEB %d:%d", | ||
| 1354 | err, zbr->lnum, zbr->offs); | ||
| 1355 | goto out_dump; | ||
| 1356 | } | ||
| 1357 | } | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | if (!znode->parent) | ||
| 1361 | break; | ||
| 1362 | |||
| 1363 | idx = znode->iip + 1; | ||
| 1364 | znode = znode->parent; | ||
| 1365 | if (idx < znode->child_cnt) { | ||
| 1366 | /* Switch to the next index in the parent */ | ||
| 1367 | zbr = &znode->zbranch[idx]; | ||
| 1368 | child = zbr->znode; | ||
| 1369 | if (!child) { | ||
| 1370 | child = ubifs_load_znode(c, zbr, znode, idx); | ||
| 1371 | if (IS_ERR(child)) { | ||
| 1372 | err = PTR_ERR(child); | ||
| 1373 | goto out_unlock; | ||
| 1374 | } | ||
| 1375 | zbr->znode = child; | ||
| 1376 | } | ||
| 1377 | znode = child; | ||
| 1378 | } else | ||
| 1379 | /* | ||
| 1380 | * This is the last child, switch to the parent and | ||
| 1381 | * continue. | ||
| 1382 | */ | ||
| 1383 | continue; | ||
| 1384 | |||
| 1385 | /* Go to the lowest leftmost znode in the new sub-tree */ | ||
| 1386 | while (znode->level > 0) { | ||
| 1387 | zbr = &znode->zbranch[0]; | ||
| 1388 | child = zbr->znode; | ||
| 1389 | if (!child) { | ||
| 1390 | child = ubifs_load_znode(c, zbr, znode, 0); | ||
| 1391 | if (IS_ERR(child)) { | ||
| 1392 | err = PTR_ERR(child); | ||
| 1393 | goto out_unlock; | ||
| 1394 | } | ||
| 1395 | zbr->znode = child; | ||
| 1396 | } | ||
| 1397 | znode = child; | ||
| 1398 | } | ||
| 1399 | } | ||
| 1400 | |||
| 1401 | mutex_unlock(&c->tnc_mutex); | ||
| 1402 | return 0; | ||
| 1403 | |||
| 1404 | out_dump: | ||
| 1405 | if (znode->parent) | ||
| 1406 | zbr = &znode->parent->zbranch[znode->iip]; | ||
| 1407 | else | ||
| 1408 | zbr = &c->zroot; | ||
| 1409 | ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); | ||
| 1410 | dbg_dump_znode(c, znode); | ||
| 1411 | out_unlock: | ||
| 1412 | mutex_unlock(&c->tnc_mutex); | ||
| 1413 | return err; | ||
| 1414 | } | ||
| 1415 | |||
| 1416 | /** | ||
| 1417 | * add_size - add znode size to partially calculated index size. | ||
| 1418 | * @c: UBIFS file-system description object | ||
| 1419 | * @znode: znode to add size for | ||
| 1420 | * @priv: partially calculated index size | ||
| 1421 | * | ||
| 1422 | * This is a helper function for 'dbg_check_idx_size()' which is called for | ||
| 1423 | * every indexing node and adds its size to the 'long long' variable pointed to | ||
| 1424 | * by @priv. | ||
| 1425 | */ | ||
| 1426 | static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv) | ||
| 1427 | { | ||
| 1428 | long long *idx_size = priv; | ||
| 1429 | int add; | ||
| 1430 | |||
| 1431 | add = ubifs_idx_node_sz(c, znode->child_cnt); | ||
| 1432 | add = ALIGN(add, 8); | ||
| 1433 | *idx_size += add; | ||
| 1434 | return 0; | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | /** | ||
| 1438 | * dbg_check_idx_size - check index size. | ||
| 1439 | * @c: UBIFS file-system description object | ||
| 1440 | * @idx_size: size to check | ||
| 1441 | * | ||
| 1442 | * This function walks the UBIFS index, calculates its size and checks that the | ||
| 1443 | * size is equivalent to @idx_size. Returns zero in case of success and a | ||
| 1444 | * negative error code in case of failure. | ||
| 1445 | */ | ||
| 1446 | int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) | ||
| 1447 | { | ||
| 1448 | int err; | ||
| 1449 | long long calc = 0; | ||
| 1450 | |||
| 1451 | if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) | ||
| 1452 | return 0; | ||
| 1453 | |||
| 1454 | err = dbg_walk_index(c, NULL, add_size, &calc); | ||
| 1455 | if (err) { | ||
| 1456 | ubifs_err("error %d while walking the index", err); | ||
| 1457 | return err; | ||
| 1458 | } | ||
| 1459 | |||
| 1460 | if (calc != idx_size) { | ||
| 1461 | ubifs_err("index size check failed: calculated size is %lld, " | ||
| 1462 | "should be %lld", calc, idx_size); | ||
| 1463 | dump_stack(); | ||
| 1464 | return -EINVAL; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | return 0; | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | /** | ||
| 1471 | * struct fsck_inode - information about an inode used when checking the file-system. | ||
| 1472 | * @rb: link in the RB-tree of inodes | ||
| 1473 | * @inum: inode number | ||
| 1474 | * @mode: inode type, permissions, etc | ||
| 1475 | * @nlink: inode link count | ||
| 1476 | * @xattr_cnt: count of extended attributes | ||
| 1477 | * @references: how many directory/xattr entries refer this inode (calculated | ||
| 1478 | * while walking the index) | ||
| 1479 | * @calc_cnt: for directory inode count of child directories | ||
| 1480 | * @size: inode size (read from on-flash inode) | ||
| 1481 | * @xattr_sz: summary size of all extended attributes (read from on-flash | ||
| 1482 | * inode) | ||
| 1483 | * @calc_sz: for directories calculated directory size | ||
| 1484 | * @calc_xcnt: count of extended attributes | ||
| 1485 | * @calc_xsz: calculated summary size of all extended attributes | ||
| 1486 | * @xattr_nms: sum of lengths of all extended attribute names belonging to this | ||
| 1487 | * inode (read from on-flash inode) | ||
| 1488 | * @calc_xnms: calculated sum of lengths of all extended attribute names | ||
| 1489 | */ | ||
| 1490 | struct fsck_inode { | ||
| 1491 | struct rb_node rb; | ||
| 1492 | ino_t inum; | ||
| 1493 | umode_t mode; | ||
| 1494 | unsigned int nlink; | ||
| 1495 | unsigned int xattr_cnt; | ||
| 1496 | int references; | ||
| 1497 | int calc_cnt; | ||
| 1498 | long long size; | ||
| 1499 | unsigned int xattr_sz; | ||
| 1500 | long long calc_sz; | ||
| 1501 | long long calc_xcnt; | ||
| 1502 | long long calc_xsz; | ||
| 1503 | unsigned int xattr_nms; | ||
| 1504 | long long calc_xnms; | ||
| 1505 | }; | ||
| 1506 | |||
| 1507 | /** | ||
| 1508 | * struct fsck_data - private FS checking information. | ||
| 1509 | * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects) | ||
| 1510 | */ | ||
| 1511 | struct fsck_data { | ||
| 1512 | struct rb_root inodes; | ||
| 1513 | }; | ||
| 1514 | |||
| 1515 | /** | ||
| 1516 | * add_inode - add inode information to RB-tree of inodes. | ||
| 1517 | * @c: UBIFS file-system description object | ||
| 1518 | * @fsckd: FS checking information | ||
| 1519 | * @ino: raw UBIFS inode to add | ||
| 1520 | * | ||
| 1521 | * This is a helper function for 'check_leaf()' which adds information about | ||
| 1522 | * inode @ino to the RB-tree of inodes. Returns inode information pointer in | ||
| 1523 | * case of success and a negative error code in case of failure. | ||
| 1524 | */ | ||
| 1525 | static struct fsck_inode *add_inode(struct ubifs_info *c, | ||
| 1526 | struct fsck_data *fsckd, | ||
| 1527 | struct ubifs_ino_node *ino) | ||
| 1528 | { | ||
| 1529 | struct rb_node **p, *parent = NULL; | ||
| 1530 | struct fsck_inode *fscki; | ||
| 1531 | ino_t inum = key_inum_flash(c, &ino->key); | ||
| 1532 | |||
| 1533 | p = &fsckd->inodes.rb_node; | ||
| 1534 | while (*p) { | ||
| 1535 | parent = *p; | ||
| 1536 | fscki = rb_entry(parent, struct fsck_inode, rb); | ||
| 1537 | if (inum < fscki->inum) | ||
| 1538 | p = &(*p)->rb_left; | ||
| 1539 | else if (inum > fscki->inum) | ||
| 1540 | p = &(*p)->rb_right; | ||
| 1541 | else | ||
| 1542 | return fscki; | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | if (inum > c->highest_inum) { | ||
| 1546 | ubifs_err("too high inode number, max. is %lu", | ||
| 1547 | c->highest_inum); | ||
| 1548 | return ERR_PTR(-EINVAL); | ||
| 1549 | } | ||
| 1550 | |||
| 1551 | fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS); | ||
| 1552 | if (!fscki) | ||
| 1553 | return ERR_PTR(-ENOMEM); | ||
| 1554 | |||
| 1555 | fscki->inum = inum; | ||
| 1556 | fscki->nlink = le32_to_cpu(ino->nlink); | ||
| 1557 | fscki->size = le64_to_cpu(ino->size); | ||
| 1558 | fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); | ||
| 1559 | fscki->xattr_sz = le32_to_cpu(ino->xattr_size); | ||
| 1560 | fscki->xattr_nms = le32_to_cpu(ino->xattr_names); | ||
| 1561 | fscki->mode = le32_to_cpu(ino->mode); | ||
| 1562 | if (S_ISDIR(fscki->mode)) { | ||
| 1563 | fscki->calc_sz = UBIFS_INO_NODE_SZ; | ||
| 1564 | fscki->calc_cnt = 2; | ||
| 1565 | } | ||
| 1566 | rb_link_node(&fscki->rb, parent, p); | ||
| 1567 | rb_insert_color(&fscki->rb, &fsckd->inodes); | ||
| 1568 | return fscki; | ||
| 1569 | } | ||
| 1570 | |||
| 1571 | /** | ||
| 1572 | * search_inode - search inode in the RB-tree of inodes. | ||
| 1573 | * @fsckd: FS checking information | ||
| 1574 | * @inum: inode number to search | ||
| 1575 | * | ||
| 1576 | * This is a helper function for 'check_leaf()' which searches inode @inum in | ||
| 1577 | * the RB-tree of inodes and returns an inode information pointer or %NULL if | ||
| 1578 | * the inode was not found. | ||
| 1579 | */ | ||
| 1580 | static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum) | ||
| 1581 | { | ||
| 1582 | struct rb_node *p; | ||
| 1583 | struct fsck_inode *fscki; | ||
| 1584 | |||
| 1585 | p = fsckd->inodes.rb_node; | ||
| 1586 | while (p) { | ||
| 1587 | fscki = rb_entry(p, struct fsck_inode, rb); | ||
| 1588 | if (inum < fscki->inum) | ||
| 1589 | p = p->rb_left; | ||
| 1590 | else if (inum > fscki->inum) | ||
| 1591 | p = p->rb_right; | ||
| 1592 | else | ||
| 1593 | return fscki; | ||
| 1594 | } | ||
| 1595 | return NULL; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | /** | ||
| 1599 | * read_add_inode - read inode node and add it to RB-tree of inodes. | ||
| 1600 | * @c: UBIFS file-system description object | ||
| 1601 | * @fsckd: FS checking information | ||
| 1602 | * @inum: inode number to read | ||
| 1603 | * | ||
| 1604 | * This is a helper function for 'check_leaf()' which finds inode node @inum in | ||
| 1605 | * the index, reads it, and adds it to the RB-tree of inodes. Returns inode | ||
| 1606 | * information pointer in case of success and a negative error code in case of | ||
| 1607 | * failure. | ||
| 1608 | */ | ||
| 1609 | static struct fsck_inode *read_add_inode(struct ubifs_info *c, | ||
| 1610 | struct fsck_data *fsckd, ino_t inum) | ||
| 1611 | { | ||
| 1612 | int n, err; | ||
| 1613 | union ubifs_key key; | ||
| 1614 | struct ubifs_znode *znode; | ||
| 1615 | struct ubifs_zbranch *zbr; | ||
| 1616 | struct ubifs_ino_node *ino; | ||
| 1617 | struct fsck_inode *fscki; | ||
| 1618 | |||
| 1619 | fscki = search_inode(fsckd, inum); | ||
| 1620 | if (fscki) | ||
| 1621 | return fscki; | ||
| 1622 | |||
| 1623 | ino_key_init(c, &key, inum); | ||
| 1624 | err = ubifs_lookup_level0(c, &key, &znode, &n); | ||
| 1625 | if (!err) { | ||
| 1626 | ubifs_err("inode %lu not found in index", inum); | ||
| 1627 | return ERR_PTR(-ENOENT); | ||
| 1628 | } else if (err < 0) { | ||
| 1629 | ubifs_err("error %d while looking up inode %lu", err, inum); | ||
| 1630 | return ERR_PTR(err); | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | zbr = &znode->zbranch[n]; | ||
| 1634 | if (zbr->len < UBIFS_INO_NODE_SZ) { | ||
| 1635 | ubifs_err("bad node %lu node length %d", inum, zbr->len); | ||
| 1636 | return ERR_PTR(-EINVAL); | ||
| 1637 | } | ||
| 1638 | |||
| 1639 | ino = kmalloc(zbr->len, GFP_NOFS); | ||
| 1640 | if (!ino) | ||
| 1641 | return ERR_PTR(-ENOMEM); | ||
| 1642 | |||
| 1643 | err = ubifs_tnc_read_node(c, zbr, ino); | ||
| 1644 | if (err) { | ||
| 1645 | ubifs_err("cannot read inode node at LEB %d:%d, error %d", | ||
| 1646 | zbr->lnum, zbr->offs, err); | ||
| 1647 | kfree(ino); | ||
| 1648 | return ERR_PTR(err); | ||
| 1649 | } | ||
| 1650 | |||
| 1651 | fscki = add_inode(c, fsckd, ino); | ||
| 1652 | kfree(ino); | ||
| 1653 | if (IS_ERR(fscki)) { | ||
| 1654 | ubifs_err("error %ld while adding inode %lu node", | ||
| 1655 | PTR_ERR(fscki), inum); | ||
| 1656 | return fscki; | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | return fscki; | ||
| 1660 | } | ||
| 1661 | |||
| 1662 | /** | ||
| 1663 | * check_leaf - check leaf node. | ||
| 1664 | * @c: UBIFS file-system description object | ||
| 1665 | * @zbr: zbranch of the leaf node to check | ||
| 1666 | * @priv: FS checking information | ||
| 1667 | * | ||
| 1668 | * This is a helper function for 'dbg_check_filesystem()' which is called for | ||
| 1669 | * every single leaf node while walking the indexing tree. It checks that the | ||
| 1670 | * leaf node referred from the indexing tree exists, has correct CRC, and does | ||
| 1671 | * some other basic validation. This function is also responsible for building | ||
| 1672 | * an RB-tree of inodes - it adds all inodes into the RB-tree. It also | ||
| 1673 | * calculates reference count, size, etc for each inode in order to later | ||
| 1674 | * compare them to the information stored inside the inodes and detect possible | ||
| 1675 | * inconsistencies. Returns zero in case of success and a negative error code | ||
| 1676 | * in case of failure. | ||
| 1677 | */ | ||
| 1678 | static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 1679 | void *priv) | ||
| 1680 | { | ||
| 1681 | ino_t inum; | ||
| 1682 | void *node; | ||
| 1683 | struct ubifs_ch *ch; | ||
| 1684 | int err, type = key_type(c, &zbr->key); | ||
| 1685 | struct fsck_inode *fscki; | ||
| 1686 | |||
| 1687 | if (zbr->len < UBIFS_CH_SZ) { | ||
| 1688 | ubifs_err("bad leaf length %d (LEB %d:%d)", | ||
| 1689 | zbr->len, zbr->lnum, zbr->offs); | ||
| 1690 | return -EINVAL; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | node = kmalloc(zbr->len, GFP_NOFS); | ||
| 1694 | if (!node) | ||
| 1695 | return -ENOMEM; | ||
| 1696 | |||
| 1697 | err = ubifs_tnc_read_node(c, zbr, node); | ||
| 1698 | if (err) { | ||
| 1699 | ubifs_err("cannot read leaf node at LEB %d:%d, error %d", | ||
| 1700 | zbr->lnum, zbr->offs, err); | ||
| 1701 | goto out_free; | ||
| 1702 | } | ||
| 1703 | |||
| 1704 | /* If this is an inode node, add it to RB-tree of inodes */ | ||
| 1705 | if (type == UBIFS_INO_KEY) { | ||
| 1706 | fscki = add_inode(c, priv, node); | ||
| 1707 | if (IS_ERR(fscki)) { | ||
| 1708 | err = PTR_ERR(fscki); | ||
| 1709 | ubifs_err("error %d while adding inode node", err); | ||
| 1710 | goto out_dump; | ||
| 1711 | } | ||
| 1712 | goto out; | ||
| 1713 | } | ||
| 1714 | |||
| 1715 | if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && | ||
| 1716 | type != UBIFS_DATA_KEY) { | ||
| 1717 | ubifs_err("unexpected node type %d at LEB %d:%d", | ||
| 1718 | type, zbr->lnum, zbr->offs); | ||
| 1719 | err = -EINVAL; | ||
| 1720 | goto out_free; | ||
| 1721 | } | ||
| 1722 | |||
| 1723 | ch = node; | ||
| 1724 | if (le64_to_cpu(ch->sqnum) > c->max_sqnum) { | ||
| 1725 | ubifs_err("too high sequence number, max. is %llu", | ||
| 1726 | c->max_sqnum); | ||
| 1727 | err = -EINVAL; | ||
| 1728 | goto out_dump; | ||
| 1729 | } | ||
| 1730 | |||
| 1731 | if (type == UBIFS_DATA_KEY) { | ||
| 1732 | long long blk_offs; | ||
| 1733 | struct ubifs_data_node *dn = node; | ||
| 1734 | |||
| 1735 | /* | ||
| 1736 | * Search the inode node this data node belongs to and insert | ||
| 1737 | * it to the RB-tree of inodes. | ||
| 1738 | */ | ||
| 1739 | inum = key_inum_flash(c, &dn->key); | ||
| 1740 | fscki = read_add_inode(c, priv, inum); | ||
| 1741 | if (IS_ERR(fscki)) { | ||
| 1742 | err = PTR_ERR(fscki); | ||
| 1743 | ubifs_err("error %d while processing data node and " | ||
| 1744 | "trying to find inode node %lu", err, inum); | ||
| 1745 | goto out_dump; | ||
| 1746 | } | ||
| 1747 | |||
| 1748 | /* Make sure the data node is within inode size */ | ||
| 1749 | blk_offs = key_block_flash(c, &dn->key); | ||
| 1750 | blk_offs <<= UBIFS_BLOCK_SHIFT; | ||
| 1751 | blk_offs += le32_to_cpu(dn->size); | ||
| 1752 | if (blk_offs > fscki->size) { | ||
| 1753 | ubifs_err("data node at LEB %d:%d is not within inode " | ||
| 1754 | "size %lld", zbr->lnum, zbr->offs, | ||
| 1755 | fscki->size); | ||
| 1756 | err = -EINVAL; | ||
| 1757 | goto out_dump; | ||
| 1758 | } | ||
| 1759 | } else { | ||
| 1760 | int nlen; | ||
| 1761 | struct ubifs_dent_node *dent = node; | ||
| 1762 | struct fsck_inode *fscki1; | ||
| 1763 | |||
| 1764 | err = ubifs_validate_entry(c, dent); | ||
| 1765 | if (err) | ||
| 1766 | goto out_dump; | ||
| 1767 | |||
| 1768 | /* | ||
| 1769 | * Search the inode node this entry refers to and the parent | ||
| 1770 | * inode node and insert them to the RB-tree of inodes. | ||
| 1771 | */ | ||
| 1772 | inum = le64_to_cpu(dent->inum); | ||
| 1773 | fscki = read_add_inode(c, priv, inum); | ||
| 1774 | if (IS_ERR(fscki)) { | ||
| 1775 | err = PTR_ERR(fscki); | ||
| 1776 | ubifs_err("error %d while processing entry node and " | ||
| 1777 | "trying to find inode node %lu", err, inum); | ||
| 1778 | goto out_dump; | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | /* Count how many direntries or xentries refers this inode */ | ||
| 1782 | fscki->references += 1; | ||
| 1783 | |||
| 1784 | inum = key_inum_flash(c, &dent->key); | ||
| 1785 | fscki1 = read_add_inode(c, priv, inum); | ||
| 1786 | if (IS_ERR(fscki1)) { | ||
| 1787 | err = PTR_ERR(fscki); | ||
| 1788 | ubifs_err("error %d while processing entry node and " | ||
| 1789 | "trying to find parent inode node %lu", | ||
| 1790 | err, inum); | ||
| 1791 | goto out_dump; | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | nlen = le16_to_cpu(dent->nlen); | ||
| 1795 | if (type == UBIFS_XENT_KEY) { | ||
| 1796 | fscki1->calc_xcnt += 1; | ||
| 1797 | fscki1->calc_xsz += CALC_DENT_SIZE(nlen); | ||
| 1798 | fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size); | ||
| 1799 | fscki1->calc_xnms += nlen; | ||
| 1800 | } else { | ||
| 1801 | fscki1->calc_sz += CALC_DENT_SIZE(nlen); | ||
| 1802 | if (dent->type == UBIFS_ITYPE_DIR) | ||
| 1803 | fscki1->calc_cnt += 1; | ||
| 1804 | } | ||
| 1805 | } | ||
| 1806 | |||
| 1807 | out: | ||
| 1808 | kfree(node); | ||
| 1809 | return 0; | ||
| 1810 | |||
| 1811 | out_dump: | ||
| 1812 | ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); | ||
| 1813 | dbg_dump_node(c, node); | ||
| 1814 | out_free: | ||
| 1815 | kfree(node); | ||
| 1816 | return err; | ||
| 1817 | } | ||
| 1818 | |||
| 1819 | /** | ||
| 1820 | * free_inodes - free RB-tree of inodes. | ||
| 1821 | * @fsckd: FS checking information | ||
| 1822 | */ | ||
| 1823 | static void free_inodes(struct fsck_data *fsckd) | ||
| 1824 | { | ||
| 1825 | struct rb_node *this = fsckd->inodes.rb_node; | ||
| 1826 | struct fsck_inode *fscki; | ||
| 1827 | |||
| 1828 | while (this) { | ||
| 1829 | if (this->rb_left) | ||
| 1830 | this = this->rb_left; | ||
| 1831 | else if (this->rb_right) | ||
| 1832 | this = this->rb_right; | ||
| 1833 | else { | ||
| 1834 | fscki = rb_entry(this, struct fsck_inode, rb); | ||
| 1835 | this = rb_parent(this); | ||
| 1836 | if (this) { | ||
| 1837 | if (this->rb_left == &fscki->rb) | ||
| 1838 | this->rb_left = NULL; | ||
| 1839 | else | ||
| 1840 | this->rb_right = NULL; | ||
| 1841 | } | ||
| 1842 | kfree(fscki); | ||
| 1843 | } | ||
| 1844 | } | ||
| 1845 | } | ||
| 1846 | |||
| 1847 | /** | ||
| 1848 | * check_inodes - checks all inodes. | ||
| 1849 | * @c: UBIFS file-system description object | ||
| 1850 | * @fsckd: FS checking information | ||
| 1851 | * | ||
| 1852 | * This is a helper function for 'dbg_check_filesystem()' which walks the | ||
| 1853 | * RB-tree of inodes after the index scan has been finished, and checks that | ||
| 1854 | * inode nlink, size, etc are correct. Returns zero if inodes are fine, | ||
| 1855 | * %-EINVAL if not, and a negative error code in case of failure. | ||
| 1856 | */ | ||
| 1857 | static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) | ||
| 1858 | { | ||
| 1859 | int n, err; | ||
| 1860 | union ubifs_key key; | ||
| 1861 | struct ubifs_znode *znode; | ||
| 1862 | struct ubifs_zbranch *zbr; | ||
| 1863 | struct ubifs_ino_node *ino; | ||
| 1864 | struct fsck_inode *fscki; | ||
| 1865 | struct rb_node *this = rb_first(&fsckd->inodes); | ||
| 1866 | |||
| 1867 | while (this) { | ||
| 1868 | fscki = rb_entry(this, struct fsck_inode, rb); | ||
| 1869 | this = rb_next(this); | ||
| 1870 | |||
| 1871 | if (S_ISDIR(fscki->mode)) { | ||
| 1872 | /* | ||
| 1873 | * Directories have to have exactly one reference (they | ||
| 1874 | * cannot have hardlinks), although root inode is an | ||
| 1875 | * exception. | ||
| 1876 | */ | ||
| 1877 | if (fscki->inum != UBIFS_ROOT_INO && | ||
| 1878 | fscki->references != 1) { | ||
| 1879 | ubifs_err("directory inode %lu has %d " | ||
| 1880 | "direntries which refer it, but " | ||
| 1881 | "should be 1", fscki->inum, | ||
| 1882 | fscki->references); | ||
| 1883 | goto out_dump; | ||
| 1884 | } | ||
| 1885 | if (fscki->inum == UBIFS_ROOT_INO && | ||
| 1886 | fscki->references != 0) { | ||
| 1887 | ubifs_err("root inode %lu has non-zero (%d) " | ||
| 1888 | "direntries which refer it", | ||
| 1889 | fscki->inum, fscki->references); | ||
| 1890 | goto out_dump; | ||
| 1891 | } | ||
| 1892 | if (fscki->calc_sz != fscki->size) { | ||
| 1893 | ubifs_err("directory inode %lu size is %lld, " | ||
| 1894 | "but calculated size is %lld", | ||
| 1895 | fscki->inum, fscki->size, | ||
| 1896 | fscki->calc_sz); | ||
| 1897 | goto out_dump; | ||
| 1898 | } | ||
| 1899 | if (fscki->calc_cnt != fscki->nlink) { | ||
| 1900 | ubifs_err("directory inode %lu nlink is %d, " | ||
| 1901 | "but calculated nlink is %d", | ||
| 1902 | fscki->inum, fscki->nlink, | ||
| 1903 | fscki->calc_cnt); | ||
| 1904 | goto out_dump; | ||
| 1905 | } | ||
| 1906 | } else { | ||
| 1907 | if (fscki->references != fscki->nlink) { | ||
| 1908 | ubifs_err("inode %lu nlink is %d, but " | ||
| 1909 | "calculated nlink is %d", fscki->inum, | ||
| 1910 | fscki->nlink, fscki->references); | ||
| 1911 | goto out_dump; | ||
| 1912 | } | ||
| 1913 | } | ||
| 1914 | if (fscki->xattr_sz != fscki->calc_xsz) { | ||
| 1915 | ubifs_err("inode %lu has xattr size %u, but " | ||
| 1916 | "calculated size is %lld", | ||
| 1917 | fscki->inum, fscki->xattr_sz, | ||
| 1918 | fscki->calc_xsz); | ||
| 1919 | goto out_dump; | ||
| 1920 | } | ||
| 1921 | if (fscki->xattr_cnt != fscki->calc_xcnt) { | ||
| 1922 | ubifs_err("inode %lu has %u xattrs, but " | ||
| 1923 | "calculated count is %lld", fscki->inum, | ||
| 1924 | fscki->xattr_cnt, fscki->calc_xcnt); | ||
| 1925 | goto out_dump; | ||
| 1926 | } | ||
| 1927 | if (fscki->xattr_nms != fscki->calc_xnms) { | ||
| 1928 | ubifs_err("inode %lu has xattr names' size %u, but " | ||
| 1929 | "calculated names' size is %lld", | ||
| 1930 | fscki->inum, fscki->xattr_nms, | ||
| 1931 | fscki->calc_xnms); | ||
| 1932 | goto out_dump; | ||
| 1933 | } | ||
| 1934 | } | ||
| 1935 | |||
| 1936 | return 0; | ||
| 1937 | |||
| 1938 | out_dump: | ||
| 1939 | /* Read the bad inode and dump it */ | ||
| 1940 | ino_key_init(c, &key, fscki->inum); | ||
| 1941 | err = ubifs_lookup_level0(c, &key, &znode, &n); | ||
| 1942 | if (!err) { | ||
| 1943 | ubifs_err("inode %lu not found in index", fscki->inum); | ||
| 1944 | return -ENOENT; | ||
| 1945 | } else if (err < 0) { | ||
| 1946 | ubifs_err("error %d while looking up inode %lu", | ||
| 1947 | err, fscki->inum); | ||
| 1948 | return err; | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | zbr = &znode->zbranch[n]; | ||
| 1952 | ino = kmalloc(zbr->len, GFP_NOFS); | ||
| 1953 | if (!ino) | ||
| 1954 | return -ENOMEM; | ||
| 1955 | |||
| 1956 | err = ubifs_tnc_read_node(c, zbr, ino); | ||
| 1957 | if (err) { | ||
| 1958 | ubifs_err("cannot read inode node at LEB %d:%d, error %d", | ||
| 1959 | zbr->lnum, zbr->offs, err); | ||
| 1960 | kfree(ino); | ||
| 1961 | return err; | ||
| 1962 | } | ||
| 1963 | |||
| 1964 | ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", | ||
| 1965 | fscki->inum, zbr->lnum, zbr->offs); | ||
| 1966 | dbg_dump_node(c, ino); | ||
| 1967 | kfree(ino); | ||
| 1968 | return -EINVAL; | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | /** | ||
| 1972 | * dbg_check_filesystem - check the file-system. | ||
| 1973 | * @c: UBIFS file-system description object | ||
| 1974 | * | ||
| 1975 | * This function checks the file system, namely: | ||
| 1976 | * o makes sure that all leaf nodes exist and their CRCs are correct; | ||
| 1977 | * o makes sure inode nlink, size, xattr size/count are correct (for all | ||
| 1978 | * inodes). | ||
| 1979 | * | ||
| 1980 | * The function reads whole indexing tree and all nodes, so it is pretty | ||
| 1981 | * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if | ||
| 1982 | * not, and a negative error code in case of failure. | ||
| 1983 | */ | ||
| 1984 | int dbg_check_filesystem(struct ubifs_info *c) | ||
| 1985 | { | ||
| 1986 | int err; | ||
| 1987 | struct fsck_data fsckd; | ||
| 1988 | |||
| 1989 | if (!(ubifs_chk_flags & UBIFS_CHK_FS)) | ||
| 1990 | return 0; | ||
| 1991 | |||
| 1992 | fsckd.inodes = RB_ROOT; | ||
| 1993 | err = dbg_walk_index(c, check_leaf, NULL, &fsckd); | ||
| 1994 | if (err) | ||
| 1995 | goto out_free; | ||
| 1996 | |||
| 1997 | err = check_inodes(c, &fsckd); | ||
| 1998 | if (err) | ||
| 1999 | goto out_free; | ||
| 2000 | |||
| 2001 | free_inodes(&fsckd); | ||
| 2002 | return 0; | ||
| 2003 | |||
| 2004 | out_free: | ||
| 2005 | ubifs_err("file-system check failed with error %d", err); | ||
| 2006 | dump_stack(); | ||
| 2007 | free_inodes(&fsckd); | ||
| 2008 | return err; | ||
| 2009 | } | ||
| 2010 | |||
| 2011 | static int invocation_cnt; | ||
| 2012 | |||
| 2013 | int dbg_force_in_the_gaps(void) | ||
| 2014 | { | ||
| 2015 | if (!dbg_force_in_the_gaps_enabled) | ||
| 2016 | return 0; | ||
| 2017 | /* Force in-the-gaps every 8th commit */ | ||
| 2018 | return !((invocation_cnt++) & 0x7); | ||
| 2019 | } | ||
| 2020 | |||
| 2021 | /* Failure mode for recovery testing */ | ||
| 2022 | |||
| 2023 | #define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) | ||
| 2024 | |||
| 2025 | struct failure_mode_info { | ||
| 2026 | struct list_head list; | ||
| 2027 | struct ubifs_info *c; | ||
| 2028 | }; | ||
| 2029 | |||
| 2030 | static LIST_HEAD(fmi_list); | ||
| 2031 | static DEFINE_SPINLOCK(fmi_lock); | ||
| 2032 | |||
| 2033 | static unsigned int next; | ||
| 2034 | |||
| 2035 | static int simple_rand(void) | ||
| 2036 | { | ||
| 2037 | if (next == 0) | ||
| 2038 | next = current->pid; | ||
| 2039 | next = next * 1103515245 + 12345; | ||
| 2040 | return (next >> 16) & 32767; | ||
| 2041 | } | ||
| 2042 | |||
| 2043 | void dbg_failure_mode_registration(struct ubifs_info *c) | ||
| 2044 | { | ||
| 2045 | struct failure_mode_info *fmi; | ||
| 2046 | |||
| 2047 | fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); | ||
| 2048 | if (!fmi) { | ||
| 2049 | dbg_err("Failed to register failure mode - no memory"); | ||
| 2050 | return; | ||
| 2051 | } | ||
| 2052 | fmi->c = c; | ||
| 2053 | spin_lock(&fmi_lock); | ||
| 2054 | list_add_tail(&fmi->list, &fmi_list); | ||
| 2055 | spin_unlock(&fmi_lock); | ||
| 2056 | } | ||
| 2057 | |||
| 2058 | void dbg_failure_mode_deregistration(struct ubifs_info *c) | ||
| 2059 | { | ||
| 2060 | struct failure_mode_info *fmi, *tmp; | ||
| 2061 | |||
| 2062 | spin_lock(&fmi_lock); | ||
| 2063 | list_for_each_entry_safe(fmi, tmp, &fmi_list, list) | ||
| 2064 | if (fmi->c == c) { | ||
| 2065 | list_del(&fmi->list); | ||
| 2066 | kfree(fmi); | ||
| 2067 | } | ||
| 2068 | spin_unlock(&fmi_lock); | ||
| 2069 | } | ||
| 2070 | |||
| 2071 | static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) | ||
| 2072 | { | ||
| 2073 | struct failure_mode_info *fmi; | ||
| 2074 | |||
| 2075 | spin_lock(&fmi_lock); | ||
| 2076 | list_for_each_entry(fmi, &fmi_list, list) | ||
| 2077 | if (fmi->c->ubi == desc) { | ||
| 2078 | struct ubifs_info *c = fmi->c; | ||
| 2079 | |||
| 2080 | spin_unlock(&fmi_lock); | ||
| 2081 | return c; | ||
| 2082 | } | ||
| 2083 | spin_unlock(&fmi_lock); | ||
| 2084 | return NULL; | ||
| 2085 | } | ||
| 2086 | |||
| 2087 | static int in_failure_mode(struct ubi_volume_desc *desc) | ||
| 2088 | { | ||
| 2089 | struct ubifs_info *c = dbg_find_info(desc); | ||
| 2090 | |||
| 2091 | if (c && dbg_failure_mode) | ||
| 2092 | return c->failure_mode; | ||
| 2093 | return 0; | ||
| 2094 | } | ||
| 2095 | |||
| 2096 | static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) | ||
| 2097 | { | ||
| 2098 | struct ubifs_info *c = dbg_find_info(desc); | ||
| 2099 | |||
| 2100 | if (!c || !dbg_failure_mode) | ||
| 2101 | return 0; | ||
| 2102 | if (c->failure_mode) | ||
| 2103 | return 1; | ||
| 2104 | if (!c->fail_cnt) { | ||
| 2105 | /* First call - decide delay to failure */ | ||
| 2106 | if (chance(1, 2)) { | ||
| 2107 | unsigned int delay = 1 << (simple_rand() >> 11); | ||
| 2108 | |||
| 2109 | if (chance(1, 2)) { | ||
| 2110 | c->fail_delay = 1; | ||
| 2111 | c->fail_timeout = jiffies + | ||
| 2112 | msecs_to_jiffies(delay); | ||
| 2113 | dbg_rcvry("failing after %ums", delay); | ||
| 2114 | } else { | ||
| 2115 | c->fail_delay = 2; | ||
| 2116 | c->fail_cnt_max = delay; | ||
| 2117 | dbg_rcvry("failing after %u calls", delay); | ||
| 2118 | } | ||
| 2119 | } | ||
| 2120 | c->fail_cnt += 1; | ||
| 2121 | } | ||
| 2122 | /* Determine if failure delay has expired */ | ||
| 2123 | if (c->fail_delay == 1) { | ||
| 2124 | if (time_before(jiffies, c->fail_timeout)) | ||
| 2125 | return 0; | ||
| 2126 | } else if (c->fail_delay == 2) | ||
| 2127 | if (c->fail_cnt++ < c->fail_cnt_max) | ||
| 2128 | return 0; | ||
| 2129 | if (lnum == UBIFS_SB_LNUM) { | ||
| 2130 | if (write) { | ||
| 2131 | if (chance(1, 2)) | ||
| 2132 | return 0; | ||
| 2133 | } else if (chance(19, 20)) | ||
| 2134 | return 0; | ||
| 2135 | dbg_rcvry("failing in super block LEB %d", lnum); | ||
| 2136 | } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { | ||
| 2137 | if (chance(19, 20)) | ||
| 2138 | return 0; | ||
| 2139 | dbg_rcvry("failing in master LEB %d", lnum); | ||
| 2140 | } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { | ||
| 2141 | if (write) { | ||
| 2142 | if (chance(99, 100)) | ||
| 2143 | return 0; | ||
| 2144 | } else if (chance(399, 400)) | ||
| 2145 | return 0; | ||
| 2146 | dbg_rcvry("failing in log LEB %d", lnum); | ||
| 2147 | } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { | ||
| 2148 | if (write) { | ||
| 2149 | if (chance(7, 8)) | ||
| 2150 | return 0; | ||
| 2151 | } else if (chance(19, 20)) | ||
| 2152 | return 0; | ||
| 2153 | dbg_rcvry("failing in LPT LEB %d", lnum); | ||
| 2154 | } else if (lnum >= c->orph_first && lnum <= c->orph_last) { | ||
| 2155 | if (write) { | ||
| 2156 | if (chance(1, 2)) | ||
| 2157 | return 0; | ||
| 2158 | } else if (chance(9, 10)) | ||
| 2159 | return 0; | ||
| 2160 | dbg_rcvry("failing in orphan LEB %d", lnum); | ||
| 2161 | } else if (lnum == c->ihead_lnum) { | ||
| 2162 | if (chance(99, 100)) | ||
| 2163 | return 0; | ||
| 2164 | dbg_rcvry("failing in index head LEB %d", lnum); | ||
| 2165 | } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { | ||
| 2166 | if (chance(9, 10)) | ||
| 2167 | return 0; | ||
| 2168 | dbg_rcvry("failing in GC head LEB %d", lnum); | ||
| 2169 | } else if (write && !RB_EMPTY_ROOT(&c->buds) && | ||
| 2170 | !ubifs_search_bud(c, lnum)) { | ||
| 2171 | if (chance(19, 20)) | ||
| 2172 | return 0; | ||
| 2173 | dbg_rcvry("failing in non-bud LEB %d", lnum); | ||
| 2174 | } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || | ||
| 2175 | c->cmt_state == COMMIT_RUNNING_REQUIRED) { | ||
| 2176 | if (chance(999, 1000)) | ||
| 2177 | return 0; | ||
| 2178 | dbg_rcvry("failing in bud LEB %d commit running", lnum); | ||
| 2179 | } else { | ||
| 2180 | if (chance(9999, 10000)) | ||
| 2181 | return 0; | ||
| 2182 | dbg_rcvry("failing in bud LEB %d commit not running", lnum); | ||
| 2183 | } | ||
| 2184 | ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); | ||
| 2185 | c->failure_mode = 1; | ||
| 2186 | dump_stack(); | ||
| 2187 | return 1; | ||
| 2188 | } | ||
| 2189 | |||
| 2190 | static void cut_data(const void *buf, int len) | ||
| 2191 | { | ||
| 2192 | int flen, i; | ||
| 2193 | unsigned char *p = (void *)buf; | ||
| 2194 | |||
| 2195 | flen = (len * (long long)simple_rand()) >> 15; | ||
| 2196 | for (i = flen; i < len; i++) | ||
| 2197 | p[i] = 0xff; | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, | ||
| 2201 | int len, int check) | ||
| 2202 | { | ||
| 2203 | if (in_failure_mode(desc)) | ||
| 2204 | return -EIO; | ||
| 2205 | return ubi_leb_read(desc, lnum, buf, offset, len, check); | ||
| 2206 | } | ||
| 2207 | |||
| 2208 | int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, | ||
| 2209 | int offset, int len, int dtype) | ||
| 2210 | { | ||
| 2211 | int err; | ||
| 2212 | |||
| 2213 | if (in_failure_mode(desc)) | ||
| 2214 | return -EIO; | ||
| 2215 | if (do_fail(desc, lnum, 1)) | ||
| 2216 | cut_data(buf, len); | ||
| 2217 | err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); | ||
| 2218 | if (err) | ||
| 2219 | return err; | ||
| 2220 | if (in_failure_mode(desc)) | ||
| 2221 | return -EIO; | ||
| 2222 | return 0; | ||
| 2223 | } | ||
| 2224 | |||
| 2225 | int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, | ||
| 2226 | int len, int dtype) | ||
| 2227 | { | ||
| 2228 | int err; | ||
| 2229 | |||
| 2230 | if (do_fail(desc, lnum, 1)) | ||
| 2231 | return -EIO; | ||
| 2232 | err = ubi_leb_change(desc, lnum, buf, len, dtype); | ||
| 2233 | if (err) | ||
| 2234 | return err; | ||
| 2235 | if (do_fail(desc, lnum, 1)) | ||
| 2236 | return -EIO; | ||
| 2237 | return 0; | ||
| 2238 | } | ||
| 2239 | |||
| 2240 | int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) | ||
| 2241 | { | ||
| 2242 | int err; | ||
| 2243 | |||
| 2244 | if (do_fail(desc, lnum, 0)) | ||
| 2245 | return -EIO; | ||
| 2246 | err = ubi_leb_erase(desc, lnum); | ||
| 2247 | if (err) | ||
| 2248 | return err; | ||
| 2249 | if (do_fail(desc, lnum, 0)) | ||
| 2250 | return -EIO; | ||
| 2251 | return 0; | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) | ||
| 2255 | { | ||
| 2256 | int err; | ||
| 2257 | |||
| 2258 | if (do_fail(desc, lnum, 0)) | ||
| 2259 | return -EIO; | ||
| 2260 | err = ubi_leb_unmap(desc, lnum); | ||
| 2261 | if (err) | ||
| 2262 | return err; | ||
| 2263 | if (do_fail(desc, lnum, 0)) | ||
| 2264 | return -EIO; | ||
| 2265 | return 0; | ||
| 2266 | } | ||
| 2267 | |||
| 2268 | int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) | ||
| 2269 | { | ||
| 2270 | if (in_failure_mode(desc)) | ||
| 2271 | return -EIO; | ||
| 2272 | return ubi_is_mapped(desc, lnum); | ||
| 2273 | } | ||
| 2274 | |||
| 2275 | int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) | ||
| 2276 | { | ||
| 2277 | int err; | ||
| 2278 | |||
| 2279 | if (do_fail(desc, lnum, 0)) | ||
| 2280 | return -EIO; | ||
| 2281 | err = ubi_leb_map(desc, lnum, dtype); | ||
| 2282 | if (err) | ||
| 2283 | return err; | ||
| 2284 | if (do_fail(desc, lnum, 0)) | ||
| 2285 | return -EIO; | ||
| 2286 | return 0; | ||
| 2287 | } | ||
| 2288 | |||
| 2289 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h new file mode 100644 index 000000000000..3c4f1e93c9e0 --- /dev/null +++ b/fs/ubifs/debug.h | |||
| @@ -0,0 +1,403 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | #ifndef __UBIFS_DEBUG_H__ | ||
| 24 | #define __UBIFS_DEBUG_H__ | ||
| 25 | |||
| 26 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 27 | |||
| 28 | #define UBIFS_DBG(op) op | ||
| 29 | |||
| 30 | #define ubifs_assert(expr) do { \ | ||
| 31 | if (unlikely(!(expr))) { \ | ||
| 32 | printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ | ||
| 33 | __func__, __LINE__, current->pid); \ | ||
| 34 | dbg_dump_stack(); \ | ||
| 35 | } \ | ||
| 36 | } while (0) | ||
| 37 | |||
| 38 | #define ubifs_assert_cmt_locked(c) do { \ | ||
| 39 | if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ | ||
| 40 | up_write(&(c)->commit_sem); \ | ||
| 41 | printk(KERN_CRIT "commit lock is not locked!\n"); \ | ||
| 42 | ubifs_assert(0); \ | ||
| 43 | } \ | ||
| 44 | } while (0) | ||
| 45 | |||
| 46 | #define dbg_dump_stack() do { \ | ||
| 47 | if (!dbg_failure_mode) \ | ||
| 48 | dump_stack(); \ | ||
| 49 | } while (0) | ||
| 50 | |||
| 51 | /* Generic debugging messages */ | ||
| 52 | #define dbg_msg(fmt, ...) do { \ | ||
| 53 | spin_lock(&dbg_lock); \ | ||
| 54 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ | ||
| 55 | __func__, ##__VA_ARGS__); \ | ||
| 56 | spin_unlock(&dbg_lock); \ | ||
| 57 | } while (0) | ||
| 58 | |||
| 59 | #define dbg_do_msg(typ, fmt, ...) do { \ | ||
| 60 | if (ubifs_msg_flags & typ) \ | ||
| 61 | dbg_msg(fmt, ##__VA_ARGS__); \ | ||
| 62 | } while (0) | ||
| 63 | |||
| 64 | #define dbg_err(fmt, ...) do { \ | ||
| 65 | spin_lock(&dbg_lock); \ | ||
| 66 | ubifs_err(fmt, ##__VA_ARGS__); \ | ||
| 67 | spin_unlock(&dbg_lock); \ | ||
| 68 | } while (0) | ||
| 69 | |||
| 70 | const char *dbg_key_str0(const struct ubifs_info *c, | ||
| 71 | const union ubifs_key *key); | ||
| 72 | const char *dbg_key_str1(const struct ubifs_info *c, | ||
| 73 | const union ubifs_key *key); | ||
| 74 | |||
| 75 | /* | ||
| 76 | * DBGKEY macros require dbg_lock to be held, which it is in the dbg message | ||
| 77 | * macros. | ||
| 78 | */ | ||
| 79 | #define DBGKEY(key) dbg_key_str0(c, (key)) | ||
| 80 | #define DBGKEY1(key) dbg_key_str1(c, (key)) | ||
| 81 | |||
| 82 | /* General messages */ | ||
| 83 | #define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) | ||
| 84 | |||
| 85 | /* Additional journal messages */ | ||
| 86 | #define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) | ||
| 87 | |||
| 88 | /* Additional TNC messages */ | ||
| 89 | #define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) | ||
| 90 | |||
| 91 | /* Additional lprops messages */ | ||
| 92 | #define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) | ||
| 93 | |||
| 94 | /* Additional LEB find messages */ | ||
| 95 | #define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) | ||
| 96 | |||
| 97 | /* Additional mount messages */ | ||
| 98 | #define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) | ||
| 99 | |||
| 100 | /* Additional I/O messages */ | ||
| 101 | #define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) | ||
| 102 | |||
| 103 | /* Additional commit messages */ | ||
| 104 | #define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) | ||
| 105 | |||
| 106 | /* Additional budgeting messages */ | ||
| 107 | #define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) | ||
| 108 | |||
| 109 | /* Additional log messages */ | ||
| 110 | #define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) | ||
| 111 | |||
| 112 | /* Additional gc messages */ | ||
| 113 | #define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) | ||
| 114 | |||
| 115 | /* Additional scan messages */ | ||
| 116 | #define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) | ||
| 117 | |||
| 118 | /* Additional recovery messages */ | ||
| 119 | #define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) | ||
| 120 | |||
| 121 | /* | ||
| 122 | * Debugging message type flags (must match msg_type_names in debug.c). | ||
| 123 | * | ||
| 124 | * UBIFS_MSG_GEN: general messages | ||
| 125 | * UBIFS_MSG_JNL: journal messages | ||
| 126 | * UBIFS_MSG_MNT: mount messages | ||
| 127 | * UBIFS_MSG_CMT: commit messages | ||
| 128 | * UBIFS_MSG_FIND: LEB find messages | ||
| 129 | * UBIFS_MSG_BUDG: budgeting messages | ||
| 130 | * UBIFS_MSG_GC: garbage collection messages | ||
| 131 | * UBIFS_MSG_TNC: TNC messages | ||
| 132 | * UBIFS_MSG_LP: lprops messages | ||
| 133 | * UBIFS_MSG_IO: I/O messages | ||
| 134 | * UBIFS_MSG_LOG: log messages | ||
| 135 | * UBIFS_MSG_SCAN: scan messages | ||
| 136 | * UBIFS_MSG_RCVRY: recovery messages | ||
| 137 | */ | ||
| 138 | enum { | ||
| 139 | UBIFS_MSG_GEN = 0x1, | ||
| 140 | UBIFS_MSG_JNL = 0x2, | ||
| 141 | UBIFS_MSG_MNT = 0x4, | ||
| 142 | UBIFS_MSG_CMT = 0x8, | ||
| 143 | UBIFS_MSG_FIND = 0x10, | ||
| 144 | UBIFS_MSG_BUDG = 0x20, | ||
| 145 | UBIFS_MSG_GC = 0x40, | ||
| 146 | UBIFS_MSG_TNC = 0x80, | ||
| 147 | UBIFS_MSG_LP = 0x100, | ||
| 148 | UBIFS_MSG_IO = 0x200, | ||
| 149 | UBIFS_MSG_LOG = 0x400, | ||
| 150 | UBIFS_MSG_SCAN = 0x800, | ||
| 151 | UBIFS_MSG_RCVRY = 0x1000, | ||
| 152 | }; | ||
| 153 | |||
| 154 | /* Debugging message type flags for each default debug message level */ | ||
| 155 | #define UBIFS_MSG_LVL_0 0 | ||
| 156 | #define UBIFS_MSG_LVL_1 0x1 | ||
| 157 | #define UBIFS_MSG_LVL_2 0x7f | ||
| 158 | #define UBIFS_MSG_LVL_3 0xffff | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Debugging check flags (must match chk_names in debug.c). | ||
| 162 | * | ||
| 163 | * UBIFS_CHK_GEN: general checks | ||
| 164 | * UBIFS_CHK_TNC: check TNC | ||
| 165 | * UBIFS_CHK_IDX_SZ: check index size | ||
| 166 | * UBIFS_CHK_ORPH: check orphans | ||
| 167 | * UBIFS_CHK_OLD_IDX: check the old index | ||
| 168 | * UBIFS_CHK_LPROPS: check lprops | ||
| 169 | * UBIFS_CHK_FS: check the file-system | ||
| 170 | */ | ||
| 171 | enum { | ||
| 172 | UBIFS_CHK_GEN = 0x1, | ||
| 173 | UBIFS_CHK_TNC = 0x2, | ||
| 174 | UBIFS_CHK_IDX_SZ = 0x4, | ||
| 175 | UBIFS_CHK_ORPH = 0x8, | ||
| 176 | UBIFS_CHK_OLD_IDX = 0x10, | ||
| 177 | UBIFS_CHK_LPROPS = 0x20, | ||
| 178 | UBIFS_CHK_FS = 0x40, | ||
| 179 | }; | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Special testing flags (must match tst_names in debug.c). | ||
| 183 | * | ||
| 184 | * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method | ||
| 185 | * UBIFS_TST_RCVRY: failure mode for recovery testing | ||
| 186 | */ | ||
| 187 | enum { | ||
| 188 | UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, | ||
| 189 | UBIFS_TST_RCVRY = 0x4, | ||
| 190 | }; | ||
| 191 | |||
| 192 | #if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 | ||
| 193 | #define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 | ||
| 194 | #elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 | ||
| 195 | #define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 | ||
| 196 | #elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 | ||
| 197 | #define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 | ||
| 198 | #else | ||
| 199 | #define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 | ||
| 200 | #endif | ||
| 201 | |||
| 202 | #ifdef CONFIG_UBIFS_FS_DEBUG_CHKS | ||
| 203 | #define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff | ||
| 204 | #else | ||
| 205 | #define UBIFS_CHK_FLAGS_DEFAULT 0 | ||
| 206 | #endif | ||
| 207 | |||
| 208 | extern spinlock_t dbg_lock; | ||
| 209 | |||
| 210 | extern unsigned int ubifs_msg_flags; | ||
| 211 | extern unsigned int ubifs_chk_flags; | ||
| 212 | extern unsigned int ubifs_tst_flags; | ||
| 213 | |||
| 214 | /* Dump functions */ | ||
| 215 | |||
| 216 | const char *dbg_ntype(int type); | ||
| 217 | const char *dbg_cstate(int cmt_state); | ||
| 218 | const char *dbg_get_key_dump(const struct ubifs_info *c, | ||
| 219 | const union ubifs_key *key); | ||
| 220 | void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); | ||
| 221 | void dbg_dump_node(const struct ubifs_info *c, const void *node); | ||
| 222 | void dbg_dump_budget_req(const struct ubifs_budget_req *req); | ||
| 223 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst); | ||
| 224 | void dbg_dump_budg(struct ubifs_info *c); | ||
| 225 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); | ||
| 226 | void dbg_dump_lprops(struct ubifs_info *c); | ||
| 227 | void dbg_dump_leb(const struct ubifs_info *c, int lnum); | ||
| 228 | void dbg_dump_znode(const struct ubifs_info *c, | ||
| 229 | const struct ubifs_znode *znode); | ||
| 230 | void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); | ||
| 231 | void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | ||
| 232 | struct ubifs_nnode *parent, int iip); | ||
| 233 | void dbg_dump_tnc(struct ubifs_info *c); | ||
| 234 | void dbg_dump_index(struct ubifs_info *c); | ||
| 235 | |||
| 236 | /* Checking helper functions */ | ||
| 237 | |||
| 238 | typedef int (*dbg_leaf_callback)(struct ubifs_info *c, | ||
| 239 | struct ubifs_zbranch *zbr, void *priv); | ||
| 240 | typedef int (*dbg_znode_callback)(struct ubifs_info *c, | ||
| 241 | struct ubifs_znode *znode, void *priv); | ||
| 242 | |||
| 243 | int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, | ||
| 244 | dbg_znode_callback znode_cb, void *priv); | ||
| 245 | |||
| 246 | /* Checking functions */ | ||
| 247 | |||
| 248 | int dbg_check_lprops(struct ubifs_info *c); | ||
| 249 | |||
| 250 | int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); | ||
| 251 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); | ||
| 252 | |||
| 253 | int dbg_check_cats(struct ubifs_info *c); | ||
| 254 | |||
| 255 | int dbg_check_ltab(struct ubifs_info *c); | ||
| 256 | |||
| 257 | int dbg_check_synced_i_size(struct inode *inode); | ||
| 258 | |||
| 259 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); | ||
| 260 | |||
| 261 | int dbg_check_tnc(struct ubifs_info *c, int extra); | ||
| 262 | |||
| 263 | int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); | ||
| 264 | |||
| 265 | int dbg_check_filesystem(struct ubifs_info *c); | ||
| 266 | |||
| 267 | void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, | ||
| 268 | int add_pos); | ||
| 269 | |||
| 270 | int dbg_check_lprops(struct ubifs_info *c); | ||
| 271 | int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, | ||
| 272 | int row, int col); | ||
| 273 | |||
| 274 | /* Force the use of in-the-gaps method for testing */ | ||
| 275 | |||
| 276 | #define dbg_force_in_the_gaps_enabled \ | ||
| 277 | (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) | ||
| 278 | |||
| 279 | int dbg_force_in_the_gaps(void); | ||
| 280 | |||
| 281 | /* Failure mode for recovery testing */ | ||
| 282 | |||
| 283 | #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) | ||
| 284 | |||
| 285 | void dbg_failure_mode_registration(struct ubifs_info *c); | ||
| 286 | void dbg_failure_mode_deregistration(struct ubifs_info *c); | ||
| 287 | |||
| 288 | #ifndef UBIFS_DBG_PRESERVE_UBI | ||
| 289 | |||
| 290 | #define ubi_leb_read dbg_leb_read | ||
| 291 | #define ubi_leb_write dbg_leb_write | ||
| 292 | #define ubi_leb_change dbg_leb_change | ||
| 293 | #define ubi_leb_erase dbg_leb_erase | ||
| 294 | #define ubi_leb_unmap dbg_leb_unmap | ||
| 295 | #define ubi_is_mapped dbg_is_mapped | ||
| 296 | #define ubi_leb_map dbg_leb_map | ||
| 297 | |||
| 298 | #endif | ||
| 299 | |||
| 300 | int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, | ||
| 301 | int len, int check); | ||
| 302 | int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, | ||
| 303 | int offset, int len, int dtype); | ||
| 304 | int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, | ||
| 305 | int len, int dtype); | ||
| 306 | int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); | ||
| 307 | int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); | ||
| 308 | int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); | ||
| 309 | int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); | ||
| 310 | |||
| 311 | static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, | ||
| 312 | int offset, int len) | ||
| 313 | { | ||
| 314 | return dbg_leb_read(desc, lnum, buf, offset, len, 0); | ||
| 315 | } | ||
| 316 | |||
| 317 | static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, | ||
| 318 | const void *buf, int offset, int len) | ||
| 319 | { | ||
| 320 | return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); | ||
| 321 | } | ||
| 322 | |||
| 323 | static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, | ||
| 324 | const void *buf, int len) | ||
| 325 | { | ||
| 326 | return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); | ||
| 327 | } | ||
| 328 | |||
| 329 | #else /* !CONFIG_UBIFS_FS_DEBUG */ | ||
| 330 | |||
| 331 | #define UBIFS_DBG(op) | ||
| 332 | #define ubifs_assert(expr) ({}) | ||
| 333 | #define ubifs_assert_cmt_locked(c) | ||
| 334 | #define dbg_dump_stack() | ||
| 335 | #define dbg_err(fmt, ...) ({}) | ||
| 336 | #define dbg_msg(fmt, ...) ({}) | ||
| 337 | #define dbg_key(c, key, fmt, ...) ({}) | ||
| 338 | |||
| 339 | #define dbg_gen(fmt, ...) ({}) | ||
| 340 | #define dbg_jnl(fmt, ...) ({}) | ||
| 341 | #define dbg_tnc(fmt, ...) ({}) | ||
| 342 | #define dbg_lp(fmt, ...) ({}) | ||
| 343 | #define dbg_find(fmt, ...) ({}) | ||
| 344 | #define dbg_mnt(fmt, ...) ({}) | ||
| 345 | #define dbg_io(fmt, ...) ({}) | ||
| 346 | #define dbg_cmt(fmt, ...) ({}) | ||
| 347 | #define dbg_budg(fmt, ...) ({}) | ||
| 348 | #define dbg_log(fmt, ...) ({}) | ||
| 349 | #define dbg_gc(fmt, ...) ({}) | ||
| 350 | #define dbg_scan(fmt, ...) ({}) | ||
| 351 | #define dbg_rcvry(fmt, ...) ({}) | ||
| 352 | |||
| 353 | #define dbg_ntype(type) "" | ||
| 354 | #define dbg_cstate(cmt_state) "" | ||
| 355 | #define dbg_get_key_dump(c, key) ({}) | ||
| 356 | #define dbg_dump_inode(c, inode) ({}) | ||
| 357 | #define dbg_dump_node(c, node) ({}) | ||
| 358 | #define dbg_dump_budget_req(req) ({}) | ||
| 359 | #define dbg_dump_lstats(lst) ({}) | ||
| 360 | #define dbg_dump_budg(c) ({}) | ||
| 361 | #define dbg_dump_lprop(c, lp) ({}) | ||
| 362 | #define dbg_dump_lprops(c) ({}) | ||
| 363 | #define dbg_dump_leb(c, lnum) ({}) | ||
| 364 | #define dbg_dump_znode(c, znode) ({}) | ||
| 365 | #define dbg_dump_heap(c, heap, cat) ({}) | ||
| 366 | #define dbg_dump_pnode(c, pnode, parent, iip) ({}) | ||
| 367 | #define dbg_dump_tnc(c) ({}) | ||
| 368 | #define dbg_dump_index(c) ({}) | ||
| 369 | |||
| 370 | #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 | ||
| 371 | |||
| 372 | #define dbg_old_index_check_init(c, zroot) 0 | ||
| 373 | #define dbg_check_old_index(c, zroot) 0 | ||
| 374 | |||
| 375 | #define dbg_check_cats(c) 0 | ||
| 376 | |||
| 377 | #define dbg_check_ltab(c) 0 | ||
| 378 | |||
| 379 | #define dbg_check_synced_i_size(inode) 0 | ||
| 380 | |||
| 381 | #define dbg_check_dir_size(c, dir) 0 | ||
| 382 | |||
| 383 | #define dbg_check_tnc(c, x) 0 | ||
| 384 | |||
| 385 | #define dbg_check_idx_size(c, idx_size) 0 | ||
| 386 | |||
| 387 | #define dbg_check_filesystem(c) 0 | ||
| 388 | |||
| 389 | #define dbg_check_heap(c, heap, cat, add_pos) ({}) | ||
| 390 | |||
| 391 | #define dbg_check_lprops(c) 0 | ||
| 392 | #define dbg_check_lpt_nodes(c, cnode, row, col) 0 | ||
| 393 | |||
| 394 | #define dbg_force_in_the_gaps_enabled 0 | ||
| 395 | #define dbg_force_in_the_gaps() 0 | ||
| 396 | |||
| 397 | #define dbg_failure_mode 0 | ||
| 398 | #define dbg_failure_mode_registration(c) ({}) | ||
| 399 | #define dbg_failure_mode_deregistration(c) ({}) | ||
| 400 | |||
| 401 | #endif /* !CONFIG_UBIFS_FS_DEBUG */ | ||
| 402 | |||
| 403 | #endif /* !__UBIFS_DEBUG_H__ */ | ||
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c new file mode 100644 index 000000000000..e90374be7d3b --- /dev/null +++ b/fs/ubifs/dir.c | |||
| @@ -0,0 +1,1240 @@ | |||
| 1 | /* * This file is part of UBIFS. | ||
| 2 | * | ||
| 3 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 4 | * Copyright (C) 2006, 2007 University of Szeged, Hungary | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | * Zoltan Sogor | ||
| 22 | */ | ||
| 23 | |||
| 24 | /* | ||
| 25 | * This file implements directory operations. | ||
| 26 | * | ||
| 27 | * All FS operations in this file allocate budget before writing anything to the | ||
| 28 | * media. If they fail to allocate it, the error is returned. The only | ||
| 29 | * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even | ||
| 30 | * if they unable to allocate the budget, because deletion %-ENOSPC failure is | ||
| 31 | * not what users are usually ready to get. UBIFS budgeting subsystem has some | ||
| 32 | * space reserved for these purposes. | ||
| 33 | * | ||
| 34 | * All operations in this file write all inodes which they change straight | ||
| 35 | * away, instead of marking them dirty. For example, 'ubifs_link()' changes | ||
| 36 | * @i_size of the parent inode and writes the parent inode together with the | ||
| 37 | * target inode. This was done to simplify file-system recovery which would | ||
| 38 | * otherwise be very difficult to do. The only exception is rename which marks | ||
| 39 | * the re-named inode dirty (because its @i_ctime is updated) but does not | ||
| 40 | * write it, but just marks it as dirty. | ||
| 41 | */ | ||
| 42 | |||
| 43 | #include "ubifs.h" | ||
| 44 | |||
| 45 | /** | ||
| 46 | * inherit_flags - inherit flags of the parent inode. | ||
| 47 | * @dir: parent inode | ||
| 48 | * @mode: new inode mode flags | ||
| 49 | * | ||
| 50 | * This is a helper function for 'ubifs_new_inode()' which inherits flag of the | ||
| 51 | * parent directory inode @dir. UBIFS inodes inherit the following flags: | ||
| 52 | * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on | ||
| 53 | * sub-directory basis; | ||
| 54 | * o %UBIFS_SYNC_FL - useful for the same reasons; | ||
| 55 | * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories. | ||
| 56 | * | ||
| 57 | * This function returns the inherited flags. | ||
| 58 | */ | ||
| 59 | static int inherit_flags(const struct inode *dir, int mode) | ||
| 60 | { | ||
| 61 | int flags; | ||
| 62 | const struct ubifs_inode *ui = ubifs_inode(dir); | ||
| 63 | |||
| 64 | if (!S_ISDIR(dir->i_mode)) | ||
| 65 | /* | ||
| 66 | * The parent is not a directory, which means that an extended | ||
| 67 | * attribute inode is being created. No flags. | ||
| 68 | */ | ||
| 69 | return 0; | ||
| 70 | |||
| 71 | flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL); | ||
| 72 | if (!S_ISDIR(mode)) | ||
| 73 | /* The "DIRSYNC" flag only applies to directories */ | ||
| 74 | flags &= ~UBIFS_DIRSYNC_FL; | ||
| 75 | return flags; | ||
| 76 | } | ||
| 77 | |||
| 78 | /** | ||
| 79 | * ubifs_new_inode - allocate new UBIFS inode object. | ||
| 80 | * @c: UBIFS file-system description object | ||
| 81 | * @dir: parent directory inode | ||
| 82 | * @mode: inode mode flags | ||
| 83 | * | ||
| 84 | * This function finds an unused inode number, allocates new inode and | ||
| 85 | * initializes it. Returns new inode in case of success and an error code in | ||
| 86 | * case of failure. | ||
| 87 | */ | ||
| 88 | struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, | ||
| 89 | int mode) | ||
| 90 | { | ||
| 91 | struct inode *inode; | ||
| 92 | struct ubifs_inode *ui; | ||
| 93 | |||
| 94 | inode = new_inode(c->vfs_sb); | ||
| 95 | ui = ubifs_inode(inode); | ||
| 96 | if (!inode) | ||
| 97 | return ERR_PTR(-ENOMEM); | ||
| 98 | |||
| 99 | /* | ||
| 100 | * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and | ||
| 101 | * marking them dirty in file write path (see 'file_update_time()'). | ||
| 102 | * UBIFS has to fully control "clean <-> dirty" transitions of inodes | ||
| 103 | * to make budgeting work. | ||
| 104 | */ | ||
| 105 | inode->i_flags |= (S_NOCMTIME); | ||
| 106 | |||
| 107 | inode->i_uid = current->fsuid; | ||
| 108 | if (dir->i_mode & S_ISGID) { | ||
| 109 | inode->i_gid = dir->i_gid; | ||
| 110 | if (S_ISDIR(mode)) | ||
| 111 | mode |= S_ISGID; | ||
| 112 | } else | ||
| 113 | inode->i_gid = current->fsgid; | ||
| 114 | inode->i_mode = mode; | ||
| 115 | inode->i_mtime = inode->i_atime = inode->i_ctime = | ||
| 116 | ubifs_current_time(inode); | ||
| 117 | inode->i_mapping->nrpages = 0; | ||
| 118 | /* Disable readahead */ | ||
| 119 | inode->i_mapping->backing_dev_info = &c->bdi; | ||
| 120 | |||
| 121 | switch (mode & S_IFMT) { | ||
| 122 | case S_IFREG: | ||
| 123 | inode->i_mapping->a_ops = &ubifs_file_address_operations; | ||
| 124 | inode->i_op = &ubifs_file_inode_operations; | ||
| 125 | inode->i_fop = &ubifs_file_operations; | ||
| 126 | break; | ||
| 127 | case S_IFDIR: | ||
| 128 | inode->i_op = &ubifs_dir_inode_operations; | ||
| 129 | inode->i_fop = &ubifs_dir_operations; | ||
| 130 | inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ; | ||
| 131 | break; | ||
| 132 | case S_IFLNK: | ||
| 133 | inode->i_op = &ubifs_symlink_inode_operations; | ||
| 134 | break; | ||
| 135 | case S_IFSOCK: | ||
| 136 | case S_IFIFO: | ||
| 137 | case S_IFBLK: | ||
| 138 | case S_IFCHR: | ||
| 139 | inode->i_op = &ubifs_file_inode_operations; | ||
| 140 | break; | ||
| 141 | default: | ||
| 142 | BUG(); | ||
| 143 | } | ||
| 144 | |||
| 145 | ui->flags = inherit_flags(dir, mode); | ||
| 146 | ubifs_set_inode_flags(inode); | ||
| 147 | if (S_ISREG(mode)) | ||
| 148 | ui->compr_type = c->default_compr; | ||
| 149 | else | ||
| 150 | ui->compr_type = UBIFS_COMPR_NONE; | ||
| 151 | ui->synced_i_size = 0; | ||
| 152 | |||
| 153 | spin_lock(&c->cnt_lock); | ||
| 154 | /* Inode number overflow is currently not supported */ | ||
| 155 | if (c->highest_inum >= INUM_WARN_WATERMARK) { | ||
| 156 | if (c->highest_inum >= INUM_WATERMARK) { | ||
| 157 | spin_unlock(&c->cnt_lock); | ||
| 158 | ubifs_err("out of inode numbers"); | ||
| 159 | make_bad_inode(inode); | ||
| 160 | iput(inode); | ||
| 161 | return ERR_PTR(-EINVAL); | ||
| 162 | } | ||
| 163 | ubifs_warn("running out of inode numbers (current %lu, max %d)", | ||
| 164 | c->highest_inum, INUM_WATERMARK); | ||
| 165 | } | ||
| 166 | |||
| 167 | inode->i_ino = ++c->highest_inum; | ||
| 168 | inode->i_generation = ++c->vfs_gen; | ||
| 169 | /* | ||
| 170 | * The creation sequence number remains with this inode for its | ||
| 171 | * lifetime. All nodes for this inode have a greater sequence number, | ||
| 172 | * and so it is possible to distinguish obsolete nodes belonging to a | ||
| 173 | * previous incarnation of the same inode number - for example, for the | ||
| 174 | * purpose of rebuilding the index. | ||
| 175 | */ | ||
| 176 | ui->creat_sqnum = ++c->max_sqnum; | ||
| 177 | spin_unlock(&c->cnt_lock); | ||
| 178 | return inode; | ||
| 179 | } | ||
| 180 | |||
| 181 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 182 | |||
| 183 | static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) | ||
| 184 | { | ||
| 185 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
| 186 | return 0; | ||
| 187 | if (le16_to_cpu(dent->nlen) != nm->len) | ||
| 188 | return -EINVAL; | ||
| 189 | if (memcmp(dent->name, nm->name, nm->len)) | ||
| 190 | return -EINVAL; | ||
| 191 | return 0; | ||
| 192 | } | ||
| 193 | |||
| 194 | #else | ||
| 195 | |||
| 196 | #define dbg_check_name(dent, nm) 0 | ||
| 197 | |||
| 198 | #endif | ||
| 199 | |||
| 200 | static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, | ||
| 201 | struct nameidata *nd) | ||
| 202 | { | ||
| 203 | int err; | ||
| 204 | union ubifs_key key; | ||
| 205 | struct inode *inode = NULL; | ||
| 206 | struct ubifs_dent_node *dent; | ||
| 207 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 208 | |||
| 209 | dbg_gen("'%.*s' in dir ino %lu", | ||
| 210 | dentry->d_name.len, dentry->d_name.name, dir->i_ino); | ||
| 211 | |||
| 212 | if (dentry->d_name.len > UBIFS_MAX_NLEN) | ||
| 213 | return ERR_PTR(-ENAMETOOLONG); | ||
| 214 | |||
| 215 | dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); | ||
| 216 | if (!dent) | ||
| 217 | return ERR_PTR(-ENOMEM); | ||
| 218 | |||
| 219 | dent_key_init(c, &key, dir->i_ino, &dentry->d_name); | ||
| 220 | |||
| 221 | err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); | ||
| 222 | if (err) { | ||
| 223 | /* | ||
| 224 | * Do not hash the direntry if parent 'i_nlink' is zero, because | ||
| 225 | * this has side-effects - '->delete_inode()' call will not be | ||
| 226 | * called for the parent orphan inode, because 'd_count' of its | ||
| 227 | * direntry will stay 1 (it'll be negative direntry I guess) | ||
| 228 | * and prevent 'iput_final()' until the dentry is destroyed due | ||
| 229 | * to unmount or memory pressure. | ||
| 230 | */ | ||
| 231 | if (err == -ENOENT && dir->i_nlink != 0) { | ||
| 232 | dbg_gen("not found"); | ||
| 233 | goto done; | ||
| 234 | } | ||
| 235 | goto out; | ||
| 236 | } | ||
| 237 | |||
| 238 | if (dbg_check_name(dent, &dentry->d_name)) { | ||
| 239 | err = -EINVAL; | ||
| 240 | goto out; | ||
| 241 | } | ||
| 242 | |||
| 243 | inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum)); | ||
| 244 | if (IS_ERR(inode)) { | ||
| 245 | /* | ||
| 246 | * This should not happen. Probably the file-system needs | ||
| 247 | * checking. | ||
| 248 | */ | ||
| 249 | err = PTR_ERR(inode); | ||
| 250 | ubifs_err("dead directory entry '%.*s', error %d", | ||
| 251 | dentry->d_name.len, dentry->d_name.name, err); | ||
| 252 | ubifs_ro_mode(c, err); | ||
| 253 | goto out; | ||
| 254 | } | ||
| 255 | |||
| 256 | done: | ||
| 257 | kfree(dent); | ||
| 258 | /* | ||
| 259 | * Note, d_splice_alias() would be required instead if we supported | ||
| 260 | * NFS. | ||
| 261 | */ | ||
| 262 | d_add(dentry, inode); | ||
| 263 | return NULL; | ||
| 264 | |||
| 265 | out: | ||
| 266 | kfree(dent); | ||
| 267 | return ERR_PTR(err); | ||
| 268 | } | ||
| 269 | |||
| 270 | static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, | ||
| 271 | struct nameidata *nd) | ||
| 272 | { | ||
| 273 | struct inode *inode; | ||
| 274 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 275 | int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); | ||
| 276 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | ||
| 277 | .dirtied_ino = 1 }; | ||
| 278 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 279 | |||
| 280 | /* | ||
| 281 | * Budget request settings: new inode, new direntry, changing the | ||
| 282 | * parent directory inode. | ||
| 283 | */ | ||
| 284 | |||
| 285 | dbg_gen("dent '%.*s', mode %#x in dir ino %lu", | ||
| 286 | dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); | ||
| 287 | |||
| 288 | err = ubifs_budget_space(c, &req); | ||
| 289 | if (err) | ||
| 290 | return err; | ||
| 291 | |||
| 292 | inode = ubifs_new_inode(c, dir, mode); | ||
| 293 | if (IS_ERR(inode)) { | ||
| 294 | err = PTR_ERR(inode); | ||
| 295 | goto out_budg; | ||
| 296 | } | ||
| 297 | |||
| 298 | mutex_lock(&dir_ui->ui_mutex); | ||
| 299 | dir->i_size += sz_change; | ||
| 300 | dir_ui->ui_size = dir->i_size; | ||
| 301 | dir->i_mtime = dir->i_ctime = inode->i_ctime; | ||
| 302 | err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); | ||
| 303 | if (err) | ||
| 304 | goto out_cancel; | ||
| 305 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 306 | |||
| 307 | ubifs_release_budget(c, &req); | ||
| 308 | insert_inode_hash(inode); | ||
| 309 | d_instantiate(dentry, inode); | ||
| 310 | return 0; | ||
| 311 | |||
| 312 | out_cancel: | ||
| 313 | dir->i_size -= sz_change; | ||
| 314 | dir_ui->ui_size = dir->i_size; | ||
| 315 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 316 | make_bad_inode(inode); | ||
| 317 | iput(inode); | ||
| 318 | out_budg: | ||
| 319 | ubifs_release_budget(c, &req); | ||
| 320 | ubifs_err("cannot create regular file, error %d", err); | ||
| 321 | return err; | ||
| 322 | } | ||
| 323 | |||
| 324 | /** | ||
| 325 | * vfs_dent_type - get VFS directory entry type. | ||
| 326 | * @type: UBIFS directory entry type | ||
| 327 | * | ||
| 328 | * This function converts UBIFS directory entry type into VFS directory entry | ||
| 329 | * type. | ||
| 330 | */ | ||
| 331 | static unsigned int vfs_dent_type(uint8_t type) | ||
| 332 | { | ||
| 333 | switch (type) { | ||
| 334 | case UBIFS_ITYPE_REG: | ||
| 335 | return DT_REG; | ||
| 336 | case UBIFS_ITYPE_DIR: | ||
| 337 | return DT_DIR; | ||
| 338 | case UBIFS_ITYPE_LNK: | ||
| 339 | return DT_LNK; | ||
| 340 | case UBIFS_ITYPE_BLK: | ||
| 341 | return DT_BLK; | ||
| 342 | case UBIFS_ITYPE_CHR: | ||
| 343 | return DT_CHR; | ||
| 344 | case UBIFS_ITYPE_FIFO: | ||
| 345 | return DT_FIFO; | ||
| 346 | case UBIFS_ITYPE_SOCK: | ||
| 347 | return DT_SOCK; | ||
| 348 | default: | ||
| 349 | BUG(); | ||
| 350 | } | ||
| 351 | return 0; | ||
| 352 | } | ||
| 353 | |||
| 354 | /* | ||
| 355 | * The classical Unix view for directory is that it is a linear array of | ||
| 356 | * (name, inode number) entries. Linux/VFS assumes this model as well. | ||
| 357 | * Particularly, 'readdir()' call wants us to return a directory entry offset | ||
| 358 | * which later may be used to continue 'readdir()'ing the directory or to | ||
| 359 | * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this | ||
| 360 | * model because directory entries are identified by keys, which may collide. | ||
| 361 | * | ||
| 362 | * UBIFS uses directory entry hash value for directory offsets, so | ||
| 363 | * 'seekdir()'/'telldir()' may not always work because of possible key | ||
| 364 | * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work | ||
| 365 | * properly by means of saving full directory entry name in the private field | ||
| 366 | * of the file description object. | ||
| 367 | * | ||
| 368 | * This means that UBIFS cannot support NFS which requires full | ||
| 369 | * 'seekdir()'/'telldir()' support. | ||
| 370 | */ | ||
| 371 | static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | ||
| 372 | { | ||
| 373 | int err, over = 0; | ||
| 374 | struct qstr nm; | ||
| 375 | union ubifs_key key; | ||
| 376 | struct ubifs_dent_node *dent; | ||
| 377 | struct inode *dir = file->f_path.dentry->d_inode; | ||
| 378 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 379 | |||
| 380 | dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); | ||
| 381 | |||
| 382 | if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) | ||
| 383 | /* | ||
| 384 | * The directory was seek'ed to a senseless position or there | ||
| 385 | * are no more entries. | ||
| 386 | */ | ||
| 387 | return 0; | ||
| 388 | |||
| 389 | /* File positions 0 and 1 correspond to "." and ".." */ | ||
| 390 | if (file->f_pos == 0) { | ||
| 391 | ubifs_assert(!file->private_data); | ||
| 392 | over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); | ||
| 393 | if (over) | ||
| 394 | return 0; | ||
| 395 | file->f_pos = 1; | ||
| 396 | } | ||
| 397 | |||
| 398 | if (file->f_pos == 1) { | ||
| 399 | ubifs_assert(!file->private_data); | ||
| 400 | over = filldir(dirent, "..", 2, 1, | ||
| 401 | parent_ino(file->f_path.dentry), DT_DIR); | ||
| 402 | if (over) | ||
| 403 | return 0; | ||
| 404 | |||
| 405 | /* Find the first entry in TNC and save it */ | ||
| 406 | lowest_dent_key(c, &key, dir->i_ino); | ||
| 407 | nm.name = NULL; | ||
| 408 | dent = ubifs_tnc_next_ent(c, &key, &nm); | ||
| 409 | if (IS_ERR(dent)) { | ||
| 410 | err = PTR_ERR(dent); | ||
| 411 | goto out; | ||
| 412 | } | ||
| 413 | |||
| 414 | file->f_pos = key_hash_flash(c, &dent->key); | ||
| 415 | file->private_data = dent; | ||
| 416 | } | ||
| 417 | |||
| 418 | dent = file->private_data; | ||
| 419 | if (!dent) { | ||
| 420 | /* | ||
| 421 | * The directory was seek'ed to and is now readdir'ed. | ||
| 422 | * Find the entry corresponding to @file->f_pos or the | ||
| 423 | * closest one. | ||
| 424 | */ | ||
| 425 | dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); | ||
| 426 | nm.name = NULL; | ||
| 427 | dent = ubifs_tnc_next_ent(c, &key, &nm); | ||
| 428 | if (IS_ERR(dent)) { | ||
| 429 | err = PTR_ERR(dent); | ||
| 430 | goto out; | ||
| 431 | } | ||
| 432 | file->f_pos = key_hash_flash(c, &dent->key); | ||
| 433 | file->private_data = dent; | ||
| 434 | } | ||
| 435 | |||
| 436 | while (1) { | ||
| 437 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", | ||
| 438 | dent->name, le64_to_cpu(dent->inum), | ||
| 439 | key_hash_flash(c, &dent->key)); | ||
| 440 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); | ||
| 441 | |||
| 442 | nm.len = le16_to_cpu(dent->nlen); | ||
| 443 | over = filldir(dirent, dent->name, nm.len, file->f_pos, | ||
| 444 | le64_to_cpu(dent->inum), | ||
| 445 | vfs_dent_type(dent->type)); | ||
| 446 | if (over) | ||
| 447 | return 0; | ||
| 448 | |||
| 449 | /* Switch to the next entry */ | ||
| 450 | key_read(c, &dent->key, &key); | ||
| 451 | nm.name = dent->name; | ||
| 452 | dent = ubifs_tnc_next_ent(c, &key, &nm); | ||
| 453 | if (IS_ERR(dent)) { | ||
| 454 | err = PTR_ERR(dent); | ||
| 455 | goto out; | ||
| 456 | } | ||
| 457 | |||
| 458 | kfree(file->private_data); | ||
| 459 | file->f_pos = key_hash_flash(c, &dent->key); | ||
| 460 | file->private_data = dent; | ||
| 461 | cond_resched(); | ||
| 462 | } | ||
| 463 | |||
| 464 | out: | ||
| 465 | if (err != -ENOENT) { | ||
| 466 | ubifs_err("cannot find next direntry, error %d", err); | ||
| 467 | return err; | ||
| 468 | } | ||
| 469 | |||
| 470 | kfree(file->private_data); | ||
| 471 | file->private_data = NULL; | ||
| 472 | file->f_pos = 2; | ||
| 473 | return 0; | ||
| 474 | } | ||
| 475 | |||
| 476 | /* If a directory is seeked, we have to free saved readdir() state */ | ||
| 477 | static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) | ||
| 478 | { | ||
| 479 | kfree(file->private_data); | ||
| 480 | file->private_data = NULL; | ||
| 481 | return generic_file_llseek(file, offset, origin); | ||
| 482 | } | ||
| 483 | |||
| 484 | /* Free saved readdir() state when the directory is closed */ | ||
| 485 | static int ubifs_dir_release(struct inode *dir, struct file *file) | ||
| 486 | { | ||
| 487 | kfree(file->private_data); | ||
| 488 | file->private_data = NULL; | ||
| 489 | return 0; | ||
| 490 | } | ||
| 491 | |||
| 492 | /** | ||
| 493 | * lock_2_inodes - lock two UBIFS inodes. | ||
| 494 | * @inode1: first inode | ||
| 495 | * @inode2: second inode | ||
| 496 | */ | ||
| 497 | static void lock_2_inodes(struct inode *inode1, struct inode *inode2) | ||
| 498 | { | ||
| 499 | if (inode1->i_ino < inode2->i_ino) { | ||
| 500 | mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2); | ||
| 501 | mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3); | ||
| 502 | } else { | ||
| 503 | mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); | ||
| 504 | mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3); | ||
| 505 | } | ||
| 506 | } | ||
| 507 | |||
| 508 | /** | ||
| 509 | * unlock_2_inodes - unlock two UBIFS inodes inodes. | ||
| 510 | * @inode1: first inode | ||
| 511 | * @inode2: second inode | ||
| 512 | */ | ||
| 513 | static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) | ||
| 514 | { | ||
| 515 | mutex_unlock(&ubifs_inode(inode1)->ui_mutex); | ||
| 516 | mutex_unlock(&ubifs_inode(inode2)->ui_mutex); | ||
| 517 | } | ||
| 518 | |||
| 519 | static int ubifs_link(struct dentry *old_dentry, struct inode *dir, | ||
| 520 | struct dentry *dentry) | ||
| 521 | { | ||
| 522 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 523 | struct inode *inode = old_dentry->d_inode; | ||
| 524 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 525 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 526 | int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); | ||
| 527 | struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, | ||
| 528 | .dirtied_ino_d = ui->data_len }; | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Budget request settings: new direntry, changing the target inode, | ||
| 532 | * changing the parent inode. | ||
| 533 | */ | ||
| 534 | |||
| 535 | dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", | ||
| 536 | dentry->d_name.len, dentry->d_name.name, inode->i_ino, | ||
| 537 | inode->i_nlink, dir->i_ino); | ||
| 538 | err = dbg_check_synced_i_size(inode); | ||
| 539 | if (err) | ||
| 540 | return err; | ||
| 541 | |||
| 542 | err = ubifs_budget_space(c, &req); | ||
| 543 | if (err) | ||
| 544 | return err; | ||
| 545 | |||
| 546 | lock_2_inodes(dir, inode); | ||
| 547 | inc_nlink(inode); | ||
| 548 | atomic_inc(&inode->i_count); | ||
| 549 | inode->i_ctime = ubifs_current_time(inode); | ||
| 550 | dir->i_size += sz_change; | ||
| 551 | dir_ui->ui_size = dir->i_size; | ||
| 552 | dir->i_mtime = dir->i_ctime = inode->i_ctime; | ||
| 553 | err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); | ||
| 554 | if (err) | ||
| 555 | goto out_cancel; | ||
| 556 | unlock_2_inodes(dir, inode); | ||
| 557 | |||
| 558 | ubifs_release_budget(c, &req); | ||
| 559 | d_instantiate(dentry, inode); | ||
| 560 | return 0; | ||
| 561 | |||
| 562 | out_cancel: | ||
| 563 | dir->i_size -= sz_change; | ||
| 564 | dir_ui->ui_size = dir->i_size; | ||
| 565 | drop_nlink(inode); | ||
| 566 | unlock_2_inodes(dir, inode); | ||
| 567 | ubifs_release_budget(c, &req); | ||
| 568 | iput(inode); | ||
| 569 | return err; | ||
| 570 | } | ||
| 571 | |||
| 572 | static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | ||
| 573 | { | ||
| 574 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 575 | struct inode *inode = dentry->d_inode; | ||
| 576 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 577 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); | ||
| 578 | int err, budgeted = 1; | ||
| 579 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; | ||
| 580 | |||
| 581 | /* | ||
| 582 | * Budget request settings: deletion direntry, deletion inode (+1 for | ||
| 583 | * @dirtied_ino), changing the parent directory inode. If budgeting | ||
| 584 | * fails, go ahead anyway because we have extra space reserved for | ||
| 585 | * deletions. | ||
| 586 | */ | ||
| 587 | |||
| 588 | dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", | ||
| 589 | dentry->d_name.len, dentry->d_name.name, inode->i_ino, | ||
| 590 | inode->i_nlink, dir->i_ino); | ||
| 591 | err = dbg_check_synced_i_size(inode); | ||
| 592 | if (err) | ||
| 593 | return err; | ||
| 594 | |||
| 595 | err = ubifs_budget_space(c, &req); | ||
| 596 | if (err) { | ||
| 597 | if (err != -ENOSPC) | ||
| 598 | return err; | ||
| 599 | err = 0; | ||
| 600 | budgeted = 0; | ||
| 601 | } | ||
| 602 | |||
| 603 | lock_2_inodes(dir, inode); | ||
| 604 | inode->i_ctime = ubifs_current_time(dir); | ||
| 605 | drop_nlink(inode); | ||
| 606 | dir->i_size -= sz_change; | ||
| 607 | dir_ui->ui_size = dir->i_size; | ||
| 608 | dir->i_mtime = dir->i_ctime = inode->i_ctime; | ||
| 609 | err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); | ||
| 610 | if (err) | ||
| 611 | goto out_cancel; | ||
| 612 | unlock_2_inodes(dir, inode); | ||
| 613 | |||
| 614 | if (budgeted) | ||
| 615 | ubifs_release_budget(c, &req); | ||
| 616 | else { | ||
| 617 | /* We've deleted something - clean the "no space" flags */ | ||
| 618 | c->nospace = c->nospace_rp = 0; | ||
| 619 | smp_wmb(); | ||
| 620 | } | ||
| 621 | return 0; | ||
| 622 | |||
| 623 | out_cancel: | ||
| 624 | dir->i_size += sz_change; | ||
| 625 | dir_ui->ui_size = dir->i_size; | ||
| 626 | inc_nlink(inode); | ||
| 627 | unlock_2_inodes(dir, inode); | ||
| 628 | if (budgeted) | ||
| 629 | ubifs_release_budget(c, &req); | ||
| 630 | return err; | ||
| 631 | } | ||
| 632 | |||
| 633 | /** | ||
| 634 | * check_dir_empty - check if a directory is empty or not. | ||
| 635 | * @c: UBIFS file-system description object | ||
| 636 | * @dir: VFS inode object of the directory to check | ||
| 637 | * | ||
| 638 | * This function checks if directory @dir is empty. Returns zero if the | ||
| 639 | * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes | ||
| 640 | * in case of of errors. | ||
| 641 | */ | ||
| 642 | static int check_dir_empty(struct ubifs_info *c, struct inode *dir) | ||
| 643 | { | ||
| 644 | struct qstr nm = { .name = NULL }; | ||
| 645 | struct ubifs_dent_node *dent; | ||
| 646 | union ubifs_key key; | ||
| 647 | int err; | ||
| 648 | |||
| 649 | lowest_dent_key(c, &key, dir->i_ino); | ||
| 650 | dent = ubifs_tnc_next_ent(c, &key, &nm); | ||
| 651 | if (IS_ERR(dent)) { | ||
| 652 | err = PTR_ERR(dent); | ||
| 653 | if (err == -ENOENT) | ||
| 654 | err = 0; | ||
| 655 | } else { | ||
| 656 | kfree(dent); | ||
| 657 | err = -ENOTEMPTY; | ||
| 658 | } | ||
| 659 | return err; | ||
| 660 | } | ||
| 661 | |||
| 662 | static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) | ||
| 663 | { | ||
| 664 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 665 | struct inode *inode = dentry->d_inode; | ||
| 666 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); | ||
| 667 | int err, budgeted = 1; | ||
| 668 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 669 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; | ||
| 670 | |||
| 671 | /* | ||
| 672 | * Budget request settings: deletion direntry, deletion inode and | ||
| 673 | * changing the parent inode. If budgeting fails, go ahead anyway | ||
| 674 | * because we have extra space reserved for deletions. | ||
| 675 | */ | ||
| 676 | |||
| 677 | dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, | ||
| 678 | dentry->d_name.name, inode->i_ino, dir->i_ino); | ||
| 679 | |||
| 680 | err = check_dir_empty(c, dentry->d_inode); | ||
| 681 | if (err) | ||
| 682 | return err; | ||
| 683 | |||
| 684 | err = ubifs_budget_space(c, &req); | ||
| 685 | if (err) { | ||
| 686 | if (err != -ENOSPC) | ||
| 687 | return err; | ||
| 688 | budgeted = 0; | ||
| 689 | } | ||
| 690 | |||
| 691 | lock_2_inodes(dir, inode); | ||
| 692 | inode->i_ctime = ubifs_current_time(dir); | ||
| 693 | clear_nlink(inode); | ||
| 694 | drop_nlink(dir); | ||
| 695 | dir->i_size -= sz_change; | ||
| 696 | dir_ui->ui_size = dir->i_size; | ||
| 697 | dir->i_mtime = dir->i_ctime = inode->i_ctime; | ||
| 698 | err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); | ||
| 699 | if (err) | ||
| 700 | goto out_cancel; | ||
| 701 | unlock_2_inodes(dir, inode); | ||
| 702 | |||
| 703 | if (budgeted) | ||
| 704 | ubifs_release_budget(c, &req); | ||
| 705 | else { | ||
| 706 | /* We've deleted something - clean the "no space" flags */ | ||
| 707 | c->nospace = c->nospace_rp = 0; | ||
| 708 | smp_wmb(); | ||
| 709 | } | ||
| 710 | return 0; | ||
| 711 | |||
| 712 | out_cancel: | ||
| 713 | dir->i_size += sz_change; | ||
| 714 | dir_ui->ui_size = dir->i_size; | ||
| 715 | inc_nlink(dir); | ||
| 716 | inc_nlink(inode); | ||
| 717 | inc_nlink(inode); | ||
| 718 | unlock_2_inodes(dir, inode); | ||
| 719 | if (budgeted) | ||
| 720 | ubifs_release_budget(c, &req); | ||
| 721 | return err; | ||
| 722 | } | ||
| 723 | |||
| 724 | static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
| 725 | { | ||
| 726 | struct inode *inode; | ||
| 727 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 728 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 729 | int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); | ||
| 730 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | ||
| 731 | .dirtied_ino_d = 1 }; | ||
| 732 | |||
| 733 | /* | ||
| 734 | * Budget request settings: new inode, new direntry and changing parent | ||
| 735 | * directory inode. | ||
| 736 | */ | ||
| 737 | |||
| 738 | dbg_gen("dent '%.*s', mode %#x in dir ino %lu", | ||
| 739 | dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); | ||
| 740 | |||
| 741 | err = ubifs_budget_space(c, &req); | ||
| 742 | if (err) | ||
| 743 | return err; | ||
| 744 | |||
| 745 | inode = ubifs_new_inode(c, dir, S_IFDIR | mode); | ||
| 746 | if (IS_ERR(inode)) { | ||
| 747 | err = PTR_ERR(inode); | ||
| 748 | goto out_budg; | ||
| 749 | } | ||
| 750 | |||
| 751 | mutex_lock(&dir_ui->ui_mutex); | ||
| 752 | insert_inode_hash(inode); | ||
| 753 | inc_nlink(inode); | ||
| 754 | inc_nlink(dir); | ||
| 755 | dir->i_size += sz_change; | ||
| 756 | dir_ui->ui_size = dir->i_size; | ||
| 757 | dir->i_mtime = dir->i_ctime = inode->i_ctime; | ||
| 758 | err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); | ||
| 759 | if (err) { | ||
| 760 | ubifs_err("cannot create directory, error %d", err); | ||
| 761 | goto out_cancel; | ||
| 762 | } | ||
| 763 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 764 | |||
| 765 | ubifs_release_budget(c, &req); | ||
| 766 | d_instantiate(dentry, inode); | ||
| 767 | return 0; | ||
| 768 | |||
| 769 | out_cancel: | ||
| 770 | dir->i_size -= sz_change; | ||
| 771 | dir_ui->ui_size = dir->i_size; | ||
| 772 | drop_nlink(dir); | ||
| 773 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 774 | make_bad_inode(inode); | ||
| 775 | iput(inode); | ||
| 776 | out_budg: | ||
| 777 | ubifs_release_budget(c, &req); | ||
| 778 | return err; | ||
| 779 | } | ||
| 780 | |||
| 781 | static int ubifs_mknod(struct inode *dir, struct dentry *dentry, | ||
| 782 | int mode, dev_t rdev) | ||
| 783 | { | ||
| 784 | struct inode *inode; | ||
| 785 | struct ubifs_inode *ui; | ||
| 786 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 787 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 788 | union ubifs_dev_desc *dev = NULL; | ||
| 789 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); | ||
| 790 | int err, devlen = 0; | ||
| 791 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | ||
| 792 | .new_ino_d = devlen, .dirtied_ino = 1 }; | ||
| 793 | |||
| 794 | /* | ||
| 795 | * Budget request settings: new inode, new direntry and changing parent | ||
| 796 | * directory inode. | ||
| 797 | */ | ||
| 798 | |||
| 799 | dbg_gen("dent '%.*s' in dir ino %lu", | ||
| 800 | dentry->d_name.len, dentry->d_name.name, dir->i_ino); | ||
| 801 | |||
| 802 | if (!new_valid_dev(rdev)) | ||
| 803 | return -EINVAL; | ||
| 804 | |||
| 805 | if (S_ISBLK(mode) || S_ISCHR(mode)) { | ||
| 806 | dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); | ||
| 807 | if (!dev) | ||
| 808 | return -ENOMEM; | ||
| 809 | devlen = ubifs_encode_dev(dev, rdev); | ||
| 810 | } | ||
| 811 | |||
| 812 | err = ubifs_budget_space(c, &req); | ||
| 813 | if (err) { | ||
| 814 | kfree(dev); | ||
| 815 | return err; | ||
| 816 | } | ||
| 817 | |||
| 818 | inode = ubifs_new_inode(c, dir, mode); | ||
| 819 | if (IS_ERR(inode)) { | ||
| 820 | kfree(dev); | ||
| 821 | err = PTR_ERR(inode); | ||
| 822 | goto out_budg; | ||
| 823 | } | ||
| 824 | |||
| 825 | init_special_inode(inode, inode->i_mode, rdev); | ||
| 826 | inode->i_size = ubifs_inode(inode)->ui_size = devlen; | ||
| 827 | ui = ubifs_inode(inode); | ||
| 828 | ui->data = dev; | ||
| 829 | ui->data_len = devlen; | ||
| 830 | |||
| 831 | mutex_lock(&dir_ui->ui_mutex); | ||
| 832 | dir->i_size += sz_change; | ||
| 833 | dir_ui->ui_size = dir->i_size; | ||
| 834 | dir->i_mtime = dir->i_ctime = inode->i_ctime; | ||
| 835 | err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); | ||
| 836 | if (err) | ||
| 837 | goto out_cancel; | ||
| 838 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 839 | |||
| 840 | ubifs_release_budget(c, &req); | ||
| 841 | insert_inode_hash(inode); | ||
| 842 | d_instantiate(dentry, inode); | ||
| 843 | return 0; | ||
| 844 | |||
| 845 | out_cancel: | ||
| 846 | dir->i_size -= sz_change; | ||
| 847 | dir_ui->ui_size = dir->i_size; | ||
| 848 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 849 | make_bad_inode(inode); | ||
| 850 | iput(inode); | ||
| 851 | out_budg: | ||
| 852 | ubifs_release_budget(c, &req); | ||
| 853 | return err; | ||
| 854 | } | ||
| 855 | |||
| 856 | static int ubifs_symlink(struct inode *dir, struct dentry *dentry, | ||
| 857 | const char *symname) | ||
| 858 | { | ||
| 859 | struct inode *inode; | ||
| 860 | struct ubifs_inode *ui; | ||
| 861 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 862 | struct ubifs_info *c = dir->i_sb->s_fs_info; | ||
| 863 | int err, len = strlen(symname); | ||
| 864 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); | ||
| 865 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | ||
| 866 | .new_ino_d = len, .dirtied_ino = 1 }; | ||
| 867 | |||
| 868 | /* | ||
| 869 | * Budget request settings: new inode, new direntry and changing parent | ||
| 870 | * directory inode. | ||
| 871 | */ | ||
| 872 | |||
| 873 | dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, | ||
| 874 | dentry->d_name.name, symname, dir->i_ino); | ||
| 875 | |||
| 876 | if (len > UBIFS_MAX_INO_DATA) | ||
| 877 | return -ENAMETOOLONG; | ||
| 878 | |||
| 879 | err = ubifs_budget_space(c, &req); | ||
| 880 | if (err) | ||
| 881 | return err; | ||
| 882 | |||
| 883 | inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); | ||
| 884 | if (IS_ERR(inode)) { | ||
| 885 | err = PTR_ERR(inode); | ||
| 886 | goto out_budg; | ||
| 887 | } | ||
| 888 | |||
| 889 | ui = ubifs_inode(inode); | ||
| 890 | ui->data = kmalloc(len + 1, GFP_NOFS); | ||
| 891 | if (!ui->data) { | ||
| 892 | err = -ENOMEM; | ||
| 893 | goto out_inode; | ||
| 894 | } | ||
| 895 | |||
| 896 | memcpy(ui->data, symname, len); | ||
| 897 | ((char *)ui->data)[len] = '\0'; | ||
| 898 | /* | ||
| 899 | * The terminating zero byte is not written to the flash media and it | ||
| 900 | * is put just to make later in-memory string processing simpler. Thus, | ||
| 901 | * data length is @len, not @len + %1. | ||
| 902 | */ | ||
| 903 | ui->data_len = len; | ||
| 904 | inode->i_size = ubifs_inode(inode)->ui_size = len; | ||
| 905 | |||
| 906 | mutex_lock(&dir_ui->ui_mutex); | ||
| 907 | dir->i_size += sz_change; | ||
| 908 | dir_ui->ui_size = dir->i_size; | ||
| 909 | dir->i_mtime = dir->i_ctime = inode->i_ctime; | ||
| 910 | err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); | ||
| 911 | if (err) | ||
| 912 | goto out_cancel; | ||
| 913 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 914 | |||
| 915 | ubifs_release_budget(c, &req); | ||
| 916 | insert_inode_hash(inode); | ||
| 917 | d_instantiate(dentry, inode); | ||
| 918 | return 0; | ||
| 919 | |||
| 920 | out_cancel: | ||
| 921 | dir->i_size -= sz_change; | ||
| 922 | dir_ui->ui_size = dir->i_size; | ||
| 923 | mutex_unlock(&dir_ui->ui_mutex); | ||
| 924 | out_inode: | ||
| 925 | make_bad_inode(inode); | ||
| 926 | iput(inode); | ||
| 927 | out_budg: | ||
| 928 | ubifs_release_budget(c, &req); | ||
| 929 | return err; | ||
| 930 | } | ||
| 931 | |||
| 932 | /** | ||
| 933 | * lock_3_inodes - lock three UBIFS inodes for rename. | ||
| 934 | * @inode1: first inode | ||
| 935 | * @inode2: second inode | ||
| 936 | * @inode3: third inode | ||
| 937 | * | ||
| 938 | * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may | ||
| 939 | * be null. | ||
| 940 | */ | ||
| 941 | static void lock_3_inodes(struct inode *inode1, struct inode *inode2, | ||
| 942 | struct inode *inode3) | ||
| 943 | { | ||
| 944 | struct inode *i1, *i2, *i3; | ||
| 945 | |||
| 946 | if (!inode3) { | ||
| 947 | if (inode1 != inode2) { | ||
| 948 | lock_2_inodes(inode1, inode2); | ||
| 949 | return; | ||
| 950 | } | ||
| 951 | mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); | ||
| 952 | return; | ||
| 953 | } | ||
| 954 | |||
| 955 | if (inode1 == inode2) { | ||
| 956 | lock_2_inodes(inode1, inode3); | ||
| 957 | return; | ||
| 958 | } | ||
| 959 | |||
| 960 | /* 3 different inodes */ | ||
| 961 | if (inode1 < inode2) { | ||
| 962 | i3 = inode2; | ||
| 963 | if (inode1 < inode3) { | ||
| 964 | i1 = inode1; | ||
| 965 | i2 = inode3; | ||
| 966 | } else { | ||
| 967 | i1 = inode3; | ||
| 968 | i2 = inode1; | ||
| 969 | } | ||
| 970 | } else { | ||
| 971 | i3 = inode1; | ||
| 972 | if (inode2 < inode3) { | ||
| 973 | i1 = inode2; | ||
| 974 | i2 = inode3; | ||
| 975 | } else { | ||
| 976 | i1 = inode3; | ||
| 977 | i2 = inode2; | ||
| 978 | } | ||
| 979 | } | ||
| 980 | mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1); | ||
| 981 | lock_2_inodes(i2, i3); | ||
| 982 | } | ||
| 983 | |||
| 984 | /** | ||
| 985 | * unlock_3_inodes - unlock three UBIFS inodes for rename. | ||
| 986 | * @inode1: first inode | ||
| 987 | * @inode2: second inode | ||
| 988 | * @inode3: third inode | ||
| 989 | */ | ||
| 990 | static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, | ||
| 991 | struct inode *inode3) | ||
| 992 | { | ||
| 993 | mutex_unlock(&ubifs_inode(inode1)->ui_mutex); | ||
| 994 | if (inode1 != inode2) | ||
| 995 | mutex_unlock(&ubifs_inode(inode2)->ui_mutex); | ||
| 996 | if (inode3) | ||
| 997 | mutex_unlock(&ubifs_inode(inode3)->ui_mutex); | ||
| 998 | } | ||
| 999 | |||
| 1000 | static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
| 1001 | struct inode *new_dir, struct dentry *new_dentry) | ||
| 1002 | { | ||
| 1003 | struct ubifs_info *c = old_dir->i_sb->s_fs_info; | ||
| 1004 | struct inode *old_inode = old_dentry->d_inode; | ||
| 1005 | struct inode *new_inode = new_dentry->d_inode; | ||
| 1006 | struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode); | ||
| 1007 | int err, release, sync = 0, move = (new_dir != old_dir); | ||
| 1008 | int is_dir = S_ISDIR(old_inode->i_mode); | ||
| 1009 | int unlink = !!new_inode; | ||
| 1010 | int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len); | ||
| 1011 | int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len); | ||
| 1012 | struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, | ||
| 1013 | .dirtied_ino = 3 }; | ||
| 1014 | struct ubifs_budget_req ino_req = { .dirtied_ino = 1, | ||
| 1015 | .dirtied_ino_d = old_inode_ui->data_len }; | ||
| 1016 | struct timespec time; | ||
| 1017 | |||
| 1018 | /* | ||
| 1019 | * Budget request settings: deletion direntry, new direntry, removing | ||
| 1020 | * the old inode, and changing old and new parent directory inodes. | ||
| 1021 | * | ||
| 1022 | * However, this operation also marks the target inode as dirty and | ||
| 1023 | * does not write it, so we allocate budget for the target inode | ||
| 1024 | * separately. | ||
| 1025 | */ | ||
| 1026 | |||
| 1027 | dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " | ||
| 1028 | "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, | ||
| 1029 | old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, | ||
| 1030 | new_dentry->d_name.name, new_dir->i_ino); | ||
| 1031 | |||
| 1032 | if (unlink && is_dir) { | ||
| 1033 | err = check_dir_empty(c, new_inode); | ||
| 1034 | if (err) | ||
| 1035 | return err; | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | err = ubifs_budget_space(c, &req); | ||
| 1039 | if (err) | ||
| 1040 | return err; | ||
| 1041 | err = ubifs_budget_space(c, &ino_req); | ||
| 1042 | if (err) { | ||
| 1043 | ubifs_release_budget(c, &req); | ||
| 1044 | return err; | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | lock_3_inodes(old_dir, new_dir, new_inode); | ||
| 1048 | |||
| 1049 | /* | ||
| 1050 | * Like most other Unix systems, set the @i_ctime for inodes on a | ||
| 1051 | * rename. | ||
| 1052 | */ | ||
| 1053 | time = ubifs_current_time(old_dir); | ||
| 1054 | old_inode->i_ctime = time; | ||
| 1055 | |||
| 1056 | /* We must adjust parent link count when renaming directories */ | ||
| 1057 | if (is_dir) { | ||
| 1058 | if (move) { | ||
| 1059 | /* | ||
| 1060 | * @old_dir loses a link because we are moving | ||
| 1061 | * @old_inode to a different directory. | ||
| 1062 | */ | ||
| 1063 | drop_nlink(old_dir); | ||
| 1064 | /* | ||
| 1065 | * @new_dir only gains a link if we are not also | ||
| 1066 | * overwriting an existing directory. | ||
| 1067 | */ | ||
| 1068 | if (!unlink) | ||
| 1069 | inc_nlink(new_dir); | ||
| 1070 | } else { | ||
| 1071 | /* | ||
| 1072 | * @old_inode is not moving to a different directory, | ||
| 1073 | * but @old_dir still loses a link if we are | ||
| 1074 | * overwriting an existing directory. | ||
| 1075 | */ | ||
| 1076 | if (unlink) | ||
| 1077 | drop_nlink(old_dir); | ||
| 1078 | } | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | old_dir->i_size -= old_sz; | ||
| 1082 | ubifs_inode(old_dir)->ui_size = old_dir->i_size; | ||
| 1083 | old_dir->i_mtime = old_dir->i_ctime = time; | ||
| 1084 | new_dir->i_mtime = new_dir->i_ctime = time; | ||
| 1085 | |||
| 1086 | /* | ||
| 1087 | * And finally, if we unlinked a direntry which happened to have the | ||
| 1088 | * same name as the moved direntry, we have to decrement @i_nlink of | ||
| 1089 | * the unlinked inode and change its ctime. | ||
| 1090 | */ | ||
| 1091 | if (unlink) { | ||
| 1092 | /* | ||
| 1093 | * Directories cannot have hard-links, so if this is a | ||
| 1094 | * directory, decrement its @i_nlink twice because an empty | ||
| 1095 | * directory has @i_nlink 2. | ||
| 1096 | */ | ||
| 1097 | if (is_dir) | ||
| 1098 | drop_nlink(new_inode); | ||
| 1099 | new_inode->i_ctime = time; | ||
| 1100 | drop_nlink(new_inode); | ||
| 1101 | } else { | ||
| 1102 | new_dir->i_size += new_sz; | ||
| 1103 | ubifs_inode(new_dir)->ui_size = new_dir->i_size; | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | /* | ||
| 1107 | * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode | ||
| 1108 | * is dirty, because this will be done later on at the end of | ||
| 1109 | * 'ubifs_rename()'. | ||
| 1110 | */ | ||
| 1111 | if (IS_SYNC(old_inode)) { | ||
| 1112 | sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); | ||
| 1113 | if (unlink && IS_SYNC(new_inode)) | ||
| 1114 | sync = 1; | ||
| 1115 | } | ||
| 1116 | err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry, | ||
| 1117 | sync); | ||
| 1118 | if (err) | ||
| 1119 | goto out_cancel; | ||
| 1120 | |||
| 1121 | unlock_3_inodes(old_dir, new_dir, new_inode); | ||
| 1122 | ubifs_release_budget(c, &req); | ||
| 1123 | |||
| 1124 | mutex_lock(&old_inode_ui->ui_mutex); | ||
| 1125 | release = old_inode_ui->dirty; | ||
| 1126 | mark_inode_dirty_sync(old_inode); | ||
| 1127 | mutex_unlock(&old_inode_ui->ui_mutex); | ||
| 1128 | |||
| 1129 | if (release) | ||
| 1130 | ubifs_release_budget(c, &ino_req); | ||
| 1131 | if (IS_SYNC(old_inode)) | ||
| 1132 | err = old_inode->i_sb->s_op->write_inode(old_inode, 1); | ||
| 1133 | return err; | ||
| 1134 | |||
| 1135 | out_cancel: | ||
| 1136 | if (unlink) { | ||
| 1137 | if (is_dir) | ||
| 1138 | inc_nlink(new_inode); | ||
| 1139 | inc_nlink(new_inode); | ||
| 1140 | } else { | ||
| 1141 | new_dir->i_size -= new_sz; | ||
| 1142 | ubifs_inode(new_dir)->ui_size = new_dir->i_size; | ||
| 1143 | } | ||
| 1144 | old_dir->i_size += old_sz; | ||
| 1145 | ubifs_inode(old_dir)->ui_size = old_dir->i_size; | ||
| 1146 | if (is_dir) { | ||
| 1147 | if (move) { | ||
| 1148 | inc_nlink(old_dir); | ||
| 1149 | if (!unlink) | ||
| 1150 | drop_nlink(new_dir); | ||
| 1151 | } else { | ||
| 1152 | if (unlink) | ||
| 1153 | inc_nlink(old_dir); | ||
| 1154 | } | ||
| 1155 | } | ||
| 1156 | unlock_3_inodes(old_dir, new_dir, new_inode); | ||
| 1157 | ubifs_release_budget(c, &ino_req); | ||
| 1158 | ubifs_release_budget(c, &req); | ||
| 1159 | return err; | ||
| 1160 | } | ||
| 1161 | |||
| 1162 | int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 1163 | struct kstat *stat) | ||
| 1164 | { | ||
| 1165 | loff_t size; | ||
| 1166 | struct inode *inode = dentry->d_inode; | ||
| 1167 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 1168 | |||
| 1169 | mutex_lock(&ui->ui_mutex); | ||
| 1170 | stat->dev = inode->i_sb->s_dev; | ||
| 1171 | stat->ino = inode->i_ino; | ||
| 1172 | stat->mode = inode->i_mode; | ||
| 1173 | stat->nlink = inode->i_nlink; | ||
| 1174 | stat->uid = inode->i_uid; | ||
| 1175 | stat->gid = inode->i_gid; | ||
| 1176 | stat->rdev = inode->i_rdev; | ||
| 1177 | stat->atime = inode->i_atime; | ||
| 1178 | stat->mtime = inode->i_mtime; | ||
| 1179 | stat->ctime = inode->i_ctime; | ||
| 1180 | stat->blksize = UBIFS_BLOCK_SIZE; | ||
| 1181 | stat->size = ui->ui_size; | ||
| 1182 | |||
| 1183 | /* | ||
| 1184 | * Unfortunately, the 'stat()' system call was designed for block | ||
| 1185 | * device based file systems, and it is not appropriate for UBIFS, | ||
| 1186 | * because UBIFS does not have notion of "block". For example, it is | ||
| 1187 | * difficult to tell how many block a directory takes - it actually | ||
| 1188 | * takes less than 300 bytes, but we have to round it to block size, | ||
| 1189 | * which introduces large mistake. This makes utilities like 'du' to | ||
| 1190 | * report completely senseless numbers. This is the reason why UBIFS | ||
| 1191 | * goes the same way as JFFS2 - it reports zero blocks for everything | ||
| 1192 | * but regular files, which makes more sense than reporting completely | ||
| 1193 | * wrong sizes. | ||
| 1194 | */ | ||
| 1195 | if (S_ISREG(inode->i_mode)) { | ||
| 1196 | size = ui->xattr_size; | ||
| 1197 | size += stat->size; | ||
| 1198 | size = ALIGN(size, UBIFS_BLOCK_SIZE); | ||
| 1199 | /* | ||
| 1200 | * Note, user-space expects 512-byte blocks count irrespectively | ||
| 1201 | * of what was reported in @stat->size. | ||
| 1202 | */ | ||
| 1203 | stat->blocks = size >> 9; | ||
| 1204 | } else | ||
| 1205 | stat->blocks = 0; | ||
| 1206 | mutex_unlock(&ui->ui_mutex); | ||
| 1207 | return 0; | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | struct inode_operations ubifs_dir_inode_operations = { | ||
| 1211 | .lookup = ubifs_lookup, | ||
| 1212 | .create = ubifs_create, | ||
| 1213 | .link = ubifs_link, | ||
| 1214 | .symlink = ubifs_symlink, | ||
| 1215 | .unlink = ubifs_unlink, | ||
| 1216 | .mkdir = ubifs_mkdir, | ||
| 1217 | .rmdir = ubifs_rmdir, | ||
| 1218 | .mknod = ubifs_mknod, | ||
| 1219 | .rename = ubifs_rename, | ||
| 1220 | .setattr = ubifs_setattr, | ||
| 1221 | .getattr = ubifs_getattr, | ||
| 1222 | #ifdef CONFIG_UBIFS_FS_XATTR | ||
| 1223 | .setxattr = ubifs_setxattr, | ||
| 1224 | .getxattr = ubifs_getxattr, | ||
| 1225 | .listxattr = ubifs_listxattr, | ||
| 1226 | .removexattr = ubifs_removexattr, | ||
| 1227 | #endif | ||
| 1228 | }; | ||
| 1229 | |||
| 1230 | struct file_operations ubifs_dir_operations = { | ||
| 1231 | .llseek = ubifs_dir_llseek, | ||
| 1232 | .release = ubifs_dir_release, | ||
| 1233 | .read = generic_read_dir, | ||
| 1234 | .readdir = ubifs_readdir, | ||
| 1235 | .fsync = ubifs_fsync, | ||
| 1236 | .unlocked_ioctl = ubifs_ioctl, | ||
| 1237 | #ifdef CONFIG_COMPAT | ||
| 1238 | .compat_ioctl = ubifs_compat_ioctl, | ||
| 1239 | #endif | ||
| 1240 | }; | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c new file mode 100644 index 000000000000..005a3b854d96 --- /dev/null +++ b/fs/ubifs/file.c | |||
| @@ -0,0 +1,1275 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements VFS file and inode operations of regular files, device | ||
| 25 | * nodes and symlinks as well as address space operations. | ||
| 26 | * | ||
| 27 | * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the | ||
| 28 | * page is dirty and is used for budgeting purposes - dirty pages should not be | ||
| 29 | * budgeted. The PG_checked flag is set if full budgeting is required for the | ||
| 30 | * page e.g., when it corresponds to a file hole or it is just beyond the file | ||
| 31 | * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to | ||
| 32 | * fail in this function, and the budget is released in 'ubifs_write_end()'. So | ||
| 33 | * the PG_private and PG_checked flags carry the information about how the page | ||
| 34 | * was budgeted, to make it possible to release the budget properly. | ||
| 35 | * | ||
| 36 | * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations | ||
| 37 | * we implement. However, this is not true for '->writepage()', which might be | ||
| 38 | * called with 'i_mutex' unlocked. For example, when pdflush is performing | ||
| 39 | * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the | ||
| 40 | * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is | ||
| 41 | * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim | ||
| 42 | * path'. So, in '->writepage()' we are only guaranteed that the page is | ||
| 43 | * locked. | ||
| 44 | * | ||
| 45 | * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., | ||
| 46 | * readahead path does not have it locked ("sys_read -> generic_file_aio_read | ||
| 47 | * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is | ||
| 48 | * not set as well. However, UBIFS disables readahead. | ||
| 49 | * | ||
| 50 | * This, for example means that there might be 2 concurrent '->writepage()' | ||
| 51 | * calls for the same inode, but different inode dirty pages. | ||
| 52 | */ | ||
| 53 | |||
| 54 | #include "ubifs.h" | ||
| 55 | #include <linux/mount.h> | ||
| 56 | |||
| 57 | static int read_block(struct inode *inode, void *addr, unsigned int block, | ||
| 58 | struct ubifs_data_node *dn) | ||
| 59 | { | ||
| 60 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 61 | int err, len, out_len; | ||
| 62 | union ubifs_key key; | ||
| 63 | unsigned int dlen; | ||
| 64 | |||
| 65 | data_key_init(c, &key, inode->i_ino, block); | ||
| 66 | err = ubifs_tnc_lookup(c, &key, dn); | ||
| 67 | if (err) { | ||
| 68 | if (err == -ENOENT) | ||
| 69 | /* Not found, so it must be a hole */ | ||
| 70 | memset(addr, 0, UBIFS_BLOCK_SIZE); | ||
| 71 | return err; | ||
| 72 | } | ||
| 73 | |||
| 74 | ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum); | ||
| 75 | |||
| 76 | len = le32_to_cpu(dn->size); | ||
| 77 | if (len <= 0 || len > UBIFS_BLOCK_SIZE) | ||
| 78 | goto dump; | ||
| 79 | |||
| 80 | dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; | ||
| 81 | out_len = UBIFS_BLOCK_SIZE; | ||
| 82 | err = ubifs_decompress(&dn->data, dlen, addr, &out_len, | ||
| 83 | le16_to_cpu(dn->compr_type)); | ||
| 84 | if (err || len != out_len) | ||
| 85 | goto dump; | ||
| 86 | |||
| 87 | /* | ||
| 88 | * Data length can be less than a full block, even for blocks that are | ||
| 89 | * not the last in the file (e.g., as a result of making a hole and | ||
| 90 | * appending data). Ensure that the remainder is zeroed out. | ||
| 91 | */ | ||
| 92 | if (len < UBIFS_BLOCK_SIZE) | ||
| 93 | memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); | ||
| 94 | |||
| 95 | return 0; | ||
| 96 | |||
| 97 | dump: | ||
| 98 | ubifs_err("bad data node (block %u, inode %lu)", | ||
| 99 | block, inode->i_ino); | ||
| 100 | dbg_dump_node(c, dn); | ||
| 101 | return -EINVAL; | ||
| 102 | } | ||
| 103 | |||
| 104 | static int do_readpage(struct page *page) | ||
| 105 | { | ||
| 106 | void *addr; | ||
| 107 | int err = 0, i; | ||
| 108 | unsigned int block, beyond; | ||
| 109 | struct ubifs_data_node *dn; | ||
| 110 | struct inode *inode = page->mapping->host; | ||
| 111 | loff_t i_size = i_size_read(inode); | ||
| 112 | |||
| 113 | dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", | ||
| 114 | inode->i_ino, page->index, i_size, page->flags); | ||
| 115 | ubifs_assert(!PageChecked(page)); | ||
| 116 | ubifs_assert(!PagePrivate(page)); | ||
| 117 | |||
| 118 | addr = kmap(page); | ||
| 119 | |||
| 120 | block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; | ||
| 121 | beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; | ||
| 122 | if (block >= beyond) { | ||
| 123 | /* Reading beyond inode */ | ||
| 124 | SetPageChecked(page); | ||
| 125 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
| 126 | goto out; | ||
| 127 | } | ||
| 128 | |||
| 129 | dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); | ||
| 130 | if (!dn) { | ||
| 131 | err = -ENOMEM; | ||
| 132 | goto error; | ||
| 133 | } | ||
| 134 | |||
| 135 | i = 0; | ||
| 136 | while (1) { | ||
| 137 | int ret; | ||
| 138 | |||
| 139 | if (block >= beyond) { | ||
| 140 | /* Reading beyond inode */ | ||
| 141 | err = -ENOENT; | ||
| 142 | memset(addr, 0, UBIFS_BLOCK_SIZE); | ||
| 143 | } else { | ||
| 144 | ret = read_block(inode, addr, block, dn); | ||
| 145 | if (ret) { | ||
| 146 | err = ret; | ||
| 147 | if (err != -ENOENT) | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | if (++i >= UBIFS_BLOCKS_PER_PAGE) | ||
| 152 | break; | ||
| 153 | block += 1; | ||
| 154 | addr += UBIFS_BLOCK_SIZE; | ||
| 155 | } | ||
| 156 | if (err) { | ||
| 157 | if (err == -ENOENT) { | ||
| 158 | /* Not found, so it must be a hole */ | ||
| 159 | SetPageChecked(page); | ||
| 160 | dbg_gen("hole"); | ||
| 161 | goto out_free; | ||
| 162 | } | ||
| 163 | ubifs_err("cannot read page %lu of inode %lu, error %d", | ||
| 164 | page->index, inode->i_ino, err); | ||
| 165 | goto error; | ||
| 166 | } | ||
| 167 | |||
| 168 | out_free: | ||
| 169 | kfree(dn); | ||
| 170 | out: | ||
| 171 | SetPageUptodate(page); | ||
| 172 | ClearPageError(page); | ||
| 173 | flush_dcache_page(page); | ||
| 174 | kunmap(page); | ||
| 175 | return 0; | ||
| 176 | |||
| 177 | error: | ||
| 178 | kfree(dn); | ||
| 179 | ClearPageUptodate(page); | ||
| 180 | SetPageError(page); | ||
| 181 | flush_dcache_page(page); | ||
| 182 | kunmap(page); | ||
| 183 | return err; | ||
| 184 | } | ||
| 185 | |||
| 186 | /** | ||
| 187 | * release_new_page_budget - release budget of a new page. | ||
| 188 | * @c: UBIFS file-system description object | ||
| 189 | * | ||
| 190 | * This is a helper function which releases budget corresponding to the budget | ||
| 191 | * of one new page of data. | ||
| 192 | */ | ||
| 193 | static void release_new_page_budget(struct ubifs_info *c) | ||
| 194 | { | ||
| 195 | struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 }; | ||
| 196 | |||
| 197 | ubifs_release_budget(c, &req); | ||
| 198 | } | ||
| 199 | |||
| 200 | /** | ||
| 201 | * release_existing_page_budget - release budget of an existing page. | ||
| 202 | * @c: UBIFS file-system description object | ||
| 203 | * | ||
| 204 | * This is a helper function which releases budget corresponding to the budget | ||
| 205 | * of changing one one page of data which already exists on the flash media. | ||
| 206 | */ | ||
| 207 | static void release_existing_page_budget(struct ubifs_info *c) | ||
| 208 | { | ||
| 209 | struct ubifs_budget_req req = { .dd_growth = c->page_budget}; | ||
| 210 | |||
| 211 | ubifs_release_budget(c, &req); | ||
| 212 | } | ||
| 213 | |||
| 214 | static int write_begin_slow(struct address_space *mapping, | ||
| 215 | loff_t pos, unsigned len, struct page **pagep) | ||
| 216 | { | ||
| 217 | struct inode *inode = mapping->host; | ||
| 218 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 219 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
| 220 | struct ubifs_budget_req req = { .new_page = 1 }; | ||
| 221 | int uninitialized_var(err), appending = !!(pos + len > inode->i_size); | ||
| 222 | struct page *page; | ||
| 223 | |||
| 224 | dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", | ||
| 225 | inode->i_ino, pos, len, inode->i_size); | ||
| 226 | |||
| 227 | /* | ||
| 228 | * At the slow path we have to budget before locking the page, because | ||
| 229 | * budgeting may force write-back, which would wait on locked pages and | ||
| 230 | * deadlock if we had the page locked. At this point we do not know | ||
| 231 | * anything about the page, so assume that this is a new page which is | ||
| 232 | * written to a hole. This corresponds to largest budget. Later the | ||
| 233 | * budget will be amended if this is not true. | ||
| 234 | */ | ||
| 235 | if (appending) | ||
| 236 | /* We are appending data, budget for inode change */ | ||
| 237 | req.dirtied_ino = 1; | ||
| 238 | |||
| 239 | err = ubifs_budget_space(c, &req); | ||
| 240 | if (unlikely(err)) | ||
| 241 | return err; | ||
| 242 | |||
| 243 | page = __grab_cache_page(mapping, index); | ||
| 244 | if (unlikely(!page)) { | ||
| 245 | ubifs_release_budget(c, &req); | ||
| 246 | return -ENOMEM; | ||
| 247 | } | ||
| 248 | |||
| 249 | if (!PageUptodate(page)) { | ||
| 250 | if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) | ||
| 251 | SetPageChecked(page); | ||
| 252 | else { | ||
| 253 | err = do_readpage(page); | ||
| 254 | if (err) { | ||
| 255 | unlock_page(page); | ||
| 256 | page_cache_release(page); | ||
| 257 | return err; | ||
| 258 | } | ||
| 259 | } | ||
| 260 | |||
| 261 | SetPageUptodate(page); | ||
| 262 | ClearPageError(page); | ||
| 263 | } | ||
| 264 | |||
| 265 | if (PagePrivate(page)) | ||
| 266 | /* | ||
| 267 | * The page is dirty, which means it was budgeted twice: | ||
| 268 | * o first time the budget was allocated by the task which | ||
| 269 | * made the page dirty and set the PG_private flag; | ||
| 270 | * o and then we budgeted for it for the second time at the | ||
| 271 | * very beginning of this function. | ||
| 272 | * | ||
| 273 | * So what we have to do is to release the page budget we | ||
| 274 | * allocated. | ||
| 275 | */ | ||
| 276 | release_new_page_budget(c); | ||
| 277 | else if (!PageChecked(page)) | ||
| 278 | /* | ||
| 279 | * We are changing a page which already exists on the media. | ||
| 280 | * This means that changing the page does not make the amount | ||
| 281 | * of indexing information larger, and this part of the budget | ||
| 282 | * which we have already acquired may be released. | ||
| 283 | */ | ||
| 284 | ubifs_convert_page_budget(c); | ||
| 285 | |||
| 286 | if (appending) { | ||
| 287 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 288 | |||
| 289 | /* | ||
| 290 | * 'ubifs_write_end()' is optimized from the fast-path part of | ||
| 291 | * 'ubifs_write_begin()' and expects the @ui_mutex to be locked | ||
| 292 | * if data is appended. | ||
| 293 | */ | ||
| 294 | mutex_lock(&ui->ui_mutex); | ||
| 295 | if (ui->dirty) | ||
| 296 | /* | ||
| 297 | * The inode is dirty already, so we may free the | ||
| 298 | * budget we allocated. | ||
| 299 | */ | ||
| 300 | ubifs_release_dirty_inode_budget(c, ui); | ||
| 301 | } | ||
| 302 | |||
| 303 | *pagep = page; | ||
| 304 | return 0; | ||
| 305 | } | ||
| 306 | |||
| 307 | /** | ||
| 308 | * allocate_budget - allocate budget for 'ubifs_write_begin()'. | ||
| 309 | * @c: UBIFS file-system description object | ||
| 310 | * @page: page to allocate budget for | ||
| 311 | * @ui: UBIFS inode object the page belongs to | ||
| 312 | * @appending: non-zero if the page is appended | ||
| 313 | * | ||
| 314 | * This is a helper function for 'ubifs_write_begin()' which allocates budget | ||
| 315 | * for the operation. The budget is allocated differently depending on whether | ||
| 316 | * this is appending, whether the page is dirty or not, and so on. This | ||
| 317 | * function leaves the @ui->ui_mutex locked in case of appending. Returns zero | ||
| 318 | * in case of success and %-ENOSPC in case of failure. | ||
| 319 | */ | ||
| 320 | static int allocate_budget(struct ubifs_info *c, struct page *page, | ||
| 321 | struct ubifs_inode *ui, int appending) | ||
| 322 | { | ||
| 323 | struct ubifs_budget_req req = { .fast = 1 }; | ||
| 324 | |||
| 325 | if (PagePrivate(page)) { | ||
| 326 | if (!appending) | ||
| 327 | /* | ||
| 328 | * The page is dirty and we are not appending, which | ||
| 329 | * means no budget is needed at all. | ||
| 330 | */ | ||
| 331 | return 0; | ||
| 332 | |||
| 333 | mutex_lock(&ui->ui_mutex); | ||
| 334 | if (ui->dirty) | ||
| 335 | /* | ||
| 336 | * The page is dirty and we are appending, so the inode | ||
| 337 | * has to be marked as dirty. However, it is already | ||
| 338 | * dirty, so we do not need any budget. We may return, | ||
| 339 | * but @ui->ui_mutex hast to be left locked because we | ||
| 340 | * should prevent write-back from flushing the inode | ||
| 341 | * and freeing the budget. The lock will be released in | ||
| 342 | * 'ubifs_write_end()'. | ||
| 343 | */ | ||
| 344 | return 0; | ||
| 345 | |||
| 346 | /* | ||
| 347 | * The page is dirty, we are appending, the inode is clean, so | ||
| 348 | * we need to budget the inode change. | ||
| 349 | */ | ||
| 350 | req.dirtied_ino = 1; | ||
| 351 | } else { | ||
| 352 | if (PageChecked(page)) | ||
| 353 | /* | ||
| 354 | * The page corresponds to a hole and does not | ||
| 355 | * exist on the media. So changing it makes | ||
| 356 | * make the amount of indexing information | ||
| 357 | * larger, and we have to budget for a new | ||
| 358 | * page. | ||
| 359 | */ | ||
| 360 | req.new_page = 1; | ||
| 361 | else | ||
| 362 | /* | ||
| 363 | * Not a hole, the change will not add any new | ||
| 364 | * indexing information, budget for page | ||
| 365 | * change. | ||
| 366 | */ | ||
| 367 | req.dirtied_page = 1; | ||
| 368 | |||
| 369 | if (appending) { | ||
| 370 | mutex_lock(&ui->ui_mutex); | ||
| 371 | if (!ui->dirty) | ||
| 372 | /* | ||
| 373 | * The inode is clean but we will have to mark | ||
| 374 | * it as dirty because we are appending. This | ||
| 375 | * needs a budget. | ||
| 376 | */ | ||
| 377 | req.dirtied_ino = 1; | ||
| 378 | } | ||
| 379 | } | ||
| 380 | |||
| 381 | return ubifs_budget_space(c, &req); | ||
| 382 | } | ||
| 383 | |||
| 384 | /* | ||
| 385 | * This function is called when a page of data is going to be written. Since | ||
| 386 | * the page of data will not necessarily go to the flash straight away, UBIFS | ||
| 387 | * has to reserve space on the media for it, which is done by means of | ||
| 388 | * budgeting. | ||
| 389 | * | ||
| 390 | * This is the hot-path of the file-system and we are trying to optimize it as | ||
| 391 | * much as possible. For this reasons it is split on 2 parts - slow and fast. | ||
| 392 | * | ||
| 393 | * There many budgeting cases: | ||
| 394 | * o a new page is appended - we have to budget for a new page and for | ||
| 395 | * changing the inode; however, if the inode is already dirty, there is | ||
| 396 | * no need to budget for it; | ||
| 397 | * o an existing clean page is changed - we have budget for it; if the page | ||
| 398 | * does not exist on the media (a hole), we have to budget for a new | ||
| 399 | * page; otherwise, we may budget for changing an existing page; the | ||
| 400 | * difference between these cases is that changing an existing page does | ||
| 401 | * not introduce anything new to the FS indexing information, so it does | ||
| 402 | * not grow, and smaller budget is acquired in this case; | ||
| 403 | * o an existing dirty page is changed - no need to budget at all, because | ||
| 404 | * the page budget has been acquired by earlier, when the page has been | ||
| 405 | * marked dirty. | ||
| 406 | * | ||
| 407 | * UBIFS budgeting sub-system may force write-back if it thinks there is no | ||
| 408 | * space to reserve. This imposes some locking restrictions and makes it | ||
| 409 | * impossible to take into account the above cases, and makes it impossible to | ||
| 410 | * optimize budgeting. | ||
| 411 | * | ||
| 412 | * The solution for this is that the fast path of 'ubifs_write_begin()' assumes | ||
| 413 | * there is a plenty of flash space and the budget will be acquired quickly, | ||
| 414 | * without forcing write-back. The slow path does not make this assumption. | ||
| 415 | */ | ||
| 416 | static int ubifs_write_begin(struct file *file, struct address_space *mapping, | ||
| 417 | loff_t pos, unsigned len, unsigned flags, | ||
| 418 | struct page **pagep, void **fsdata) | ||
| 419 | { | ||
| 420 | struct inode *inode = mapping->host; | ||
| 421 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 422 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 423 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
| 424 | int uninitialized_var(err), appending = !!(pos + len > inode->i_size); | ||
| 425 | struct page *page; | ||
| 426 | |||
| 427 | |||
| 428 | ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); | ||
| 429 | |||
| 430 | if (unlikely(c->ro_media)) | ||
| 431 | return -EROFS; | ||
| 432 | |||
| 433 | /* Try out the fast-path part first */ | ||
| 434 | page = __grab_cache_page(mapping, index); | ||
| 435 | if (unlikely(!page)) | ||
| 436 | return -ENOMEM; | ||
| 437 | |||
| 438 | if (!PageUptodate(page)) { | ||
| 439 | /* The page is not loaded from the flash */ | ||
| 440 | if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) | ||
| 441 | /* | ||
| 442 | * We change whole page so no need to load it. But we | ||
| 443 | * have to set the @PG_checked flag to make the further | ||
| 444 | * code the page is new. This might be not true, but it | ||
| 445 | * is better to budget more that to read the page from | ||
| 446 | * the media. | ||
| 447 | */ | ||
| 448 | SetPageChecked(page); | ||
| 449 | else { | ||
| 450 | err = do_readpage(page); | ||
| 451 | if (err) { | ||
| 452 | unlock_page(page); | ||
| 453 | page_cache_release(page); | ||
| 454 | return err; | ||
| 455 | } | ||
| 456 | } | ||
| 457 | |||
| 458 | SetPageUptodate(page); | ||
| 459 | ClearPageError(page); | ||
| 460 | } | ||
| 461 | |||
| 462 | err = allocate_budget(c, page, ui, appending); | ||
| 463 | if (unlikely(err)) { | ||
| 464 | ubifs_assert(err == -ENOSPC); | ||
| 465 | /* | ||
| 466 | * Budgeting failed which means it would have to force | ||
| 467 | * write-back but didn't, because we set the @fast flag in the | ||
| 468 | * request. Write-back cannot be done now, while we have the | ||
| 469 | * page locked, because it would deadlock. Unlock and free | ||
| 470 | * everything and fall-back to slow-path. | ||
| 471 | */ | ||
| 472 | if (appending) { | ||
| 473 | ubifs_assert(mutex_is_locked(&ui->ui_mutex)); | ||
| 474 | mutex_unlock(&ui->ui_mutex); | ||
| 475 | } | ||
| 476 | unlock_page(page); | ||
| 477 | page_cache_release(page); | ||
| 478 | |||
| 479 | return write_begin_slow(mapping, pos, len, pagep); | ||
| 480 | } | ||
| 481 | |||
| 482 | /* | ||
| 483 | * Whee, we aquired budgeting quickly - without involving | ||
| 484 | * garbage-collection, committing or forceing write-back. We return | ||
| 485 | * with @ui->ui_mutex locked if we are appending pages, and unlocked | ||
| 486 | * otherwise. This is an optimization (slightly hacky though). | ||
| 487 | */ | ||
| 488 | *pagep = page; | ||
| 489 | return 0; | ||
| 490 | |||
| 491 | } | ||
| 492 | |||
| 493 | /** | ||
| 494 | * cancel_budget - cancel budget. | ||
| 495 | * @c: UBIFS file-system description object | ||
| 496 | * @page: page to cancel budget for | ||
| 497 | * @ui: UBIFS inode object the page belongs to | ||
| 498 | * @appending: non-zero if the page is appended | ||
| 499 | * | ||
| 500 | * This is a helper function for a page write operation. It unlocks the | ||
| 501 | * @ui->ui_mutex in case of appending. | ||
| 502 | */ | ||
| 503 | static void cancel_budget(struct ubifs_info *c, struct page *page, | ||
| 504 | struct ubifs_inode *ui, int appending) | ||
| 505 | { | ||
| 506 | if (appending) { | ||
| 507 | if (!ui->dirty) | ||
| 508 | ubifs_release_dirty_inode_budget(c, ui); | ||
| 509 | mutex_unlock(&ui->ui_mutex); | ||
| 510 | } | ||
| 511 | if (!PagePrivate(page)) { | ||
| 512 | if (PageChecked(page)) | ||
| 513 | release_new_page_budget(c); | ||
| 514 | else | ||
| 515 | release_existing_page_budget(c); | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | static int ubifs_write_end(struct file *file, struct address_space *mapping, | ||
| 520 | loff_t pos, unsigned len, unsigned copied, | ||
| 521 | struct page *page, void *fsdata) | ||
| 522 | { | ||
| 523 | struct inode *inode = mapping->host; | ||
| 524 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 525 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 526 | loff_t end_pos = pos + len; | ||
| 527 | int appending = !!(end_pos > inode->i_size); | ||
| 528 | |||
| 529 | dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", | ||
| 530 | inode->i_ino, pos, page->index, len, copied, inode->i_size); | ||
| 531 | |||
| 532 | if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) { | ||
| 533 | /* | ||
| 534 | * VFS copied less data to the page that it intended and | ||
| 535 | * declared in its '->write_begin()' call via the @len | ||
| 536 | * argument. If the page was not up-to-date, and @len was | ||
| 537 | * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did | ||
| 538 | * not load it from the media (for optimization reasons). This | ||
| 539 | * means that part of the page contains garbage. So read the | ||
| 540 | * page now. | ||
| 541 | */ | ||
| 542 | dbg_gen("copied %d instead of %d, read page and repeat", | ||
| 543 | copied, len); | ||
| 544 | cancel_budget(c, page, ui, appending); | ||
| 545 | |||
| 546 | /* | ||
| 547 | * Return 0 to force VFS to repeat the whole operation, or the | ||
| 548 | * error code if 'do_readpage()' failes. | ||
| 549 | */ | ||
| 550 | copied = do_readpage(page); | ||
| 551 | goto out; | ||
| 552 | } | ||
| 553 | |||
| 554 | if (!PagePrivate(page)) { | ||
| 555 | SetPagePrivate(page); | ||
| 556 | atomic_long_inc(&c->dirty_pg_cnt); | ||
| 557 | __set_page_dirty_nobuffers(page); | ||
| 558 | } | ||
| 559 | |||
| 560 | if (appending) { | ||
| 561 | i_size_write(inode, end_pos); | ||
| 562 | ui->ui_size = end_pos; | ||
| 563 | /* | ||
| 564 | * Note, we do not set @I_DIRTY_PAGES (which means that the | ||
| 565 | * inode has dirty pages), this has been done in | ||
| 566 | * '__set_page_dirty_nobuffers()'. | ||
| 567 | */ | ||
| 568 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | ||
| 569 | ubifs_assert(mutex_is_locked(&ui->ui_mutex)); | ||
| 570 | mutex_unlock(&ui->ui_mutex); | ||
| 571 | } | ||
| 572 | |||
| 573 | out: | ||
| 574 | unlock_page(page); | ||
| 575 | page_cache_release(page); | ||
| 576 | return copied; | ||
| 577 | } | ||
| 578 | |||
| 579 | static int ubifs_readpage(struct file *file, struct page *page) | ||
| 580 | { | ||
| 581 | do_readpage(page); | ||
| 582 | unlock_page(page); | ||
| 583 | return 0; | ||
| 584 | } | ||
| 585 | |||
| 586 | static int do_writepage(struct page *page, int len) | ||
| 587 | { | ||
| 588 | int err = 0, i, blen; | ||
| 589 | unsigned int block; | ||
| 590 | void *addr; | ||
| 591 | union ubifs_key key; | ||
| 592 | struct inode *inode = page->mapping->host; | ||
| 593 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 594 | |||
| 595 | #ifdef UBIFS_DEBUG | ||
| 596 | spin_lock(&ui->ui_lock); | ||
| 597 | ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); | ||
| 598 | spin_unlock(&ui->ui_lock); | ||
| 599 | #endif | ||
| 600 | |||
| 601 | /* Update radix tree tags */ | ||
| 602 | set_page_writeback(page); | ||
| 603 | |||
| 604 | addr = kmap(page); | ||
| 605 | block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; | ||
| 606 | i = 0; | ||
| 607 | while (len) { | ||
| 608 | blen = min_t(int, len, UBIFS_BLOCK_SIZE); | ||
| 609 | data_key_init(c, &key, inode->i_ino, block); | ||
| 610 | err = ubifs_jnl_write_data(c, inode, &key, addr, blen); | ||
| 611 | if (err) | ||
| 612 | break; | ||
| 613 | if (++i >= UBIFS_BLOCKS_PER_PAGE) | ||
| 614 | break; | ||
| 615 | block += 1; | ||
| 616 | addr += blen; | ||
| 617 | len -= blen; | ||
| 618 | } | ||
| 619 | if (err) { | ||
| 620 | SetPageError(page); | ||
| 621 | ubifs_err("cannot write page %lu of inode %lu, error %d", | ||
| 622 | page->index, inode->i_ino, err); | ||
| 623 | ubifs_ro_mode(c, err); | ||
| 624 | } | ||
| 625 | |||
| 626 | ubifs_assert(PagePrivate(page)); | ||
| 627 | if (PageChecked(page)) | ||
| 628 | release_new_page_budget(c); | ||
| 629 | else | ||
| 630 | release_existing_page_budget(c); | ||
| 631 | |||
| 632 | atomic_long_dec(&c->dirty_pg_cnt); | ||
| 633 | ClearPagePrivate(page); | ||
| 634 | ClearPageChecked(page); | ||
| 635 | |||
| 636 | kunmap(page); | ||
| 637 | unlock_page(page); | ||
| 638 | end_page_writeback(page); | ||
| 639 | return err; | ||
| 640 | } | ||
| 641 | |||
| 642 | /* | ||
| 643 | * When writing-back dirty inodes, VFS first writes-back pages belonging to the | ||
| 644 | * inode, then the inode itself. For UBIFS this may cause a problem. Consider a | ||
| 645 | * situation when a we have an inode with size 0, then a megabyte of data is | ||
| 646 | * appended to the inode, then write-back starts and flushes some amount of the | ||
| 647 | * dirty pages, the journal becomes full, commit happens and finishes, and then | ||
| 648 | * an unclean reboot happens. When the file system is mounted next time, the | ||
| 649 | * inode size would still be 0, but there would be many pages which are beyond | ||
| 650 | * the inode size, they would be indexed and consume flash space. Because the | ||
| 651 | * journal has been committed, the replay would not be able to detect this | ||
| 652 | * situation and correct the inode size. This means UBIFS would have to scan | ||
| 653 | * whole index and correct all inode sizes, which is long an unacceptable. | ||
| 654 | * | ||
| 655 | * To prevent situations like this, UBIFS writes pages back only if they are | ||
| 656 | * within last synchronized inode size, i.e. the the size which has been | ||
| 657 | * written to the flash media last time. Otherwise, UBIFS forces inode | ||
| 658 | * write-back, thus making sure the on-flash inode contains current inode size, | ||
| 659 | * and then keeps writing pages back. | ||
| 660 | * | ||
| 661 | * Some locking issues explanation. 'ubifs_writepage()' first is called with | ||
| 662 | * the page locked, and it locks @ui_mutex. However, write-back does take inode | ||
| 663 | * @i_mutex, which means other VFS operations may be run on this inode at the | ||
| 664 | * same time. And the problematic one is truncation to smaller size, from where | ||
| 665 | * we have to call 'vmtruncate()', which first changes @inode->i_size, then | ||
| 666 | * drops the truncated pages. And while dropping the pages, it takes the page | ||
| 667 | * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with | ||
| 668 | * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This | ||
| 669 | * means that @inode->i_size is changed while @ui_mutex is unlocked. | ||
| 670 | * | ||
| 671 | * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond | ||
| 672 | * inode size. How do we do this if @inode->i_size may became smaller while we | ||
| 673 | * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the | ||
| 674 | * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size | ||
| 675 | * internally and updates it under @ui_mutex. | ||
| 676 | * | ||
| 677 | * Q: why we do not worry that if we race with truncation, we may end up with a | ||
| 678 | * situation when the inode is truncated while we are in the middle of | ||
| 679 | * 'do_writepage()', so we do write beyond inode size? | ||
| 680 | * A: If we are in the middle of 'do_writepage()', truncation would be locked | ||
| 681 | * on the page lock and it would not write the truncated inode node to the | ||
| 682 | * journal before we have finished. | ||
| 683 | */ | ||
| 684 | static int ubifs_writepage(struct page *page, struct writeback_control *wbc) | ||
| 685 | { | ||
| 686 | struct inode *inode = page->mapping->host; | ||
| 687 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 688 | loff_t i_size = i_size_read(inode), synced_i_size; | ||
| 689 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 690 | int err, len = i_size & (PAGE_CACHE_SIZE - 1); | ||
| 691 | void *kaddr; | ||
| 692 | |||
| 693 | dbg_gen("ino %lu, pg %lu, pg flags %#lx", | ||
| 694 | inode->i_ino, page->index, page->flags); | ||
| 695 | ubifs_assert(PagePrivate(page)); | ||
| 696 | |||
| 697 | /* Is the page fully outside @i_size? (truncate in progress) */ | ||
| 698 | if (page->index > end_index || (page->index == end_index && !len)) { | ||
| 699 | err = 0; | ||
| 700 | goto out_unlock; | ||
| 701 | } | ||
| 702 | |||
| 703 | spin_lock(&ui->ui_lock); | ||
| 704 | synced_i_size = ui->synced_i_size; | ||
| 705 | spin_unlock(&ui->ui_lock); | ||
| 706 | |||
| 707 | /* Is the page fully inside @i_size? */ | ||
| 708 | if (page->index < end_index) { | ||
| 709 | if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { | ||
| 710 | err = inode->i_sb->s_op->write_inode(inode, 1); | ||
| 711 | if (err) | ||
| 712 | goto out_unlock; | ||
| 713 | /* | ||
| 714 | * The inode has been written, but the write-buffer has | ||
| 715 | * not been synchronized, so in case of an unclean | ||
| 716 | * reboot we may end up with some pages beyond inode | ||
| 717 | * size, but they would be in the journal (because | ||
| 718 | * commit flushes write buffers) and recovery would deal | ||
| 719 | * with this. | ||
| 720 | */ | ||
| 721 | } | ||
| 722 | return do_writepage(page, PAGE_CACHE_SIZE); | ||
| 723 | } | ||
| 724 | |||
| 725 | /* | ||
| 726 | * The page straddles @i_size. It must be zeroed out on each and every | ||
| 727 | * writepage invocation because it may be mmapped. "A file is mapped | ||
| 728 | * in multiples of the page size. For a file that is not a multiple of | ||
| 729 | * the page size, the remaining memory is zeroed when mapped, and | ||
| 730 | * writes to that region are not written out to the file." | ||
| 731 | */ | ||
| 732 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 733 | memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); | ||
| 734 | flush_dcache_page(page); | ||
| 735 | kunmap_atomic(kaddr, KM_USER0); | ||
| 736 | |||
| 737 | if (i_size > synced_i_size) { | ||
| 738 | err = inode->i_sb->s_op->write_inode(inode, 1); | ||
| 739 | if (err) | ||
| 740 | goto out_unlock; | ||
| 741 | } | ||
| 742 | |||
| 743 | return do_writepage(page, len); | ||
| 744 | |||
| 745 | out_unlock: | ||
| 746 | unlock_page(page); | ||
| 747 | return err; | ||
| 748 | } | ||
| 749 | |||
| 750 | /** | ||
| 751 | * do_attr_changes - change inode attributes. | ||
| 752 | * @inode: inode to change attributes for | ||
| 753 | * @attr: describes attributes to change | ||
| 754 | */ | ||
| 755 | static void do_attr_changes(struct inode *inode, const struct iattr *attr) | ||
| 756 | { | ||
| 757 | if (attr->ia_valid & ATTR_UID) | ||
| 758 | inode->i_uid = attr->ia_uid; | ||
| 759 | if (attr->ia_valid & ATTR_GID) | ||
| 760 | inode->i_gid = attr->ia_gid; | ||
| 761 | if (attr->ia_valid & ATTR_ATIME) | ||
| 762 | inode->i_atime = timespec_trunc(attr->ia_atime, | ||
| 763 | inode->i_sb->s_time_gran); | ||
| 764 | if (attr->ia_valid & ATTR_MTIME) | ||
| 765 | inode->i_mtime = timespec_trunc(attr->ia_mtime, | ||
| 766 | inode->i_sb->s_time_gran); | ||
| 767 | if (attr->ia_valid & ATTR_CTIME) | ||
| 768 | inode->i_ctime = timespec_trunc(attr->ia_ctime, | ||
| 769 | inode->i_sb->s_time_gran); | ||
| 770 | if (attr->ia_valid & ATTR_MODE) { | ||
| 771 | umode_t mode = attr->ia_mode; | ||
| 772 | |||
| 773 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | ||
| 774 | mode &= ~S_ISGID; | ||
| 775 | inode->i_mode = mode; | ||
| 776 | } | ||
| 777 | } | ||
| 778 | |||
| 779 | /** | ||
| 780 | * do_truncation - truncate an inode. | ||
| 781 | * @c: UBIFS file-system description object | ||
| 782 | * @inode: inode to truncate | ||
| 783 | * @attr: inode attribute changes description | ||
| 784 | * | ||
| 785 | * This function implements VFS '->setattr()' call when the inode is truncated | ||
| 786 | * to a smaller size. Returns zero in case of success and a negative error code | ||
| 787 | * in case of failure. | ||
| 788 | */ | ||
| 789 | static int do_truncation(struct ubifs_info *c, struct inode *inode, | ||
| 790 | const struct iattr *attr) | ||
| 791 | { | ||
| 792 | int err; | ||
| 793 | struct ubifs_budget_req req; | ||
| 794 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | ||
| 795 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | ||
| 796 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 797 | |||
| 798 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | ||
| 799 | memset(&req, 0, sizeof(struct ubifs_budget_req)); | ||
| 800 | |||
| 801 | /* | ||
| 802 | * If this is truncation to a smaller size, and we do not truncate on a | ||
| 803 | * block boundary, budget for changing one data block, because the last | ||
| 804 | * block will be re-written. | ||
| 805 | */ | ||
| 806 | if (new_size & (UBIFS_BLOCK_SIZE - 1)) | ||
| 807 | req.dirtied_page = 1; | ||
| 808 | |||
| 809 | req.dirtied_ino = 1; | ||
| 810 | /* A funny way to budget for truncation node */ | ||
| 811 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | ||
| 812 | err = ubifs_budget_space(c, &req); | ||
| 813 | if (err) | ||
| 814 | return err; | ||
| 815 | |||
| 816 | err = vmtruncate(inode, new_size); | ||
| 817 | if (err) | ||
| 818 | goto out_budg; | ||
| 819 | |||
| 820 | if (offset) { | ||
| 821 | pgoff_t index = new_size >> PAGE_CACHE_SHIFT; | ||
| 822 | struct page *page; | ||
| 823 | |||
| 824 | page = find_lock_page(inode->i_mapping, index); | ||
| 825 | if (page) { | ||
| 826 | if (PageDirty(page)) { | ||
| 827 | /* | ||
| 828 | * 'ubifs_jnl_truncate()' will try to truncate | ||
| 829 | * the last data node, but it contains | ||
| 830 | * out-of-date data because the page is dirty. | ||
| 831 | * Write the page now, so that | ||
| 832 | * 'ubifs_jnl_truncate()' will see an already | ||
| 833 | * truncated (and up to date) data node. | ||
| 834 | */ | ||
| 835 | ubifs_assert(PagePrivate(page)); | ||
| 836 | |||
| 837 | clear_page_dirty_for_io(page); | ||
| 838 | if (UBIFS_BLOCKS_PER_PAGE_SHIFT) | ||
| 839 | offset = new_size & | ||
| 840 | (PAGE_CACHE_SIZE - 1); | ||
| 841 | err = do_writepage(page, offset); | ||
| 842 | page_cache_release(page); | ||
| 843 | if (err) | ||
| 844 | goto out_budg; | ||
| 845 | /* | ||
| 846 | * We could now tell 'ubifs_jnl_truncate()' not | ||
| 847 | * to read the last block. | ||
| 848 | */ | ||
| 849 | } else { | ||
| 850 | /* | ||
| 851 | * We could 'kmap()' the page and pass the data | ||
| 852 | * to 'ubifs_jnl_truncate()' to save it from | ||
| 853 | * having to read it. | ||
| 854 | */ | ||
| 855 | unlock_page(page); | ||
| 856 | page_cache_release(page); | ||
| 857 | } | ||
| 858 | } | ||
| 859 | } | ||
| 860 | |||
| 861 | mutex_lock(&ui->ui_mutex); | ||
| 862 | ui->ui_size = inode->i_size; | ||
| 863 | /* Truncation changes inode [mc]time */ | ||
| 864 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); | ||
| 865 | /* The other attributes may be changed at the same time as well */ | ||
| 866 | do_attr_changes(inode, attr); | ||
| 867 | |||
| 868 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | ||
| 869 | mutex_unlock(&ui->ui_mutex); | ||
| 870 | out_budg: | ||
| 871 | ubifs_release_budget(c, &req); | ||
| 872 | return err; | ||
| 873 | } | ||
| 874 | |||
| 875 | /** | ||
| 876 | * do_setattr - change inode attributes. | ||
| 877 | * @c: UBIFS file-system description object | ||
| 878 | * @inode: inode to change attributes for | ||
| 879 | * @attr: inode attribute changes description | ||
| 880 | * | ||
| 881 | * This function implements VFS '->setattr()' call for all cases except | ||
| 882 | * truncations to smaller size. Returns zero in case of success and a negative | ||
| 883 | * error code in case of failure. | ||
| 884 | */ | ||
| 885 | static int do_setattr(struct ubifs_info *c, struct inode *inode, | ||
| 886 | const struct iattr *attr) | ||
| 887 | { | ||
| 888 | int err, release; | ||
| 889 | loff_t new_size = attr->ia_size; | ||
| 890 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 891 | struct ubifs_budget_req req = { .dirtied_ino = 1, | ||
| 892 | .dirtied_ino_d = ui->data_len }; | ||
| 893 | |||
| 894 | err = ubifs_budget_space(c, &req); | ||
| 895 | if (err) | ||
| 896 | return err; | ||
| 897 | |||
| 898 | if (attr->ia_valid & ATTR_SIZE) { | ||
| 899 | dbg_gen("size %lld -> %lld", inode->i_size, new_size); | ||
| 900 | err = vmtruncate(inode, new_size); | ||
| 901 | if (err) | ||
| 902 | goto out; | ||
| 903 | } | ||
| 904 | |||
| 905 | mutex_lock(&ui->ui_mutex); | ||
| 906 | if (attr->ia_valid & ATTR_SIZE) { | ||
| 907 | /* Truncation changes inode [mc]time */ | ||
| 908 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); | ||
| 909 | /* 'vmtruncate()' changed @i_size, update @ui_size */ | ||
| 910 | ui->ui_size = inode->i_size; | ||
| 911 | } | ||
| 912 | |||
| 913 | do_attr_changes(inode, attr); | ||
| 914 | |||
| 915 | release = ui->dirty; | ||
| 916 | if (attr->ia_valid & ATTR_SIZE) | ||
| 917 | /* | ||
| 918 | * Inode length changed, so we have to make sure | ||
| 919 | * @I_DIRTY_DATASYNC is set. | ||
| 920 | */ | ||
| 921 | __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); | ||
| 922 | else | ||
| 923 | mark_inode_dirty_sync(inode); | ||
| 924 | mutex_unlock(&ui->ui_mutex); | ||
| 925 | |||
| 926 | if (release) | ||
| 927 | ubifs_release_budget(c, &req); | ||
| 928 | if (IS_SYNC(inode)) | ||
| 929 | err = inode->i_sb->s_op->write_inode(inode, 1); | ||
| 930 | return err; | ||
| 931 | |||
| 932 | out: | ||
| 933 | ubifs_release_budget(c, &req); | ||
| 934 | return err; | ||
| 935 | } | ||
| 936 | |||
| 937 | int ubifs_setattr(struct dentry *dentry, struct iattr *attr) | ||
| 938 | { | ||
| 939 | int err; | ||
| 940 | struct inode *inode = dentry->d_inode; | ||
| 941 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 942 | |||
| 943 | dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); | ||
| 944 | err = inode_change_ok(inode, attr); | ||
| 945 | if (err) | ||
| 946 | return err; | ||
| 947 | |||
| 948 | err = dbg_check_synced_i_size(inode); | ||
| 949 | if (err) | ||
| 950 | return err; | ||
| 951 | |||
| 952 | if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size) | ||
| 953 | /* Truncation to a smaller size */ | ||
| 954 | err = do_truncation(c, inode, attr); | ||
| 955 | else | ||
| 956 | err = do_setattr(c, inode, attr); | ||
| 957 | |||
| 958 | return err; | ||
| 959 | } | ||
| 960 | |||
| 961 | static void ubifs_invalidatepage(struct page *page, unsigned long offset) | ||
| 962 | { | ||
| 963 | struct inode *inode = page->mapping->host; | ||
| 964 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 965 | |||
| 966 | ubifs_assert(PagePrivate(page)); | ||
| 967 | if (offset) | ||
| 968 | /* Partial page remains dirty */ | ||
| 969 | return; | ||
| 970 | |||
| 971 | if (PageChecked(page)) | ||
| 972 | release_new_page_budget(c); | ||
| 973 | else | ||
| 974 | release_existing_page_budget(c); | ||
| 975 | |||
| 976 | atomic_long_dec(&c->dirty_pg_cnt); | ||
| 977 | ClearPagePrivate(page); | ||
| 978 | ClearPageChecked(page); | ||
| 979 | } | ||
| 980 | |||
| 981 | static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
| 982 | { | ||
| 983 | struct ubifs_inode *ui = ubifs_inode(dentry->d_inode); | ||
| 984 | |||
| 985 | nd_set_link(nd, ui->data); | ||
| 986 | return NULL; | ||
| 987 | } | ||
| 988 | |||
| 989 | int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) | ||
| 990 | { | ||
| 991 | struct inode *inode = dentry->d_inode; | ||
| 992 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 993 | int err; | ||
| 994 | |||
| 995 | dbg_gen("syncing inode %lu", inode->i_ino); | ||
| 996 | |||
| 997 | /* | ||
| 998 | * VFS has already synchronized dirty pages for this inode. Synchronize | ||
| 999 | * the inode unless this is a 'datasync()' call. | ||
| 1000 | */ | ||
| 1001 | if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { | ||
| 1002 | err = inode->i_sb->s_op->write_inode(inode, 1); | ||
| 1003 | if (err) | ||
| 1004 | return err; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | /* | ||
| 1008 | * Nodes related to this inode may still sit in a write-buffer. Flush | ||
| 1009 | * them. | ||
| 1010 | */ | ||
| 1011 | err = ubifs_sync_wbufs_by_inode(c, inode); | ||
| 1012 | if (err) | ||
| 1013 | return err; | ||
| 1014 | |||
| 1015 | return 0; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /** | ||
| 1019 | * mctime_update_needed - check if mtime or ctime update is needed. | ||
| 1020 | * @inode: the inode to do the check for | ||
| 1021 | * @now: current time | ||
| 1022 | * | ||
| 1023 | * This helper function checks if the inode mtime/ctime should be updated or | ||
| 1024 | * not. If current values of the time-stamps are within the UBIFS inode time | ||
| 1025 | * granularity, they are not updated. This is an optimization. | ||
| 1026 | */ | ||
| 1027 | static inline int mctime_update_needed(const struct inode *inode, | ||
| 1028 | const struct timespec *now) | ||
| 1029 | { | ||
| 1030 | if (!timespec_equal(&inode->i_mtime, now) || | ||
| 1031 | !timespec_equal(&inode->i_ctime, now)) | ||
| 1032 | return 1; | ||
| 1033 | return 0; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | /** | ||
| 1037 | * update_ctime - update mtime and ctime of an inode. | ||
| 1038 | * @c: UBIFS file-system description object | ||
| 1039 | * @inode: inode to update | ||
| 1040 | * | ||
| 1041 | * This function updates mtime and ctime of the inode if it is not equivalent to | ||
| 1042 | * current time. Returns zero in case of success and a negative error code in | ||
| 1043 | * case of failure. | ||
| 1044 | */ | ||
| 1045 | static int update_mctime(struct ubifs_info *c, struct inode *inode) | ||
| 1046 | { | ||
| 1047 | struct timespec now = ubifs_current_time(inode); | ||
| 1048 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 1049 | |||
| 1050 | if (mctime_update_needed(inode, &now)) { | ||
| 1051 | int err, release; | ||
| 1052 | struct ubifs_budget_req req = { .dirtied_ino = 1, | ||
| 1053 | .dirtied_ino_d = ui->data_len }; | ||
| 1054 | |||
| 1055 | err = ubifs_budget_space(c, &req); | ||
| 1056 | if (err) | ||
| 1057 | return err; | ||
| 1058 | |||
| 1059 | mutex_lock(&ui->ui_mutex); | ||
| 1060 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); | ||
| 1061 | release = ui->dirty; | ||
| 1062 | mark_inode_dirty_sync(inode); | ||
| 1063 | mutex_unlock(&ui->ui_mutex); | ||
| 1064 | if (release) | ||
| 1065 | ubifs_release_budget(c, &req); | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | return 0; | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, | ||
| 1072 | unsigned long nr_segs, loff_t pos) | ||
| 1073 | { | ||
| 1074 | int err; | ||
| 1075 | ssize_t ret; | ||
| 1076 | struct inode *inode = iocb->ki_filp->f_mapping->host; | ||
| 1077 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 1078 | |||
| 1079 | err = update_mctime(c, inode); | ||
| 1080 | if (err) | ||
| 1081 | return err; | ||
| 1082 | |||
| 1083 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
| 1084 | if (ret < 0) | ||
| 1085 | return ret; | ||
| 1086 | |||
| 1087 | if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { | ||
| 1088 | err = ubifs_sync_wbufs_by_inode(c, inode); | ||
| 1089 | if (err) | ||
| 1090 | return err; | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | return ret; | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | static int ubifs_set_page_dirty(struct page *page) | ||
| 1097 | { | ||
| 1098 | int ret; | ||
| 1099 | |||
| 1100 | ret = __set_page_dirty_nobuffers(page); | ||
| 1101 | /* | ||
| 1102 | * An attempt to dirty a page without budgeting for it - should not | ||
| 1103 | * happen. | ||
| 1104 | */ | ||
| 1105 | ubifs_assert(ret == 0); | ||
| 1106 | return ret; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) | ||
| 1110 | { | ||
| 1111 | /* | ||
| 1112 | * An attempt to release a dirty page without budgeting for it - should | ||
| 1113 | * not happen. | ||
| 1114 | */ | ||
| 1115 | if (PageWriteback(page)) | ||
| 1116 | return 0; | ||
| 1117 | ubifs_assert(PagePrivate(page)); | ||
| 1118 | ubifs_assert(0); | ||
| 1119 | ClearPagePrivate(page); | ||
| 1120 | ClearPageChecked(page); | ||
| 1121 | return 1; | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | /* | ||
| 1125 | * mmap()d file has taken write protection fault and is being made | ||
| 1126 | * writable. UBIFS must ensure page is budgeted for. | ||
| 1127 | */ | ||
| 1128 | static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
| 1129 | { | ||
| 1130 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | ||
| 1131 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 1132 | struct timespec now = ubifs_current_time(inode); | ||
| 1133 | struct ubifs_budget_req req = { .new_page = 1 }; | ||
| 1134 | int err, update_time; | ||
| 1135 | |||
| 1136 | dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, | ||
| 1137 | i_size_read(inode)); | ||
| 1138 | ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); | ||
| 1139 | |||
| 1140 | if (unlikely(c->ro_media)) | ||
| 1141 | return -EROFS; | ||
| 1142 | |||
| 1143 | /* | ||
| 1144 | * We have not locked @page so far so we may budget for changing the | ||
| 1145 | * page. Note, we cannot do this after we locked the page, because | ||
| 1146 | * budgeting may cause write-back which would cause deadlock. | ||
| 1147 | * | ||
| 1148 | * At the moment we do not know whether the page is dirty or not, so we | ||
| 1149 | * assume that it is not and budget for a new page. We could look at | ||
| 1150 | * the @PG_private flag and figure this out, but we may race with write | ||
| 1151 | * back and the page state may change by the time we lock it, so this | ||
| 1152 | * would need additional care. We do not bother with this at the | ||
| 1153 | * moment, although it might be good idea to do. Instead, we allocate | ||
| 1154 | * budget for a new page and amend it later on if the page was in fact | ||
| 1155 | * dirty. | ||
| 1156 | * | ||
| 1157 | * The budgeting-related logic of this function is similar to what we | ||
| 1158 | * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there | ||
| 1159 | * for more comments. | ||
| 1160 | */ | ||
| 1161 | update_time = mctime_update_needed(inode, &now); | ||
| 1162 | if (update_time) | ||
| 1163 | /* | ||
| 1164 | * We have to change inode time stamp which requires extra | ||
| 1165 | * budgeting. | ||
| 1166 | */ | ||
| 1167 | req.dirtied_ino = 1; | ||
| 1168 | |||
| 1169 | err = ubifs_budget_space(c, &req); | ||
| 1170 | if (unlikely(err)) { | ||
| 1171 | if (err == -ENOSPC) | ||
| 1172 | ubifs_warn("out of space for mmapped file " | ||
| 1173 | "(inode number %lu)", inode->i_ino); | ||
| 1174 | return err; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | lock_page(page); | ||
| 1178 | if (unlikely(page->mapping != inode->i_mapping || | ||
| 1179 | page_offset(page) > i_size_read(inode))) { | ||
| 1180 | /* Page got truncated out from underneath us */ | ||
| 1181 | err = -EINVAL; | ||
| 1182 | goto out_unlock; | ||
| 1183 | } | ||
| 1184 | |||
| 1185 | if (PagePrivate(page)) | ||
| 1186 | release_new_page_budget(c); | ||
| 1187 | else { | ||
| 1188 | if (!PageChecked(page)) | ||
| 1189 | ubifs_convert_page_budget(c); | ||
| 1190 | SetPagePrivate(page); | ||
| 1191 | atomic_long_inc(&c->dirty_pg_cnt); | ||
| 1192 | __set_page_dirty_nobuffers(page); | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | if (update_time) { | ||
| 1196 | int release; | ||
| 1197 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 1198 | |||
| 1199 | mutex_lock(&ui->ui_mutex); | ||
| 1200 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); | ||
| 1201 | release = ui->dirty; | ||
| 1202 | mark_inode_dirty_sync(inode); | ||
| 1203 | mutex_unlock(&ui->ui_mutex); | ||
| 1204 | if (release) | ||
| 1205 | ubifs_release_dirty_inode_budget(c, ui); | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | unlock_page(page); | ||
| 1209 | return 0; | ||
| 1210 | |||
| 1211 | out_unlock: | ||
| 1212 | unlock_page(page); | ||
| 1213 | ubifs_release_budget(c, &req); | ||
| 1214 | return err; | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | static struct vm_operations_struct ubifs_file_vm_ops = { | ||
| 1218 | .fault = filemap_fault, | ||
| 1219 | .page_mkwrite = ubifs_vm_page_mkwrite, | ||
| 1220 | }; | ||
| 1221 | |||
| 1222 | static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 1223 | { | ||
| 1224 | int err; | ||
| 1225 | |||
| 1226 | /* 'generic_file_mmap()' takes care of NOMMU case */ | ||
| 1227 | err = generic_file_mmap(file, vma); | ||
| 1228 | if (err) | ||
| 1229 | return err; | ||
| 1230 | vma->vm_ops = &ubifs_file_vm_ops; | ||
| 1231 | return 0; | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | struct address_space_operations ubifs_file_address_operations = { | ||
| 1235 | .readpage = ubifs_readpage, | ||
| 1236 | .writepage = ubifs_writepage, | ||
| 1237 | .write_begin = ubifs_write_begin, | ||
| 1238 | .write_end = ubifs_write_end, | ||
| 1239 | .invalidatepage = ubifs_invalidatepage, | ||
| 1240 | .set_page_dirty = ubifs_set_page_dirty, | ||
| 1241 | .releasepage = ubifs_releasepage, | ||
| 1242 | }; | ||
| 1243 | |||
| 1244 | struct inode_operations ubifs_file_inode_operations = { | ||
| 1245 | .setattr = ubifs_setattr, | ||
| 1246 | .getattr = ubifs_getattr, | ||
| 1247 | #ifdef CONFIG_UBIFS_FS_XATTR | ||
| 1248 | .setxattr = ubifs_setxattr, | ||
| 1249 | .getxattr = ubifs_getxattr, | ||
| 1250 | .listxattr = ubifs_listxattr, | ||
| 1251 | .removexattr = ubifs_removexattr, | ||
| 1252 | #endif | ||
| 1253 | }; | ||
| 1254 | |||
| 1255 | struct inode_operations ubifs_symlink_inode_operations = { | ||
| 1256 | .readlink = generic_readlink, | ||
| 1257 | .follow_link = ubifs_follow_link, | ||
| 1258 | .setattr = ubifs_setattr, | ||
| 1259 | .getattr = ubifs_getattr, | ||
| 1260 | }; | ||
| 1261 | |||
| 1262 | struct file_operations ubifs_file_operations = { | ||
| 1263 | .llseek = generic_file_llseek, | ||
| 1264 | .read = do_sync_read, | ||
| 1265 | .write = do_sync_write, | ||
| 1266 | .aio_read = generic_file_aio_read, | ||
| 1267 | .aio_write = ubifs_aio_write, | ||
| 1268 | .mmap = ubifs_file_mmap, | ||
| 1269 | .fsync = ubifs_fsync, | ||
| 1270 | .unlocked_ioctl = ubifs_ioctl, | ||
| 1271 | .splice_read = generic_file_splice_read, | ||
| 1272 | #ifdef CONFIG_COMPAT | ||
| 1273 | .compat_ioctl = ubifs_compat_ioctl, | ||
| 1274 | #endif | ||
| 1275 | }; | ||
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c new file mode 100644 index 000000000000..10394c548367 --- /dev/null +++ b/fs/ubifs/find.c | |||
| @@ -0,0 +1,975 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file contains functions for finding LEBs for various purposes e.g. | ||
| 25 | * garbage collection. In general, lprops category heaps and lists are used | ||
| 26 | * for fast access, falling back on scanning the LPT as a last resort. | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include <linux/sort.h> | ||
| 30 | #include "ubifs.h" | ||
| 31 | |||
| 32 | /** | ||
| 33 | * struct scan_data - data provided to scan callback functions | ||
| 34 | * @min_space: minimum number of bytes for which to scan | ||
| 35 | * @pick_free: whether it is OK to scan for empty LEBs | ||
| 36 | * @lnum: LEB number found is returned here | ||
| 37 | * @exclude_index: whether to exclude index LEBs | ||
| 38 | */ | ||
| 39 | struct scan_data { | ||
| 40 | int min_space; | ||
| 41 | int pick_free; | ||
| 42 | int lnum; | ||
| 43 | int exclude_index; | ||
| 44 | }; | ||
| 45 | |||
| 46 | /** | ||
| 47 | * valuable - determine whether LEB properties are valuable. | ||
| 48 | * @c: the UBIFS file-system description object | ||
| 49 | * @lprops: LEB properties | ||
| 50 | * | ||
| 51 | * This function return %1 if the LEB properties should be added to the LEB | ||
| 52 | * properties tree in memory. Otherwise %0 is returned. | ||
| 53 | */ | ||
| 54 | static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) | ||
| 55 | { | ||
| 56 | int n, cat = lprops->flags & LPROPS_CAT_MASK; | ||
| 57 | struct ubifs_lpt_heap *heap; | ||
| 58 | |||
| 59 | switch (cat) { | ||
| 60 | case LPROPS_DIRTY: | ||
| 61 | case LPROPS_DIRTY_IDX: | ||
| 62 | case LPROPS_FREE: | ||
| 63 | heap = &c->lpt_heap[cat - 1]; | ||
| 64 | if (heap->cnt < heap->max_cnt) | ||
| 65 | return 1; | ||
| 66 | if (lprops->free + lprops->dirty >= c->dark_wm) | ||
| 67 | return 1; | ||
| 68 | return 0; | ||
| 69 | case LPROPS_EMPTY: | ||
| 70 | n = c->lst.empty_lebs + c->freeable_cnt - | ||
| 71 | c->lst.taken_empty_lebs; | ||
| 72 | if (n < c->lsave_cnt) | ||
| 73 | return 1; | ||
| 74 | return 0; | ||
| 75 | case LPROPS_FREEABLE: | ||
| 76 | return 1; | ||
| 77 | case LPROPS_FRDI_IDX: | ||
| 78 | return 1; | ||
| 79 | } | ||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | /** | ||
| 84 | * scan_for_dirty_cb - dirty space scan callback. | ||
| 85 | * @c: the UBIFS file-system description object | ||
| 86 | * @lprops: LEB properties to scan | ||
| 87 | * @in_tree: whether the LEB properties are in main memory | ||
| 88 | * @data: information passed to and from the caller of the scan | ||
| 89 | * | ||
| 90 | * This function returns a code that indicates whether the scan should continue | ||
| 91 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree | ||
| 92 | * in main memory (%LPT_SCAN_ADD), or whether the scan should stop | ||
| 93 | * (%LPT_SCAN_STOP). | ||
| 94 | */ | ||
| 95 | static int scan_for_dirty_cb(struct ubifs_info *c, | ||
| 96 | const struct ubifs_lprops *lprops, int in_tree, | ||
| 97 | struct scan_data *data) | ||
| 98 | { | ||
| 99 | int ret = LPT_SCAN_CONTINUE; | ||
| 100 | |||
| 101 | /* Exclude LEBs that are currently in use */ | ||
| 102 | if (lprops->flags & LPROPS_TAKEN) | ||
| 103 | return LPT_SCAN_CONTINUE; | ||
| 104 | /* Determine whether to add these LEB properties to the tree */ | ||
| 105 | if (!in_tree && valuable(c, lprops)) | ||
| 106 | ret |= LPT_SCAN_ADD; | ||
| 107 | /* Exclude LEBs with too little space */ | ||
| 108 | if (lprops->free + lprops->dirty < data->min_space) | ||
| 109 | return ret; | ||
| 110 | /* If specified, exclude index LEBs */ | ||
| 111 | if (data->exclude_index && lprops->flags & LPROPS_INDEX) | ||
| 112 | return ret; | ||
| 113 | /* If specified, exclude empty or freeable LEBs */ | ||
| 114 | if (lprops->free + lprops->dirty == c->leb_size) { | ||
| 115 | if (!data->pick_free) | ||
| 116 | return ret; | ||
| 117 | /* Exclude LEBs with too little dirty space (unless it is empty) */ | ||
| 118 | } else if (lprops->dirty < c->dead_wm) | ||
| 119 | return ret; | ||
| 120 | /* Finally we found space */ | ||
| 121 | data->lnum = lprops->lnum; | ||
| 122 | return LPT_SCAN_ADD | LPT_SCAN_STOP; | ||
| 123 | } | ||
| 124 | |||
| 125 | /** | ||
| 126 | * scan_for_dirty - find a data LEB with free space. | ||
| 127 | * @c: the UBIFS file-system description object | ||
| 128 | * @min_space: minimum amount free plus dirty space the returned LEB has to | ||
| 129 | * have | ||
| 130 | * @pick_free: if it is OK to return a free or freeable LEB | ||
| 131 | * @exclude_index: whether to exclude index LEBs | ||
| 132 | * | ||
| 133 | * This function returns a pointer to the LEB properties found or a negative | ||
| 134 | * error code. | ||
| 135 | */ | ||
| 136 | static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | ||
| 137 | int min_space, int pick_free, | ||
| 138 | int exclude_index) | ||
| 139 | { | ||
| 140 | const struct ubifs_lprops *lprops; | ||
| 141 | struct ubifs_lpt_heap *heap; | ||
| 142 | struct scan_data data; | ||
| 143 | int err, i; | ||
| 144 | |||
| 145 | /* There may be an LEB with enough dirty space on the free heap */ | ||
| 146 | heap = &c->lpt_heap[LPROPS_FREE - 1]; | ||
| 147 | for (i = 0; i < heap->cnt; i++) { | ||
| 148 | lprops = heap->arr[i]; | ||
| 149 | if (lprops->free + lprops->dirty < min_space) | ||
| 150 | continue; | ||
| 151 | if (lprops->dirty < c->dead_wm) | ||
| 152 | continue; | ||
| 153 | return lprops; | ||
| 154 | } | ||
| 155 | /* | ||
| 156 | * A LEB may have fallen off of the bottom of the dirty heap, and ended | ||
| 157 | * up as uncategorized even though it has enough dirty space for us now, | ||
| 158 | * so check the uncategorized list. N.B. neither empty nor freeable LEBs | ||
| 159 | * can end up as uncategorized because they are kept on lists not | ||
| 160 | * finite-sized heaps. | ||
| 161 | */ | ||
| 162 | list_for_each_entry(lprops, &c->uncat_list, list) { | ||
| 163 | if (lprops->flags & LPROPS_TAKEN) | ||
| 164 | continue; | ||
| 165 | if (lprops->free + lprops->dirty < min_space) | ||
| 166 | continue; | ||
| 167 | if (exclude_index && (lprops->flags & LPROPS_INDEX)) | ||
| 168 | continue; | ||
| 169 | if (lprops->dirty < c->dead_wm) | ||
| 170 | continue; | ||
| 171 | return lprops; | ||
| 172 | } | ||
| 173 | /* We have looked everywhere in main memory, now scan the flash */ | ||
| 174 | if (c->pnodes_have >= c->pnode_cnt) | ||
| 175 | /* All pnodes are in memory, so skip scan */ | ||
| 176 | return ERR_PTR(-ENOSPC); | ||
| 177 | data.min_space = min_space; | ||
| 178 | data.pick_free = pick_free; | ||
| 179 | data.lnum = -1; | ||
| 180 | data.exclude_index = exclude_index; | ||
| 181 | err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, | ||
| 182 | (ubifs_lpt_scan_callback)scan_for_dirty_cb, | ||
| 183 | &data); | ||
| 184 | if (err) | ||
| 185 | return ERR_PTR(err); | ||
| 186 | ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); | ||
| 187 | c->lscan_lnum = data.lnum; | ||
| 188 | lprops = ubifs_lpt_lookup_dirty(c, data.lnum); | ||
| 189 | if (IS_ERR(lprops)) | ||
| 190 | return lprops; | ||
| 191 | ubifs_assert(lprops->lnum == data.lnum); | ||
| 192 | ubifs_assert(lprops->free + lprops->dirty >= min_space); | ||
| 193 | ubifs_assert(lprops->dirty >= c->dead_wm || | ||
| 194 | (pick_free && | ||
| 195 | lprops->free + lprops->dirty == c->leb_size)); | ||
| 196 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 197 | ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX)); | ||
| 198 | return lprops; | ||
| 199 | } | ||
| 200 | |||
| 201 | /** | ||
| 202 | * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector. | ||
| 203 | * @c: the UBIFS file-system description object | ||
| 204 | * @ret_lp: LEB properties are returned here on exit | ||
| 205 | * @min_space: minimum amount free plus dirty space the returned LEB has to | ||
| 206 | * have | ||
| 207 | * @pick_free: controls whether it is OK to pick empty or index LEBs | ||
| 208 | * | ||
| 209 | * This function tries to find a dirty logical eraseblock which has at least | ||
| 210 | * @min_space free and dirty space. It prefers to take an LEB from the dirty or | ||
| 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | ||
| 212 | * or do not have an LEB which satisfies the @min_space criteria. | ||
| 213 | * | ||
| 214 | * Note: | ||
| 215 | * o LEBs which have less than dead watermark of dirty space are never picked | ||
| 216 | * by this function; | ||
| 217 | * | ||
| 218 | * Returns zero and the LEB properties of | ||
| 219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
| 220 | * negative error code in case of other failures. The returned LEB is marked as | ||
| 221 | * "taken". | ||
| 222 | * | ||
| 223 | * The additional @pick_free argument controls if this function has to return a | ||
| 224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | ||
| 225 | * when called from the journal space reservation function, because the | ||
| 226 | * appearance of free space may coincide with the loss of enough dirty space | ||
| 227 | * for GC to succeed anyway. | ||
| 228 | * | ||
| 229 | * In contrast, if the Garbage Collector is called from budgeting, it should | ||
| 230 | * just make free space, not return LEBs which are already free or freeable. | ||
| 231 | * | ||
| 232 | * In addition @pick_free is set to %2 by the recovery process in order to | ||
| 233 | * recover gc_lnum in which case an index LEB must not be returned. | ||
| 234 | */ | ||
| 235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | ||
| 236 | int min_space, int pick_free) | ||
| 237 | { | ||
| 238 | int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0; | ||
| 239 | const struct ubifs_lprops *lp = NULL, *idx_lp = NULL; | ||
| 240 | struct ubifs_lpt_heap *heap, *idx_heap; | ||
| 241 | |||
| 242 | ubifs_get_lprops(c); | ||
| 243 | |||
| 244 | if (pick_free) { | ||
| 245 | int lebs, rsvd_idx_lebs = 0; | ||
| 246 | |||
| 247 | spin_lock(&c->space_lock); | ||
| 248 | lebs = c->lst.empty_lebs; | ||
| 249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Note, the index may consume more LEBs than have been reserved | ||
| 253 | * for it. It is OK because it might be consolidated by GC. | ||
| 254 | * But if the index takes fewer LEBs than it is reserved for it, | ||
| 255 | * this function must avoid picking those reserved LEBs. | ||
| 256 | */ | ||
| 257 | if (c->min_idx_lebs >= c->lst.idx_lebs) { | ||
| 258 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | ||
| 259 | exclude_index = 1; | ||
| 260 | } | ||
| 261 | spin_unlock(&c->space_lock); | ||
| 262 | |||
| 263 | /* Check if there are enough free LEBs for the index */ | ||
| 264 | if (rsvd_idx_lebs < lebs) { | ||
| 265 | /* OK, try to find an empty LEB */ | ||
| 266 | lp = ubifs_fast_find_empty(c); | ||
| 267 | if (lp) | ||
| 268 | goto found; | ||
| 269 | |||
| 270 | /* Or a freeable LEB */ | ||
| 271 | lp = ubifs_fast_find_freeable(c); | ||
| 272 | if (lp) | ||
| 273 | goto found; | ||
| 274 | } else | ||
| 275 | /* | ||
| 276 | * We cannot pick free/freeable LEBs in the below code. | ||
| 277 | */ | ||
| 278 | pick_free = 0; | ||
| 279 | } else { | ||
| 280 | spin_lock(&c->space_lock); | ||
| 281 | exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); | ||
| 282 | spin_unlock(&c->space_lock); | ||
| 283 | } | ||
| 284 | |||
| 285 | /* Look on the dirty and dirty index heaps */ | ||
| 286 | heap = &c->lpt_heap[LPROPS_DIRTY - 1]; | ||
| 287 | idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; | ||
| 288 | |||
| 289 | if (idx_heap->cnt && !exclude_index) { | ||
| 290 | idx_lp = idx_heap->arr[0]; | ||
| 291 | sum = idx_lp->free + idx_lp->dirty; | ||
| 292 | /* | ||
| 293 | * Since we reserve twice as more space for the index than it | ||
| 294 | * actually takes, it does not make sense to pick indexing LEBs | ||
| 295 | * with less than half LEB of dirty space. | ||
| 296 | */ | ||
| 297 | if (sum < min_space || sum < c->half_leb_size) | ||
| 298 | idx_lp = NULL; | ||
| 299 | } | ||
| 300 | |||
| 301 | if (heap->cnt) { | ||
| 302 | lp = heap->arr[0]; | ||
| 303 | if (lp->dirty + lp->free < min_space) | ||
| 304 | lp = NULL; | ||
| 305 | } | ||
| 306 | |||
| 307 | /* Pick the LEB with most space */ | ||
| 308 | if (idx_lp && lp) { | ||
| 309 | if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty) | ||
| 310 | lp = idx_lp; | ||
| 311 | } else if (idx_lp && !lp) | ||
| 312 | lp = idx_lp; | ||
| 313 | |||
| 314 | if (lp) { | ||
| 315 | ubifs_assert(lp->dirty >= c->dead_wm); | ||
| 316 | goto found; | ||
| 317 | } | ||
| 318 | |||
| 319 | /* Did not find a dirty LEB on the dirty heaps, have to scan */ | ||
| 320 | dbg_find("scanning LPT for a dirty LEB"); | ||
| 321 | lp = scan_for_dirty(c, min_space, pick_free, exclude_index); | ||
| 322 | if (IS_ERR(lp)) { | ||
| 323 | err = PTR_ERR(lp); | ||
| 324 | goto out; | ||
| 325 | } | ||
| 326 | ubifs_assert(lp->dirty >= c->dead_wm || | ||
| 327 | (pick_free && lp->free + lp->dirty == c->leb_size)); | ||
| 328 | |||
| 329 | found: | ||
| 330 | dbg_find("found LEB %d, free %d, dirty %d, flags %#x", | ||
| 331 | lp->lnum, lp->free, lp->dirty, lp->flags); | ||
| 332 | |||
| 333 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, | ||
| 334 | lp->flags | LPROPS_TAKEN, 0); | ||
| 335 | if (IS_ERR(lp)) { | ||
| 336 | err = PTR_ERR(lp); | ||
| 337 | goto out; | ||
| 338 | } | ||
| 339 | |||
| 340 | memcpy(ret_lp, lp, sizeof(struct ubifs_lprops)); | ||
| 341 | |||
| 342 | out: | ||
| 343 | ubifs_release_lprops(c); | ||
| 344 | return err; | ||
| 345 | } | ||
| 346 | |||
| 347 | /** | ||
| 348 | * scan_for_free_cb - free space scan callback. | ||
| 349 | * @c: the UBIFS file-system description object | ||
| 350 | * @lprops: LEB properties to scan | ||
| 351 | * @in_tree: whether the LEB properties are in main memory | ||
| 352 | * @data: information passed to and from the caller of the scan | ||
| 353 | * | ||
| 354 | * This function returns a code that indicates whether the scan should continue | ||
| 355 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree | ||
| 356 | * in main memory (%LPT_SCAN_ADD), or whether the scan should stop | ||
| 357 | * (%LPT_SCAN_STOP). | ||
| 358 | */ | ||
| 359 | static int scan_for_free_cb(struct ubifs_info *c, | ||
| 360 | const struct ubifs_lprops *lprops, int in_tree, | ||
| 361 | struct scan_data *data) | ||
| 362 | { | ||
| 363 | int ret = LPT_SCAN_CONTINUE; | ||
| 364 | |||
| 365 | /* Exclude LEBs that are currently in use */ | ||
| 366 | if (lprops->flags & LPROPS_TAKEN) | ||
| 367 | return LPT_SCAN_CONTINUE; | ||
| 368 | /* Determine whether to add these LEB properties to the tree */ | ||
| 369 | if (!in_tree && valuable(c, lprops)) | ||
| 370 | ret |= LPT_SCAN_ADD; | ||
| 371 | /* Exclude index LEBs */ | ||
| 372 | if (lprops->flags & LPROPS_INDEX) | ||
| 373 | return ret; | ||
| 374 | /* Exclude LEBs with too little space */ | ||
| 375 | if (lprops->free < data->min_space) | ||
| 376 | return ret; | ||
| 377 | /* If specified, exclude empty LEBs */ | ||
| 378 | if (!data->pick_free && lprops->free == c->leb_size) | ||
| 379 | return ret; | ||
| 380 | /* | ||
| 381 | * LEBs that have only free and dirty space must not be allocated | ||
| 382 | * because they may have been unmapped already or they may have data | ||
| 383 | * that is obsolete only because of nodes that are still sitting in a | ||
| 384 | * wbuf. | ||
| 385 | */ | ||
| 386 | if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0) | ||
| 387 | return ret; | ||
| 388 | /* Finally we found space */ | ||
| 389 | data->lnum = lprops->lnum; | ||
| 390 | return LPT_SCAN_ADD | LPT_SCAN_STOP; | ||
| 391 | } | ||
| 392 | |||
| 393 | /** | ||
| 394 | * do_find_free_space - find a data LEB with free space. | ||
| 395 | * @c: the UBIFS file-system description object | ||
| 396 | * @min_space: minimum amount of free space required | ||
| 397 | * @pick_free: whether it is OK to scan for empty LEBs | ||
| 398 | * @squeeze: whether to try to find space in a non-empty LEB first | ||
| 399 | * | ||
| 400 | * This function returns a pointer to the LEB properties found or a negative | ||
| 401 | * error code. | ||
| 402 | */ | ||
| 403 | static | ||
| 404 | const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, | ||
| 405 | int min_space, int pick_free, | ||
| 406 | int squeeze) | ||
| 407 | { | ||
| 408 | const struct ubifs_lprops *lprops; | ||
| 409 | struct ubifs_lpt_heap *heap; | ||
| 410 | struct scan_data data; | ||
| 411 | int err, i; | ||
| 412 | |||
| 413 | if (squeeze) { | ||
| 414 | lprops = ubifs_fast_find_free(c); | ||
| 415 | if (lprops && lprops->free >= min_space) | ||
| 416 | return lprops; | ||
| 417 | } | ||
| 418 | if (pick_free) { | ||
| 419 | lprops = ubifs_fast_find_empty(c); | ||
| 420 | if (lprops) | ||
| 421 | return lprops; | ||
| 422 | } | ||
| 423 | if (!squeeze) { | ||
| 424 | lprops = ubifs_fast_find_free(c); | ||
| 425 | if (lprops && lprops->free >= min_space) | ||
| 426 | return lprops; | ||
| 427 | } | ||
| 428 | /* There may be an LEB with enough free space on the dirty heap */ | ||
| 429 | heap = &c->lpt_heap[LPROPS_DIRTY - 1]; | ||
| 430 | for (i = 0; i < heap->cnt; i++) { | ||
| 431 | lprops = heap->arr[i]; | ||
| 432 | if (lprops->free >= min_space) | ||
| 433 | return lprops; | ||
| 434 | } | ||
| 435 | /* | ||
| 436 | * A LEB may have fallen off of the bottom of the free heap, and ended | ||
| 437 | * up as uncategorized even though it has enough free space for us now, | ||
| 438 | * so check the uncategorized list. N.B. neither empty nor freeable LEBs | ||
| 439 | * can end up as uncategorized because they are kept on lists not | ||
| 440 | * finite-sized heaps. | ||
| 441 | */ | ||
| 442 | list_for_each_entry(lprops, &c->uncat_list, list) { | ||
| 443 | if (lprops->flags & LPROPS_TAKEN) | ||
| 444 | continue; | ||
| 445 | if (lprops->flags & LPROPS_INDEX) | ||
| 446 | continue; | ||
| 447 | if (lprops->free >= min_space) | ||
| 448 | return lprops; | ||
| 449 | } | ||
| 450 | /* We have looked everywhere in main memory, now scan the flash */ | ||
| 451 | if (c->pnodes_have >= c->pnode_cnt) | ||
| 452 | /* All pnodes are in memory, so skip scan */ | ||
| 453 | return ERR_PTR(-ENOSPC); | ||
| 454 | data.min_space = min_space; | ||
| 455 | data.pick_free = pick_free; | ||
| 456 | data.lnum = -1; | ||
| 457 | err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, | ||
| 458 | (ubifs_lpt_scan_callback)scan_for_free_cb, | ||
| 459 | &data); | ||
| 460 | if (err) | ||
| 461 | return ERR_PTR(err); | ||
| 462 | ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); | ||
| 463 | c->lscan_lnum = data.lnum; | ||
| 464 | lprops = ubifs_lpt_lookup_dirty(c, data.lnum); | ||
| 465 | if (IS_ERR(lprops)) | ||
| 466 | return lprops; | ||
| 467 | ubifs_assert(lprops->lnum == data.lnum); | ||
| 468 | ubifs_assert(lprops->free >= min_space); | ||
| 469 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 470 | ubifs_assert(!(lprops->flags & LPROPS_INDEX)); | ||
| 471 | return lprops; | ||
| 472 | } | ||
| 473 | |||
| 474 | /** | ||
| 475 | * ubifs_find_free_space - find a data LEB with free space. | ||
| 476 | * @c: the UBIFS file-system description object | ||
| 477 | * @min_space: minimum amount of required free space | ||
| 478 | * @free: contains amount of free space in the LEB on exit | ||
| 479 | * @squeeze: whether to try to find space in a non-empty LEB first | ||
| 480 | * | ||
| 481 | * This function looks for an LEB with at least @min_space bytes of free space. | ||
| 482 | * It tries to find an empty LEB if possible. If no empty LEBs are available, | ||
| 483 | * this function searches for a non-empty data LEB. The returned LEB is marked | ||
| 484 | * as "taken". | ||
| 485 | * | ||
| 486 | * This function returns found LEB number in case of success, %-ENOSPC if it | ||
| 487 | * failed to find a LEB with @min_space bytes of free space and other a negative | ||
| 488 | * error codes in case of failure. | ||
| 489 | */ | ||
| 490 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | ||
| 491 | int squeeze) | ||
| 492 | { | ||
| 493 | const struct ubifs_lprops *lprops; | ||
| 494 | int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags; | ||
| 495 | |||
| 496 | dbg_find("min_space %d", min_space); | ||
| 497 | ubifs_get_lprops(c); | ||
| 498 | |||
| 499 | /* Check if there are enough empty LEBs for commit */ | ||
| 500 | spin_lock(&c->space_lock); | ||
| 501 | if (c->min_idx_lebs > c->lst.idx_lebs) | ||
| 502 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | ||
| 503 | else | ||
| 504 | rsvd_idx_lebs = 0; | ||
| 505 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
| 506 | c->lst.taken_empty_lebs; | ||
| 507 | ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); | ||
| 508 | if (rsvd_idx_lebs < lebs) | ||
| 509 | /* | ||
| 510 | * OK to allocate an empty LEB, but we still don't want to go | ||
| 511 | * looking for one if there aren't any. | ||
| 512 | */ | ||
| 513 | if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { | ||
| 514 | pick_free = 1; | ||
| 515 | /* | ||
| 516 | * Because we release the space lock, we must account | ||
| 517 | * for this allocation here. After the LEB properties | ||
| 518 | * flags have been updated, we subtract one. Note, the | ||
| 519 | * result of this is that lprops also decreases | ||
| 520 | * @taken_empty_lebs in 'ubifs_change_lp()', so it is | ||
| 521 | * off by one for a short period of time which may | ||
| 522 | * introduce a small disturbance to budgeting | ||
| 523 | * calculations, but this is harmless because at the | ||
| 524 | * worst case this would make the budgeting subsystem | ||
| 525 | * be more pessimistic than needed. | ||
| 526 | * | ||
| 527 | * Fundamentally, this is about serialization of the | ||
| 528 | * budgeting and lprops subsystems. We could make the | ||
| 529 | * @space_lock a mutex and avoid dropping it before | ||
| 530 | * calling 'ubifs_change_lp()', but mutex is more | ||
| 531 | * heavy-weight, and we want budgeting to be as fast as | ||
| 532 | * possible. | ||
| 533 | */ | ||
| 534 | c->lst.taken_empty_lebs += 1; | ||
| 535 | } | ||
| 536 | spin_unlock(&c->space_lock); | ||
| 537 | |||
| 538 | lprops = do_find_free_space(c, min_space, pick_free, squeeze); | ||
| 539 | if (IS_ERR(lprops)) { | ||
| 540 | err = PTR_ERR(lprops); | ||
| 541 | goto out; | ||
| 542 | } | ||
| 543 | |||
| 544 | lnum = lprops->lnum; | ||
| 545 | flags = lprops->flags | LPROPS_TAKEN; | ||
| 546 | |||
| 547 | lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0); | ||
| 548 | if (IS_ERR(lprops)) { | ||
| 549 | err = PTR_ERR(lprops); | ||
| 550 | goto out; | ||
| 551 | } | ||
| 552 | |||
| 553 | if (pick_free) { | ||
| 554 | spin_lock(&c->space_lock); | ||
| 555 | c->lst.taken_empty_lebs -= 1; | ||
| 556 | spin_unlock(&c->space_lock); | ||
| 557 | } | ||
| 558 | |||
| 559 | *free = lprops->free; | ||
| 560 | ubifs_release_lprops(c); | ||
| 561 | |||
| 562 | if (*free == c->leb_size) { | ||
| 563 | /* | ||
| 564 | * Ensure that empty LEBs have been unmapped. They may not have | ||
| 565 | * been, for example, because of an unclean unmount. Also | ||
| 566 | * LEBs that were freeable LEBs (free + dirty == leb_size) will | ||
| 567 | * not have been unmapped. | ||
| 568 | */ | ||
| 569 | err = ubifs_leb_unmap(c, lnum); | ||
| 570 | if (err) | ||
| 571 | return err; | ||
| 572 | } | ||
| 573 | |||
| 574 | dbg_find("found LEB %d, free %d", lnum, *free); | ||
| 575 | ubifs_assert(*free >= min_space); | ||
| 576 | return lnum; | ||
| 577 | |||
| 578 | out: | ||
| 579 | if (pick_free) { | ||
| 580 | spin_lock(&c->space_lock); | ||
| 581 | c->lst.taken_empty_lebs -= 1; | ||
| 582 | spin_unlock(&c->space_lock); | ||
| 583 | } | ||
| 584 | ubifs_release_lprops(c); | ||
| 585 | return err; | ||
| 586 | } | ||
| 587 | |||
| 588 | /** | ||
| 589 | * scan_for_idx_cb - callback used by the scan for a free LEB for the index. | ||
| 590 | * @c: the UBIFS file-system description object | ||
| 591 | * @lprops: LEB properties to scan | ||
| 592 | * @in_tree: whether the LEB properties are in main memory | ||
| 593 | * @data: information passed to and from the caller of the scan | ||
| 594 | * | ||
| 595 | * This function returns a code that indicates whether the scan should continue | ||
| 596 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree | ||
| 597 | * in main memory (%LPT_SCAN_ADD), or whether the scan should stop | ||
| 598 | * (%LPT_SCAN_STOP). | ||
| 599 | */ | ||
| 600 | static int scan_for_idx_cb(struct ubifs_info *c, | ||
| 601 | const struct ubifs_lprops *lprops, int in_tree, | ||
| 602 | struct scan_data *data) | ||
| 603 | { | ||
| 604 | int ret = LPT_SCAN_CONTINUE; | ||
| 605 | |||
| 606 | /* Exclude LEBs that are currently in use */ | ||
| 607 | if (lprops->flags & LPROPS_TAKEN) | ||
| 608 | return LPT_SCAN_CONTINUE; | ||
| 609 | /* Determine whether to add these LEB properties to the tree */ | ||
| 610 | if (!in_tree && valuable(c, lprops)) | ||
| 611 | ret |= LPT_SCAN_ADD; | ||
| 612 | /* Exclude index LEBS */ | ||
| 613 | if (lprops->flags & LPROPS_INDEX) | ||
| 614 | return ret; | ||
| 615 | /* Exclude LEBs that cannot be made empty */ | ||
| 616 | if (lprops->free + lprops->dirty != c->leb_size) | ||
| 617 | return ret; | ||
| 618 | /* | ||
| 619 | * We are allocating for the index so it is safe to allocate LEBs with | ||
| 620 | * only free and dirty space, because write buffers are sync'd at commit | ||
| 621 | * start. | ||
| 622 | */ | ||
| 623 | data->lnum = lprops->lnum; | ||
| 624 | return LPT_SCAN_ADD | LPT_SCAN_STOP; | ||
| 625 | } | ||
| 626 | |||
| 627 | /** | ||
| 628 | * scan_for_leb_for_idx - scan for a free LEB for the index. | ||
| 629 | * @c: the UBIFS file-system description object | ||
| 630 | */ | ||
| 631 | static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) | ||
| 632 | { | ||
| 633 | struct ubifs_lprops *lprops; | ||
| 634 | struct scan_data data; | ||
| 635 | int err; | ||
| 636 | |||
| 637 | data.lnum = -1; | ||
| 638 | err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, | ||
| 639 | (ubifs_lpt_scan_callback)scan_for_idx_cb, | ||
| 640 | &data); | ||
| 641 | if (err) | ||
| 642 | return ERR_PTR(err); | ||
| 643 | ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); | ||
| 644 | c->lscan_lnum = data.lnum; | ||
| 645 | lprops = ubifs_lpt_lookup_dirty(c, data.lnum); | ||
| 646 | if (IS_ERR(lprops)) | ||
| 647 | return lprops; | ||
| 648 | ubifs_assert(lprops->lnum == data.lnum); | ||
| 649 | ubifs_assert(lprops->free + lprops->dirty == c->leb_size); | ||
| 650 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 651 | ubifs_assert(!(lprops->flags & LPROPS_INDEX)); | ||
| 652 | return lprops; | ||
| 653 | } | ||
| 654 | |||
| 655 | /** | ||
| 656 | * ubifs_find_free_leb_for_idx - find a free LEB for the index. | ||
| 657 | * @c: the UBIFS file-system description object | ||
| 658 | * | ||
| 659 | * This function looks for a free LEB and returns that LEB number. The returned | ||
| 660 | * LEB is marked as "taken", "index". | ||
| 661 | * | ||
| 662 | * Only empty LEBs are allocated. This is for two reasons. First, the commit | ||
| 663 | * calculates the number of LEBs to allocate based on the assumption that they | ||
| 664 | * will be empty. Secondly, free space at the end of an index LEB is not | ||
| 665 | * guaranteed to be empty because it may have been used by the in-the-gaps | ||
| 666 | * method prior to an unclean unmount. | ||
| 667 | * | ||
| 668 | * If no LEB is found %-ENOSPC is returned. For other failures another negative | ||
| 669 | * error code is returned. | ||
| 670 | */ | ||
| 671 | int ubifs_find_free_leb_for_idx(struct ubifs_info *c) | ||
| 672 | { | ||
| 673 | const struct ubifs_lprops *lprops; | ||
| 674 | int lnum = -1, err, flags; | ||
| 675 | |||
| 676 | ubifs_get_lprops(c); | ||
| 677 | |||
| 678 | lprops = ubifs_fast_find_empty(c); | ||
| 679 | if (!lprops) { | ||
| 680 | lprops = ubifs_fast_find_freeable(c); | ||
| 681 | if (!lprops) { | ||
| 682 | ubifs_assert(c->freeable_cnt == 0); | ||
| 683 | if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { | ||
| 684 | lprops = scan_for_leb_for_idx(c); | ||
| 685 | if (IS_ERR(lprops)) { | ||
| 686 | err = PTR_ERR(lprops); | ||
| 687 | goto out; | ||
| 688 | } | ||
| 689 | } | ||
| 690 | } | ||
| 691 | } | ||
| 692 | |||
| 693 | if (!lprops) { | ||
| 694 | err = -ENOSPC; | ||
| 695 | goto out; | ||
| 696 | } | ||
| 697 | |||
| 698 | lnum = lprops->lnum; | ||
| 699 | |||
| 700 | dbg_find("found LEB %d, free %d, dirty %d, flags %#x", | ||
| 701 | lnum, lprops->free, lprops->dirty, lprops->flags); | ||
| 702 | |||
| 703 | flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX; | ||
| 704 | lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0); | ||
| 705 | if (IS_ERR(lprops)) { | ||
| 706 | err = PTR_ERR(lprops); | ||
| 707 | goto out; | ||
| 708 | } | ||
| 709 | |||
| 710 | ubifs_release_lprops(c); | ||
| 711 | |||
| 712 | /* | ||
| 713 | * Ensure that empty LEBs have been unmapped. They may not have been, | ||
| 714 | * for example, because of an unclean unmount. Also LEBs that were | ||
| 715 | * freeable LEBs (free + dirty == leb_size) will not have been unmapped. | ||
| 716 | */ | ||
| 717 | err = ubifs_leb_unmap(c, lnum); | ||
| 718 | if (err) { | ||
| 719 | ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 720 | LPROPS_TAKEN | LPROPS_INDEX, 0); | ||
| 721 | return err; | ||
| 722 | } | ||
| 723 | |||
| 724 | return lnum; | ||
| 725 | |||
| 726 | out: | ||
| 727 | ubifs_release_lprops(c); | ||
| 728 | return err; | ||
| 729 | } | ||
| 730 | |||
| 731 | static int cmp_dirty_idx(const struct ubifs_lprops **a, | ||
| 732 | const struct ubifs_lprops **b) | ||
| 733 | { | ||
| 734 | const struct ubifs_lprops *lpa = *a; | ||
| 735 | const struct ubifs_lprops *lpb = *b; | ||
| 736 | |||
| 737 | return lpa->dirty + lpa->free - lpb->dirty - lpb->free; | ||
| 738 | } | ||
| 739 | |||
| 740 | static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b, | ||
| 741 | int size) | ||
| 742 | { | ||
| 743 | struct ubifs_lprops *t = *a; | ||
| 744 | |||
| 745 | *a = *b; | ||
| 746 | *b = t; | ||
| 747 | } | ||
| 748 | |||
| 749 | /** | ||
| 750 | * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos. | ||
| 751 | * @c: the UBIFS file-system description object | ||
| 752 | * | ||
| 753 | * This function is called each commit to create an array of LEB numbers of | ||
| 754 | * dirty index LEBs sorted in order of dirty and free space. This is used by | ||
| 755 | * the in-the-gaps method of TNC commit. | ||
| 756 | */ | ||
| 757 | int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) | ||
| 758 | { | ||
| 759 | int i; | ||
| 760 | |||
| 761 | ubifs_get_lprops(c); | ||
| 762 | /* Copy the LPROPS_DIRTY_IDX heap */ | ||
| 763 | c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt; | ||
| 764 | memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr, | ||
| 765 | sizeof(void *) * c->dirty_idx.cnt); | ||
| 766 | /* Sort it so that the dirtiest is now at the end */ | ||
| 767 | sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), | ||
| 768 | (int (*)(const void *, const void *))cmp_dirty_idx, | ||
| 769 | (void (*)(void *, void *, int))swap_dirty_idx); | ||
| 770 | dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); | ||
| 771 | if (c->dirty_idx.cnt) | ||
| 772 | dbg_find("dirtiest index LEB is %d with dirty %d and free %d", | ||
| 773 | c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum, | ||
| 774 | c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty, | ||
| 775 | c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free); | ||
| 776 | /* Replace the lprops pointers with LEB numbers */ | ||
| 777 | for (i = 0; i < c->dirty_idx.cnt; i++) | ||
| 778 | c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum; | ||
| 779 | ubifs_release_lprops(c); | ||
| 780 | return 0; | ||
| 781 | } | ||
| 782 | |||
| 783 | /** | ||
| 784 | * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB. | ||
| 785 | * @c: the UBIFS file-system description object | ||
| 786 | * @lprops: LEB properties to scan | ||
| 787 | * @in_tree: whether the LEB properties are in main memory | ||
| 788 | * @data: information passed to and from the caller of the scan | ||
| 789 | * | ||
| 790 | * This function returns a code that indicates whether the scan should continue | ||
| 791 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree | ||
| 792 | * in main memory (%LPT_SCAN_ADD), or whether the scan should stop | ||
| 793 | * (%LPT_SCAN_STOP). | ||
| 794 | */ | ||
| 795 | static int scan_dirty_idx_cb(struct ubifs_info *c, | ||
| 796 | const struct ubifs_lprops *lprops, int in_tree, | ||
| 797 | struct scan_data *data) | ||
| 798 | { | ||
| 799 | int ret = LPT_SCAN_CONTINUE; | ||
| 800 | |||
| 801 | /* Exclude LEBs that are currently in use */ | ||
| 802 | if (lprops->flags & LPROPS_TAKEN) | ||
| 803 | return LPT_SCAN_CONTINUE; | ||
| 804 | /* Determine whether to add these LEB properties to the tree */ | ||
| 805 | if (!in_tree && valuable(c, lprops)) | ||
| 806 | ret |= LPT_SCAN_ADD; | ||
| 807 | /* Exclude non-index LEBs */ | ||
| 808 | if (!(lprops->flags & LPROPS_INDEX)) | ||
| 809 | return ret; | ||
| 810 | /* Exclude LEBs with too little space */ | ||
| 811 | if (lprops->free + lprops->dirty < c->min_idx_node_sz) | ||
| 812 | return ret; | ||
| 813 | /* Finally we found space */ | ||
| 814 | data->lnum = lprops->lnum; | ||
| 815 | return LPT_SCAN_ADD | LPT_SCAN_STOP; | ||
| 816 | } | ||
| 817 | |||
| 818 | /** | ||
| 819 | * find_dirty_idx_leb - find a dirty index LEB. | ||
| 820 | * @c: the UBIFS file-system description object | ||
| 821 | * | ||
| 822 | * This function returns LEB number upon success and a negative error code upon | ||
| 823 | * failure. In particular, -ENOSPC is returned if a dirty index LEB is not | ||
| 824 | * found. | ||
| 825 | * | ||
| 826 | * Note that this function scans the entire LPT but it is called very rarely. | ||
| 827 | */ | ||
| 828 | static int find_dirty_idx_leb(struct ubifs_info *c) | ||
| 829 | { | ||
| 830 | const struct ubifs_lprops *lprops; | ||
| 831 | struct ubifs_lpt_heap *heap; | ||
| 832 | struct scan_data data; | ||
| 833 | int err, i, ret; | ||
| 834 | |||
| 835 | /* Check all structures in memory first */ | ||
| 836 | data.lnum = -1; | ||
| 837 | heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; | ||
| 838 | for (i = 0; i < heap->cnt; i++) { | ||
| 839 | lprops = heap->arr[i]; | ||
| 840 | ret = scan_dirty_idx_cb(c, lprops, 1, &data); | ||
| 841 | if (ret & LPT_SCAN_STOP) | ||
| 842 | goto found; | ||
| 843 | } | ||
| 844 | list_for_each_entry(lprops, &c->frdi_idx_list, list) { | ||
| 845 | ret = scan_dirty_idx_cb(c, lprops, 1, &data); | ||
| 846 | if (ret & LPT_SCAN_STOP) | ||
| 847 | goto found; | ||
| 848 | } | ||
| 849 | list_for_each_entry(lprops, &c->uncat_list, list) { | ||
| 850 | ret = scan_dirty_idx_cb(c, lprops, 1, &data); | ||
| 851 | if (ret & LPT_SCAN_STOP) | ||
| 852 | goto found; | ||
| 853 | } | ||
| 854 | if (c->pnodes_have >= c->pnode_cnt) | ||
| 855 | /* All pnodes are in memory, so skip scan */ | ||
| 856 | return -ENOSPC; | ||
| 857 | err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, | ||
| 858 | (ubifs_lpt_scan_callback)scan_dirty_idx_cb, | ||
| 859 | &data); | ||
| 860 | if (err) | ||
| 861 | return err; | ||
| 862 | found: | ||
| 863 | ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); | ||
| 864 | c->lscan_lnum = data.lnum; | ||
| 865 | lprops = ubifs_lpt_lookup_dirty(c, data.lnum); | ||
| 866 | if (IS_ERR(lprops)) | ||
| 867 | return PTR_ERR(lprops); | ||
| 868 | ubifs_assert(lprops->lnum == data.lnum); | ||
| 869 | ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz); | ||
| 870 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 871 | ubifs_assert((lprops->flags & LPROPS_INDEX)); | ||
| 872 | |||
| 873 | dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x", | ||
| 874 | lprops->lnum, lprops->free, lprops->dirty, lprops->flags); | ||
| 875 | |||
| 876 | lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, | ||
| 877 | lprops->flags | LPROPS_TAKEN, 0); | ||
| 878 | if (IS_ERR(lprops)) | ||
| 879 | return PTR_ERR(lprops); | ||
| 880 | |||
| 881 | return lprops->lnum; | ||
| 882 | } | ||
| 883 | |||
| 884 | /** | ||
| 885 | * get_idx_gc_leb - try to get a LEB number from trivial GC. | ||
| 886 | * @c: the UBIFS file-system description object | ||
| 887 | */ | ||
| 888 | static int get_idx_gc_leb(struct ubifs_info *c) | ||
| 889 | { | ||
| 890 | const struct ubifs_lprops *lp; | ||
| 891 | int err, lnum; | ||
| 892 | |||
| 893 | err = ubifs_get_idx_gc_leb(c); | ||
| 894 | if (err < 0) | ||
| 895 | return err; | ||
| 896 | lnum = err; | ||
| 897 | /* | ||
| 898 | * The LEB was due to be unmapped after the commit but | ||
| 899 | * it is needed now for this commit. | ||
| 900 | */ | ||
| 901 | lp = ubifs_lpt_lookup_dirty(c, lnum); | ||
| 902 | if (unlikely(IS_ERR(lp))) | ||
| 903 | return PTR_ERR(lp); | ||
| 904 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, | ||
| 905 | lp->flags | LPROPS_INDEX, -1); | ||
| 906 | if (unlikely(IS_ERR(lp))) | ||
| 907 | return PTR_ERR(lp); | ||
| 908 | dbg_find("LEB %d, dirty %d and free %d flags %#x", | ||
| 909 | lp->lnum, lp->dirty, lp->free, lp->flags); | ||
| 910 | return lnum; | ||
| 911 | } | ||
| 912 | |||
| 913 | /** | ||
| 914 | * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array. | ||
| 915 | * @c: the UBIFS file-system description object | ||
| 916 | */ | ||
| 917 | static int find_dirtiest_idx_leb(struct ubifs_info *c) | ||
| 918 | { | ||
| 919 | const struct ubifs_lprops *lp; | ||
| 920 | int lnum; | ||
| 921 | |||
| 922 | while (1) { | ||
| 923 | if (!c->dirty_idx.cnt) | ||
| 924 | return -ENOSPC; | ||
| 925 | /* The lprops pointers were replaced by LEB numbers */ | ||
| 926 | lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt]; | ||
| 927 | lp = ubifs_lpt_lookup(c, lnum); | ||
| 928 | if (IS_ERR(lp)) | ||
| 929 | return PTR_ERR(lp); | ||
| 930 | if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX)) | ||
| 931 | continue; | ||
| 932 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, | ||
| 933 | lp->flags | LPROPS_TAKEN, 0); | ||
| 934 | if (IS_ERR(lp)) | ||
| 935 | return PTR_ERR(lp); | ||
| 936 | break; | ||
| 937 | } | ||
| 938 | dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty, | ||
| 939 | lp->free, lp->flags); | ||
| 940 | ubifs_assert(lp->flags | LPROPS_TAKEN); | ||
| 941 | ubifs_assert(lp->flags | LPROPS_INDEX); | ||
| 942 | return lnum; | ||
| 943 | } | ||
| 944 | |||
| 945 | /** | ||
| 946 | * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit. | ||
| 947 | * @c: the UBIFS file-system description object | ||
| 948 | * | ||
| 949 | * This function attempts to find an untaken index LEB with the most free and | ||
| 950 | * dirty space that can be used without overwriting index nodes that were in the | ||
| 951 | * last index committed. | ||
| 952 | */ | ||
| 953 | int ubifs_find_dirty_idx_leb(struct ubifs_info *c) | ||
| 954 | { | ||
| 955 | int err; | ||
| 956 | |||
| 957 | ubifs_get_lprops(c); | ||
| 958 | |||
| 959 | /* | ||
| 960 | * We made an array of the dirtiest index LEB numbers as at the start of | ||
| 961 | * last commit. Try that array first. | ||
| 962 | */ | ||
| 963 | err = find_dirtiest_idx_leb(c); | ||
| 964 | |||
| 965 | /* Next try scanning the entire LPT */ | ||
| 966 | if (err == -ENOSPC) | ||
| 967 | err = find_dirty_idx_leb(c); | ||
| 968 | |||
| 969 | /* Finally take any index LEBs awaiting trivial GC */ | ||
| 970 | if (err == -ENOSPC) | ||
| 971 | err = get_idx_gc_leb(c); | ||
| 972 | |||
| 973 | ubifs_release_lprops(c); | ||
| 974 | return err; | ||
| 975 | } | ||
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c new file mode 100644 index 000000000000..d0f3dac29081 --- /dev/null +++ b/fs/ubifs/gc.c | |||
| @@ -0,0 +1,773 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements garbage collection. The procedure for garbage collection | ||
| 25 | * is different depending on whether a LEB as an index LEB (contains index | ||
| 26 | * nodes) or not. For non-index LEBs, garbage collection finds a LEB which | ||
| 27 | * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete | ||
| 28 | * nodes to the journal, at which point the garbage-collected LEB is free to be | ||
| 29 | * reused. For index LEBs, garbage collection marks the non-obsolete index nodes | ||
| 30 | * dirty in the TNC, and after the next commit, the garbage-collected LEB is | ||
| 31 | * to be reused. Garbage collection will cause the number of dirty index nodes | ||
| 32 | * to grow, however sufficient space is reserved for the index to ensure the | ||
| 33 | * commit will never run out of space. | ||
| 34 | */ | ||
| 35 | |||
| 36 | #include <linux/pagemap.h> | ||
| 37 | #include "ubifs.h" | ||
| 38 | |||
| 39 | /* | ||
| 40 | * GC tries to optimize the way it fit nodes to available space, and it sorts | ||
| 41 | * nodes a little. The below constants are watermarks which define "large", | ||
| 42 | * "medium", and "small" nodes. | ||
| 43 | */ | ||
| 44 | #define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) | ||
| 45 | #define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ | ||
| 46 | |||
| 47 | /* | ||
| 48 | * GC may need to move more then one LEB to make progress. The below constants | ||
| 49 | * define "soft" and "hard" limits on the number of LEBs the garbage collector | ||
| 50 | * may move. | ||
| 51 | */ | ||
| 52 | #define SOFT_LEBS_LIMIT 4 | ||
| 53 | #define HARD_LEBS_LIMIT 32 | ||
| 54 | |||
| 55 | /** | ||
| 56 | * switch_gc_head - switch the garbage collection journal head. | ||
| 57 | * @c: UBIFS file-system description object | ||
| 58 | * @buf: buffer to write | ||
| 59 | * @len: length of the buffer to write | ||
| 60 | * @lnum: LEB number written is returned here | ||
| 61 | * @offs: offset written is returned here | ||
| 62 | * | ||
| 63 | * This function switch the GC head to the next LEB which is reserved in | ||
| 64 | * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required, | ||
| 65 | * and other negative error code in case of failures. | ||
| 66 | */ | ||
| 67 | static int switch_gc_head(struct ubifs_info *c) | ||
| 68 | { | ||
| 69 | int err, gc_lnum = c->gc_lnum; | ||
| 70 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
| 71 | |||
| 72 | ubifs_assert(gc_lnum != -1); | ||
| 73 | dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)", | ||
| 74 | wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum, | ||
| 75 | c->leb_size - wbuf->offs - wbuf->used); | ||
| 76 | |||
| 77 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 78 | if (err) | ||
| 79 | return err; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * The GC write-buffer was synchronized, we may safely unmap | ||
| 83 | * 'c->gc_lnum'. | ||
| 84 | */ | ||
| 85 | err = ubifs_leb_unmap(c, gc_lnum); | ||
| 86 | if (err) | ||
| 87 | return err; | ||
| 88 | |||
| 89 | err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); | ||
| 90 | if (err) | ||
| 91 | return err; | ||
| 92 | |||
| 93 | c->gc_lnum = -1; | ||
| 94 | err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM); | ||
| 95 | return err; | ||
| 96 | } | ||
| 97 | |||
| 98 | /** | ||
| 99 | * move_nodes - move nodes. | ||
| 100 | * @c: UBIFS file-system description object | ||
| 101 | * @sleb: describes nodes to move | ||
| 102 | * | ||
| 103 | * This function moves valid nodes from data LEB described by @sleb to the GC | ||
| 104 | * journal head. The obsolete nodes are dropped. | ||
| 105 | * | ||
| 106 | * When moving nodes we have to deal with classical bin-packing problem: the | ||
| 107 | * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", | ||
| 108 | * where the nodes in the @sleb->nodes list are the elements which should be | ||
| 109 | * fit optimally to the bins. This function uses the "first fit decreasing" | ||
| 110 | * strategy, although it does not really sort the nodes but just split them on | ||
| 111 | * 3 classes - large, medium, and small, so they are roughly sorted. | ||
| 112 | * | ||
| 113 | * This function returns zero in case of success, %-EAGAIN if commit is | ||
| 114 | * required, and other negative error codes in case of other failures. | ||
| 115 | */ | ||
| 116 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | ||
| 117 | { | ||
| 118 | struct ubifs_scan_node *snod, *tmp; | ||
| 119 | struct list_head large, medium, small; | ||
| 120 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
| 121 | int avail, err, min = INT_MAX; | ||
| 122 | |||
| 123 | INIT_LIST_HEAD(&large); | ||
| 124 | INIT_LIST_HEAD(&medium); | ||
| 125 | INIT_LIST_HEAD(&small); | ||
| 126 | |||
| 127 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | ||
| 128 | struct list_head *lst; | ||
| 129 | |||
| 130 | ubifs_assert(snod->type != UBIFS_IDX_NODE); | ||
| 131 | ubifs_assert(snod->type != UBIFS_REF_NODE); | ||
| 132 | ubifs_assert(snod->type != UBIFS_CS_NODE); | ||
| 133 | |||
| 134 | err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, | ||
| 135 | snod->offs, 0); | ||
| 136 | if (err < 0) | ||
| 137 | goto out; | ||
| 138 | |||
| 139 | lst = &snod->list; | ||
| 140 | list_del(lst); | ||
| 141 | if (!err) { | ||
| 142 | /* The node is obsolete, remove it from the list */ | ||
| 143 | kfree(snod); | ||
| 144 | continue; | ||
| 145 | } | ||
| 146 | |||
| 147 | /* | ||
| 148 | * Sort the list of nodes so that large nodes go first, and | ||
| 149 | * small nodes go last. | ||
| 150 | */ | ||
| 151 | if (snod->len > MEDIUM_NODE_WM) | ||
| 152 | list_add(lst, &large); | ||
| 153 | else if (snod->len > SMALL_NODE_WM) | ||
| 154 | list_add(lst, &medium); | ||
| 155 | else | ||
| 156 | list_add(lst, &small); | ||
| 157 | |||
| 158 | /* And find the smallest node */ | ||
| 159 | if (snod->len < min) | ||
| 160 | min = snod->len; | ||
| 161 | } | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Join the tree lists so that we'd have one roughly sorted list | ||
| 165 | * ('large' will be the head of the joined list). | ||
| 166 | */ | ||
| 167 | list_splice(&medium, large.prev); | ||
| 168 | list_splice(&small, large.prev); | ||
| 169 | |||
| 170 | if (wbuf->lnum == -1) { | ||
| 171 | /* | ||
| 172 | * The GC journal head is not set, because it is the first GC | ||
| 173 | * invocation since mount. | ||
| 174 | */ | ||
| 175 | err = switch_gc_head(c); | ||
| 176 | if (err) | ||
| 177 | goto out; | ||
| 178 | } | ||
| 179 | |||
| 180 | /* Write nodes to their new location. Use the first-fit strategy */ | ||
| 181 | while (1) { | ||
| 182 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
| 183 | list_for_each_entry_safe(snod, tmp, &large, list) { | ||
| 184 | int new_lnum, new_offs; | ||
| 185 | |||
| 186 | if (avail < min) | ||
| 187 | break; | ||
| 188 | |||
| 189 | if (snod->len > avail) | ||
| 190 | /* This node does not fit */ | ||
| 191 | continue; | ||
| 192 | |||
| 193 | cond_resched(); | ||
| 194 | |||
| 195 | new_lnum = wbuf->lnum; | ||
| 196 | new_offs = wbuf->offs + wbuf->used; | ||
| 197 | err = ubifs_wbuf_write_nolock(wbuf, snod->node, | ||
| 198 | snod->len); | ||
| 199 | if (err) | ||
| 200 | goto out; | ||
| 201 | err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, | ||
| 202 | snod->offs, new_lnum, new_offs, | ||
| 203 | snod->len); | ||
| 204 | if (err) | ||
| 205 | goto out; | ||
| 206 | |||
| 207 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
| 208 | list_del(&snod->list); | ||
| 209 | kfree(snod); | ||
| 210 | } | ||
| 211 | |||
| 212 | if (list_empty(&large)) | ||
| 213 | break; | ||
| 214 | |||
| 215 | /* | ||
| 216 | * Waste the rest of the space in the LEB and switch to the | ||
| 217 | * next LEB. | ||
| 218 | */ | ||
| 219 | err = switch_gc_head(c); | ||
| 220 | if (err) | ||
| 221 | goto out; | ||
| 222 | } | ||
| 223 | |||
| 224 | return 0; | ||
| 225 | |||
| 226 | out: | ||
| 227 | list_for_each_entry_safe(snod, tmp, &large, list) { | ||
| 228 | list_del(&snod->list); | ||
| 229 | kfree(snod); | ||
| 230 | } | ||
| 231 | return err; | ||
| 232 | } | ||
| 233 | |||
| 234 | /** | ||
| 235 | * gc_sync_wbufs - sync write-buffers for GC. | ||
| 236 | * @c: UBIFS file-system description object | ||
| 237 | * | ||
| 238 | * We must guarantee that obsoleting nodes are on flash. Unfortunately they may | ||
| 239 | * be in a write-buffer instead. That is, a node could be written to a | ||
| 240 | * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is | ||
| 241 | * erased before the write-buffer is sync'd and then there is an unclean | ||
| 242 | * unmount, then an existing node is lost. To avoid this, we sync all | ||
| 243 | * write-buffers. | ||
| 244 | * | ||
| 245 | * This function returns %0 on success or a negative error code on failure. | ||
| 246 | */ | ||
| 247 | static int gc_sync_wbufs(struct ubifs_info *c) | ||
| 248 | { | ||
| 249 | int err, i; | ||
| 250 | |||
| 251 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 252 | if (i == GCHD) | ||
| 253 | continue; | ||
| 254 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
| 255 | if (err) | ||
| 256 | return err; | ||
| 257 | } | ||
| 258 | return 0; | ||
| 259 | } | ||
| 260 | |||
| 261 | /** | ||
| 262 | * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock. | ||
| 263 | * @c: UBIFS file-system description object | ||
| 264 | * @lp: describes the LEB to garbage collect | ||
| 265 | * | ||
| 266 | * This function garbage-collects an LEB and returns one of the @LEB_FREED, | ||
| 267 | * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is | ||
| 268 | * required, and other negative error codes in case of failures. | ||
| 269 | */ | ||
| 270 | int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | ||
| 271 | { | ||
| 272 | struct ubifs_scan_leb *sleb; | ||
| 273 | struct ubifs_scan_node *snod; | ||
| 274 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
| 275 | int err = 0, lnum = lp->lnum; | ||
| 276 | |||
| 277 | ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 || | ||
| 278 | c->need_recovery); | ||
| 279 | ubifs_assert(c->gc_lnum != lnum); | ||
| 280 | ubifs_assert(wbuf->lnum != lnum); | ||
| 281 | |||
| 282 | /* | ||
| 283 | * We scan the entire LEB even though we only really need to scan up to | ||
| 284 | * (c->leb_size - lp->free). | ||
| 285 | */ | ||
| 286 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | ||
| 287 | if (IS_ERR(sleb)) | ||
| 288 | return PTR_ERR(sleb); | ||
| 289 | |||
| 290 | ubifs_assert(!list_empty(&sleb->nodes)); | ||
| 291 | snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); | ||
| 292 | |||
| 293 | if (snod->type == UBIFS_IDX_NODE) { | ||
| 294 | struct ubifs_gced_idx_leb *idx_gc; | ||
| 295 | |||
| 296 | dbg_gc("indexing LEB %d (free %d, dirty %d)", | ||
| 297 | lnum, lp->free, lp->dirty); | ||
| 298 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 299 | struct ubifs_idx_node *idx = snod->node; | ||
| 300 | int level = le16_to_cpu(idx->level); | ||
| 301 | |||
| 302 | ubifs_assert(snod->type == UBIFS_IDX_NODE); | ||
| 303 | key_read(c, ubifs_idx_key(c, idx), &snod->key); | ||
| 304 | err = ubifs_dirty_idx_node(c, &snod->key, level, lnum, | ||
| 305 | snod->offs); | ||
| 306 | if (err) | ||
| 307 | goto out; | ||
| 308 | } | ||
| 309 | |||
| 310 | idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); | ||
| 311 | if (!idx_gc) { | ||
| 312 | err = -ENOMEM; | ||
| 313 | goto out; | ||
| 314 | } | ||
| 315 | |||
| 316 | idx_gc->lnum = lnum; | ||
| 317 | idx_gc->unmap = 0; | ||
| 318 | list_add(&idx_gc->list, &c->idx_gc); | ||
| 319 | |||
| 320 | /* | ||
| 321 | * Don't release the LEB until after the next commit, because | ||
| 322 | * it may contain date which is needed for recovery. So | ||
| 323 | * although we freed this LEB, it will become usable only after | ||
| 324 | * the commit. | ||
| 325 | */ | ||
| 326 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, | ||
| 327 | LPROPS_INDEX, 1); | ||
| 328 | if (err) | ||
| 329 | goto out; | ||
| 330 | err = LEB_FREED_IDX; | ||
| 331 | } else { | ||
| 332 | dbg_gc("data LEB %d (free %d, dirty %d)", | ||
| 333 | lnum, lp->free, lp->dirty); | ||
| 334 | |||
| 335 | err = move_nodes(c, sleb); | ||
| 336 | if (err) | ||
| 337 | goto out; | ||
| 338 | |||
| 339 | err = gc_sync_wbufs(c); | ||
| 340 | if (err) | ||
| 341 | goto out; | ||
| 342 | |||
| 343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); | ||
| 344 | if (err) | ||
| 345 | goto out; | ||
| 346 | |||
| 347 | if (c->gc_lnum == -1) { | ||
| 348 | c->gc_lnum = lnum; | ||
| 349 | err = LEB_RETAINED; | ||
| 350 | } else { | ||
| 351 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 352 | if (err) | ||
| 353 | goto out; | ||
| 354 | |||
| 355 | err = ubifs_leb_unmap(c, lnum); | ||
| 356 | if (err) | ||
| 357 | goto out; | ||
| 358 | |||
| 359 | err = LEB_FREED; | ||
| 360 | } | ||
| 361 | } | ||
| 362 | |||
| 363 | out: | ||
| 364 | ubifs_scan_destroy(sleb); | ||
| 365 | return err; | ||
| 366 | } | ||
| 367 | |||
| 368 | /** | ||
| 369 | * ubifs_garbage_collect - UBIFS garbage collector. | ||
| 370 | * @c: UBIFS file-system description object | ||
| 371 | * @anyway: do GC even if there are free LEBs | ||
| 372 | * | ||
| 373 | * This function does out-of-place garbage collection. The return codes are: | ||
| 374 | * o positive LEB number if the LEB has been freed and may be used; | ||
| 375 | * o %-EAGAIN if the caller has to run commit; | ||
| 376 | * o %-ENOSPC if GC failed to make any progress; | ||
| 377 | * o other negative error codes in case of other errors. | ||
| 378 | * | ||
| 379 | * Garbage collector writes data to the journal when GC'ing data LEBs, and just | ||
| 380 | * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point | ||
| 381 | * commit may be required. But commit cannot be run from inside GC, because the | ||
| 382 | * caller might be holding the commit lock, so %-EAGAIN is returned instead; | ||
| 383 | * And this error code means that the caller has to run commit, and re-run GC | ||
| 384 | * if there is still no free space. | ||
| 385 | * | ||
| 386 | * There are many reasons why this function may return %-EAGAIN: | ||
| 387 | * o the log is full and there is no space to write an LEB reference for | ||
| 388 | * @c->gc_lnum; | ||
| 389 | * o the journal is too large and exceeds size limitations; | ||
| 390 | * o GC moved indexing LEBs, but they can be used only after the commit; | ||
| 391 | * o the shrinker fails to find clean znodes to free and requests the commit; | ||
| 392 | * o etc. | ||
| 393 | * | ||
| 394 | * Note, if the file-system is close to be full, this function may return | ||
| 395 | * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of | ||
| 396 | * the function. E.g., this happens if the limits on the journal size are too | ||
| 397 | * tough and GC writes too much to the journal before an LEB is freed. This | ||
| 398 | * might also mean that the journal is too large, and the TNC becomes to big, | ||
| 399 | * so that the shrinker is constantly called, finds not clean znodes to free, | ||
| 400 | * and requests commit. Well, this may also happen if the journal is all right, | ||
| 401 | * but another kernel process consumes too much memory. Anyway, infinite | ||
| 402 | * %-EAGAIN may happen, but in some extreme/misconfiguration cases. | ||
| 403 | */ | ||
| 404 | int ubifs_garbage_collect(struct ubifs_info *c, int anyway) | ||
| 405 | { | ||
| 406 | int i, err, ret, min_space = c->dead_wm; | ||
| 407 | struct ubifs_lprops lp; | ||
| 408 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
| 409 | |||
| 410 | ubifs_assert_cmt_locked(c); | ||
| 411 | |||
| 412 | if (ubifs_gc_should_commit(c)) | ||
| 413 | return -EAGAIN; | ||
| 414 | |||
| 415 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 416 | |||
| 417 | if (c->ro_media) { | ||
| 418 | ret = -EROFS; | ||
| 419 | goto out_unlock; | ||
| 420 | } | ||
| 421 | |||
| 422 | /* We expect the write-buffer to be empty on entry */ | ||
| 423 | ubifs_assert(!wbuf->used); | ||
| 424 | |||
| 425 | for (i = 0; ; i++) { | ||
| 426 | int space_before = c->leb_size - wbuf->offs - wbuf->used; | ||
| 427 | int space_after; | ||
| 428 | |||
| 429 | cond_resched(); | ||
| 430 | |||
| 431 | /* Give the commit an opportunity to run */ | ||
| 432 | if (ubifs_gc_should_commit(c)) { | ||
| 433 | ret = -EAGAIN; | ||
| 434 | break; | ||
| 435 | } | ||
| 436 | |||
| 437 | if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) { | ||
| 438 | /* | ||
| 439 | * We've done enough iterations. Indexing LEBs were | ||
| 440 | * moved and will be available after the commit. | ||
| 441 | */ | ||
| 442 | dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN"); | ||
| 443 | ubifs_commit_required(c); | ||
| 444 | ret = -EAGAIN; | ||
| 445 | break; | ||
| 446 | } | ||
| 447 | |||
| 448 | if (i > HARD_LEBS_LIMIT) { | ||
| 449 | /* | ||
| 450 | * We've moved too many LEBs and have not made | ||
| 451 | * progress, give up. | ||
| 452 | */ | ||
| 453 | dbg_gc("hard limit, -ENOSPC"); | ||
| 454 | ret = -ENOSPC; | ||
| 455 | break; | ||
| 456 | } | ||
| 457 | |||
| 458 | /* | ||
| 459 | * Empty and freeable LEBs can turn up while we waited for | ||
| 460 | * the wbuf lock, or while we have been running GC. In that | ||
| 461 | * case, we should just return one of those instead of | ||
| 462 | * continuing to GC dirty LEBs. Hence we request | ||
| 463 | * 'ubifs_find_dirty_leb()' to return an empty LEB if it can. | ||
| 464 | */ | ||
| 465 | ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1); | ||
| 466 | if (ret) { | ||
| 467 | if (ret == -ENOSPC) | ||
| 468 | dbg_gc("no more dirty LEBs"); | ||
| 469 | break; | ||
| 470 | } | ||
| 471 | |||
| 472 | dbg_gc("found LEB %d: free %d, dirty %d, sum %d " | ||
| 473 | "(min. space %d)", lp.lnum, lp.free, lp.dirty, | ||
| 474 | lp.free + lp.dirty, min_space); | ||
| 475 | |||
| 476 | if (lp.free + lp.dirty == c->leb_size) { | ||
| 477 | /* An empty LEB was returned */ | ||
| 478 | dbg_gc("LEB %d is free, return it", lp.lnum); | ||
| 479 | /* | ||
| 480 | * ubifs_find_dirty_leb() doesn't return freeable index | ||
| 481 | * LEBs. | ||
| 482 | */ | ||
| 483 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 484 | if (lp.free != c->leb_size) { | ||
| 485 | /* | ||
| 486 | * Write buffers must be sync'd before | ||
| 487 | * unmapping freeable LEBs, because one of them | ||
| 488 | * may contain data which obsoletes something | ||
| 489 | * in 'lp.pnum'. | ||
| 490 | */ | ||
| 491 | ret = gc_sync_wbufs(c); | ||
| 492 | if (ret) | ||
| 493 | goto out; | ||
| 494 | ret = ubifs_change_one_lp(c, lp.lnum, | ||
| 495 | c->leb_size, 0, 0, 0, | ||
| 496 | 0); | ||
| 497 | if (ret) | ||
| 498 | goto out; | ||
| 499 | } | ||
| 500 | ret = ubifs_leb_unmap(c, lp.lnum); | ||
| 501 | if (ret) | ||
| 502 | goto out; | ||
| 503 | ret = lp.lnum; | ||
| 504 | break; | ||
| 505 | } | ||
| 506 | |||
| 507 | space_before = c->leb_size - wbuf->offs - wbuf->used; | ||
| 508 | if (wbuf->lnum == -1) | ||
| 509 | space_before = 0; | ||
| 510 | |||
| 511 | ret = ubifs_garbage_collect_leb(c, &lp); | ||
| 512 | if (ret < 0) { | ||
| 513 | if (ret == -EAGAIN || ret == -ENOSPC) { | ||
| 514 | /* | ||
| 515 | * These codes are not errors, so we have to | ||
| 516 | * return the LEB to lprops. But if the | ||
| 517 | * 'ubifs_return_leb()' function fails, its | ||
| 518 | * failure code is propagated to the caller | ||
| 519 | * instead of the original '-EAGAIN' or | ||
| 520 | * '-ENOSPC'. | ||
| 521 | */ | ||
| 522 | err = ubifs_return_leb(c, lp.lnum); | ||
| 523 | if (err) | ||
| 524 | ret = err; | ||
| 525 | break; | ||
| 526 | } | ||
| 527 | goto out; | ||
| 528 | } | ||
| 529 | |||
| 530 | if (ret == LEB_FREED) { | ||
| 531 | /* An LEB has been freed and is ready for use */ | ||
| 532 | dbg_gc("LEB %d freed, return", lp.lnum); | ||
| 533 | ret = lp.lnum; | ||
| 534 | break; | ||
| 535 | } | ||
| 536 | |||
| 537 | if (ret == LEB_FREED_IDX) { | ||
| 538 | /* | ||
| 539 | * This was an indexing LEB and it cannot be | ||
| 540 | * immediately used. And instead of requesting the | ||
| 541 | * commit straight away, we try to garbage collect some | ||
| 542 | * more. | ||
| 543 | */ | ||
| 544 | dbg_gc("indexing LEB %d freed, continue", lp.lnum); | ||
| 545 | continue; | ||
| 546 | } | ||
| 547 | |||
| 548 | ubifs_assert(ret == LEB_RETAINED); | ||
| 549 | space_after = c->leb_size - wbuf->offs - wbuf->used; | ||
| 550 | dbg_gc("LEB %d retained, freed %d bytes", lp.lnum, | ||
| 551 | space_after - space_before); | ||
| 552 | |||
| 553 | if (space_after > space_before) { | ||
| 554 | /* GC makes progress, keep working */ | ||
| 555 | min_space >>= 1; | ||
| 556 | if (min_space < c->dead_wm) | ||
| 557 | min_space = c->dead_wm; | ||
| 558 | continue; | ||
| 559 | } | ||
| 560 | |||
| 561 | dbg_gc("did not make progress"); | ||
| 562 | |||
| 563 | /* | ||
| 564 | * GC moved an LEB bud have not done any progress. This means | ||
| 565 | * that the previous GC head LEB contained too few free space | ||
| 566 | * and the LEB which was GC'ed contained only large nodes which | ||
| 567 | * did not fit that space. | ||
| 568 | * | ||
| 569 | * We can do 2 things: | ||
| 570 | * 1. pick another LEB in a hope it'll contain a small node | ||
| 571 | * which will fit the space we have at the end of current GC | ||
| 572 | * head LEB, but there is no guarantee, so we try this out | ||
| 573 | * unless we have already been working for too long; | ||
| 574 | * 2. request an LEB with more dirty space, which will force | ||
| 575 | * 'ubifs_find_dirty_leb()' to start scanning the lprops | ||
| 576 | * table, instead of just picking one from the heap | ||
| 577 | * (previously it already picked the dirtiest LEB). | ||
| 578 | */ | ||
| 579 | if (i < SOFT_LEBS_LIMIT) { | ||
| 580 | dbg_gc("try again"); | ||
| 581 | continue; | ||
| 582 | } | ||
| 583 | |||
| 584 | min_space <<= 1; | ||
| 585 | if (min_space > c->dark_wm) | ||
| 586 | min_space = c->dark_wm; | ||
| 587 | dbg_gc("set min. space to %d", min_space); | ||
| 588 | } | ||
| 589 | |||
| 590 | if (ret == -ENOSPC && !list_empty(&c->idx_gc)) { | ||
| 591 | dbg_gc("no space, some index LEBs GC'ed, -EAGAIN"); | ||
| 592 | ubifs_commit_required(c); | ||
| 593 | ret = -EAGAIN; | ||
| 594 | } | ||
| 595 | |||
| 596 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 597 | if (!err) | ||
| 598 | err = ubifs_leb_unmap(c, c->gc_lnum); | ||
| 599 | if (err) { | ||
| 600 | ret = err; | ||
| 601 | goto out; | ||
| 602 | } | ||
| 603 | out_unlock: | ||
| 604 | mutex_unlock(&wbuf->io_mutex); | ||
| 605 | return ret; | ||
| 606 | |||
| 607 | out: | ||
| 608 | ubifs_assert(ret < 0); | ||
| 609 | ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); | ||
| 610 | ubifs_ro_mode(c, ret); | ||
| 611 | ubifs_wbuf_sync_nolock(wbuf); | ||
| 612 | mutex_unlock(&wbuf->io_mutex); | ||
| 613 | ubifs_return_leb(c, lp.lnum); | ||
| 614 | return ret; | ||
| 615 | } | ||
| 616 | |||
| 617 | /** | ||
| 618 | * ubifs_gc_start_commit - garbage collection at start of commit. | ||
| 619 | * @c: UBIFS file-system description object | ||
| 620 | * | ||
| 621 | * If a LEB has only dirty and free space, then we may safely unmap it and make | ||
| 622 | * it free. Note, we cannot do this with indexing LEBs because dirty space may | ||
| 623 | * correspond index nodes that are required for recovery. In that case, the | ||
| 624 | * LEB cannot be unmapped until after the next commit. | ||
| 625 | * | ||
| 626 | * This function returns %0 upon success and a negative error code upon failure. | ||
| 627 | */ | ||
| 628 | int ubifs_gc_start_commit(struct ubifs_info *c) | ||
| 629 | { | ||
| 630 | struct ubifs_gced_idx_leb *idx_gc; | ||
| 631 | const struct ubifs_lprops *lp; | ||
| 632 | int err = 0, flags; | ||
| 633 | |||
| 634 | ubifs_get_lprops(c); | ||
| 635 | |||
| 636 | /* | ||
| 637 | * Unmap (non-index) freeable LEBs. Note that recovery requires that all | ||
| 638 | * wbufs are sync'd before this, which is done in 'do_commit()'. | ||
| 639 | */ | ||
| 640 | while (1) { | ||
| 641 | lp = ubifs_fast_find_freeable(c); | ||
| 642 | if (unlikely(IS_ERR(lp))) { | ||
| 643 | err = PTR_ERR(lp); | ||
| 644 | goto out; | ||
| 645 | } | ||
| 646 | if (!lp) | ||
| 647 | break; | ||
| 648 | ubifs_assert(!(lp->flags & LPROPS_TAKEN)); | ||
| 649 | ubifs_assert(!(lp->flags & LPROPS_INDEX)); | ||
| 650 | err = ubifs_leb_unmap(c, lp->lnum); | ||
| 651 | if (err) | ||
| 652 | goto out; | ||
| 653 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); | ||
| 654 | if (unlikely(IS_ERR(lp))) { | ||
| 655 | err = PTR_ERR(lp); | ||
| 656 | goto out; | ||
| 657 | } | ||
| 658 | ubifs_assert(!(lp->flags & LPROPS_TAKEN)); | ||
| 659 | ubifs_assert(!(lp->flags & LPROPS_INDEX)); | ||
| 660 | } | ||
| 661 | |||
| 662 | /* Mark GC'd index LEBs OK to unmap after this commit finishes */ | ||
| 663 | list_for_each_entry(idx_gc, &c->idx_gc, list) | ||
| 664 | idx_gc->unmap = 1; | ||
| 665 | |||
| 666 | /* Record index freeable LEBs for unmapping after commit */ | ||
| 667 | while (1) { | ||
| 668 | lp = ubifs_fast_find_frdi_idx(c); | ||
| 669 | if (unlikely(IS_ERR(lp))) { | ||
| 670 | err = PTR_ERR(lp); | ||
| 671 | goto out; | ||
| 672 | } | ||
| 673 | if (!lp) | ||
| 674 | break; | ||
| 675 | idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); | ||
| 676 | if (!idx_gc) { | ||
| 677 | err = -ENOMEM; | ||
| 678 | goto out; | ||
| 679 | } | ||
| 680 | ubifs_assert(!(lp->flags & LPROPS_TAKEN)); | ||
| 681 | ubifs_assert(lp->flags & LPROPS_INDEX); | ||
| 682 | /* Don't release the LEB until after the next commit */ | ||
| 683 | flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; | ||
| 684 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); | ||
| 685 | if (unlikely(IS_ERR(lp))) { | ||
| 686 | err = PTR_ERR(lp); | ||
| 687 | kfree(idx_gc); | ||
| 688 | goto out; | ||
| 689 | } | ||
| 690 | ubifs_assert(lp->flags & LPROPS_TAKEN); | ||
| 691 | ubifs_assert(!(lp->flags & LPROPS_INDEX)); | ||
| 692 | idx_gc->lnum = lp->lnum; | ||
| 693 | idx_gc->unmap = 1; | ||
| 694 | list_add(&idx_gc->list, &c->idx_gc); | ||
| 695 | } | ||
| 696 | out: | ||
| 697 | ubifs_release_lprops(c); | ||
| 698 | return err; | ||
| 699 | } | ||
| 700 | |||
| 701 | /** | ||
| 702 | * ubifs_gc_end_commit - garbage collection at end of commit. | ||
| 703 | * @c: UBIFS file-system description object | ||
| 704 | * | ||
| 705 | * This function completes out-of-place garbage collection of index LEBs. | ||
| 706 | */ | ||
| 707 | int ubifs_gc_end_commit(struct ubifs_info *c) | ||
| 708 | { | ||
| 709 | struct ubifs_gced_idx_leb *idx_gc, *tmp; | ||
| 710 | struct ubifs_wbuf *wbuf; | ||
| 711 | int err = 0; | ||
| 712 | |||
| 713 | wbuf = &c->jheads[GCHD].wbuf; | ||
| 714 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 715 | list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list) | ||
| 716 | if (idx_gc->unmap) { | ||
| 717 | dbg_gc("LEB %d", idx_gc->lnum); | ||
| 718 | err = ubifs_leb_unmap(c, idx_gc->lnum); | ||
| 719 | if (err) | ||
| 720 | goto out; | ||
| 721 | err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, | ||
| 722 | LPROPS_NC, 0, LPROPS_TAKEN, -1); | ||
| 723 | if (err) | ||
| 724 | goto out; | ||
| 725 | list_del(&idx_gc->list); | ||
| 726 | kfree(idx_gc); | ||
| 727 | } | ||
| 728 | out: | ||
| 729 | mutex_unlock(&wbuf->io_mutex); | ||
| 730 | return err; | ||
| 731 | } | ||
| 732 | |||
| 733 | /** | ||
| 734 | * ubifs_destroy_idx_gc - destroy idx_gc list. | ||
| 735 | * @c: UBIFS file-system description object | ||
| 736 | * | ||
| 737 | * This function destroys the idx_gc list. It is called when unmounting or | ||
| 738 | * remounting read-only so locks are not needed. | ||
| 739 | */ | ||
| 740 | void ubifs_destroy_idx_gc(struct ubifs_info *c) | ||
| 741 | { | ||
| 742 | while (!list_empty(&c->idx_gc)) { | ||
| 743 | struct ubifs_gced_idx_leb *idx_gc; | ||
| 744 | |||
| 745 | idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, | ||
| 746 | list); | ||
| 747 | c->idx_gc_cnt -= 1; | ||
| 748 | list_del(&idx_gc->list); | ||
| 749 | kfree(idx_gc); | ||
| 750 | } | ||
| 751 | |||
| 752 | } | ||
| 753 | |||
| 754 | /** | ||
| 755 | * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list. | ||
| 756 | * @c: UBIFS file-system description object | ||
| 757 | * | ||
| 758 | * Called during start commit so locks are not needed. | ||
| 759 | */ | ||
| 760 | int ubifs_get_idx_gc_leb(struct ubifs_info *c) | ||
| 761 | { | ||
| 762 | struct ubifs_gced_idx_leb *idx_gc; | ||
| 763 | int lnum; | ||
| 764 | |||
| 765 | if (list_empty(&c->idx_gc)) | ||
| 766 | return -ENOSPC; | ||
| 767 | idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); | ||
| 768 | lnum = idx_gc->lnum; | ||
| 769 | /* c->idx_gc_cnt is updated by the caller when lprops are updated */ | ||
| 770 | list_del(&idx_gc->list); | ||
| 771 | kfree(idx_gc); | ||
| 772 | return lnum; | ||
| 773 | } | ||
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c new file mode 100644 index 000000000000..3374f91b6709 --- /dev/null +++ b/fs/ubifs/io.c | |||
| @@ -0,0 +1,914 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * Copyright (C) 2006, 2007 University of Szeged, Hungary | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify it | ||
| 8 | * under the terms of the GNU General Public License version 2 as published by | ||
| 9 | * the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 14 | * more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License along with | ||
| 17 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 18 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | * | ||
| 20 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | * Adrian Hunter | ||
| 22 | * Zoltan Sogor | ||
| 23 | */ | ||
| 24 | |||
| 25 | /* | ||
| 26 | * This file implements UBIFS I/O subsystem which provides various I/O-related | ||
| 27 | * helper functions (reading/writing/checking/validating nodes) and implements | ||
| 28 | * write-buffering support. Write buffers help to save space which otherwise | ||
| 29 | * would have been wasted for padding to the nearest minimal I/O unit boundary. | ||
| 30 | * Instead, data first goes to the write-buffer and is flushed when the | ||
| 31 | * buffer is full or when it is not used for some time (by timer). This is | ||
| 32 | * similarto the mechanism is used by JFFS2. | ||
| 33 | * | ||
| 34 | * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by | ||
| 35 | * mutexes defined inside these objects. Since sometimes upper-level code | ||
| 36 | * has to lock the write-buffer (e.g. journal space reservation code), many | ||
| 37 | * functions related to write-buffers have "nolock" suffix which means that the | ||
| 38 | * caller has to lock the write-buffer before calling this function. | ||
| 39 | * | ||
| 40 | * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not | ||
| 41 | * aligned, UBIFS starts the next node from the aligned address, and the padded | ||
| 42 | * bytes may contain any rubbish. In other words, UBIFS does not put padding | ||
| 43 | * bytes in those small gaps. Common headers of nodes store real node lengths, | ||
| 44 | * not aligned lengths. Indexing nodes also store real lengths in branches. | ||
| 45 | * | ||
| 46 | * UBIFS uses padding when it pads to the next min. I/O unit. In this case it | ||
| 47 | * uses padding nodes or padding bytes, if the padding node does not fit. | ||
| 48 | * | ||
| 49 | * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes | ||
| 50 | * every time they are read from the flash media. | ||
| 51 | */ | ||
| 52 | |||
| 53 | #include <linux/crc32.h> | ||
| 54 | #include "ubifs.h" | ||
| 55 | |||
| 56 | /** | ||
| 57 | * ubifs_check_node - check node. | ||
| 58 | * @c: UBIFS file-system description object | ||
| 59 | * @buf: node to check | ||
| 60 | * @lnum: logical eraseblock number | ||
| 61 | * @offs: offset within the logical eraseblock | ||
| 62 | * @quiet: print no messages | ||
| 63 | * | ||
| 64 | * This function checks node magic number and CRC checksum. This function also | ||
| 65 | * validates node length to prevent UBIFS from becoming crazy when an attacker | ||
| 66 | * feeds it a file-system image with incorrect nodes. For example, too large | ||
| 67 | * node length in the common header could cause UBIFS to read memory outside of | ||
| 68 | * allocated buffer when checking the CRC checksum. | ||
| 69 | * | ||
| 70 | * This function returns zero in case of success %-EUCLEAN in case of bad CRC | ||
| 71 | * or magic. | ||
| 72 | */ | ||
| 73 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | ||
| 74 | int offs, int quiet) | ||
| 75 | { | ||
| 76 | int err = -EINVAL, type, node_len; | ||
| 77 | uint32_t crc, node_crc, magic; | ||
| 78 | const struct ubifs_ch *ch = buf; | ||
| 79 | |||
| 80 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | ||
| 81 | ubifs_assert(!(offs & 7) && offs < c->leb_size); | ||
| 82 | |||
| 83 | magic = le32_to_cpu(ch->magic); | ||
| 84 | if (magic != UBIFS_NODE_MAGIC) { | ||
| 85 | if (!quiet) | ||
| 86 | ubifs_err("bad magic %#08x, expected %#08x", | ||
| 87 | magic, UBIFS_NODE_MAGIC); | ||
| 88 | err = -EUCLEAN; | ||
| 89 | goto out; | ||
| 90 | } | ||
| 91 | |||
| 92 | type = ch->node_type; | ||
| 93 | if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { | ||
| 94 | if (!quiet) | ||
| 95 | ubifs_err("bad node type %d", type); | ||
| 96 | goto out; | ||
| 97 | } | ||
| 98 | |||
| 99 | node_len = le32_to_cpu(ch->len); | ||
| 100 | if (node_len + offs > c->leb_size) | ||
| 101 | goto out_len; | ||
| 102 | |||
| 103 | if (c->ranges[type].max_len == 0) { | ||
| 104 | if (node_len != c->ranges[type].len) | ||
| 105 | goto out_len; | ||
| 106 | } else if (node_len < c->ranges[type].min_len || | ||
| 107 | node_len > c->ranges[type].max_len) | ||
| 108 | goto out_len; | ||
| 109 | |||
| 110 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); | ||
| 111 | node_crc = le32_to_cpu(ch->crc); | ||
| 112 | if (crc != node_crc) { | ||
| 113 | if (!quiet) | ||
| 114 | ubifs_err("bad CRC: calculated %#08x, read %#08x", | ||
| 115 | crc, node_crc); | ||
| 116 | err = -EUCLEAN; | ||
| 117 | goto out; | ||
| 118 | } | ||
| 119 | |||
| 120 | return 0; | ||
| 121 | |||
| 122 | out_len: | ||
| 123 | if (!quiet) | ||
| 124 | ubifs_err("bad node length %d", node_len); | ||
| 125 | out: | ||
| 126 | if (!quiet) { | ||
| 127 | ubifs_err("bad node at LEB %d:%d", lnum, offs); | ||
| 128 | dbg_dump_node(c, buf); | ||
| 129 | dbg_dump_stack(); | ||
| 130 | } | ||
| 131 | return err; | ||
| 132 | } | ||
| 133 | |||
| 134 | /** | ||
| 135 | * ubifs_pad - pad flash space. | ||
| 136 | * @c: UBIFS file-system description object | ||
| 137 | * @buf: buffer to put padding to | ||
| 138 | * @pad: how many bytes to pad | ||
| 139 | * | ||
| 140 | * The flash media obliges us to write only in chunks of %c->min_io_size and | ||
| 141 | * when we have to write less data we add padding node to the write-buffer and | ||
| 142 | * pad it to the next minimal I/O unit's boundary. Padding nodes help when the | ||
| 143 | * media is being scanned. If the amount of wasted space is not enough to fit a | ||
| 144 | * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes | ||
| 145 | * pattern (%UBIFS_PADDING_BYTE). | ||
| 146 | * | ||
| 147 | * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is | ||
| 148 | * used. | ||
| 149 | */ | ||
| 150 | void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) | ||
| 151 | { | ||
| 152 | uint32_t crc; | ||
| 153 | |||
| 154 | ubifs_assert(pad >= 0 && !(pad & 7)); | ||
| 155 | |||
| 156 | if (pad >= UBIFS_PAD_NODE_SZ) { | ||
| 157 | struct ubifs_ch *ch = buf; | ||
| 158 | struct ubifs_pad_node *pad_node = buf; | ||
| 159 | |||
| 160 | ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); | ||
| 161 | ch->node_type = UBIFS_PAD_NODE; | ||
| 162 | ch->group_type = UBIFS_NO_NODE_GROUP; | ||
| 163 | ch->padding[0] = ch->padding[1] = 0; | ||
| 164 | ch->sqnum = 0; | ||
| 165 | ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); | ||
| 166 | pad -= UBIFS_PAD_NODE_SZ; | ||
| 167 | pad_node->pad_len = cpu_to_le32(pad); | ||
| 168 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); | ||
| 169 | ch->crc = cpu_to_le32(crc); | ||
| 170 | memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); | ||
| 171 | } else if (pad > 0) | ||
| 172 | /* Too little space, padding node won't fit */ | ||
| 173 | memset(buf, UBIFS_PADDING_BYTE, pad); | ||
| 174 | } | ||
| 175 | |||
| 176 | /** | ||
| 177 | * next_sqnum - get next sequence number. | ||
| 178 | * @c: UBIFS file-system description object | ||
| 179 | */ | ||
| 180 | static unsigned long long next_sqnum(struct ubifs_info *c) | ||
| 181 | { | ||
| 182 | unsigned long long sqnum; | ||
| 183 | |||
| 184 | spin_lock(&c->cnt_lock); | ||
| 185 | sqnum = ++c->max_sqnum; | ||
| 186 | spin_unlock(&c->cnt_lock); | ||
| 187 | |||
| 188 | if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { | ||
| 189 | if (sqnum >= SQNUM_WATERMARK) { | ||
| 190 | ubifs_err("sequence number overflow %llu, end of life", | ||
| 191 | sqnum); | ||
| 192 | ubifs_ro_mode(c, -EINVAL); | ||
| 193 | } | ||
| 194 | ubifs_warn("running out of sequence numbers, end of life soon"); | ||
| 195 | } | ||
| 196 | |||
| 197 | return sqnum; | ||
| 198 | } | ||
| 199 | |||
| 200 | /** | ||
| 201 | * ubifs_prepare_node - prepare node to be written to flash. | ||
| 202 | * @c: UBIFS file-system description object | ||
| 203 | * @node: the node to pad | ||
| 204 | * @len: node length | ||
| 205 | * @pad: if the buffer has to be padded | ||
| 206 | * | ||
| 207 | * This function prepares node at @node to be written to the media - it | ||
| 208 | * calculates node CRC, fills the common header, and adds proper padding up to | ||
| 209 | * the next minimum I/O unit if @pad is not zero. | ||
| 210 | */ | ||
| 211 | void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) | ||
| 212 | { | ||
| 213 | uint32_t crc; | ||
| 214 | struct ubifs_ch *ch = node; | ||
| 215 | unsigned long long sqnum = next_sqnum(c); | ||
| 216 | |||
| 217 | ubifs_assert(len >= UBIFS_CH_SZ); | ||
| 218 | |||
| 219 | ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); | ||
| 220 | ch->len = cpu_to_le32(len); | ||
| 221 | ch->group_type = UBIFS_NO_NODE_GROUP; | ||
| 222 | ch->sqnum = cpu_to_le64(sqnum); | ||
| 223 | ch->padding[0] = ch->padding[1] = 0; | ||
| 224 | crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); | ||
| 225 | ch->crc = cpu_to_le32(crc); | ||
| 226 | |||
| 227 | if (pad) { | ||
| 228 | len = ALIGN(len, 8); | ||
| 229 | pad = ALIGN(len, c->min_io_size) - len; | ||
| 230 | ubifs_pad(c, node + len, pad); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | /** | ||
| 235 | * ubifs_prep_grp_node - prepare node of a group to be written to flash. | ||
| 236 | * @c: UBIFS file-system description object | ||
| 237 | * @node: the node to pad | ||
| 238 | * @len: node length | ||
| 239 | * @last: indicates the last node of the group | ||
| 240 | * | ||
| 241 | * This function prepares node at @node to be written to the media - it | ||
| 242 | * calculates node CRC and fills the common header. | ||
| 243 | */ | ||
| 244 | void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) | ||
| 245 | { | ||
| 246 | uint32_t crc; | ||
| 247 | struct ubifs_ch *ch = node; | ||
| 248 | unsigned long long sqnum = next_sqnum(c); | ||
| 249 | |||
| 250 | ubifs_assert(len >= UBIFS_CH_SZ); | ||
| 251 | |||
| 252 | ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); | ||
| 253 | ch->len = cpu_to_le32(len); | ||
| 254 | if (last) | ||
| 255 | ch->group_type = UBIFS_LAST_OF_NODE_GROUP; | ||
| 256 | else | ||
| 257 | ch->group_type = UBIFS_IN_NODE_GROUP; | ||
| 258 | ch->sqnum = cpu_to_le64(sqnum); | ||
| 259 | ch->padding[0] = ch->padding[1] = 0; | ||
| 260 | crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); | ||
| 261 | ch->crc = cpu_to_le32(crc); | ||
| 262 | } | ||
| 263 | |||
| 264 | /** | ||
| 265 | * wbuf_timer_callback - write-buffer timer callback function. | ||
| 266 | * @data: timer data (write-buffer descriptor) | ||
| 267 | * | ||
| 268 | * This function is called when the write-buffer timer expires. | ||
| 269 | */ | ||
| 270 | static void wbuf_timer_callback_nolock(unsigned long data) | ||
| 271 | { | ||
| 272 | struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; | ||
| 273 | |||
| 274 | wbuf->need_sync = 1; | ||
| 275 | wbuf->c->need_wbuf_sync = 1; | ||
| 276 | ubifs_wake_up_bgt(wbuf->c); | ||
| 277 | } | ||
| 278 | |||
| 279 | /** | ||
| 280 | * new_wbuf_timer - start new write-buffer timer. | ||
| 281 | * @wbuf: write-buffer descriptor | ||
| 282 | */ | ||
| 283 | static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) | ||
| 284 | { | ||
| 285 | ubifs_assert(!timer_pending(&wbuf->timer)); | ||
| 286 | |||
| 287 | if (!wbuf->timeout) | ||
| 288 | return; | ||
| 289 | |||
| 290 | wbuf->timer.expires = jiffies + wbuf->timeout; | ||
| 291 | add_timer(&wbuf->timer); | ||
| 292 | } | ||
| 293 | |||
| 294 | /** | ||
| 295 | * cancel_wbuf_timer - cancel write-buffer timer. | ||
| 296 | * @wbuf: write-buffer descriptor | ||
| 297 | */ | ||
| 298 | static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) | ||
| 299 | { | ||
| 300 | /* | ||
| 301 | * If the syncer is waiting for the lock (from the background thread's | ||
| 302 | * context) and another task is changing write-buffer then the syncing | ||
| 303 | * should be canceled. | ||
| 304 | */ | ||
| 305 | wbuf->need_sync = 0; | ||
| 306 | del_timer(&wbuf->timer); | ||
| 307 | } | ||
| 308 | |||
| 309 | /** | ||
| 310 | * ubifs_wbuf_sync_nolock - synchronize write-buffer. | ||
| 311 | * @wbuf: write-buffer to synchronize | ||
| 312 | * | ||
| 313 | * This function synchronizes write-buffer @buf and returns zero in case of | ||
| 314 | * success or a negative error code in case of failure. | ||
| 315 | */ | ||
| 316 | int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | ||
| 317 | { | ||
| 318 | struct ubifs_info *c = wbuf->c; | ||
| 319 | int err, dirt; | ||
| 320 | |||
| 321 | cancel_wbuf_timer_nolock(wbuf); | ||
| 322 | if (!wbuf->used || wbuf->lnum == -1) | ||
| 323 | /* Write-buffer is empty or not seeked */ | ||
| 324 | return 0; | ||
| 325 | |||
| 326 | dbg_io("LEB %d:%d, %d bytes", | ||
| 327 | wbuf->lnum, wbuf->offs, wbuf->used); | ||
| 328 | ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); | ||
| 329 | ubifs_assert(!(wbuf->avail & 7)); | ||
| 330 | ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); | ||
| 331 | |||
| 332 | if (c->ro_media) | ||
| 333 | return -EROFS; | ||
| 334 | |||
| 335 | ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); | ||
| 336 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | ||
| 337 | c->min_io_size, wbuf->dtype); | ||
| 338 | if (err) { | ||
| 339 | ubifs_err("cannot write %d bytes to LEB %d:%d", | ||
| 340 | c->min_io_size, wbuf->lnum, wbuf->offs); | ||
| 341 | dbg_dump_stack(); | ||
| 342 | return err; | ||
| 343 | } | ||
| 344 | |||
| 345 | dirt = wbuf->avail; | ||
| 346 | |||
| 347 | spin_lock(&wbuf->lock); | ||
| 348 | wbuf->offs += c->min_io_size; | ||
| 349 | wbuf->avail = c->min_io_size; | ||
| 350 | wbuf->used = 0; | ||
| 351 | wbuf->next_ino = 0; | ||
| 352 | spin_unlock(&wbuf->lock); | ||
| 353 | |||
| 354 | if (wbuf->sync_callback) | ||
| 355 | err = wbuf->sync_callback(c, wbuf->lnum, | ||
| 356 | c->leb_size - wbuf->offs, dirt); | ||
| 357 | return err; | ||
| 358 | } | ||
| 359 | |||
| 360 | /** | ||
| 361 | * ubifs_wbuf_seek_nolock - seek write-buffer. | ||
| 362 | * @wbuf: write-buffer | ||
| 363 | * @lnum: logical eraseblock number to seek to | ||
| 364 | * @offs: logical eraseblock offset to seek to | ||
| 365 | * @dtype: data type | ||
| 366 | * | ||
| 367 | * This function targets the write buffer to logical eraseblock @lnum:@offs. | ||
| 368 | * The write-buffer is synchronized if it is not empty. Returns zero in case of | ||
| 369 | * success and a negative error code in case of failure. | ||
| 370 | */ | ||
| 371 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | ||
| 372 | int dtype) | ||
| 373 | { | ||
| 374 | const struct ubifs_info *c = wbuf->c; | ||
| 375 | |||
| 376 | dbg_io("LEB %d:%d", lnum, offs); | ||
| 377 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); | ||
| 378 | ubifs_assert(offs >= 0 && offs <= c->leb_size); | ||
| 379 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); | ||
| 380 | ubifs_assert(lnum != wbuf->lnum); | ||
| 381 | |||
| 382 | if (wbuf->used > 0) { | ||
| 383 | int err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 384 | |||
| 385 | if (err) | ||
| 386 | return err; | ||
| 387 | } | ||
| 388 | |||
| 389 | spin_lock(&wbuf->lock); | ||
| 390 | wbuf->lnum = lnum; | ||
| 391 | wbuf->offs = offs; | ||
| 392 | wbuf->avail = c->min_io_size; | ||
| 393 | wbuf->used = 0; | ||
| 394 | spin_unlock(&wbuf->lock); | ||
| 395 | wbuf->dtype = dtype; | ||
| 396 | |||
| 397 | return 0; | ||
| 398 | } | ||
| 399 | |||
| 400 | /** | ||
| 401 | * ubifs_bg_wbufs_sync - synchronize write-buffers. | ||
| 402 | * @c: UBIFS file-system description object | ||
| 403 | * | ||
| 404 | * This function is called by background thread to synchronize write-buffers. | ||
| 405 | * Returns zero in case of success and a negative error code in case of | ||
| 406 | * failure. | ||
| 407 | */ | ||
| 408 | int ubifs_bg_wbufs_sync(struct ubifs_info *c) | ||
| 409 | { | ||
| 410 | int err, i; | ||
| 411 | |||
| 412 | if (!c->need_wbuf_sync) | ||
| 413 | return 0; | ||
| 414 | c->need_wbuf_sync = 0; | ||
| 415 | |||
| 416 | if (c->ro_media) { | ||
| 417 | err = -EROFS; | ||
| 418 | goto out_timers; | ||
| 419 | } | ||
| 420 | |||
| 421 | dbg_io("synchronize"); | ||
| 422 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 423 | struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; | ||
| 424 | |||
| 425 | cond_resched(); | ||
| 426 | |||
| 427 | /* | ||
| 428 | * If the mutex is locked then wbuf is being changed, so | ||
| 429 | * synchronization is not necessary. | ||
| 430 | */ | ||
| 431 | if (mutex_is_locked(&wbuf->io_mutex)) | ||
| 432 | continue; | ||
| 433 | |||
| 434 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 435 | if (!wbuf->need_sync) { | ||
| 436 | mutex_unlock(&wbuf->io_mutex); | ||
| 437 | continue; | ||
| 438 | } | ||
| 439 | |||
| 440 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 441 | mutex_unlock(&wbuf->io_mutex); | ||
| 442 | if (err) { | ||
| 443 | ubifs_err("cannot sync write-buffer, error %d", err); | ||
| 444 | ubifs_ro_mode(c, err); | ||
| 445 | goto out_timers; | ||
| 446 | } | ||
| 447 | } | ||
| 448 | |||
| 449 | return 0; | ||
| 450 | |||
| 451 | out_timers: | ||
| 452 | /* Cancel all timers to prevent repeated errors */ | ||
| 453 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 454 | struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; | ||
| 455 | |||
| 456 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 457 | cancel_wbuf_timer_nolock(wbuf); | ||
| 458 | mutex_unlock(&wbuf->io_mutex); | ||
| 459 | } | ||
| 460 | return err; | ||
| 461 | } | ||
| 462 | |||
| 463 | /** | ||
| 464 | * ubifs_wbuf_write_nolock - write data to flash via write-buffer. | ||
| 465 | * @wbuf: write-buffer | ||
| 466 | * @buf: node to write | ||
| 467 | * @len: node length | ||
| 468 | * | ||
| 469 | * This function writes data to flash via write-buffer @wbuf. This means that | ||
| 470 | * the last piece of the node won't reach the flash media immediately if it | ||
| 471 | * does not take whole minimal I/O unit. Instead, the node will sit in RAM | ||
| 472 | * until the write-buffer is synchronized (e.g., by timer). | ||
| 473 | * | ||
| 474 | * This function returns zero in case of success and a negative error code in | ||
| 475 | * case of failure. If the node cannot be written because there is no more | ||
| 476 | * space in this logical eraseblock, %-ENOSPC is returned. | ||
| 477 | */ | ||
| 478 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | ||
| 479 | { | ||
| 480 | struct ubifs_info *c = wbuf->c; | ||
| 481 | int err, written, n, aligned_len = ALIGN(len, 8), offs; | ||
| 482 | |||
| 483 | dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, | ||
| 484 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, | ||
| 485 | wbuf->offs + wbuf->used); | ||
| 486 | ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); | ||
| 487 | ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); | ||
| 488 | ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); | ||
| 489 | ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); | ||
| 490 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | ||
| 491 | |||
| 492 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { | ||
| 493 | err = -ENOSPC; | ||
| 494 | goto out; | ||
| 495 | } | ||
| 496 | |||
| 497 | cancel_wbuf_timer_nolock(wbuf); | ||
| 498 | |||
| 499 | if (c->ro_media) | ||
| 500 | return -EROFS; | ||
| 501 | |||
| 502 | if (aligned_len <= wbuf->avail) { | ||
| 503 | /* | ||
| 504 | * The node is not very large and fits entirely within | ||
| 505 | * write-buffer. | ||
| 506 | */ | ||
| 507 | memcpy(wbuf->buf + wbuf->used, buf, len); | ||
| 508 | |||
| 509 | if (aligned_len == wbuf->avail) { | ||
| 510 | dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, | ||
| 511 | wbuf->offs); | ||
| 512 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, | ||
| 513 | wbuf->offs, c->min_io_size, | ||
| 514 | wbuf->dtype); | ||
| 515 | if (err) | ||
| 516 | goto out; | ||
| 517 | |||
| 518 | spin_lock(&wbuf->lock); | ||
| 519 | wbuf->offs += c->min_io_size; | ||
| 520 | wbuf->avail = c->min_io_size; | ||
| 521 | wbuf->used = 0; | ||
| 522 | wbuf->next_ino = 0; | ||
| 523 | spin_unlock(&wbuf->lock); | ||
| 524 | } else { | ||
| 525 | spin_lock(&wbuf->lock); | ||
| 526 | wbuf->avail -= aligned_len; | ||
| 527 | wbuf->used += aligned_len; | ||
| 528 | spin_unlock(&wbuf->lock); | ||
| 529 | } | ||
| 530 | |||
| 531 | goto exit; | ||
| 532 | } | ||
| 533 | |||
| 534 | /* | ||
| 535 | * The node is large enough and does not fit entirely within current | ||
| 536 | * minimal I/O unit. We have to fill and flush write-buffer and switch | ||
| 537 | * to the next min. I/O unit. | ||
| 538 | */ | ||
| 539 | dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); | ||
| 540 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); | ||
| 541 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | ||
| 542 | c->min_io_size, wbuf->dtype); | ||
| 543 | if (err) | ||
| 544 | goto out; | ||
| 545 | |||
| 546 | offs = wbuf->offs + c->min_io_size; | ||
| 547 | len -= wbuf->avail; | ||
| 548 | aligned_len -= wbuf->avail; | ||
| 549 | written = wbuf->avail; | ||
| 550 | |||
| 551 | /* | ||
| 552 | * The remaining data may take more whole min. I/O units, so write the | ||
| 553 | * remains multiple to min. I/O unit size directly to the flash media. | ||
| 554 | * We align node length to 8-byte boundary because we anyway flash wbuf | ||
| 555 | * if the remaining space is less than 8 bytes. | ||
| 556 | */ | ||
| 557 | n = aligned_len >> c->min_io_shift; | ||
| 558 | if (n) { | ||
| 559 | n <<= c->min_io_shift; | ||
| 560 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); | ||
| 561 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, | ||
| 562 | wbuf->dtype); | ||
| 563 | if (err) | ||
| 564 | goto out; | ||
| 565 | offs += n; | ||
| 566 | aligned_len -= n; | ||
| 567 | len -= n; | ||
| 568 | written += n; | ||
| 569 | } | ||
| 570 | |||
| 571 | spin_lock(&wbuf->lock); | ||
| 572 | if (aligned_len) | ||
| 573 | /* | ||
| 574 | * And now we have what's left and what does not take whole | ||
| 575 | * min. I/O unit, so write it to the write-buffer and we are | ||
| 576 | * done. | ||
| 577 | */ | ||
| 578 | memcpy(wbuf->buf, buf + written, len); | ||
| 579 | |||
| 580 | wbuf->offs = offs; | ||
| 581 | wbuf->used = aligned_len; | ||
| 582 | wbuf->avail = c->min_io_size - aligned_len; | ||
| 583 | wbuf->next_ino = 0; | ||
| 584 | spin_unlock(&wbuf->lock); | ||
| 585 | |||
| 586 | exit: | ||
| 587 | if (wbuf->sync_callback) { | ||
| 588 | int free = c->leb_size - wbuf->offs - wbuf->used; | ||
| 589 | |||
| 590 | err = wbuf->sync_callback(c, wbuf->lnum, free, 0); | ||
| 591 | if (err) | ||
| 592 | goto out; | ||
| 593 | } | ||
| 594 | |||
| 595 | if (wbuf->used) | ||
| 596 | new_wbuf_timer_nolock(wbuf); | ||
| 597 | |||
| 598 | return 0; | ||
| 599 | |||
| 600 | out: | ||
| 601 | ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", | ||
| 602 | len, wbuf->lnum, wbuf->offs, err); | ||
| 603 | dbg_dump_node(c, buf); | ||
| 604 | dbg_dump_stack(); | ||
| 605 | dbg_dump_leb(c, wbuf->lnum); | ||
| 606 | return err; | ||
| 607 | } | ||
| 608 | |||
| 609 | /** | ||
| 610 | * ubifs_write_node - write node to the media. | ||
| 611 | * @c: UBIFS file-system description object | ||
| 612 | * @buf: the node to write | ||
| 613 | * @len: node length | ||
| 614 | * @lnum: logical eraseblock number | ||
| 615 | * @offs: offset within the logical eraseblock | ||
| 616 | * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) | ||
| 617 | * | ||
| 618 | * This function automatically fills node magic number, assigns sequence | ||
| 619 | * number, and calculates node CRC checksum. The length of the @buf buffer has | ||
| 620 | * to be aligned to the minimal I/O unit size. This function automatically | ||
| 621 | * appends padding node and padding bytes if needed. Returns zero in case of | ||
| 622 | * success and a negative error code in case of failure. | ||
| 623 | */ | ||
| 624 | int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, | ||
| 625 | int offs, int dtype) | ||
| 626 | { | ||
| 627 | int err, buf_len = ALIGN(len, c->min_io_size); | ||
| 628 | |||
| 629 | dbg_io("LEB %d:%d, %s, length %d (aligned %d)", | ||
| 630 | lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, | ||
| 631 | buf_len); | ||
| 632 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | ||
| 633 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); | ||
| 634 | |||
| 635 | if (c->ro_media) | ||
| 636 | return -EROFS; | ||
| 637 | |||
| 638 | ubifs_prepare_node(c, buf, len, 1); | ||
| 639 | err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); | ||
| 640 | if (err) { | ||
| 641 | ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", | ||
| 642 | buf_len, lnum, offs, err); | ||
| 643 | dbg_dump_node(c, buf); | ||
| 644 | dbg_dump_stack(); | ||
| 645 | } | ||
| 646 | |||
| 647 | return err; | ||
| 648 | } | ||
| 649 | |||
| 650 | /** | ||
| 651 | * ubifs_read_node_wbuf - read node from the media or write-buffer. | ||
| 652 | * @wbuf: wbuf to check for un-written data | ||
| 653 | * @buf: buffer to read to | ||
| 654 | * @type: node type | ||
| 655 | * @len: node length | ||
| 656 | * @lnum: logical eraseblock number | ||
| 657 | * @offs: offset within the logical eraseblock | ||
| 658 | * | ||
| 659 | * This function reads a node of known type and length, checks it and stores | ||
| 660 | * in @buf. If the node partially or fully sits in the write-buffer, this | ||
| 661 | * function takes data from the buffer, otherwise it reads the flash media. | ||
| 662 | * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative | ||
| 663 | * error code in case of failure. | ||
| 664 | */ | ||
| 665 | int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, | ||
| 666 | int lnum, int offs) | ||
| 667 | { | ||
| 668 | const struct ubifs_info *c = wbuf->c; | ||
| 669 | int err, rlen, overlap; | ||
| 670 | struct ubifs_ch *ch = buf; | ||
| 671 | |||
| 672 | dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); | ||
| 673 | ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | ||
| 674 | ubifs_assert(!(offs & 7) && offs < c->leb_size); | ||
| 675 | ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); | ||
| 676 | |||
| 677 | spin_lock(&wbuf->lock); | ||
| 678 | overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); | ||
| 679 | if (!overlap) { | ||
| 680 | /* We may safely unlock the write-buffer and read the data */ | ||
| 681 | spin_unlock(&wbuf->lock); | ||
| 682 | return ubifs_read_node(c, buf, type, len, lnum, offs); | ||
| 683 | } | ||
| 684 | |||
| 685 | /* Don't read under wbuf */ | ||
| 686 | rlen = wbuf->offs - offs; | ||
| 687 | if (rlen < 0) | ||
| 688 | rlen = 0; | ||
| 689 | |||
| 690 | /* Copy the rest from the write-buffer */ | ||
| 691 | memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); | ||
| 692 | spin_unlock(&wbuf->lock); | ||
| 693 | |||
| 694 | if (rlen > 0) { | ||
| 695 | /* Read everything that goes before write-buffer */ | ||
| 696 | err = ubi_read(c->ubi, lnum, buf, offs, rlen); | ||
| 697 | if (err && err != -EBADMSG) { | ||
| 698 | ubifs_err("failed to read node %d from LEB %d:%d, " | ||
| 699 | "error %d", type, lnum, offs, err); | ||
| 700 | dbg_dump_stack(); | ||
| 701 | return err; | ||
| 702 | } | ||
| 703 | } | ||
| 704 | |||
| 705 | if (type != ch->node_type) { | ||
| 706 | ubifs_err("bad node type (%d but expected %d)", | ||
| 707 | ch->node_type, type); | ||
| 708 | goto out; | ||
| 709 | } | ||
| 710 | |||
| 711 | err = ubifs_check_node(c, buf, lnum, offs, 0); | ||
| 712 | if (err) { | ||
| 713 | ubifs_err("expected node type %d", type); | ||
| 714 | return err; | ||
| 715 | } | ||
| 716 | |||
| 717 | rlen = le32_to_cpu(ch->len); | ||
| 718 | if (rlen != len) { | ||
| 719 | ubifs_err("bad node length %d, expected %d", rlen, len); | ||
| 720 | goto out; | ||
| 721 | } | ||
| 722 | |||
| 723 | return 0; | ||
| 724 | |||
| 725 | out: | ||
| 726 | ubifs_err("bad node at LEB %d:%d", lnum, offs); | ||
| 727 | dbg_dump_node(c, buf); | ||
| 728 | dbg_dump_stack(); | ||
| 729 | return -EINVAL; | ||
| 730 | } | ||
| 731 | |||
| 732 | /** | ||
| 733 | * ubifs_read_node - read node. | ||
| 734 | * @c: UBIFS file-system description object | ||
| 735 | * @buf: buffer to read to | ||
| 736 | * @type: node type | ||
| 737 | * @len: node length (not aligned) | ||
| 738 | * @lnum: logical eraseblock number | ||
| 739 | * @offs: offset within the logical eraseblock | ||
| 740 | * | ||
| 741 | * This function reads a node of known type and and length, checks it and | ||
| 742 | * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched | ||
| 743 | * and a negative error code in case of failure. | ||
| 744 | */ | ||
| 745 | int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, | ||
| 746 | int lnum, int offs) | ||
| 747 | { | ||
| 748 | int err, l; | ||
| 749 | struct ubifs_ch *ch = buf; | ||
| 750 | |||
| 751 | dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); | ||
| 752 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | ||
| 753 | ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); | ||
| 754 | ubifs_assert(!(offs & 7) && offs < c->leb_size); | ||
| 755 | ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); | ||
| 756 | |||
| 757 | err = ubi_read(c->ubi, lnum, buf, offs, len); | ||
| 758 | if (err && err != -EBADMSG) { | ||
| 759 | ubifs_err("cannot read node %d from LEB %d:%d, error %d", | ||
| 760 | type, lnum, offs, err); | ||
| 761 | return err; | ||
| 762 | } | ||
| 763 | |||
| 764 | if (type != ch->node_type) { | ||
| 765 | ubifs_err("bad node type (%d but expected %d)", | ||
| 766 | ch->node_type, type); | ||
| 767 | goto out; | ||
| 768 | } | ||
| 769 | |||
| 770 | err = ubifs_check_node(c, buf, lnum, offs, 0); | ||
| 771 | if (err) { | ||
| 772 | ubifs_err("expected node type %d", type); | ||
| 773 | return err; | ||
| 774 | } | ||
| 775 | |||
| 776 | l = le32_to_cpu(ch->len); | ||
| 777 | if (l != len) { | ||
| 778 | ubifs_err("bad node length %d, expected %d", l, len); | ||
| 779 | goto out; | ||
| 780 | } | ||
| 781 | |||
| 782 | return 0; | ||
| 783 | |||
| 784 | out: | ||
| 785 | ubifs_err("bad node at LEB %d:%d", lnum, offs); | ||
| 786 | dbg_dump_node(c, buf); | ||
| 787 | dbg_dump_stack(); | ||
| 788 | return -EINVAL; | ||
| 789 | } | ||
| 790 | |||
| 791 | /** | ||
| 792 | * ubifs_wbuf_init - initialize write-buffer. | ||
| 793 | * @c: UBIFS file-system description object | ||
| 794 | * @wbuf: write-buffer to initialize | ||
| 795 | * | ||
| 796 | * This function initializes write buffer. Returns zero in case of success | ||
| 797 | * %-ENOMEM in case of failure. | ||
| 798 | */ | ||
| 799 | int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) | ||
| 800 | { | ||
| 801 | size_t size; | ||
| 802 | |||
| 803 | wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); | ||
| 804 | if (!wbuf->buf) | ||
| 805 | return -ENOMEM; | ||
| 806 | |||
| 807 | size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); | ||
| 808 | wbuf->inodes = kmalloc(size, GFP_KERNEL); | ||
| 809 | if (!wbuf->inodes) { | ||
| 810 | kfree(wbuf->buf); | ||
| 811 | wbuf->buf = NULL; | ||
| 812 | return -ENOMEM; | ||
| 813 | } | ||
| 814 | |||
| 815 | wbuf->used = 0; | ||
| 816 | wbuf->lnum = wbuf->offs = -1; | ||
| 817 | wbuf->avail = c->min_io_size; | ||
| 818 | wbuf->dtype = UBI_UNKNOWN; | ||
| 819 | wbuf->sync_callback = NULL; | ||
| 820 | mutex_init(&wbuf->io_mutex); | ||
| 821 | spin_lock_init(&wbuf->lock); | ||
| 822 | |||
| 823 | wbuf->c = c; | ||
| 824 | init_timer(&wbuf->timer); | ||
| 825 | wbuf->timer.function = wbuf_timer_callback_nolock; | ||
| 826 | wbuf->timer.data = (unsigned long)wbuf; | ||
| 827 | wbuf->timeout = DEFAULT_WBUF_TIMEOUT; | ||
| 828 | wbuf->next_ino = 0; | ||
| 829 | |||
| 830 | return 0; | ||
| 831 | } | ||
| 832 | |||
| 833 | /** | ||
| 834 | * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. | ||
| 835 | * @wbuf: the write-buffer whereto add | ||
| 836 | * @inum: the inode number | ||
| 837 | * | ||
| 838 | * This function adds an inode number to the inode array of the write-buffer. | ||
| 839 | */ | ||
| 840 | void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) | ||
| 841 | { | ||
| 842 | if (!wbuf->buf) | ||
| 843 | /* NOR flash or something similar */ | ||
| 844 | return; | ||
| 845 | |||
| 846 | spin_lock(&wbuf->lock); | ||
| 847 | if (wbuf->used) | ||
| 848 | wbuf->inodes[wbuf->next_ino++] = inum; | ||
| 849 | spin_unlock(&wbuf->lock); | ||
| 850 | } | ||
| 851 | |||
| 852 | /** | ||
| 853 | * wbuf_has_ino - returns if the wbuf contains data from the inode. | ||
| 854 | * @wbuf: the write-buffer | ||
| 855 | * @inum: the inode number | ||
| 856 | * | ||
| 857 | * This function returns with %1 if the write-buffer contains some data from the | ||
| 858 | * given inode otherwise it returns with %0. | ||
| 859 | */ | ||
| 860 | static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) | ||
| 861 | { | ||
| 862 | int i, ret = 0; | ||
| 863 | |||
| 864 | spin_lock(&wbuf->lock); | ||
| 865 | for (i = 0; i < wbuf->next_ino; i++) | ||
| 866 | if (inum == wbuf->inodes[i]) { | ||
| 867 | ret = 1; | ||
| 868 | break; | ||
| 869 | } | ||
| 870 | spin_unlock(&wbuf->lock); | ||
| 871 | |||
| 872 | return ret; | ||
| 873 | } | ||
| 874 | |||
| 875 | /** | ||
| 876 | * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. | ||
| 877 | * @c: UBIFS file-system description object | ||
| 878 | * @inode: inode to synchronize | ||
| 879 | * | ||
| 880 | * This function synchronizes write-buffers which contain nodes belonging to | ||
| 881 | * @inode. Returns zero in case of success and a negative error code in case of | ||
| 882 | * failure. | ||
| 883 | */ | ||
| 884 | int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) | ||
| 885 | { | ||
| 886 | int i, err = 0; | ||
| 887 | |||
| 888 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 889 | struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; | ||
| 890 | |||
| 891 | if (i == GCHD) | ||
| 892 | /* | ||
| 893 | * GC head is special, do not look at it. Even if the | ||
| 894 | * head contains something related to this inode, it is | ||
| 895 | * a _copy_ of corresponding on-flash node which sits | ||
| 896 | * somewhere else. | ||
| 897 | */ | ||
| 898 | continue; | ||
| 899 | |||
| 900 | if (!wbuf_has_ino(wbuf, inode->i_ino)) | ||
| 901 | continue; | ||
| 902 | |||
| 903 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 904 | if (wbuf_has_ino(wbuf, inode->i_ino)) | ||
| 905 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 906 | mutex_unlock(&wbuf->io_mutex); | ||
| 907 | |||
| 908 | if (err) { | ||
| 909 | ubifs_ro_mode(c, err); | ||
| 910 | return err; | ||
| 911 | } | ||
| 912 | } | ||
| 913 | return 0; | ||
| 914 | } | ||
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c new file mode 100644 index 000000000000..5e82cffe9695 --- /dev/null +++ b/fs/ubifs/ioctl.c | |||
| @@ -0,0 +1,204 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * Copyright (C) 2006, 2007 University of Szeged, Hungary | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify it | ||
| 8 | * under the terms of the GNU General Public License version 2 as published by | ||
| 9 | * the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 14 | * more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License along with | ||
| 17 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 18 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | * | ||
| 20 | * Authors: Zoltan Sogor | ||
| 21 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 22 | * Adrian Hunter | ||
| 23 | */ | ||
| 24 | |||
| 25 | /* This file implements EXT2-compatible extended attribute ioctl() calls */ | ||
| 26 | |||
| 27 | #include <linux/compat.h> | ||
| 28 | #include <linux/smp_lock.h> | ||
| 29 | #include <linux/mount.h> | ||
| 30 | #include "ubifs.h" | ||
| 31 | |||
| 32 | /** | ||
| 33 | * ubifs_set_inode_flags - set VFS inode flags. | ||
| 34 | * @inode: VFS inode to set flags for | ||
| 35 | * | ||
| 36 | * This function propagates flags from UBIFS inode object to VFS inode object. | ||
| 37 | */ | ||
| 38 | void ubifs_set_inode_flags(struct inode *inode) | ||
| 39 | { | ||
| 40 | unsigned int flags = ubifs_inode(inode)->flags; | ||
| 41 | |||
| 42 | inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC); | ||
| 43 | if (flags & UBIFS_SYNC_FL) | ||
| 44 | inode->i_flags |= S_SYNC; | ||
| 45 | if (flags & UBIFS_APPEND_FL) | ||
| 46 | inode->i_flags |= S_APPEND; | ||
| 47 | if (flags & UBIFS_IMMUTABLE_FL) | ||
| 48 | inode->i_flags |= S_IMMUTABLE; | ||
| 49 | if (flags & UBIFS_DIRSYNC_FL) | ||
| 50 | inode->i_flags |= S_DIRSYNC; | ||
| 51 | } | ||
| 52 | |||
| 53 | /* | ||
| 54 | * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags. | ||
| 55 | * @ioctl_flags: flags to convert | ||
| 56 | * | ||
| 57 | * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags | ||
| 58 | * (@UBIFS_COMPR_FL, etc). | ||
| 59 | */ | ||
| 60 | static int ioctl2ubifs(int ioctl_flags) | ||
| 61 | { | ||
| 62 | int ubifs_flags = 0; | ||
| 63 | |||
| 64 | if (ioctl_flags & FS_COMPR_FL) | ||
| 65 | ubifs_flags |= UBIFS_COMPR_FL; | ||
| 66 | if (ioctl_flags & FS_SYNC_FL) | ||
| 67 | ubifs_flags |= UBIFS_SYNC_FL; | ||
| 68 | if (ioctl_flags & FS_APPEND_FL) | ||
| 69 | ubifs_flags |= UBIFS_APPEND_FL; | ||
| 70 | if (ioctl_flags & FS_IMMUTABLE_FL) | ||
| 71 | ubifs_flags |= UBIFS_IMMUTABLE_FL; | ||
| 72 | if (ioctl_flags & FS_DIRSYNC_FL) | ||
| 73 | ubifs_flags |= UBIFS_DIRSYNC_FL; | ||
| 74 | |||
| 75 | return ubifs_flags; | ||
| 76 | } | ||
| 77 | |||
| 78 | /* | ||
| 79 | * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags. | ||
| 80 | * @ubifs_flags: flags to convert | ||
| 81 | * | ||
| 82 | * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags | ||
| 83 | * (@FS_COMPR_FL, etc). | ||
| 84 | */ | ||
| 85 | static int ubifs2ioctl(int ubifs_flags) | ||
| 86 | { | ||
| 87 | int ioctl_flags = 0; | ||
| 88 | |||
| 89 | if (ubifs_flags & UBIFS_COMPR_FL) | ||
| 90 | ioctl_flags |= FS_COMPR_FL; | ||
| 91 | if (ubifs_flags & UBIFS_SYNC_FL) | ||
| 92 | ioctl_flags |= FS_SYNC_FL; | ||
| 93 | if (ubifs_flags & UBIFS_APPEND_FL) | ||
| 94 | ioctl_flags |= FS_APPEND_FL; | ||
| 95 | if (ubifs_flags & UBIFS_IMMUTABLE_FL) | ||
| 96 | ioctl_flags |= FS_IMMUTABLE_FL; | ||
| 97 | if (ubifs_flags & UBIFS_DIRSYNC_FL) | ||
| 98 | ioctl_flags |= FS_DIRSYNC_FL; | ||
| 99 | |||
| 100 | return ioctl_flags; | ||
| 101 | } | ||
| 102 | |||
| 103 | static int setflags(struct inode *inode, int flags) | ||
| 104 | { | ||
| 105 | int oldflags, err, release; | ||
| 106 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 107 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 108 | struct ubifs_budget_req req = { .dirtied_ino = 1, | ||
| 109 | .dirtied_ino_d = ui->data_len }; | ||
| 110 | |||
| 111 | err = ubifs_budget_space(c, &req); | ||
| 112 | if (err) | ||
| 113 | return err; | ||
| 114 | |||
| 115 | /* | ||
| 116 | * The IMMUTABLE and APPEND_ONLY flags can only be changed by | ||
| 117 | * the relevant capability. | ||
| 118 | */ | ||
| 119 | mutex_lock(&ui->ui_mutex); | ||
| 120 | oldflags = ubifs2ioctl(ui->flags); | ||
| 121 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { | ||
| 122 | if (!capable(CAP_LINUX_IMMUTABLE)) { | ||
| 123 | err = -EPERM; | ||
| 124 | goto out_unlock; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | ui->flags = ioctl2ubifs(flags); | ||
| 129 | ubifs_set_inode_flags(inode); | ||
| 130 | inode->i_ctime = ubifs_current_time(inode); | ||
| 131 | release = ui->dirty; | ||
| 132 | mark_inode_dirty_sync(inode); | ||
| 133 | mutex_unlock(&ui->ui_mutex); | ||
| 134 | |||
| 135 | if (release) | ||
| 136 | ubifs_release_budget(c, &req); | ||
| 137 | if (IS_SYNC(inode)) | ||
| 138 | err = write_inode_now(inode, 1); | ||
| 139 | return err; | ||
| 140 | |||
| 141 | out_unlock: | ||
| 142 | ubifs_err("can't modify inode %lu attributes", inode->i_ino); | ||
| 143 | mutex_unlock(&ui->ui_mutex); | ||
| 144 | ubifs_release_budget(c, &req); | ||
| 145 | return err; | ||
| 146 | } | ||
| 147 | |||
| 148 | long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 149 | { | ||
| 150 | int flags, err; | ||
| 151 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 152 | |||
| 153 | switch (cmd) { | ||
| 154 | case FS_IOC_GETFLAGS: | ||
| 155 | flags = ubifs2ioctl(ubifs_inode(inode)->flags); | ||
| 156 | |||
| 157 | return put_user(flags, (int __user *) arg); | ||
| 158 | |||
| 159 | case FS_IOC_SETFLAGS: { | ||
| 160 | if (IS_RDONLY(inode)) | ||
| 161 | return -EROFS; | ||
| 162 | |||
| 163 | if (!is_owner_or_cap(inode)) | ||
| 164 | return -EACCES; | ||
| 165 | |||
| 166 | if (get_user(flags, (int __user *) arg)) | ||
| 167 | return -EFAULT; | ||
| 168 | |||
| 169 | if (!S_ISDIR(inode->i_mode)) | ||
| 170 | flags &= ~FS_DIRSYNC_FL; | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Make sure the file-system is read-write and make sure it | ||
| 174 | * will not become read-only while we are changing the flags. | ||
| 175 | */ | ||
| 176 | err = mnt_want_write(file->f_path.mnt); | ||
| 177 | if (err) | ||
| 178 | return err; | ||
| 179 | err = setflags(inode, flags); | ||
| 180 | mnt_drop_write(file->f_path.mnt); | ||
| 181 | return err; | ||
| 182 | } | ||
| 183 | |||
| 184 | default: | ||
| 185 | return -ENOTTY; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | #ifdef CONFIG_COMPAT | ||
| 190 | long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 191 | { | ||
| 192 | switch (cmd) { | ||
| 193 | case FS_IOC32_GETFLAGS: | ||
| 194 | cmd = FS_IOC_GETFLAGS; | ||
| 195 | break; | ||
| 196 | case FS_IOC32_SETFLAGS: | ||
| 197 | cmd = FS_IOC_SETFLAGS; | ||
| 198 | break; | ||
| 199 | default: | ||
| 200 | return -ENOIOCTLCMD; | ||
| 201 | } | ||
| 202 | return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); | ||
| 203 | } | ||
| 204 | #endif | ||
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c new file mode 100644 index 000000000000..283155abe5f5 --- /dev/null +++ b/fs/ubifs/journal.c | |||
| @@ -0,0 +1,1387 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements UBIFS journal. | ||
| 25 | * | ||
| 26 | * The journal consists of 2 parts - the log and bud LEBs. The log has fixed | ||
| 27 | * length and position, while a bud logical eraseblock is any LEB in the main | ||
| 28 | * area. Buds contain file system data - data nodes, inode nodes, etc. The log | ||
| 29 | * contains only references to buds and some other stuff like commit | ||
| 30 | * start node. The idea is that when we commit the journal, we do | ||
| 31 | * not copy the data, the buds just become indexed. Since after the commit the | ||
| 32 | * nodes in bud eraseblocks become leaf nodes of the file system index tree, we | ||
| 33 | * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will | ||
| 34 | * become leafs in the future. | ||
| 35 | * | ||
| 36 | * The journal is multi-headed because we want to write data to the journal as | ||
| 37 | * optimally as possible. It is nice to have nodes belonging to the same inode | ||
| 38 | * in one LEB, so we may write data owned by different inodes to different | ||
| 39 | * journal heads, although at present only one data head is used. | ||
| 40 | * | ||
| 41 | * For recovery reasons, the base head contains all inode nodes, all directory | ||
| 42 | * entry nodes and all truncate nodes. This means that the other heads contain | ||
| 43 | * only data nodes. | ||
| 44 | * | ||
| 45 | * Bud LEBs may be half-indexed. For example, if the bud was not full at the | ||
| 46 | * time of commit, the bud is retained to continue to be used in the journal, | ||
| 47 | * even though the "front" of the LEB is now indexed. In that case, the log | ||
| 48 | * reference contains the offset where the bud starts for the purposes of the | ||
| 49 | * journal. | ||
| 50 | * | ||
| 51 | * The journal size has to be limited, because the larger is the journal, the | ||
| 52 | * longer it takes to mount UBIFS (scanning the journal) and the more memory it | ||
| 53 | * takes (indexing in the TNC). | ||
| 54 | * | ||
| 55 | * All the journal write operations like 'ubifs_jnl_update()' here, which write | ||
| 56 | * multiple UBIFS nodes to the journal at one go, are atomic with respect to | ||
| 57 | * unclean reboots. Should the unclean reboot happen, the recovery code drops | ||
| 58 | * all the nodes. | ||
| 59 | */ | ||
| 60 | |||
| 61 | #include "ubifs.h" | ||
| 62 | |||
| 63 | /** | ||
| 64 | * zero_ino_node_unused - zero out unused fields of an on-flash inode node. | ||
| 65 | * @ino: the inode to zero out | ||
| 66 | */ | ||
| 67 | static inline void zero_ino_node_unused(struct ubifs_ino_node *ino) | ||
| 68 | { | ||
| 69 | memset(ino->padding1, 0, 4); | ||
| 70 | memset(ino->padding2, 0, 26); | ||
| 71 | } | ||
| 72 | |||
| 73 | /** | ||
| 74 | * zero_dent_node_unused - zero out unused fields of an on-flash directory | ||
| 75 | * entry node. | ||
| 76 | * @dent: the directory entry to zero out | ||
| 77 | */ | ||
| 78 | static inline void zero_dent_node_unused(struct ubifs_dent_node *dent) | ||
| 79 | { | ||
| 80 | dent->padding1 = 0; | ||
| 81 | memset(dent->padding2, 0, 4); | ||
| 82 | } | ||
| 83 | |||
| 84 | /** | ||
| 85 | * zero_data_node_unused - zero out unused fields of an on-flash data node. | ||
| 86 | * @data: the data node to zero out | ||
| 87 | */ | ||
| 88 | static inline void zero_data_node_unused(struct ubifs_data_node *data) | ||
| 89 | { | ||
| 90 | memset(data->padding, 0, 2); | ||
| 91 | } | ||
| 92 | |||
| 93 | /** | ||
| 94 | * zero_trun_node_unused - zero out unused fields of an on-flash truncation | ||
| 95 | * node. | ||
| 96 | * @trun: the truncation node to zero out | ||
| 97 | */ | ||
| 98 | static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) | ||
| 99 | { | ||
| 100 | memset(trun->padding, 0, 12); | ||
| 101 | } | ||
| 102 | |||
| 103 | /** | ||
| 104 | * reserve_space - reserve space in the journal. | ||
| 105 | * @c: UBIFS file-system description object | ||
| 106 | * @jhead: journal head number | ||
| 107 | * @len: node length | ||
| 108 | * | ||
| 109 | * This function reserves space in journal head @head. If the reservation | ||
| 110 | * succeeded, the journal head stays locked and later has to be unlocked using | ||
| 111 | * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock | ||
| 112 | * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and | ||
| 113 | * other negative error codes in case of other failures. | ||
| 114 | */ | ||
| 115 | static int reserve_space(struct ubifs_info *c, int jhead, int len) | ||
| 116 | { | ||
| 117 | int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; | ||
| 118 | struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Typically, the base head has smaller nodes written to it, so it is | ||
| 122 | * better to try to allocate space at the ends of eraseblocks. This is | ||
| 123 | * what the squeeze parameter does. | ||
| 124 | */ | ||
| 125 | squeeze = (jhead == BASEHD); | ||
| 126 | again: | ||
| 127 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 128 | |||
| 129 | if (c->ro_media) { | ||
| 130 | err = -EROFS; | ||
| 131 | goto out_unlock; | ||
| 132 | } | ||
| 133 | |||
| 134 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
| 135 | if (wbuf->lnum != -1 && avail >= len) | ||
| 136 | return 0; | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Write buffer wasn't seek'ed or there is no enough space - look for an | ||
| 140 | * LEB with some empty space. | ||
| 141 | */ | ||
| 142 | lnum = ubifs_find_free_space(c, len, &free, squeeze); | ||
| 143 | if (lnum >= 0) { | ||
| 144 | /* Found an LEB, add it to the journal head */ | ||
| 145 | offs = c->leb_size - free; | ||
| 146 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); | ||
| 147 | if (err) | ||
| 148 | goto out_return; | ||
| 149 | /* A new bud was successfully allocated and added to the log */ | ||
| 150 | goto out; | ||
| 151 | } | ||
| 152 | |||
| 153 | err = lnum; | ||
| 154 | if (err != -ENOSPC) | ||
| 155 | goto out_unlock; | ||
| 156 | |||
| 157 | /* | ||
| 158 | * No free space, we have to run garbage collector to make | ||
| 159 | * some. But the write-buffer mutex has to be unlocked because | ||
| 160 | * GC also takes it. | ||
| 161 | */ | ||
| 162 | dbg_jnl("no free space jhead %d, run GC", jhead); | ||
| 163 | mutex_unlock(&wbuf->io_mutex); | ||
| 164 | |||
| 165 | lnum = ubifs_garbage_collect(c, 0); | ||
| 166 | if (lnum < 0) { | ||
| 167 | err = lnum; | ||
| 168 | if (err != -ENOSPC) | ||
| 169 | return err; | ||
| 170 | |||
| 171 | /* | ||
| 172 | * GC could not make a free LEB. But someone else may | ||
| 173 | * have allocated new bud for this journal head, | ||
| 174 | * because we dropped @wbuf->io_mutex, so try once | ||
| 175 | * again. | ||
| 176 | */ | ||
| 177 | dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); | ||
| 178 | if (retries++ < 2) { | ||
| 179 | dbg_jnl("retry (%d)", retries); | ||
| 180 | goto again; | ||
| 181 | } | ||
| 182 | |||
| 183 | dbg_jnl("return -ENOSPC"); | ||
| 184 | return err; | ||
| 185 | } | ||
| 186 | |||
| 187 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 188 | dbg_jnl("got LEB %d for jhead %d", lnum, jhead); | ||
| 189 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
| 190 | |||
| 191 | if (wbuf->lnum != -1 && avail >= len) { | ||
| 192 | /* | ||
| 193 | * Someone else has switched the journal head and we have | ||
| 194 | * enough space now. This happens when more then one process is | ||
| 195 | * trying to write to the same journal head at the same time. | ||
| 196 | */ | ||
| 197 | dbg_jnl("return LEB %d back, already have LEB %d:%d", | ||
| 198 | lnum, wbuf->lnum, wbuf->offs + wbuf->used); | ||
| 199 | err = ubifs_return_leb(c, lnum); | ||
| 200 | if (err) | ||
| 201 | goto out_unlock; | ||
| 202 | return 0; | ||
| 203 | } | ||
| 204 | |||
| 205 | err = ubifs_add_bud_to_log(c, jhead, lnum, 0); | ||
| 206 | if (err) | ||
| 207 | goto out_return; | ||
| 208 | offs = 0; | ||
| 209 | |||
| 210 | out: | ||
| 211 | err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM); | ||
| 212 | if (err) | ||
| 213 | goto out_unlock; | ||
| 214 | |||
| 215 | return 0; | ||
| 216 | |||
| 217 | out_unlock: | ||
| 218 | mutex_unlock(&wbuf->io_mutex); | ||
| 219 | return err; | ||
| 220 | |||
| 221 | out_return: | ||
| 222 | /* An error occurred and the LEB has to be returned to lprops */ | ||
| 223 | ubifs_assert(err < 0); | ||
| 224 | err1 = ubifs_return_leb(c, lnum); | ||
| 225 | if (err1 && err == -EAGAIN) | ||
| 226 | /* | ||
| 227 | * Return original error code only if it is not %-EAGAIN, | ||
| 228 | * which is not really an error. Otherwise, return the error | ||
| 229 | * code of 'ubifs_return_leb()'. | ||
| 230 | */ | ||
| 231 | err = err1; | ||
| 232 | mutex_unlock(&wbuf->io_mutex); | ||
| 233 | return err; | ||
| 234 | } | ||
| 235 | |||
| 236 | /** | ||
| 237 | * write_node - write node to a journal head. | ||
| 238 | * @c: UBIFS file-system description object | ||
| 239 | * @jhead: journal head | ||
| 240 | * @node: node to write | ||
| 241 | * @len: node length | ||
| 242 | * @lnum: LEB number written is returned here | ||
| 243 | * @offs: offset written is returned here | ||
| 244 | * | ||
| 245 | * This function writes a node to reserved space of journal head @jhead. | ||
| 246 | * Returns zero in case of success and a negative error code in case of | ||
| 247 | * failure. | ||
| 248 | */ | ||
| 249 | static int write_node(struct ubifs_info *c, int jhead, void *node, int len, | ||
| 250 | int *lnum, int *offs) | ||
| 251 | { | ||
| 252 | struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; | ||
| 253 | |||
| 254 | ubifs_assert(jhead != GCHD); | ||
| 255 | |||
| 256 | *lnum = c->jheads[jhead].wbuf.lnum; | ||
| 257 | *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; | ||
| 258 | |||
| 259 | dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); | ||
| 260 | ubifs_prepare_node(c, node, len, 0); | ||
| 261 | |||
| 262 | return ubifs_wbuf_write_nolock(wbuf, node, len); | ||
| 263 | } | ||
| 264 | |||
| 265 | /** | ||
| 266 | * write_head - write data to a journal head. | ||
| 267 | * @c: UBIFS file-system description object | ||
| 268 | * @jhead: journal head | ||
| 269 | * @buf: buffer to write | ||
| 270 | * @len: length to write | ||
| 271 | * @lnum: LEB number written is returned here | ||
| 272 | * @offs: offset written is returned here | ||
| 273 | * @sync: non-zero if the write-buffer has to by synchronized | ||
| 274 | * | ||
| 275 | * This function is the same as 'write_node()' but it does not assume the | ||
| 276 | * buffer it is writing is a node, so it does not prepare it (which means | ||
| 277 | * initializing common header and calculating CRC). | ||
| 278 | */ | ||
| 279 | static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, | ||
| 280 | int *lnum, int *offs, int sync) | ||
| 281 | { | ||
| 282 | int err; | ||
| 283 | struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; | ||
| 284 | |||
| 285 | ubifs_assert(jhead != GCHD); | ||
| 286 | |||
| 287 | *lnum = c->jheads[jhead].wbuf.lnum; | ||
| 288 | *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; | ||
| 289 | dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); | ||
| 290 | |||
| 291 | err = ubifs_wbuf_write_nolock(wbuf, buf, len); | ||
| 292 | if (err) | ||
| 293 | return err; | ||
| 294 | if (sync) | ||
| 295 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 296 | return err; | ||
| 297 | } | ||
| 298 | |||
| 299 | /** | ||
| 300 | * make_reservation - reserve journal space. | ||
| 301 | * @c: UBIFS file-system description object | ||
| 302 | * @jhead: journal head | ||
| 303 | * @len: how many bytes to reserve | ||
| 304 | * | ||
| 305 | * This function makes space reservation in journal head @jhead. The function | ||
| 306 | * takes the commit lock and locks the journal head, and the caller has to | ||
| 307 | * unlock the head and finish the reservation with 'finish_reservation()'. | ||
| 308 | * Returns zero in case of success and a negative error code in case of | ||
| 309 | * failure. | ||
| 310 | * | ||
| 311 | * Note, the journal head may be unlocked as soon as the data is written, while | ||
| 312 | * the commit lock has to be released after the data has been added to the | ||
| 313 | * TNC. | ||
| 314 | */ | ||
| 315 | static int make_reservation(struct ubifs_info *c, int jhead, int len) | ||
| 316 | { | ||
| 317 | int err, cmt_retries = 0, nospc_retries = 0; | ||
| 318 | |||
| 319 | again: | ||
| 320 | down_read(&c->commit_sem); | ||
| 321 | err = reserve_space(c, jhead, len); | ||
| 322 | if (!err) | ||
| 323 | return 0; | ||
| 324 | up_read(&c->commit_sem); | ||
| 325 | |||
| 326 | if (err == -ENOSPC) { | ||
| 327 | /* | ||
| 328 | * GC could not make any progress. We should try to commit | ||
| 329 | * once because it could make some dirty space and GC would | ||
| 330 | * make progress, so make the error -EAGAIN so that the below | ||
| 331 | * will commit and re-try. | ||
| 332 | */ | ||
| 333 | if (nospc_retries++ < 2) { | ||
| 334 | dbg_jnl("no space, retry"); | ||
| 335 | err = -EAGAIN; | ||
| 336 | } | ||
| 337 | |||
| 338 | /* | ||
| 339 | * This means that the budgeting is incorrect. We always have | ||
| 340 | * to be able to write to the media, because all operations are | ||
| 341 | * budgeted. Deletions are not budgeted, though, but we reserve | ||
| 342 | * an extra LEB for them. | ||
| 343 | */ | ||
| 344 | } | ||
| 345 | |||
| 346 | if (err != -EAGAIN) | ||
| 347 | goto out; | ||
| 348 | |||
| 349 | /* | ||
| 350 | * -EAGAIN means that the journal is full or too large, or the above | ||
| 351 | * code wants to do one commit. Do this and re-try. | ||
| 352 | */ | ||
| 353 | if (cmt_retries > 128) { | ||
| 354 | /* | ||
| 355 | * This should not happen unless the journal size limitations | ||
| 356 | * are too tough. | ||
| 357 | */ | ||
| 358 | ubifs_err("stuck in space allocation"); | ||
| 359 | err = -ENOSPC; | ||
| 360 | goto out; | ||
| 361 | } else if (cmt_retries > 32) | ||
| 362 | ubifs_warn("too many space allocation re-tries (%d)", | ||
| 363 | cmt_retries); | ||
| 364 | |||
| 365 | dbg_jnl("-EAGAIN, commit and retry (retried %d times)", | ||
| 366 | cmt_retries); | ||
| 367 | cmt_retries += 1; | ||
| 368 | |||
| 369 | err = ubifs_run_commit(c); | ||
| 370 | if (err) | ||
| 371 | return err; | ||
| 372 | goto again; | ||
| 373 | |||
| 374 | out: | ||
| 375 | ubifs_err("cannot reserve %d bytes in jhead %d, error %d", | ||
| 376 | len, jhead, err); | ||
| 377 | if (err == -ENOSPC) { | ||
| 378 | /* This are some budgeting problems, print useful information */ | ||
| 379 | down_write(&c->commit_sem); | ||
| 380 | spin_lock(&c->space_lock); | ||
| 381 | dbg_dump_stack(); | ||
| 382 | dbg_dump_budg(c); | ||
| 383 | spin_unlock(&c->space_lock); | ||
| 384 | dbg_dump_lprops(c); | ||
| 385 | cmt_retries = dbg_check_lprops(c); | ||
| 386 | up_write(&c->commit_sem); | ||
| 387 | } | ||
| 388 | return err; | ||
| 389 | } | ||
| 390 | |||
| 391 | /** | ||
| 392 | * release_head - release a journal head. | ||
| 393 | * @c: UBIFS file-system description object | ||
| 394 | * @jhead: journal head | ||
| 395 | * | ||
| 396 | * This function releases journal head @jhead which was locked by | ||
| 397 | * the 'make_reservation()' function. It has to be called after each successful | ||
| 398 | * 'make_reservation()' invocation. | ||
| 399 | */ | ||
| 400 | static inline void release_head(struct ubifs_info *c, int jhead) | ||
| 401 | { | ||
| 402 | mutex_unlock(&c->jheads[jhead].wbuf.io_mutex); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * finish_reservation - finish a reservation. | ||
| 407 | * @c: UBIFS file-system description object | ||
| 408 | * | ||
| 409 | * This function finishes journal space reservation. It must be called after | ||
| 410 | * 'make_reservation()'. | ||
| 411 | */ | ||
| 412 | static void finish_reservation(struct ubifs_info *c) | ||
| 413 | { | ||
| 414 | up_read(&c->commit_sem); | ||
| 415 | } | ||
| 416 | |||
| 417 | /** | ||
| 418 | * get_dent_type - translate VFS inode mode to UBIFS directory entry type. | ||
| 419 | * @mode: inode mode | ||
| 420 | */ | ||
| 421 | static int get_dent_type(int mode) | ||
| 422 | { | ||
| 423 | switch (mode & S_IFMT) { | ||
| 424 | case S_IFREG: | ||
| 425 | return UBIFS_ITYPE_REG; | ||
| 426 | case S_IFDIR: | ||
| 427 | return UBIFS_ITYPE_DIR; | ||
| 428 | case S_IFLNK: | ||
| 429 | return UBIFS_ITYPE_LNK; | ||
| 430 | case S_IFBLK: | ||
| 431 | return UBIFS_ITYPE_BLK; | ||
| 432 | case S_IFCHR: | ||
| 433 | return UBIFS_ITYPE_CHR; | ||
| 434 | case S_IFIFO: | ||
| 435 | return UBIFS_ITYPE_FIFO; | ||
| 436 | case S_IFSOCK: | ||
| 437 | return UBIFS_ITYPE_SOCK; | ||
| 438 | default: | ||
| 439 | BUG(); | ||
| 440 | } | ||
| 441 | return 0; | ||
| 442 | } | ||
| 443 | |||
| 444 | /** | ||
| 445 | * pack_inode - pack an inode node. | ||
| 446 | * @c: UBIFS file-system description object | ||
| 447 | * @ino: buffer in which to pack inode node | ||
| 448 | * @inode: inode to pack | ||
| 449 | * @last: indicates the last node of the group | ||
| 450 | * @last_reference: non-zero if this is a deletion inode | ||
| 451 | */ | ||
| 452 | static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, | ||
| 453 | const struct inode *inode, int last, | ||
| 454 | int last_reference) | ||
| 455 | { | ||
| 456 | int data_len = 0; | ||
| 457 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 458 | |||
| 459 | ino->ch.node_type = UBIFS_INO_NODE; | ||
| 460 | ino_key_init_flash(c, &ino->key, inode->i_ino); | ||
| 461 | ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum); | ||
| 462 | ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec); | ||
| 463 | ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); | ||
| 464 | ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec); | ||
| 465 | ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
| 466 | ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); | ||
| 467 | ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | ||
| 468 | ino->uid = cpu_to_le32(inode->i_uid); | ||
| 469 | ino->gid = cpu_to_le32(inode->i_gid); | ||
| 470 | ino->mode = cpu_to_le32(inode->i_mode); | ||
| 471 | ino->flags = cpu_to_le32(ui->flags); | ||
| 472 | ino->size = cpu_to_le64(ui->ui_size); | ||
| 473 | ino->nlink = cpu_to_le32(inode->i_nlink); | ||
| 474 | ino->compr_type = cpu_to_le16(ui->compr_type); | ||
| 475 | ino->data_len = cpu_to_le32(ui->data_len); | ||
| 476 | ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt); | ||
| 477 | ino->xattr_size = cpu_to_le32(ui->xattr_size); | ||
| 478 | ino->xattr_names = cpu_to_le32(ui->xattr_names); | ||
| 479 | zero_ino_node_unused(ino); | ||
| 480 | |||
| 481 | /* | ||
| 482 | * Drop the attached data if this is a deletion inode, the data is not | ||
| 483 | * needed anymore. | ||
| 484 | */ | ||
| 485 | if (!last_reference) { | ||
| 486 | memcpy(ino->data, ui->data, ui->data_len); | ||
| 487 | data_len = ui->data_len; | ||
| 488 | } | ||
| 489 | |||
| 490 | ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last); | ||
| 491 | } | ||
| 492 | |||
| 493 | /** | ||
| 494 | * mark_inode_clean - mark UBIFS inode as clean. | ||
| 495 | * @c: UBIFS file-system description object | ||
| 496 | * @ui: UBIFS inode to mark as clean | ||
| 497 | * | ||
| 498 | * This helper function marks UBIFS inode @ui as clean by cleaning the | ||
| 499 | * @ui->dirty flag and releasing its budget. Note, VFS may still treat the | ||
| 500 | * inode as dirty and try to write it back, but 'ubifs_write_inode()' would | ||
| 501 | * just do nothing. | ||
| 502 | */ | ||
| 503 | static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui) | ||
| 504 | { | ||
| 505 | if (ui->dirty) | ||
| 506 | ubifs_release_dirty_inode_budget(c, ui); | ||
| 507 | ui->dirty = 0; | ||
| 508 | } | ||
| 509 | |||
| 510 | /** | ||
| 511 | * ubifs_jnl_update - update inode. | ||
| 512 | * @c: UBIFS file-system description object | ||
| 513 | * @dir: parent inode or host inode in case of extended attributes | ||
| 514 | * @nm: directory entry name | ||
| 515 | * @inode: inode to update | ||
| 516 | * @deletion: indicates a directory entry deletion i.e unlink or rmdir | ||
| 517 | * @xent: non-zero if the directory entry is an extended attribute entry | ||
| 518 | * | ||
| 519 | * This function updates an inode by writing a directory entry (or extended | ||
| 520 | * attribute entry), the inode itself, and the parent directory inode (or the | ||
| 521 | * host inode) to the journal. | ||
| 522 | * | ||
| 523 | * The function writes the host inode @dir last, which is important in case of | ||
| 524 | * extended attributes. Indeed, then we guarantee that if the host inode gets | ||
| 525 | * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed, | ||
| 526 | * the extended attribute inode gets flushed too. And this is exactly what the | ||
| 527 | * user expects - synchronizing the host inode synchronizes its extended | ||
| 528 | * attributes. Similarly, this guarantees that if @dir is synchronized, its | ||
| 529 | * directory entry corresponding to @nm gets synchronized too. | ||
| 530 | * | ||
| 531 | * If the inode (@inode) or the parent directory (@dir) are synchronous, this | ||
| 532 | * function synchronizes the write-buffer. | ||
| 533 | * | ||
| 534 | * This function marks the @dir and @inode inodes as clean and returns zero on | ||
| 535 | * success. In case of failure, a negative error code is returned. | ||
| 536 | */ | ||
| 537 | int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, | ||
| 538 | const struct qstr *nm, const struct inode *inode, | ||
| 539 | int deletion, int xent) | ||
| 540 | { | ||
| 541 | int err, dlen, ilen, len, lnum, ino_offs, dent_offs; | ||
| 542 | int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); | ||
| 543 | int last_reference = !!(deletion && inode->i_nlink == 0); | ||
| 544 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 545 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | ||
| 546 | struct ubifs_dent_node *dent; | ||
| 547 | struct ubifs_ino_node *ino; | ||
| 548 | union ubifs_key dent_key, ino_key; | ||
| 549 | |||
| 550 | dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu", | ||
| 551 | inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino); | ||
| 552 | ubifs_assert(dir_ui->data_len == 0); | ||
| 553 | ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex)); | ||
| 554 | |||
| 555 | dlen = UBIFS_DENT_NODE_SZ + nm->len + 1; | ||
| 556 | ilen = UBIFS_INO_NODE_SZ; | ||
| 557 | |||
| 558 | /* | ||
| 559 | * If the last reference to the inode is being deleted, then there is | ||
| 560 | * no need to attach and write inode data, it is being deleted anyway. | ||
| 561 | * And if the inode is being deleted, no need to synchronize | ||
| 562 | * write-buffer even if the inode is synchronous. | ||
| 563 | */ | ||
| 564 | if (!last_reference) { | ||
| 565 | ilen += ui->data_len; | ||
| 566 | sync |= IS_SYNC(inode); | ||
| 567 | } | ||
| 568 | |||
| 569 | aligned_dlen = ALIGN(dlen, 8); | ||
| 570 | aligned_ilen = ALIGN(ilen, 8); | ||
| 571 | len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ; | ||
| 572 | dent = kmalloc(len, GFP_NOFS); | ||
| 573 | if (!dent) | ||
| 574 | return -ENOMEM; | ||
| 575 | |||
| 576 | /* Make reservation before allocating sequence numbers */ | ||
| 577 | err = make_reservation(c, BASEHD, len); | ||
| 578 | if (err) | ||
| 579 | goto out_free; | ||
| 580 | |||
| 581 | if (!xent) { | ||
| 582 | dent->ch.node_type = UBIFS_DENT_NODE; | ||
| 583 | dent_key_init(c, &dent_key, dir->i_ino, nm); | ||
| 584 | } else { | ||
| 585 | dent->ch.node_type = UBIFS_XENT_NODE; | ||
| 586 | xent_key_init(c, &dent_key, dir->i_ino, nm); | ||
| 587 | } | ||
| 588 | |||
| 589 | key_write(c, &dent_key, dent->key); | ||
| 590 | dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino); | ||
| 591 | dent->type = get_dent_type(inode->i_mode); | ||
| 592 | dent->nlen = cpu_to_le16(nm->len); | ||
| 593 | memcpy(dent->name, nm->name, nm->len); | ||
| 594 | dent->name[nm->len] = '\0'; | ||
| 595 | zero_dent_node_unused(dent); | ||
| 596 | ubifs_prep_grp_node(c, dent, dlen, 0); | ||
| 597 | |||
| 598 | ino = (void *)dent + aligned_dlen; | ||
| 599 | pack_inode(c, ino, inode, 0, last_reference); | ||
| 600 | ino = (void *)ino + aligned_ilen; | ||
| 601 | pack_inode(c, ino, dir, 1, 0); | ||
| 602 | |||
| 603 | if (last_reference) { | ||
| 604 | err = ubifs_add_orphan(c, inode->i_ino); | ||
| 605 | if (err) { | ||
| 606 | release_head(c, BASEHD); | ||
| 607 | goto out_finish; | ||
| 608 | } | ||
| 609 | } | ||
| 610 | |||
| 611 | err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); | ||
| 612 | if (err) | ||
| 613 | goto out_release; | ||
| 614 | if (!sync) { | ||
| 615 | struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; | ||
| 616 | |||
| 617 | ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); | ||
| 618 | ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino); | ||
| 619 | } | ||
| 620 | release_head(c, BASEHD); | ||
| 621 | kfree(dent); | ||
| 622 | |||
| 623 | if (deletion) { | ||
| 624 | err = ubifs_tnc_remove_nm(c, &dent_key, nm); | ||
| 625 | if (err) | ||
| 626 | goto out_ro; | ||
| 627 | err = ubifs_add_dirt(c, lnum, dlen); | ||
| 628 | } else | ||
| 629 | err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm); | ||
| 630 | if (err) | ||
| 631 | goto out_ro; | ||
| 632 | |||
| 633 | /* | ||
| 634 | * Note, we do not remove the inode from TNC even if the last reference | ||
| 635 | * to it has just been deleted, because the inode may still be opened. | ||
| 636 | * Instead, the inode has been added to orphan lists and the orphan | ||
| 637 | * subsystem will take further care about it. | ||
| 638 | */ | ||
| 639 | ino_key_init(c, &ino_key, inode->i_ino); | ||
| 640 | ino_offs = dent_offs + aligned_dlen; | ||
| 641 | err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen); | ||
| 642 | if (err) | ||
| 643 | goto out_ro; | ||
| 644 | |||
| 645 | ino_key_init(c, &ino_key, dir->i_ino); | ||
| 646 | ino_offs += aligned_ilen; | ||
| 647 | err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ); | ||
| 648 | if (err) | ||
| 649 | goto out_ro; | ||
| 650 | |||
| 651 | finish_reservation(c); | ||
| 652 | spin_lock(&ui->ui_lock); | ||
| 653 | ui->synced_i_size = ui->ui_size; | ||
| 654 | spin_unlock(&ui->ui_lock); | ||
| 655 | mark_inode_clean(c, ui); | ||
| 656 | mark_inode_clean(c, dir_ui); | ||
| 657 | return 0; | ||
| 658 | |||
| 659 | out_finish: | ||
| 660 | finish_reservation(c); | ||
| 661 | out_free: | ||
| 662 | kfree(dent); | ||
| 663 | return err; | ||
| 664 | |||
| 665 | out_release: | ||
| 666 | release_head(c, BASEHD); | ||
| 667 | out_ro: | ||
| 668 | ubifs_ro_mode(c, err); | ||
| 669 | if (last_reference) | ||
| 670 | ubifs_delete_orphan(c, inode->i_ino); | ||
| 671 | finish_reservation(c); | ||
| 672 | return err; | ||
| 673 | } | ||
| 674 | |||
| 675 | /** | ||
| 676 | * ubifs_jnl_write_data - write a data node to the journal. | ||
| 677 | * @c: UBIFS file-system description object | ||
| 678 | * @inode: inode the data node belongs to | ||
| 679 | * @key: node key | ||
| 680 | * @buf: buffer to write | ||
| 681 | * @len: data length (must not exceed %UBIFS_BLOCK_SIZE) | ||
| 682 | * | ||
| 683 | * This function writes a data node to the journal. Returns %0 if the data node | ||
| 684 | * was successfully written, and a negative error code in case of failure. | ||
| 685 | */ | ||
| 686 | int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | ||
| 687 | const union ubifs_key *key, const void *buf, int len) | ||
| 688 | { | ||
| 689 | struct ubifs_data_node *data; | ||
| 690 | int err, lnum, offs, compr_type, out_len; | ||
| 691 | int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; | ||
| 692 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 693 | |||
| 694 | dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key), | ||
| 695 | key_block(c, key), len, DBGKEY(key)); | ||
| 696 | ubifs_assert(len <= UBIFS_BLOCK_SIZE); | ||
| 697 | |||
| 698 | data = kmalloc(dlen, GFP_NOFS); | ||
| 699 | if (!data) | ||
| 700 | return -ENOMEM; | ||
| 701 | |||
| 702 | data->ch.node_type = UBIFS_DATA_NODE; | ||
| 703 | key_write(c, key, &data->key); | ||
| 704 | data->size = cpu_to_le32(len); | ||
| 705 | zero_data_node_unused(data); | ||
| 706 | |||
| 707 | if (!(ui->flags && UBIFS_COMPR_FL)) | ||
| 708 | /* Compression is disabled for this inode */ | ||
| 709 | compr_type = UBIFS_COMPR_NONE; | ||
| 710 | else | ||
| 711 | compr_type = ui->compr_type; | ||
| 712 | |||
| 713 | out_len = dlen - UBIFS_DATA_NODE_SZ; | ||
| 714 | ubifs_compress(buf, len, &data->data, &out_len, &compr_type); | ||
| 715 | ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); | ||
| 716 | |||
| 717 | dlen = UBIFS_DATA_NODE_SZ + out_len; | ||
| 718 | data->compr_type = cpu_to_le16(compr_type); | ||
| 719 | |||
| 720 | /* Make reservation before allocating sequence numbers */ | ||
| 721 | err = make_reservation(c, DATAHD, dlen); | ||
| 722 | if (err) | ||
| 723 | goto out_free; | ||
| 724 | |||
| 725 | err = write_node(c, DATAHD, data, dlen, &lnum, &offs); | ||
| 726 | if (err) | ||
| 727 | goto out_release; | ||
| 728 | ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key)); | ||
| 729 | release_head(c, DATAHD); | ||
| 730 | |||
| 731 | err = ubifs_tnc_add(c, key, lnum, offs, dlen); | ||
| 732 | if (err) | ||
| 733 | goto out_ro; | ||
| 734 | |||
| 735 | finish_reservation(c); | ||
| 736 | kfree(data); | ||
| 737 | return 0; | ||
| 738 | |||
| 739 | out_release: | ||
| 740 | release_head(c, DATAHD); | ||
| 741 | out_ro: | ||
| 742 | ubifs_ro_mode(c, err); | ||
| 743 | finish_reservation(c); | ||
| 744 | out_free: | ||
| 745 | kfree(data); | ||
| 746 | return err; | ||
| 747 | } | ||
| 748 | |||
| 749 | /** | ||
| 750 | * ubifs_jnl_write_inode - flush inode to the journal. | ||
| 751 | * @c: UBIFS file-system description object | ||
| 752 | * @inode: inode to flush | ||
| 753 | * @deletion: inode has been deleted | ||
| 754 | * | ||
| 755 | * This function writes inode @inode to the journal. If the inode is | ||
| 756 | * synchronous, it also synchronizes the write-buffer. Returns zero in case of | ||
| 757 | * success and a negative error code in case of failure. | ||
| 758 | */ | ||
| 759 | int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, | ||
| 760 | int deletion) | ||
| 761 | { | ||
| 762 | int err, len, lnum, offs, sync = 0; | ||
| 763 | struct ubifs_ino_node *ino; | ||
| 764 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 765 | |||
| 766 | dbg_jnl("ino %lu%s", inode->i_ino, | ||
| 767 | deletion ? " (last reference)" : ""); | ||
| 768 | if (deletion) | ||
| 769 | ubifs_assert(inode->i_nlink == 0); | ||
| 770 | |||
| 771 | len = UBIFS_INO_NODE_SZ; | ||
| 772 | /* | ||
| 773 | * If the inode is being deleted, do not write the attached data. No | ||
| 774 | * need to synchronize the write-buffer either. | ||
| 775 | */ | ||
| 776 | if (!deletion) { | ||
| 777 | len += ui->data_len; | ||
| 778 | sync = IS_SYNC(inode); | ||
| 779 | } | ||
| 780 | ino = kmalloc(len, GFP_NOFS); | ||
| 781 | if (!ino) | ||
| 782 | return -ENOMEM; | ||
| 783 | |||
| 784 | /* Make reservation before allocating sequence numbers */ | ||
| 785 | err = make_reservation(c, BASEHD, len); | ||
| 786 | if (err) | ||
| 787 | goto out_free; | ||
| 788 | |||
| 789 | pack_inode(c, ino, inode, 1, deletion); | ||
| 790 | err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); | ||
| 791 | if (err) | ||
| 792 | goto out_release; | ||
| 793 | if (!sync) | ||
| 794 | ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, | ||
| 795 | inode->i_ino); | ||
| 796 | release_head(c, BASEHD); | ||
| 797 | |||
| 798 | if (deletion) { | ||
| 799 | err = ubifs_tnc_remove_ino(c, inode->i_ino); | ||
| 800 | if (err) | ||
| 801 | goto out_ro; | ||
| 802 | ubifs_delete_orphan(c, inode->i_ino); | ||
| 803 | err = ubifs_add_dirt(c, lnum, len); | ||
| 804 | } else { | ||
| 805 | union ubifs_key key; | ||
| 806 | |||
| 807 | ino_key_init(c, &key, inode->i_ino); | ||
| 808 | err = ubifs_tnc_add(c, &key, lnum, offs, len); | ||
| 809 | } | ||
| 810 | if (err) | ||
| 811 | goto out_ro; | ||
| 812 | |||
| 813 | finish_reservation(c); | ||
| 814 | spin_lock(&ui->ui_lock); | ||
| 815 | ui->synced_i_size = ui->ui_size; | ||
| 816 | spin_unlock(&ui->ui_lock); | ||
| 817 | kfree(ino); | ||
| 818 | return 0; | ||
| 819 | |||
| 820 | out_release: | ||
| 821 | release_head(c, BASEHD); | ||
| 822 | out_ro: | ||
| 823 | ubifs_ro_mode(c, err); | ||
| 824 | finish_reservation(c); | ||
| 825 | out_free: | ||
| 826 | kfree(ino); | ||
| 827 | return err; | ||
| 828 | } | ||
| 829 | |||
| 830 | /** | ||
| 831 | * ubifs_jnl_rename - rename a directory entry. | ||
| 832 | * @c: UBIFS file-system description object | ||
| 833 | * @old_dir: parent inode of directory entry to rename | ||
| 834 | * @old_dentry: directory entry to rename | ||
| 835 | * @new_dir: parent inode of directory entry to rename | ||
| 836 | * @new_dentry: new directory entry (or directory entry to replace) | ||
| 837 | * @sync: non-zero if the write-buffer has to be synchronized | ||
| 838 | * | ||
| 839 | * This function implements the re-name operation which may involve writing up | ||
| 840 | * to 3 inodes and 2 directory entries. It marks the written inodes as clean | ||
| 841 | * and returns zero on success. In case of failure, a negative error code is | ||
| 842 | * returned. | ||
| 843 | */ | ||
| 844 | int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, | ||
| 845 | const struct dentry *old_dentry, | ||
| 846 | const struct inode *new_dir, | ||
| 847 | const struct dentry *new_dentry, int sync) | ||
| 848 | { | ||
| 849 | void *p; | ||
| 850 | union ubifs_key key; | ||
| 851 | struct ubifs_dent_node *dent, *dent2; | ||
| 852 | int err, dlen1, dlen2, ilen, lnum, offs, len; | ||
| 853 | const struct inode *old_inode = old_dentry->d_inode; | ||
| 854 | const struct inode *new_inode = new_dentry->d_inode; | ||
| 855 | int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; | ||
| 856 | int last_reference = !!(new_inode && new_inode->i_nlink == 0); | ||
| 857 | int move = (old_dir != new_dir); | ||
| 858 | struct ubifs_inode *uninitialized_var(new_ui); | ||
| 859 | |||
| 860 | dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", | ||
| 861 | old_dentry->d_name.len, old_dentry->d_name.name, | ||
| 862 | old_dir->i_ino, new_dentry->d_name.len, | ||
| 863 | new_dentry->d_name.name, new_dir->i_ino); | ||
| 864 | ubifs_assert(ubifs_inode(old_dir)->data_len == 0); | ||
| 865 | ubifs_assert(ubifs_inode(new_dir)->data_len == 0); | ||
| 866 | ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); | ||
| 867 | ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex)); | ||
| 868 | |||
| 869 | dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1; | ||
| 870 | dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1; | ||
| 871 | if (new_inode) { | ||
| 872 | new_ui = ubifs_inode(new_inode); | ||
| 873 | ubifs_assert(mutex_is_locked(&new_ui->ui_mutex)); | ||
| 874 | ilen = UBIFS_INO_NODE_SZ; | ||
| 875 | if (!last_reference) | ||
| 876 | ilen += new_ui->data_len; | ||
| 877 | } else | ||
| 878 | ilen = 0; | ||
| 879 | |||
| 880 | aligned_dlen1 = ALIGN(dlen1, 8); | ||
| 881 | aligned_dlen2 = ALIGN(dlen2, 8); | ||
| 882 | len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); | ||
| 883 | if (old_dir != new_dir) | ||
| 884 | len += plen; | ||
| 885 | dent = kmalloc(len, GFP_NOFS); | ||
| 886 | if (!dent) | ||
| 887 | return -ENOMEM; | ||
| 888 | |||
| 889 | /* Make reservation before allocating sequence numbers */ | ||
| 890 | err = make_reservation(c, BASEHD, len); | ||
| 891 | if (err) | ||
| 892 | goto out_free; | ||
| 893 | |||
| 894 | /* Make new dent */ | ||
| 895 | dent->ch.node_type = UBIFS_DENT_NODE; | ||
| 896 | dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name); | ||
| 897 | dent->inum = cpu_to_le64(old_inode->i_ino); | ||
| 898 | dent->type = get_dent_type(old_inode->i_mode); | ||
| 899 | dent->nlen = cpu_to_le16(new_dentry->d_name.len); | ||
| 900 | memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len); | ||
| 901 | dent->name[new_dentry->d_name.len] = '\0'; | ||
| 902 | zero_dent_node_unused(dent); | ||
| 903 | ubifs_prep_grp_node(c, dent, dlen1, 0); | ||
| 904 | |||
| 905 | /* Make deletion dent */ | ||
| 906 | dent2 = (void *)dent + aligned_dlen1; | ||
| 907 | dent2->ch.node_type = UBIFS_DENT_NODE; | ||
| 908 | dent_key_init_flash(c, &dent2->key, old_dir->i_ino, | ||
| 909 | &old_dentry->d_name); | ||
| 910 | dent2->inum = 0; | ||
| 911 | dent2->type = DT_UNKNOWN; | ||
| 912 | dent2->nlen = cpu_to_le16(old_dentry->d_name.len); | ||
| 913 | memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len); | ||
| 914 | dent2->name[old_dentry->d_name.len] = '\0'; | ||
| 915 | zero_dent_node_unused(dent2); | ||
| 916 | ubifs_prep_grp_node(c, dent2, dlen2, 0); | ||
| 917 | |||
| 918 | p = (void *)dent2 + aligned_dlen2; | ||
| 919 | if (new_inode) { | ||
| 920 | pack_inode(c, p, new_inode, 0, last_reference); | ||
| 921 | p += ALIGN(ilen, 8); | ||
| 922 | } | ||
| 923 | |||
| 924 | if (!move) | ||
| 925 | pack_inode(c, p, old_dir, 1, 0); | ||
| 926 | else { | ||
| 927 | pack_inode(c, p, old_dir, 0, 0); | ||
| 928 | p += ALIGN(plen, 8); | ||
| 929 | pack_inode(c, p, new_dir, 1, 0); | ||
| 930 | } | ||
| 931 | |||
| 932 | if (last_reference) { | ||
| 933 | err = ubifs_add_orphan(c, new_inode->i_ino); | ||
| 934 | if (err) { | ||
| 935 | release_head(c, BASEHD); | ||
| 936 | goto out_finish; | ||
| 937 | } | ||
| 938 | } | ||
| 939 | |||
| 940 | err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); | ||
| 941 | if (err) | ||
| 942 | goto out_release; | ||
| 943 | if (!sync) { | ||
| 944 | struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; | ||
| 945 | |||
| 946 | ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino); | ||
| 947 | ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino); | ||
| 948 | if (new_inode) | ||
| 949 | ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, | ||
| 950 | new_inode->i_ino); | ||
| 951 | } | ||
| 952 | release_head(c, BASEHD); | ||
| 953 | |||
| 954 | dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name); | ||
| 955 | err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name); | ||
| 956 | if (err) | ||
| 957 | goto out_ro; | ||
| 958 | |||
| 959 | err = ubifs_add_dirt(c, lnum, dlen2); | ||
| 960 | if (err) | ||
| 961 | goto out_ro; | ||
| 962 | |||
| 963 | dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name); | ||
| 964 | err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name); | ||
| 965 | if (err) | ||
| 966 | goto out_ro; | ||
| 967 | |||
| 968 | offs += aligned_dlen1 + aligned_dlen2; | ||
| 969 | if (new_inode) { | ||
| 970 | ino_key_init(c, &key, new_inode->i_ino); | ||
| 971 | err = ubifs_tnc_add(c, &key, lnum, offs, ilen); | ||
| 972 | if (err) | ||
| 973 | goto out_ro; | ||
| 974 | offs += ALIGN(ilen, 8); | ||
| 975 | } | ||
| 976 | |||
| 977 | ino_key_init(c, &key, old_dir->i_ino); | ||
| 978 | err = ubifs_tnc_add(c, &key, lnum, offs, plen); | ||
| 979 | if (err) | ||
| 980 | goto out_ro; | ||
| 981 | |||
| 982 | if (old_dir != new_dir) { | ||
| 983 | offs += ALIGN(plen, 8); | ||
| 984 | ino_key_init(c, &key, new_dir->i_ino); | ||
| 985 | err = ubifs_tnc_add(c, &key, lnum, offs, plen); | ||
| 986 | if (err) | ||
| 987 | goto out_ro; | ||
| 988 | } | ||
| 989 | |||
| 990 | finish_reservation(c); | ||
| 991 | if (new_inode) { | ||
| 992 | mark_inode_clean(c, new_ui); | ||
| 993 | spin_lock(&new_ui->ui_lock); | ||
| 994 | new_ui->synced_i_size = new_ui->ui_size; | ||
| 995 | spin_unlock(&new_ui->ui_lock); | ||
| 996 | } | ||
| 997 | mark_inode_clean(c, ubifs_inode(old_dir)); | ||
| 998 | if (move) | ||
| 999 | mark_inode_clean(c, ubifs_inode(new_dir)); | ||
| 1000 | kfree(dent); | ||
| 1001 | return 0; | ||
| 1002 | |||
| 1003 | out_release: | ||
| 1004 | release_head(c, BASEHD); | ||
| 1005 | out_ro: | ||
| 1006 | ubifs_ro_mode(c, err); | ||
| 1007 | if (last_reference) | ||
| 1008 | ubifs_delete_orphan(c, new_inode->i_ino); | ||
| 1009 | out_finish: | ||
| 1010 | finish_reservation(c); | ||
| 1011 | out_free: | ||
| 1012 | kfree(dent); | ||
| 1013 | return err; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | /** | ||
| 1017 | * recomp_data_node - re-compress a truncated data node. | ||
| 1018 | * @dn: data node to re-compress | ||
| 1019 | * @new_len: new length | ||
| 1020 | * | ||
| 1021 | * This function is used when an inode is truncated and the last data node of | ||
| 1022 | * the inode has to be re-compressed and re-written. | ||
| 1023 | */ | ||
| 1024 | static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) | ||
| 1025 | { | ||
| 1026 | void *buf; | ||
| 1027 | int err, len, compr_type, out_len; | ||
| 1028 | |||
| 1029 | out_len = le32_to_cpu(dn->size); | ||
| 1030 | buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS); | ||
| 1031 | if (!buf) | ||
| 1032 | return -ENOMEM; | ||
| 1033 | |||
| 1034 | len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; | ||
| 1035 | compr_type = le16_to_cpu(dn->compr_type); | ||
| 1036 | err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); | ||
| 1037 | if (err) | ||
| 1038 | goto out; | ||
| 1039 | |||
| 1040 | ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); | ||
| 1041 | ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); | ||
| 1042 | dn->compr_type = cpu_to_le16(compr_type); | ||
| 1043 | dn->size = cpu_to_le32(*new_len); | ||
| 1044 | *new_len = UBIFS_DATA_NODE_SZ + out_len; | ||
| 1045 | out: | ||
| 1046 | kfree(buf); | ||
| 1047 | return err; | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | /** | ||
| 1051 | * ubifs_jnl_truncate - update the journal for a truncation. | ||
| 1052 | * @c: UBIFS file-system description object | ||
| 1053 | * @inode: inode to truncate | ||
| 1054 | * @old_size: old size | ||
| 1055 | * @new_size: new size | ||
| 1056 | * | ||
| 1057 | * When the size of a file decreases due to truncation, a truncation node is | ||
| 1058 | * written, the journal tree is updated, and the last data block is re-written | ||
| 1059 | * if it has been affected. The inode is also updated in order to synchronize | ||
| 1060 | * the new inode size. | ||
| 1061 | * | ||
| 1062 | * This function marks the inode as clean and returns zero on success. In case | ||
| 1063 | * of failure, a negative error code is returned. | ||
| 1064 | */ | ||
| 1065 | int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, | ||
| 1066 | loff_t old_size, loff_t new_size) | ||
| 1067 | { | ||
| 1068 | union ubifs_key key, to_key; | ||
| 1069 | struct ubifs_ino_node *ino; | ||
| 1070 | struct ubifs_trun_node *trun; | ||
| 1071 | struct ubifs_data_node *uninitialized_var(dn); | ||
| 1072 | int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode); | ||
| 1073 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 1074 | ino_t inum = inode->i_ino; | ||
| 1075 | unsigned int blk; | ||
| 1076 | |||
| 1077 | dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size); | ||
| 1078 | ubifs_assert(!ui->data_len); | ||
| 1079 | ubifs_assert(S_ISREG(inode->i_mode)); | ||
| 1080 | ubifs_assert(mutex_is_locked(&ui->ui_mutex)); | ||
| 1081 | |||
| 1082 | sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + | ||
| 1083 | UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR; | ||
| 1084 | ino = kmalloc(sz, GFP_NOFS); | ||
| 1085 | if (!ino) | ||
| 1086 | return -ENOMEM; | ||
| 1087 | |||
| 1088 | trun = (void *)ino + UBIFS_INO_NODE_SZ; | ||
| 1089 | trun->ch.node_type = UBIFS_TRUN_NODE; | ||
| 1090 | trun->inum = cpu_to_le32(inum); | ||
| 1091 | trun->old_size = cpu_to_le64(old_size); | ||
| 1092 | trun->new_size = cpu_to_le64(new_size); | ||
| 1093 | zero_trun_node_unused(trun); | ||
| 1094 | |||
| 1095 | dlen = new_size & (UBIFS_BLOCK_SIZE - 1); | ||
| 1096 | if (dlen) { | ||
| 1097 | /* Get last data block so it can be truncated */ | ||
| 1098 | dn = (void *)trun + UBIFS_TRUN_NODE_SZ; | ||
| 1099 | blk = new_size >> UBIFS_BLOCK_SHIFT; | ||
| 1100 | data_key_init(c, &key, inum, blk); | ||
| 1101 | dbg_jnl("last block key %s", DBGKEY(&key)); | ||
| 1102 | err = ubifs_tnc_lookup(c, &key, dn); | ||
| 1103 | if (err == -ENOENT) | ||
| 1104 | dlen = 0; /* Not found (so it is a hole) */ | ||
| 1105 | else if (err) | ||
| 1106 | goto out_free; | ||
| 1107 | else { | ||
| 1108 | if (le32_to_cpu(dn->size) <= dlen) | ||
| 1109 | dlen = 0; /* Nothing to do */ | ||
| 1110 | else { | ||
| 1111 | int compr_type = le16_to_cpu(dn->compr_type); | ||
| 1112 | |||
| 1113 | if (compr_type != UBIFS_COMPR_NONE) { | ||
| 1114 | err = recomp_data_node(dn, &dlen); | ||
| 1115 | if (err) | ||
| 1116 | goto out_free; | ||
| 1117 | } else { | ||
| 1118 | dn->size = cpu_to_le32(dlen); | ||
| 1119 | dlen += UBIFS_DATA_NODE_SZ; | ||
| 1120 | } | ||
| 1121 | zero_data_node_unused(dn); | ||
| 1122 | } | ||
| 1123 | } | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | /* Must make reservation before allocating sequence numbers */ | ||
| 1127 | len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ; | ||
| 1128 | if (dlen) | ||
| 1129 | len += dlen; | ||
| 1130 | err = make_reservation(c, BASEHD, len); | ||
| 1131 | if (err) | ||
| 1132 | goto out_free; | ||
| 1133 | |||
| 1134 | pack_inode(c, ino, inode, 0, 0); | ||
| 1135 | ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); | ||
| 1136 | if (dlen) | ||
| 1137 | ubifs_prep_grp_node(c, dn, dlen, 1); | ||
| 1138 | |||
| 1139 | err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); | ||
| 1140 | if (err) | ||
| 1141 | goto out_release; | ||
| 1142 | if (!sync) | ||
| 1143 | ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum); | ||
| 1144 | release_head(c, BASEHD); | ||
| 1145 | |||
| 1146 | if (dlen) { | ||
| 1147 | sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ; | ||
| 1148 | err = ubifs_tnc_add(c, &key, lnum, sz, dlen); | ||
| 1149 | if (err) | ||
| 1150 | goto out_ro; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | ino_key_init(c, &key, inum); | ||
| 1154 | err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ); | ||
| 1155 | if (err) | ||
| 1156 | goto out_ro; | ||
| 1157 | |||
| 1158 | err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ); | ||
| 1159 | if (err) | ||
| 1160 | goto out_ro; | ||
| 1161 | |||
| 1162 | bit = new_size & (UBIFS_BLOCK_SIZE - 1); | ||
| 1163 | blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0); | ||
| 1164 | data_key_init(c, &key, inum, blk); | ||
| 1165 | |||
| 1166 | bit = old_size & (UBIFS_BLOCK_SIZE - 1); | ||
| 1167 | blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1); | ||
| 1168 | data_key_init(c, &to_key, inum, blk); | ||
| 1169 | |||
| 1170 | err = ubifs_tnc_remove_range(c, &key, &to_key); | ||
| 1171 | if (err) | ||
| 1172 | goto out_ro; | ||
| 1173 | |||
| 1174 | finish_reservation(c); | ||
| 1175 | spin_lock(&ui->ui_lock); | ||
| 1176 | ui->synced_i_size = ui->ui_size; | ||
| 1177 | spin_unlock(&ui->ui_lock); | ||
| 1178 | mark_inode_clean(c, ui); | ||
| 1179 | kfree(ino); | ||
| 1180 | return 0; | ||
| 1181 | |||
| 1182 | out_release: | ||
| 1183 | release_head(c, BASEHD); | ||
| 1184 | out_ro: | ||
| 1185 | ubifs_ro_mode(c, err); | ||
| 1186 | finish_reservation(c); | ||
| 1187 | out_free: | ||
| 1188 | kfree(ino); | ||
| 1189 | return err; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | #ifdef CONFIG_UBIFS_FS_XATTR | ||
| 1193 | |||
| 1194 | /** | ||
| 1195 | * ubifs_jnl_delete_xattr - delete an extended attribute. | ||
| 1196 | * @c: UBIFS file-system description object | ||
| 1197 | * @host: host inode | ||
| 1198 | * @inode: extended attribute inode | ||
| 1199 | * @nm: extended attribute entry name | ||
| 1200 | * | ||
| 1201 | * This function delete an extended attribute which is very similar to | ||
| 1202 | * un-linking regular files - it writes a deletion xentry, a deletion inode and | ||
| 1203 | * updates the target inode. Returns zero in case of success and a negative | ||
| 1204 | * error code in case of failure. | ||
| 1205 | */ | ||
| 1206 | int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, | ||
| 1207 | const struct inode *inode, const struct qstr *nm) | ||
| 1208 | { | ||
| 1209 | int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen; | ||
| 1210 | struct ubifs_dent_node *xent; | ||
| 1211 | struct ubifs_ino_node *ino; | ||
| 1212 | union ubifs_key xent_key, key1, key2; | ||
| 1213 | int sync = IS_DIRSYNC(host); | ||
| 1214 | struct ubifs_inode *host_ui = ubifs_inode(host); | ||
| 1215 | |||
| 1216 | dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d", | ||
| 1217 | host->i_ino, inode->i_ino, nm->name, | ||
| 1218 | ubifs_inode(inode)->data_len); | ||
| 1219 | ubifs_assert(inode->i_nlink == 0); | ||
| 1220 | ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); | ||
| 1221 | |||
| 1222 | /* | ||
| 1223 | * Since we are deleting the inode, we do not bother to attach any data | ||
| 1224 | * to it and assume its length is %UBIFS_INO_NODE_SZ. | ||
| 1225 | */ | ||
| 1226 | xlen = UBIFS_DENT_NODE_SZ + nm->len + 1; | ||
| 1227 | aligned_xlen = ALIGN(xlen, 8); | ||
| 1228 | hlen = host_ui->data_len + UBIFS_INO_NODE_SZ; | ||
| 1229 | len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8); | ||
| 1230 | |||
| 1231 | xent = kmalloc(len, GFP_NOFS); | ||
| 1232 | if (!xent) | ||
| 1233 | return -ENOMEM; | ||
| 1234 | |||
| 1235 | /* Make reservation before allocating sequence numbers */ | ||
| 1236 | err = make_reservation(c, BASEHD, len); | ||
| 1237 | if (err) { | ||
| 1238 | kfree(xent); | ||
| 1239 | return err; | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | xent->ch.node_type = UBIFS_XENT_NODE; | ||
| 1243 | xent_key_init(c, &xent_key, host->i_ino, nm); | ||
| 1244 | key_write(c, &xent_key, xent->key); | ||
| 1245 | xent->inum = 0; | ||
| 1246 | xent->type = get_dent_type(inode->i_mode); | ||
| 1247 | xent->nlen = cpu_to_le16(nm->len); | ||
| 1248 | memcpy(xent->name, nm->name, nm->len); | ||
| 1249 | xent->name[nm->len] = '\0'; | ||
| 1250 | zero_dent_node_unused(xent); | ||
| 1251 | ubifs_prep_grp_node(c, xent, xlen, 0); | ||
| 1252 | |||
| 1253 | ino = (void *)xent + aligned_xlen; | ||
| 1254 | pack_inode(c, ino, inode, 0, 1); | ||
| 1255 | ino = (void *)ino + UBIFS_INO_NODE_SZ; | ||
| 1256 | pack_inode(c, ino, host, 1, 0); | ||
| 1257 | |||
| 1258 | err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); | ||
| 1259 | if (!sync && !err) | ||
| 1260 | ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino); | ||
| 1261 | release_head(c, BASEHD); | ||
| 1262 | kfree(xent); | ||
| 1263 | if (err) | ||
| 1264 | goto out_ro; | ||
| 1265 | |||
| 1266 | /* Remove the extended attribute entry from TNC */ | ||
| 1267 | err = ubifs_tnc_remove_nm(c, &xent_key, nm); | ||
| 1268 | if (err) | ||
| 1269 | goto out_ro; | ||
| 1270 | err = ubifs_add_dirt(c, lnum, xlen); | ||
| 1271 | if (err) | ||
| 1272 | goto out_ro; | ||
| 1273 | |||
| 1274 | /* | ||
| 1275 | * Remove all nodes belonging to the extended attribute inode from TNC. | ||
| 1276 | * Well, there actually must be only one node - the inode itself. | ||
| 1277 | */ | ||
| 1278 | lowest_ino_key(c, &key1, inode->i_ino); | ||
| 1279 | highest_ino_key(c, &key2, inode->i_ino); | ||
| 1280 | err = ubifs_tnc_remove_range(c, &key1, &key2); | ||
| 1281 | if (err) | ||
| 1282 | goto out_ro; | ||
| 1283 | err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ); | ||
| 1284 | if (err) | ||
| 1285 | goto out_ro; | ||
| 1286 | |||
| 1287 | /* And update TNC with the new host inode position */ | ||
| 1288 | ino_key_init(c, &key1, host->i_ino); | ||
| 1289 | err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen); | ||
| 1290 | if (err) | ||
| 1291 | goto out_ro; | ||
| 1292 | |||
| 1293 | finish_reservation(c); | ||
| 1294 | spin_lock(&host_ui->ui_lock); | ||
| 1295 | host_ui->synced_i_size = host_ui->ui_size; | ||
| 1296 | spin_unlock(&host_ui->ui_lock); | ||
| 1297 | mark_inode_clean(c, host_ui); | ||
| 1298 | return 0; | ||
| 1299 | |||
| 1300 | out_ro: | ||
| 1301 | ubifs_ro_mode(c, err); | ||
| 1302 | finish_reservation(c); | ||
| 1303 | return err; | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | /** | ||
| 1307 | * ubifs_jnl_change_xattr - change an extended attribute. | ||
| 1308 | * @c: UBIFS file-system description object | ||
| 1309 | * @inode: extended attribute inode | ||
| 1310 | * @host: host inode | ||
| 1311 | * | ||
| 1312 | * This function writes the updated version of an extended attribute inode and | ||
| 1313 | * the host inode tho the journal (to the base head). The host inode is written | ||
| 1314 | * after the extended attribute inode in order to guarantee that the extended | ||
| 1315 | * attribute will be flushed when the inode is synchronized by 'fsync()' and | ||
| 1316 | * consequently, the write-buffer is synchronized. This function returns zero | ||
| 1317 | * in case of success and a negative error code in case of failure. | ||
| 1318 | */ | ||
| 1319 | int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, | ||
| 1320 | const struct inode *host) | ||
| 1321 | { | ||
| 1322 | int err, len1, len2, aligned_len, aligned_len1, lnum, offs; | ||
| 1323 | struct ubifs_inode *host_ui = ubifs_inode(inode); | ||
| 1324 | struct ubifs_ino_node *ino; | ||
| 1325 | union ubifs_key key; | ||
| 1326 | int sync = IS_DIRSYNC(host); | ||
| 1327 | |||
| 1328 | dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino); | ||
| 1329 | ubifs_assert(host->i_nlink > 0); | ||
| 1330 | ubifs_assert(inode->i_nlink > 0); | ||
| 1331 | ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); | ||
| 1332 | |||
| 1333 | len1 = UBIFS_INO_NODE_SZ + host_ui->data_len; | ||
| 1334 | len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len; | ||
| 1335 | aligned_len1 = ALIGN(len1, 8); | ||
| 1336 | aligned_len = aligned_len1 + ALIGN(len2, 8); | ||
| 1337 | |||
| 1338 | ino = kmalloc(aligned_len, GFP_NOFS); | ||
| 1339 | if (!ino) | ||
| 1340 | return -ENOMEM; | ||
| 1341 | |||
| 1342 | /* Make reservation before allocating sequence numbers */ | ||
| 1343 | err = make_reservation(c, BASEHD, aligned_len); | ||
| 1344 | if (err) | ||
| 1345 | goto out_free; | ||
| 1346 | |||
| 1347 | pack_inode(c, ino, host, 0, 0); | ||
| 1348 | pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); | ||
| 1349 | |||
| 1350 | err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); | ||
| 1351 | if (!sync && !err) { | ||
| 1352 | struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; | ||
| 1353 | |||
| 1354 | ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino); | ||
| 1355 | ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); | ||
| 1356 | } | ||
| 1357 | release_head(c, BASEHD); | ||
| 1358 | if (err) | ||
| 1359 | goto out_ro; | ||
| 1360 | |||
| 1361 | ino_key_init(c, &key, host->i_ino); | ||
| 1362 | err = ubifs_tnc_add(c, &key, lnum, offs, len1); | ||
| 1363 | if (err) | ||
| 1364 | goto out_ro; | ||
| 1365 | |||
| 1366 | ino_key_init(c, &key, inode->i_ino); | ||
| 1367 | err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2); | ||
| 1368 | if (err) | ||
| 1369 | goto out_ro; | ||
| 1370 | |||
| 1371 | finish_reservation(c); | ||
| 1372 | spin_lock(&host_ui->ui_lock); | ||
| 1373 | host_ui->synced_i_size = host_ui->ui_size; | ||
| 1374 | spin_unlock(&host_ui->ui_lock); | ||
| 1375 | mark_inode_clean(c, host_ui); | ||
| 1376 | kfree(ino); | ||
| 1377 | return 0; | ||
| 1378 | |||
| 1379 | out_ro: | ||
| 1380 | ubifs_ro_mode(c, err); | ||
| 1381 | finish_reservation(c); | ||
| 1382 | out_free: | ||
| 1383 | kfree(ino); | ||
| 1384 | return err; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | #endif /* CONFIG_UBIFS_FS_XATTR */ | ||
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h new file mode 100644 index 000000000000..8f7476007549 --- /dev/null +++ b/fs/ubifs/key.h | |||
| @@ -0,0 +1,533 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This header contains various key-related definitions and helper function. | ||
| 25 | * UBIFS allows several key schemes, so we access key fields only via these | ||
| 26 | * helpers. At the moment only one key scheme is supported. | ||
| 27 | * | ||
| 28 | * Simple key scheme | ||
| 29 | * ~~~~~~~~~~~~~~~~~ | ||
| 30 | * | ||
| 31 | * Keys are 64-bits long. First 32-bits are inode number (parent inode number | ||
| 32 | * in case of direntry key). Next 3 bits are node type. The last 29 bits are | ||
| 33 | * 4KiB offset in case of inode node, and direntry hash in case of a direntry | ||
| 34 | * node. We use "r5" hash borrowed from reiserfs. | ||
| 35 | */ | ||
| 36 | |||
| 37 | #ifndef __UBIFS_KEY_H__ | ||
| 38 | #define __UBIFS_KEY_H__ | ||
| 39 | |||
| 40 | /** | ||
| 41 | * key_r5_hash - R5 hash function (borrowed from reiserfs). | ||
| 42 | * @s: direntry name | ||
| 43 | * @len: name length | ||
| 44 | */ | ||
| 45 | static inline uint32_t key_r5_hash(const char *s, int len) | ||
| 46 | { | ||
| 47 | uint32_t a = 0; | ||
| 48 | const signed char *str = (const signed char *)s; | ||
| 49 | |||
| 50 | while (*str) { | ||
| 51 | a += *str << 4; | ||
| 52 | a += *str >> 4; | ||
| 53 | a *= 11; | ||
| 54 | str++; | ||
| 55 | } | ||
| 56 | |||
| 57 | a &= UBIFS_S_KEY_HASH_MASK; | ||
| 58 | |||
| 59 | /* | ||
| 60 | * We use hash values as offset in directories, so values %0 and %1 are | ||
| 61 | * reserved for "." and "..". %2 is reserved for "end of readdir" | ||
| 62 | * marker. | ||
| 63 | */ | ||
| 64 | if (unlikely(a >= 0 && a <= 2)) | ||
| 65 | a += 3; | ||
| 66 | return a; | ||
| 67 | } | ||
| 68 | |||
| 69 | /** | ||
| 70 | * key_test_hash - testing hash function. | ||
| 71 | * @str: direntry name | ||
| 72 | * @len: name length | ||
| 73 | */ | ||
| 74 | static inline uint32_t key_test_hash(const char *str, int len) | ||
| 75 | { | ||
| 76 | uint32_t a = 0; | ||
| 77 | |||
| 78 | len = min_t(uint32_t, len, 4); | ||
| 79 | memcpy(&a, str, len); | ||
| 80 | a &= UBIFS_S_KEY_HASH_MASK; | ||
| 81 | if (unlikely(a >= 0 && a <= 2)) | ||
| 82 | a += 3; | ||
| 83 | return a; | ||
| 84 | } | ||
| 85 | |||
| 86 | /** | ||
| 87 | * ino_key_init - initialize inode key. | ||
| 88 | * @c: UBIFS file-system description object | ||
| 89 | * @key: key to initialize | ||
| 90 | * @inum: inode number | ||
| 91 | */ | ||
| 92 | static inline void ino_key_init(const struct ubifs_info *c, | ||
| 93 | union ubifs_key *key, ino_t inum) | ||
| 94 | { | ||
| 95 | key->u32[0] = inum; | ||
| 96 | key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS; | ||
| 97 | } | ||
| 98 | |||
| 99 | /** | ||
| 100 | * ino_key_init_flash - initialize on-flash inode key. | ||
| 101 | * @c: UBIFS file-system description object | ||
| 102 | * @k: key to initialize | ||
| 103 | * @inum: inode number | ||
| 104 | */ | ||
| 105 | static inline void ino_key_init_flash(const struct ubifs_info *c, void *k, | ||
| 106 | ino_t inum) | ||
| 107 | { | ||
| 108 | union ubifs_key *key = k; | ||
| 109 | |||
| 110 | key->j32[0] = cpu_to_le32(inum); | ||
| 111 | key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS); | ||
| 112 | memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); | ||
| 113 | } | ||
| 114 | |||
| 115 | /** | ||
| 116 | * lowest_ino_key - get the lowest possible inode key. | ||
| 117 | * @c: UBIFS file-system description object | ||
| 118 | * @key: key to initialize | ||
| 119 | * @inum: inode number | ||
| 120 | */ | ||
| 121 | static inline void lowest_ino_key(const struct ubifs_info *c, | ||
| 122 | union ubifs_key *key, ino_t inum) | ||
| 123 | { | ||
| 124 | key->u32[0] = inum; | ||
| 125 | key->u32[1] = 0; | ||
| 126 | } | ||
| 127 | |||
| 128 | /** | ||
| 129 | * highest_ino_key - get the highest possible inode key. | ||
| 130 | * @c: UBIFS file-system description object | ||
| 131 | * @key: key to initialize | ||
| 132 | * @inum: inode number | ||
| 133 | */ | ||
| 134 | static inline void highest_ino_key(const struct ubifs_info *c, | ||
| 135 | union ubifs_key *key, ino_t inum) | ||
| 136 | { | ||
| 137 | key->u32[0] = inum; | ||
| 138 | key->u32[1] = 0xffffffff; | ||
| 139 | } | ||
| 140 | |||
| 141 | /** | ||
| 142 | * dent_key_init - initialize directory entry key. | ||
| 143 | * @c: UBIFS file-system description object | ||
| 144 | * @key: key to initialize | ||
| 145 | * @inum: parent inode number | ||
| 146 | * @nm: direntry name and length | ||
| 147 | */ | ||
| 148 | static inline void dent_key_init(const struct ubifs_info *c, | ||
| 149 | union ubifs_key *key, ino_t inum, | ||
| 150 | const struct qstr *nm) | ||
| 151 | { | ||
| 152 | uint32_t hash = c->key_hash(nm->name, nm->len); | ||
| 153 | |||
| 154 | ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); | ||
| 155 | key->u32[0] = inum; | ||
| 156 | key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); | ||
| 157 | } | ||
| 158 | |||
| 159 | /** | ||
| 160 | * dent_key_init_hash - initialize directory entry key without re-calculating | ||
| 161 | * hash function. | ||
| 162 | * @c: UBIFS file-system description object | ||
| 163 | * @key: key to initialize | ||
| 164 | * @inum: parent inode number | ||
| 165 | * @hash: direntry name hash | ||
| 166 | */ | ||
| 167 | static inline void dent_key_init_hash(const struct ubifs_info *c, | ||
| 168 | union ubifs_key *key, ino_t inum, | ||
| 169 | uint32_t hash) | ||
| 170 | { | ||
| 171 | ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); | ||
| 172 | key->u32[0] = inum; | ||
| 173 | key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); | ||
| 174 | } | ||
| 175 | |||
| 176 | /** | ||
| 177 | * dent_key_init_flash - initialize on-flash directory entry key. | ||
| 178 | * @c: UBIFS file-system description object | ||
| 179 | * @k: key to initialize | ||
| 180 | * @inum: parent inode number | ||
| 181 | * @nm: direntry name and length | ||
| 182 | */ | ||
| 183 | static inline void dent_key_init_flash(const struct ubifs_info *c, void *k, | ||
| 184 | ino_t inum, const struct qstr *nm) | ||
| 185 | { | ||
| 186 | union ubifs_key *key = k; | ||
| 187 | uint32_t hash = c->key_hash(nm->name, nm->len); | ||
| 188 | |||
| 189 | ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); | ||
| 190 | key->j32[0] = cpu_to_le32(inum); | ||
| 191 | key->j32[1] = cpu_to_le32(hash | | ||
| 192 | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS)); | ||
| 193 | memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); | ||
| 194 | } | ||
| 195 | |||
| 196 | /** | ||
| 197 | * lowest_dent_key - get the lowest possible directory entry key. | ||
| 198 | * @c: UBIFS file-system description object | ||
| 199 | * @key: where to store the lowest key | ||
| 200 | * @inum: parent inode number | ||
| 201 | */ | ||
| 202 | static inline void lowest_dent_key(const struct ubifs_info *c, | ||
| 203 | union ubifs_key *key, ino_t inum) | ||
| 204 | { | ||
| 205 | key->u32[0] = inum; | ||
| 206 | key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS; | ||
| 207 | } | ||
| 208 | |||
| 209 | /** | ||
| 210 | * xent_key_init - initialize extended attribute entry key. | ||
| 211 | * @c: UBIFS file-system description object | ||
| 212 | * @key: key to initialize | ||
| 213 | * @inum: host inode number | ||
| 214 | * @nm: extended attribute entry name and length | ||
| 215 | */ | ||
| 216 | static inline void xent_key_init(const struct ubifs_info *c, | ||
| 217 | union ubifs_key *key, ino_t inum, | ||
| 218 | const struct qstr *nm) | ||
| 219 | { | ||
| 220 | uint32_t hash = c->key_hash(nm->name, nm->len); | ||
| 221 | |||
| 222 | ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); | ||
| 223 | key->u32[0] = inum; | ||
| 224 | key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); | ||
| 225 | } | ||
| 226 | |||
| 227 | /** | ||
| 228 | * xent_key_init_hash - initialize extended attribute entry key without | ||
| 229 | * re-calculating hash function. | ||
| 230 | * @c: UBIFS file-system description object | ||
| 231 | * @key: key to initialize | ||
| 232 | * @inum: host inode number | ||
| 233 | * @hash: extended attribute entry name hash | ||
| 234 | */ | ||
| 235 | static inline void xent_key_init_hash(const struct ubifs_info *c, | ||
| 236 | union ubifs_key *key, ino_t inum, | ||
| 237 | uint32_t hash) | ||
| 238 | { | ||
| 239 | ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); | ||
| 240 | key->u32[0] = inum; | ||
| 241 | key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); | ||
| 242 | } | ||
| 243 | |||
| 244 | /** | ||
| 245 | * xent_key_init_flash - initialize on-flash extended attribute entry key. | ||
| 246 | * @c: UBIFS file-system description object | ||
| 247 | * @k: key to initialize | ||
| 248 | * @inum: host inode number | ||
| 249 | * @nm: extended attribute entry name and length | ||
| 250 | */ | ||
| 251 | static inline void xent_key_init_flash(const struct ubifs_info *c, void *k, | ||
| 252 | ino_t inum, const struct qstr *nm) | ||
| 253 | { | ||
| 254 | union ubifs_key *key = k; | ||
| 255 | uint32_t hash = c->key_hash(nm->name, nm->len); | ||
| 256 | |||
| 257 | ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); | ||
| 258 | key->j32[0] = cpu_to_le32(inum); | ||
| 259 | key->j32[1] = cpu_to_le32(hash | | ||
| 260 | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS)); | ||
| 261 | memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); | ||
| 262 | } | ||
| 263 | |||
| 264 | /** | ||
| 265 | * lowest_xent_key - get the lowest possible extended attribute entry key. | ||
| 266 | * @c: UBIFS file-system description object | ||
| 267 | * @key: where to store the lowest key | ||
| 268 | * @inum: host inode number | ||
| 269 | */ | ||
| 270 | static inline void lowest_xent_key(const struct ubifs_info *c, | ||
| 271 | union ubifs_key *key, ino_t inum) | ||
| 272 | { | ||
| 273 | key->u32[0] = inum; | ||
| 274 | key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS; | ||
| 275 | } | ||
| 276 | |||
| 277 | /** | ||
| 278 | * data_key_init - initialize data key. | ||
| 279 | * @c: UBIFS file-system description object | ||
| 280 | * @key: key to initialize | ||
| 281 | * @inum: inode number | ||
| 282 | * @block: block number | ||
| 283 | */ | ||
| 284 | static inline void data_key_init(const struct ubifs_info *c, | ||
| 285 | union ubifs_key *key, ino_t inum, | ||
| 286 | unsigned int block) | ||
| 287 | { | ||
| 288 | ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); | ||
| 289 | key->u32[0] = inum; | ||
| 290 | key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS); | ||
| 291 | } | ||
| 292 | |||
| 293 | /** | ||
| 294 | * data_key_init_flash - initialize on-flash data key. | ||
| 295 | * @c: UBIFS file-system description object | ||
| 296 | * @k: key to initialize | ||
| 297 | * @inum: inode number | ||
| 298 | * @block: block number | ||
| 299 | */ | ||
| 300 | static inline void data_key_init_flash(const struct ubifs_info *c, void *k, | ||
| 301 | ino_t inum, unsigned int block) | ||
| 302 | { | ||
| 303 | union ubifs_key *key = k; | ||
| 304 | |||
| 305 | ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); | ||
| 306 | key->j32[0] = cpu_to_le32(inum); | ||
| 307 | key->j32[1] = cpu_to_le32(block | | ||
| 308 | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); | ||
| 309 | memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); | ||
| 310 | } | ||
| 311 | |||
| 312 | /** | ||
| 313 | * trun_key_init - initialize truncation node key. | ||
| 314 | * @c: UBIFS file-system description object | ||
| 315 | * @key: key to initialize | ||
| 316 | * @inum: inode number | ||
| 317 | * | ||
| 318 | * Note, UBIFS does not have truncation keys on the media and this function is | ||
| 319 | * only used for purposes of replay. | ||
| 320 | */ | ||
| 321 | static inline void trun_key_init(const struct ubifs_info *c, | ||
| 322 | union ubifs_key *key, ino_t inum) | ||
| 323 | { | ||
| 324 | key->u32[0] = inum; | ||
| 325 | key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS; | ||
| 326 | } | ||
| 327 | |||
| 328 | /** | ||
| 329 | * key_type - get key type. | ||
| 330 | * @c: UBIFS file-system description object | ||
| 331 | * @key: key to get type of | ||
| 332 | */ | ||
| 333 | static inline int key_type(const struct ubifs_info *c, | ||
| 334 | const union ubifs_key *key) | ||
| 335 | { | ||
| 336 | return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS; | ||
| 337 | } | ||
| 338 | |||
| 339 | /** | ||
| 340 | * key_type_flash - get type of a on-flash formatted key. | ||
| 341 | * @c: UBIFS file-system description object | ||
| 342 | * @k: key to get type of | ||
| 343 | */ | ||
| 344 | static inline int key_type_flash(const struct ubifs_info *c, const void *k) | ||
| 345 | { | ||
| 346 | const union ubifs_key *key = k; | ||
| 347 | |||
| 348 | return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS; | ||
| 349 | } | ||
| 350 | |||
| 351 | /** | ||
| 352 | * key_inum - fetch inode number from key. | ||
| 353 | * @c: UBIFS file-system description object | ||
| 354 | * @k: key to fetch inode number from | ||
| 355 | */ | ||
| 356 | static inline ino_t key_inum(const struct ubifs_info *c, const void *k) | ||
| 357 | { | ||
| 358 | const union ubifs_key *key = k; | ||
| 359 | |||
| 360 | return key->u32[0]; | ||
| 361 | } | ||
| 362 | |||
| 363 | /** | ||
| 364 | * key_inum_flash - fetch inode number from an on-flash formatted key. | ||
| 365 | * @c: UBIFS file-system description object | ||
| 366 | * @k: key to fetch inode number from | ||
| 367 | */ | ||
| 368 | static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) | ||
| 369 | { | ||
| 370 | const union ubifs_key *key = k; | ||
| 371 | |||
| 372 | return le32_to_cpu(key->j32[0]); | ||
| 373 | } | ||
| 374 | |||
| 375 | /** | ||
| 376 | * key_hash - get directory entry hash. | ||
| 377 | * @c: UBIFS file-system description object | ||
| 378 | * @key: the key to get hash from | ||
| 379 | */ | ||
| 380 | static inline int key_hash(const struct ubifs_info *c, | ||
| 381 | const union ubifs_key *key) | ||
| 382 | { | ||
| 383 | return key->u32[1] & UBIFS_S_KEY_HASH_MASK; | ||
| 384 | } | ||
| 385 | |||
| 386 | /** | ||
| 387 | * key_hash_flash - get directory entry hash from an on-flash formatted key. | ||
| 388 | * @c: UBIFS file-system description object | ||
| 389 | * @k: the key to get hash from | ||
| 390 | */ | ||
| 391 | static inline int key_hash_flash(const struct ubifs_info *c, const void *k) | ||
| 392 | { | ||
| 393 | const union ubifs_key *key = k; | ||
| 394 | |||
| 395 | return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK; | ||
| 396 | } | ||
| 397 | |||
| 398 | /** | ||
| 399 | * key_block - get data block number. | ||
| 400 | * @c: UBIFS file-system description object | ||
| 401 | * @key: the key to get the block number from | ||
| 402 | */ | ||
| 403 | static inline unsigned int key_block(const struct ubifs_info *c, | ||
| 404 | const union ubifs_key *key) | ||
| 405 | { | ||
| 406 | return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK; | ||
| 407 | } | ||
| 408 | |||
| 409 | /** | ||
| 410 | * key_block_flash - get data block number from an on-flash formatted key. | ||
| 411 | * @c: UBIFS file-system description object | ||
| 412 | * @k: the key to get the block number from | ||
| 413 | */ | ||
| 414 | static inline unsigned int key_block_flash(const struct ubifs_info *c, | ||
| 415 | const void *k) | ||
| 416 | { | ||
| 417 | const union ubifs_key *key = k; | ||
| 418 | |||
| 419 | return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK; | ||
| 420 | } | ||
| 421 | |||
| 422 | /** | ||
| 423 | * key_read - transform a key to in-memory format. | ||
| 424 | * @c: UBIFS file-system description object | ||
| 425 | * @from: the key to transform | ||
| 426 | * @to: the key to store the result | ||
| 427 | */ | ||
| 428 | static inline void key_read(const struct ubifs_info *c, const void *from, | ||
| 429 | union ubifs_key *to) | ||
| 430 | { | ||
| 431 | const union ubifs_key *f = from; | ||
| 432 | |||
| 433 | to->u32[0] = le32_to_cpu(f->j32[0]); | ||
| 434 | to->u32[1] = le32_to_cpu(f->j32[1]); | ||
| 435 | } | ||
| 436 | |||
| 437 | /** | ||
| 438 | * key_write - transform a key from in-memory format. | ||
| 439 | * @c: UBIFS file-system description object | ||
| 440 | * @from: the key to transform | ||
| 441 | * @to: the key to store the result | ||
| 442 | */ | ||
| 443 | static inline void key_write(const struct ubifs_info *c, | ||
| 444 | const union ubifs_key *from, void *to) | ||
| 445 | { | ||
| 446 | union ubifs_key *t = to; | ||
| 447 | |||
| 448 | t->j32[0] = cpu_to_le32(from->u32[0]); | ||
| 449 | t->j32[1] = cpu_to_le32(from->u32[1]); | ||
| 450 | memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8); | ||
| 451 | } | ||
| 452 | |||
| 453 | /** | ||
| 454 | * key_write_idx - transform a key from in-memory format for the index. | ||
| 455 | * @c: UBIFS file-system description object | ||
| 456 | * @from: the key to transform | ||
| 457 | * @to: the key to store the result | ||
| 458 | */ | ||
| 459 | static inline void key_write_idx(const struct ubifs_info *c, | ||
| 460 | const union ubifs_key *from, void *to) | ||
| 461 | { | ||
| 462 | union ubifs_key *t = to; | ||
| 463 | |||
| 464 | t->j32[0] = cpu_to_le32(from->u32[0]); | ||
| 465 | t->j32[1] = cpu_to_le32(from->u32[1]); | ||
| 466 | } | ||
| 467 | |||
| 468 | /** | ||
| 469 | * key_copy - copy a key. | ||
| 470 | * @c: UBIFS file-system description object | ||
| 471 | * @from: the key to copy from | ||
| 472 | * @to: the key to copy to | ||
| 473 | */ | ||
| 474 | static inline void key_copy(const struct ubifs_info *c, | ||
| 475 | const union ubifs_key *from, union ubifs_key *to) | ||
| 476 | { | ||
| 477 | to->u64[0] = from->u64[0]; | ||
| 478 | } | ||
| 479 | |||
| 480 | /** | ||
| 481 | * keys_cmp - compare keys. | ||
| 482 | * @c: UBIFS file-system description object | ||
| 483 | * @key1: the first key to compare | ||
| 484 | * @key2: the second key to compare | ||
| 485 | * | ||
| 486 | * This function compares 2 keys and returns %-1 if @key1 is less than | ||
| 487 | * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2. | ||
| 488 | */ | ||
| 489 | static inline int keys_cmp(const struct ubifs_info *c, | ||
| 490 | const union ubifs_key *key1, | ||
| 491 | const union ubifs_key *key2) | ||
| 492 | { | ||
| 493 | if (key1->u32[0] < key2->u32[0]) | ||
| 494 | return -1; | ||
| 495 | if (key1->u32[0] > key2->u32[0]) | ||
| 496 | return 1; | ||
| 497 | if (key1->u32[1] < key2->u32[1]) | ||
| 498 | return -1; | ||
| 499 | if (key1->u32[1] > key2->u32[1]) | ||
| 500 | return 1; | ||
| 501 | |||
| 502 | return 0; | ||
| 503 | } | ||
| 504 | |||
| 505 | /** | ||
| 506 | * is_hash_key - is a key vulnerable to hash collisions. | ||
| 507 | * @c: UBIFS file-system description object | ||
| 508 | * @key: key | ||
| 509 | * | ||
| 510 | * This function returns %1 if @key is a hashed key or %0 otherwise. | ||
| 511 | */ | ||
| 512 | static inline int is_hash_key(const struct ubifs_info *c, | ||
| 513 | const union ubifs_key *key) | ||
| 514 | { | ||
| 515 | int type = key_type(c, key); | ||
| 516 | |||
| 517 | return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY; | ||
| 518 | } | ||
| 519 | |||
| 520 | /** | ||
| 521 | * key_max_inode_size - get maximum file size allowed by current key format. | ||
| 522 | * @c: UBIFS file-system description object | ||
| 523 | */ | ||
| 524 | static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) | ||
| 525 | { | ||
| 526 | switch (c->key_fmt) { | ||
| 527 | case UBIFS_SIMPLE_KEY_FMT: | ||
| 528 | return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE; | ||
| 529 | default: | ||
| 530 | return 0; | ||
| 531 | } | ||
| 532 | } | ||
| 533 | #endif /* !__UBIFS_KEY_H__ */ | ||
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c new file mode 100644 index 000000000000..36857b9ed59e --- /dev/null +++ b/fs/ubifs/log.c | |||
| @@ -0,0 +1,805 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file is a part of UBIFS journal implementation and contains various | ||
| 25 | * functions which manipulate the log. The log is a fixed area on the flash | ||
| 26 | * which does not contain any data but refers to buds. The log is a part of the | ||
| 27 | * journal. | ||
| 28 | */ | ||
| 29 | |||
| 30 | #include "ubifs.h" | ||
| 31 | |||
| 32 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 33 | static int dbg_check_bud_bytes(struct ubifs_info *c); | ||
| 34 | #else | ||
| 35 | #define dbg_check_bud_bytes(c) 0 | ||
| 36 | #endif | ||
| 37 | |||
| 38 | /** | ||
| 39 | * ubifs_search_bud - search bud LEB. | ||
| 40 | * @c: UBIFS file-system description object | ||
| 41 | * @lnum: logical eraseblock number to search | ||
| 42 | * | ||
| 43 | * This function searches bud LEB @lnum. Returns bud description object in case | ||
| 44 | * of success and %NULL if there is no bud with this LEB number. | ||
| 45 | */ | ||
| 46 | struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) | ||
| 47 | { | ||
| 48 | struct rb_node *p; | ||
| 49 | struct ubifs_bud *bud; | ||
| 50 | |||
| 51 | spin_lock(&c->buds_lock); | ||
| 52 | p = c->buds.rb_node; | ||
| 53 | while (p) { | ||
| 54 | bud = rb_entry(p, struct ubifs_bud, rb); | ||
| 55 | if (lnum < bud->lnum) | ||
| 56 | p = p->rb_left; | ||
| 57 | else if (lnum > bud->lnum) | ||
| 58 | p = p->rb_right; | ||
| 59 | else { | ||
| 60 | spin_unlock(&c->buds_lock); | ||
| 61 | return bud; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | spin_unlock(&c->buds_lock); | ||
| 65 | return NULL; | ||
| 66 | } | ||
| 67 | |||
| 68 | /** | ||
| 69 | * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one. | ||
| 70 | * @c: UBIFS file-system description object | ||
| 71 | * @lnum: logical eraseblock number to search | ||
| 72 | * | ||
| 73 | * This functions returns the wbuf for @lnum or %NULL if there is not one. | ||
| 74 | */ | ||
| 75 | struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) | ||
| 76 | { | ||
| 77 | struct rb_node *p; | ||
| 78 | struct ubifs_bud *bud; | ||
| 79 | int jhead; | ||
| 80 | |||
| 81 | if (!c->jheads) | ||
| 82 | return NULL; | ||
| 83 | |||
| 84 | spin_lock(&c->buds_lock); | ||
| 85 | p = c->buds.rb_node; | ||
| 86 | while (p) { | ||
| 87 | bud = rb_entry(p, struct ubifs_bud, rb); | ||
| 88 | if (lnum < bud->lnum) | ||
| 89 | p = p->rb_left; | ||
| 90 | else if (lnum > bud->lnum) | ||
| 91 | p = p->rb_right; | ||
| 92 | else { | ||
| 93 | jhead = bud->jhead; | ||
| 94 | spin_unlock(&c->buds_lock); | ||
| 95 | return &c->jheads[jhead].wbuf; | ||
| 96 | } | ||
| 97 | } | ||
| 98 | spin_unlock(&c->buds_lock); | ||
| 99 | return NULL; | ||
| 100 | } | ||
| 101 | |||
| 102 | /** | ||
| 103 | * next_log_lnum - switch to the next log LEB. | ||
| 104 | * @c: UBIFS file-system description object | ||
| 105 | * @lnum: current log LEB | ||
| 106 | */ | ||
| 107 | static inline int next_log_lnum(const struct ubifs_info *c, int lnum) | ||
| 108 | { | ||
| 109 | lnum += 1; | ||
| 110 | if (lnum > c->log_last) | ||
| 111 | lnum = UBIFS_LOG_LNUM; | ||
| 112 | |||
| 113 | return lnum; | ||
| 114 | } | ||
| 115 | |||
| 116 | /** | ||
| 117 | * empty_log_bytes - calculate amount of empty space in the log. | ||
| 118 | * @c: UBIFS file-system description object | ||
| 119 | */ | ||
| 120 | static inline long long empty_log_bytes(const struct ubifs_info *c) | ||
| 121 | { | ||
| 122 | long long h, t; | ||
| 123 | |||
| 124 | h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; | ||
| 125 | t = (long long)c->ltail_lnum * c->leb_size; | ||
| 126 | |||
| 127 | if (h >= t) | ||
| 128 | return c->log_bytes - h + t; | ||
| 129 | else | ||
| 130 | return t - h; | ||
| 131 | } | ||
| 132 | |||
| 133 | /** | ||
| 134 | * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. | ||
| 135 | * @c: UBIFS file-system description object | ||
| 136 | * @bud: the bud to add | ||
| 137 | */ | ||
| 138 | void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) | ||
| 139 | { | ||
| 140 | struct rb_node **p, *parent = NULL; | ||
| 141 | struct ubifs_bud *b; | ||
| 142 | struct ubifs_jhead *jhead; | ||
| 143 | |||
| 144 | spin_lock(&c->buds_lock); | ||
| 145 | p = &c->buds.rb_node; | ||
| 146 | while (*p) { | ||
| 147 | parent = *p; | ||
| 148 | b = rb_entry(parent, struct ubifs_bud, rb); | ||
| 149 | ubifs_assert(bud->lnum != b->lnum); | ||
| 150 | if (bud->lnum < b->lnum) | ||
| 151 | p = &(*p)->rb_left; | ||
| 152 | else | ||
| 153 | p = &(*p)->rb_right; | ||
| 154 | } | ||
| 155 | |||
| 156 | rb_link_node(&bud->rb, parent, p); | ||
| 157 | rb_insert_color(&bud->rb, &c->buds); | ||
| 158 | if (c->jheads) { | ||
| 159 | jhead = &c->jheads[bud->jhead]; | ||
| 160 | list_add_tail(&bud->list, &jhead->buds_list); | ||
| 161 | } else | ||
| 162 | ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Note, although this is a new bud, we anyway account this space now, | ||
| 166 | * before any data has been written to it, because this is about to | ||
| 167 | * guarantee fixed mount time, and this bud will anyway be read and | ||
| 168 | * scanned. | ||
| 169 | */ | ||
| 170 | c->bud_bytes += c->leb_size - bud->start; | ||
| 171 | |||
| 172 | dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, | ||
| 173 | bud->start, bud->jhead, c->bud_bytes); | ||
| 174 | spin_unlock(&c->buds_lock); | ||
| 175 | } | ||
| 176 | |||
| 177 | /** | ||
| 178 | * ubifs_create_buds_lists - create journal head buds lists for remount rw. | ||
| 179 | * @c: UBIFS file-system description object | ||
| 180 | */ | ||
| 181 | void ubifs_create_buds_lists(struct ubifs_info *c) | ||
| 182 | { | ||
| 183 | struct rb_node *p; | ||
| 184 | |||
| 185 | spin_lock(&c->buds_lock); | ||
| 186 | p = rb_first(&c->buds); | ||
| 187 | while (p) { | ||
| 188 | struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); | ||
| 189 | struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; | ||
| 190 | |||
| 191 | list_add_tail(&bud->list, &jhead->buds_list); | ||
| 192 | p = rb_next(p); | ||
| 193 | } | ||
| 194 | spin_unlock(&c->buds_lock); | ||
| 195 | } | ||
| 196 | |||
| 197 | /** | ||
| 198 | * ubifs_add_bud_to_log - add a new bud to the log. | ||
| 199 | * @c: UBIFS file-system description object | ||
| 200 | * @jhead: journal head the bud belongs to | ||
| 201 | * @lnum: LEB number of the bud | ||
| 202 | * @offs: starting offset of the bud | ||
| 203 | * | ||
| 204 | * This function writes reference node for the new bud LEB @lnum it to the log, | ||
| 205 | * and adds it to the buds tress. It also makes sure that log size does not | ||
| 206 | * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success, | ||
| 207 | * %-EAGAIN if commit is required, and a negative error codes in case of | ||
| 208 | * failure. | ||
| 209 | */ | ||
| 210 | int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) | ||
| 211 | { | ||
| 212 | int err; | ||
| 213 | struct ubifs_bud *bud; | ||
| 214 | struct ubifs_ref_node *ref; | ||
| 215 | |||
| 216 | bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS); | ||
| 217 | if (!bud) | ||
| 218 | return -ENOMEM; | ||
| 219 | ref = kzalloc(c->ref_node_alsz, GFP_NOFS); | ||
| 220 | if (!ref) { | ||
| 221 | kfree(bud); | ||
| 222 | return -ENOMEM; | ||
| 223 | } | ||
| 224 | |||
| 225 | mutex_lock(&c->log_mutex); | ||
| 226 | |||
| 227 | if (c->ro_media) { | ||
| 228 | err = -EROFS; | ||
| 229 | goto out_unlock; | ||
| 230 | } | ||
| 231 | |||
| 232 | /* Make sure we have enough space in the log */ | ||
| 233 | if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) { | ||
| 234 | dbg_log("not enough log space - %lld, required %d", | ||
| 235 | empty_log_bytes(c), c->min_log_bytes); | ||
| 236 | ubifs_commit_required(c); | ||
| 237 | err = -EAGAIN; | ||
| 238 | goto out_unlock; | ||
| 239 | } | ||
| 240 | |||
| 241 | /* | ||
| 242 | * Make sure the the amount of space in buds will not exceed | ||
| 243 | * 'c->max_bud_bytes' limit, because we want to guarantee mount time | ||
| 244 | * limits. | ||
| 245 | * | ||
| 246 | * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes | ||
| 247 | * because we are holding @c->log_mutex. All @c->bud_bytes take place | ||
| 248 | * when both @c->log_mutex and @c->bud_bytes are locked. | ||
| 249 | */ | ||
| 250 | if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) { | ||
| 251 | dbg_log("bud bytes %lld (%lld max), require commit", | ||
| 252 | c->bud_bytes, c->max_bud_bytes); | ||
| 253 | ubifs_commit_required(c); | ||
| 254 | err = -EAGAIN; | ||
| 255 | goto out_unlock; | ||
| 256 | } | ||
| 257 | |||
| 258 | /* | ||
| 259 | * If the journal is full enough - start background commit. Note, it is | ||
| 260 | * OK to read 'c->cmt_state' without spinlock because integer reads | ||
| 261 | * are atomic in the kernel. | ||
| 262 | */ | ||
| 263 | if (c->bud_bytes >= c->bg_bud_bytes && | ||
| 264 | c->cmt_state == COMMIT_RESTING) { | ||
| 265 | dbg_log("bud bytes %lld (%lld max), initiate BG commit", | ||
| 266 | c->bud_bytes, c->max_bud_bytes); | ||
| 267 | ubifs_request_bg_commit(c); | ||
| 268 | } | ||
| 269 | |||
| 270 | bud->lnum = lnum; | ||
| 271 | bud->start = offs; | ||
| 272 | bud->jhead = jhead; | ||
| 273 | |||
| 274 | ref->ch.node_type = UBIFS_REF_NODE; | ||
| 275 | ref->lnum = cpu_to_le32(bud->lnum); | ||
| 276 | ref->offs = cpu_to_le32(bud->start); | ||
| 277 | ref->jhead = cpu_to_le32(jhead); | ||
| 278 | |||
| 279 | if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { | ||
| 280 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | ||
| 281 | c->lhead_offs = 0; | ||
| 282 | } | ||
| 283 | |||
| 284 | if (c->lhead_offs == 0) { | ||
| 285 | /* Must ensure next log LEB has been unmapped */ | ||
| 286 | err = ubifs_leb_unmap(c, c->lhead_lnum); | ||
| 287 | if (err) | ||
| 288 | goto out_unlock; | ||
| 289 | } | ||
| 290 | |||
| 291 | if (bud->start == 0) { | ||
| 292 | /* | ||
| 293 | * Before writing the LEB reference which refers an empty LEB | ||
| 294 | * to the log, we have to make sure it is mapped, because | ||
| 295 | * otherwise we'd risk to refer an LEB with garbage in case of | ||
| 296 | * an unclean reboot, because the target LEB might have been | ||
| 297 | * unmapped, but not yet physically erased. | ||
| 298 | */ | ||
| 299 | err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); | ||
| 300 | if (err) | ||
| 301 | goto out_unlock; | ||
| 302 | } | ||
| 303 | |||
| 304 | dbg_log("write ref LEB %d:%d", | ||
| 305 | c->lhead_lnum, c->lhead_offs); | ||
| 306 | err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum, | ||
| 307 | c->lhead_offs, UBI_SHORTTERM); | ||
| 308 | if (err) | ||
| 309 | goto out_unlock; | ||
| 310 | |||
| 311 | c->lhead_offs += c->ref_node_alsz; | ||
| 312 | |||
| 313 | ubifs_add_bud(c, bud); | ||
| 314 | |||
| 315 | mutex_unlock(&c->log_mutex); | ||
| 316 | kfree(ref); | ||
| 317 | return 0; | ||
| 318 | |||
| 319 | out_unlock: | ||
| 320 | mutex_unlock(&c->log_mutex); | ||
| 321 | kfree(ref); | ||
| 322 | kfree(bud); | ||
| 323 | return err; | ||
| 324 | } | ||
| 325 | |||
| 326 | /** | ||
| 327 | * remove_buds - remove used buds. | ||
| 328 | * @c: UBIFS file-system description object | ||
| 329 | * | ||
| 330 | * This function removes use buds from the buds tree. It does not remove the | ||
| 331 | * buds which are pointed to by journal heads. | ||
| 332 | */ | ||
| 333 | static void remove_buds(struct ubifs_info *c) | ||
| 334 | { | ||
| 335 | struct rb_node *p; | ||
| 336 | |||
| 337 | ubifs_assert(list_empty(&c->old_buds)); | ||
| 338 | c->cmt_bud_bytes = 0; | ||
| 339 | spin_lock(&c->buds_lock); | ||
| 340 | p = rb_first(&c->buds); | ||
| 341 | while (p) { | ||
| 342 | struct rb_node *p1 = p; | ||
| 343 | struct ubifs_bud *bud; | ||
| 344 | struct ubifs_wbuf *wbuf; | ||
| 345 | |||
| 346 | p = rb_next(p); | ||
| 347 | bud = rb_entry(p1, struct ubifs_bud, rb); | ||
| 348 | wbuf = &c->jheads[bud->jhead].wbuf; | ||
| 349 | |||
| 350 | if (wbuf->lnum == bud->lnum) { | ||
| 351 | /* | ||
| 352 | * Do not remove buds which are pointed to by journal | ||
| 353 | * heads (non-closed buds). | ||
| 354 | */ | ||
| 355 | c->cmt_bud_bytes += wbuf->offs - bud->start; | ||
| 356 | dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " | ||
| 357 | "cmt_bud_bytes %lld", bud->lnum, bud->start, | ||
| 358 | bud->jhead, wbuf->offs - bud->start, | ||
| 359 | c->cmt_bud_bytes); | ||
| 360 | bud->start = wbuf->offs; | ||
| 361 | } else { | ||
| 362 | c->cmt_bud_bytes += c->leb_size - bud->start; | ||
| 363 | dbg_log("remove %d:%d, jhead %d, bud bytes %d, " | ||
| 364 | "cmt_bud_bytes %lld", bud->lnum, bud->start, | ||
| 365 | bud->jhead, c->leb_size - bud->start, | ||
| 366 | c->cmt_bud_bytes); | ||
| 367 | rb_erase(p1, &c->buds); | ||
| 368 | list_del(&bud->list); | ||
| 369 | /* | ||
| 370 | * If the commit does not finish, the recovery will need | ||
| 371 | * to replay the journal, in which case the old buds | ||
| 372 | * must be unchanged. Do not release them until post | ||
| 373 | * commit i.e. do not allow them to be garbage | ||
| 374 | * collected. | ||
| 375 | */ | ||
| 376 | list_add(&bud->list, &c->old_buds); | ||
| 377 | } | ||
| 378 | } | ||
| 379 | spin_unlock(&c->buds_lock); | ||
| 380 | } | ||
| 381 | |||
| 382 | /** | ||
| 383 | * ubifs_log_start_commit - start commit. | ||
| 384 | * @c: UBIFS file-system description object | ||
| 385 | * @ltail_lnum: return new log tail LEB number | ||
| 386 | * | ||
| 387 | * The commit operation starts with writing "commit start" node to the log and | ||
| 388 | * reference nodes for all journal heads which will define new journal after | ||
| 389 | * the commit has been finished. The commit start and reference nodes are | ||
| 390 | * written in one go to the nearest empty log LEB (hence, when commit is | ||
| 391 | * finished UBIFS may safely unmap all the previous log LEBs). This function | ||
| 392 | * returns zero in case of success and a negative error code in case of | ||
| 393 | * failure. | ||
| 394 | */ | ||
| 395 | int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) | ||
| 396 | { | ||
| 397 | void *buf; | ||
| 398 | struct ubifs_cs_node *cs; | ||
| 399 | struct ubifs_ref_node *ref; | ||
| 400 | int err, i, max_len, len; | ||
| 401 | |||
| 402 | err = dbg_check_bud_bytes(c); | ||
| 403 | if (err) | ||
| 404 | return err; | ||
| 405 | |||
| 406 | max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ; | ||
| 407 | max_len = ALIGN(max_len, c->min_io_size); | ||
| 408 | buf = cs = kmalloc(max_len, GFP_NOFS); | ||
| 409 | if (!buf) | ||
| 410 | return -ENOMEM; | ||
| 411 | |||
| 412 | cs->ch.node_type = UBIFS_CS_NODE; | ||
| 413 | cs->cmt_no = cpu_to_le64(c->cmt_no + 1); | ||
| 414 | ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); | ||
| 415 | |||
| 416 | /* | ||
| 417 | * Note, we do not lock 'c->log_mutex' because this is the commit start | ||
| 418 | * phase and we are exclusively using the log. And we do not lock | ||
| 419 | * write-buffer because nobody can write to the file-system at this | ||
| 420 | * phase. | ||
| 421 | */ | ||
| 422 | |||
| 423 | len = UBIFS_CS_NODE_SZ; | ||
| 424 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 425 | int lnum = c->jheads[i].wbuf.lnum; | ||
| 426 | int offs = c->jheads[i].wbuf.offs; | ||
| 427 | |||
| 428 | if (lnum == -1 || offs == c->leb_size) | ||
| 429 | continue; | ||
| 430 | |||
| 431 | dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); | ||
| 432 | ref = buf + len; | ||
| 433 | ref->ch.node_type = UBIFS_REF_NODE; | ||
| 434 | ref->lnum = cpu_to_le32(lnum); | ||
| 435 | ref->offs = cpu_to_le32(offs); | ||
| 436 | ref->jhead = cpu_to_le32(i); | ||
| 437 | |||
| 438 | ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0); | ||
| 439 | len += UBIFS_REF_NODE_SZ; | ||
| 440 | } | ||
| 441 | |||
| 442 | ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len); | ||
| 443 | |||
| 444 | /* Switch to the next log LEB */ | ||
| 445 | if (c->lhead_offs) { | ||
| 446 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | ||
| 447 | c->lhead_offs = 0; | ||
| 448 | } | ||
| 449 | |||
| 450 | if (c->lhead_offs == 0) { | ||
| 451 | /* Must ensure next LEB has been unmapped */ | ||
| 452 | err = ubifs_leb_unmap(c, c->lhead_lnum); | ||
| 453 | if (err) | ||
| 454 | goto out; | ||
| 455 | } | ||
| 456 | |||
| 457 | len = ALIGN(len, c->min_io_size); | ||
| 458 | dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); | ||
| 459 | err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM); | ||
| 460 | if (err) | ||
| 461 | goto out; | ||
| 462 | |||
| 463 | *ltail_lnum = c->lhead_lnum; | ||
| 464 | |||
| 465 | c->lhead_offs += len; | ||
| 466 | if (c->lhead_offs == c->leb_size) { | ||
| 467 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | ||
| 468 | c->lhead_offs = 0; | ||
| 469 | } | ||
| 470 | |||
| 471 | remove_buds(c); | ||
| 472 | |||
| 473 | /* | ||
| 474 | * We have started the commit and now users may use the rest of the log | ||
| 475 | * for new writes. | ||
| 476 | */ | ||
| 477 | c->min_log_bytes = 0; | ||
| 478 | |||
| 479 | out: | ||
| 480 | kfree(buf); | ||
| 481 | return err; | ||
| 482 | } | ||
| 483 | |||
| 484 | /** | ||
| 485 | * ubifs_log_end_commit - end commit. | ||
| 486 | * @c: UBIFS file-system description object | ||
| 487 | * @ltail_lnum: new log tail LEB number | ||
| 488 | * | ||
| 489 | * This function is called on when the commit operation was finished. It | ||
| 490 | * moves log tail to new position and unmaps LEBs which contain obsolete data. | ||
| 491 | * Returns zero in case of success and a negative error code in case of | ||
| 492 | * failure. | ||
| 493 | */ | ||
| 494 | int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) | ||
| 495 | { | ||
| 496 | int err; | ||
| 497 | |||
| 498 | /* | ||
| 499 | * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS | ||
| 500 | * writes during commit. Its only short "commit" start phase when | ||
| 501 | * writers are blocked. | ||
| 502 | */ | ||
| 503 | mutex_lock(&c->log_mutex); | ||
| 504 | |||
| 505 | dbg_log("old tail was LEB %d:0, new tail is LEB %d:0", | ||
| 506 | c->ltail_lnum, ltail_lnum); | ||
| 507 | |||
| 508 | c->ltail_lnum = ltail_lnum; | ||
| 509 | /* | ||
| 510 | * The commit is finished and from now on it must be guaranteed that | ||
| 511 | * there is always enough space for the next commit. | ||
| 512 | */ | ||
| 513 | c->min_log_bytes = c->leb_size; | ||
| 514 | |||
| 515 | spin_lock(&c->buds_lock); | ||
| 516 | c->bud_bytes -= c->cmt_bud_bytes; | ||
| 517 | spin_unlock(&c->buds_lock); | ||
| 518 | |||
| 519 | err = dbg_check_bud_bytes(c); | ||
| 520 | |||
| 521 | mutex_unlock(&c->log_mutex); | ||
| 522 | return err; | ||
| 523 | } | ||
| 524 | |||
| 525 | /** | ||
| 526 | * ubifs_log_post_commit - things to do after commit is completed. | ||
| 527 | * @c: UBIFS file-system description object | ||
| 528 | * @old_ltail_lnum: old log tail LEB number | ||
| 529 | * | ||
| 530 | * Release buds only after commit is completed, because they must be unchanged | ||
| 531 | * if recovery is needed. | ||
| 532 | * | ||
| 533 | * Unmap log LEBs only after commit is completed, because they may be needed for | ||
| 534 | * recovery. | ||
| 535 | * | ||
| 536 | * This function returns %0 on success and a negative error code on failure. | ||
| 537 | */ | ||
| 538 | int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) | ||
| 539 | { | ||
| 540 | int lnum, err = 0; | ||
| 541 | |||
| 542 | while (!list_empty(&c->old_buds)) { | ||
| 543 | struct ubifs_bud *bud; | ||
| 544 | |||
| 545 | bud = list_entry(c->old_buds.next, struct ubifs_bud, list); | ||
| 546 | err = ubifs_return_leb(c, bud->lnum); | ||
| 547 | if (err) | ||
| 548 | return err; | ||
| 549 | list_del(&bud->list); | ||
| 550 | kfree(bud); | ||
| 551 | } | ||
| 552 | mutex_lock(&c->log_mutex); | ||
| 553 | for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; | ||
| 554 | lnum = next_log_lnum(c, lnum)) { | ||
| 555 | dbg_log("unmap log LEB %d", lnum); | ||
| 556 | err = ubifs_leb_unmap(c, lnum); | ||
| 557 | if (err) | ||
| 558 | goto out; | ||
| 559 | } | ||
| 560 | out: | ||
| 561 | mutex_unlock(&c->log_mutex); | ||
| 562 | return err; | ||
| 563 | } | ||
| 564 | |||
| 565 | /** | ||
| 566 | * struct done_ref - references that have been done. | ||
| 567 | * @rb: rb-tree node | ||
| 568 | * @lnum: LEB number | ||
| 569 | */ | ||
| 570 | struct done_ref { | ||
| 571 | struct rb_node rb; | ||
| 572 | int lnum; | ||
| 573 | }; | ||
| 574 | |||
| 575 | /** | ||
| 576 | * done_already - determine if a reference has been done already. | ||
| 577 | * @done_tree: rb-tree to store references that have been done | ||
| 578 | * @lnum: LEB number of reference | ||
| 579 | * | ||
| 580 | * This function returns %1 if the reference has been done, %0 if not, otherwise | ||
| 581 | * a negative error code is returned. | ||
| 582 | */ | ||
| 583 | static int done_already(struct rb_root *done_tree, int lnum) | ||
| 584 | { | ||
| 585 | struct rb_node **p = &done_tree->rb_node, *parent = NULL; | ||
| 586 | struct done_ref *dr; | ||
| 587 | |||
| 588 | while (*p) { | ||
| 589 | parent = *p; | ||
| 590 | dr = rb_entry(parent, struct done_ref, rb); | ||
| 591 | if (lnum < dr->lnum) | ||
| 592 | p = &(*p)->rb_left; | ||
| 593 | else if (lnum > dr->lnum) | ||
| 594 | p = &(*p)->rb_right; | ||
| 595 | else | ||
| 596 | return 1; | ||
| 597 | } | ||
| 598 | |||
| 599 | dr = kzalloc(sizeof(struct done_ref), GFP_NOFS); | ||
| 600 | if (!dr) | ||
| 601 | return -ENOMEM; | ||
| 602 | |||
| 603 | dr->lnum = lnum; | ||
| 604 | |||
| 605 | rb_link_node(&dr->rb, parent, p); | ||
| 606 | rb_insert_color(&dr->rb, done_tree); | ||
| 607 | |||
| 608 | return 0; | ||
| 609 | } | ||
| 610 | |||
| 611 | /** | ||
| 612 | * destroy_done_tree - destroy the done tree. | ||
| 613 | * @done_tree: done tree to destroy | ||
| 614 | */ | ||
| 615 | static void destroy_done_tree(struct rb_root *done_tree) | ||
| 616 | { | ||
| 617 | struct rb_node *this = done_tree->rb_node; | ||
| 618 | struct done_ref *dr; | ||
| 619 | |||
| 620 | while (this) { | ||
| 621 | if (this->rb_left) { | ||
| 622 | this = this->rb_left; | ||
| 623 | continue; | ||
| 624 | } else if (this->rb_right) { | ||
| 625 | this = this->rb_right; | ||
| 626 | continue; | ||
| 627 | } | ||
| 628 | dr = rb_entry(this, struct done_ref, rb); | ||
| 629 | this = rb_parent(this); | ||
| 630 | if (this) { | ||
| 631 | if (this->rb_left == &dr->rb) | ||
| 632 | this->rb_left = NULL; | ||
| 633 | else | ||
| 634 | this->rb_right = NULL; | ||
| 635 | } | ||
| 636 | kfree(dr); | ||
| 637 | } | ||
| 638 | } | ||
| 639 | |||
| 640 | /** | ||
| 641 | * add_node - add a node to the consolidated log. | ||
| 642 | * @c: UBIFS file-system description object | ||
| 643 | * @buf: buffer to which to add | ||
| 644 | * @lnum: LEB number to which to write is passed and returned here | ||
| 645 | * @offs: offset to where to write is passed and returned here | ||
| 646 | * @node: node to add | ||
| 647 | * | ||
| 648 | * This function returns %0 on success and a negative error code on failure. | ||
| 649 | */ | ||
| 650 | static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, | ||
| 651 | void *node) | ||
| 652 | { | ||
| 653 | struct ubifs_ch *ch = node; | ||
| 654 | int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs; | ||
| 655 | |||
| 656 | if (len > remains) { | ||
| 657 | int sz = ALIGN(*offs, c->min_io_size), err; | ||
| 658 | |||
| 659 | ubifs_pad(c, buf + *offs, sz - *offs); | ||
| 660 | err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); | ||
| 661 | if (err) | ||
| 662 | return err; | ||
| 663 | *lnum = next_log_lnum(c, *lnum); | ||
| 664 | *offs = 0; | ||
| 665 | } | ||
| 666 | memcpy(buf + *offs, node, len); | ||
| 667 | *offs += ALIGN(len, 8); | ||
| 668 | return 0; | ||
| 669 | } | ||
| 670 | |||
| 671 | /** | ||
| 672 | * ubifs_consolidate_log - consolidate the log. | ||
| 673 | * @c: UBIFS file-system description object | ||
| 674 | * | ||
| 675 | * Repeated failed commits could cause the log to be full, but at least 1 LEB is | ||
| 676 | * needed for commit. This function rewrites the reference nodes in the log | ||
| 677 | * omitting duplicates, and failed CS nodes, and leaving no gaps. | ||
| 678 | * | ||
| 679 | * This function returns %0 on success and a negative error code on failure. | ||
| 680 | */ | ||
| 681 | int ubifs_consolidate_log(struct ubifs_info *c) | ||
| 682 | { | ||
| 683 | struct ubifs_scan_leb *sleb; | ||
| 684 | struct ubifs_scan_node *snod; | ||
| 685 | struct rb_root done_tree = RB_ROOT; | ||
| 686 | int lnum, err, first = 1, write_lnum, offs = 0; | ||
| 687 | void *buf; | ||
| 688 | |||
| 689 | dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum, | ||
| 690 | c->lhead_lnum); | ||
| 691 | buf = vmalloc(c->leb_size); | ||
| 692 | if (!buf) | ||
| 693 | return -ENOMEM; | ||
| 694 | lnum = c->ltail_lnum; | ||
| 695 | write_lnum = lnum; | ||
| 696 | while (1) { | ||
| 697 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | ||
| 698 | if (IS_ERR(sleb)) { | ||
| 699 | err = PTR_ERR(sleb); | ||
| 700 | goto out_free; | ||
| 701 | } | ||
| 702 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 703 | switch (snod->type) { | ||
| 704 | case UBIFS_REF_NODE: { | ||
| 705 | struct ubifs_ref_node *ref = snod->node; | ||
| 706 | int ref_lnum = le32_to_cpu(ref->lnum); | ||
| 707 | |||
| 708 | err = done_already(&done_tree, ref_lnum); | ||
| 709 | if (err < 0) | ||
| 710 | goto out_scan; | ||
| 711 | if (err != 1) { | ||
| 712 | err = add_node(c, buf, &write_lnum, | ||
| 713 | &offs, snod->node); | ||
| 714 | if (err) | ||
| 715 | goto out_scan; | ||
| 716 | } | ||
| 717 | break; | ||
| 718 | } | ||
| 719 | case UBIFS_CS_NODE: | ||
| 720 | if (!first) | ||
| 721 | break; | ||
| 722 | err = add_node(c, buf, &write_lnum, &offs, | ||
| 723 | snod->node); | ||
| 724 | if (err) | ||
| 725 | goto out_scan; | ||
| 726 | first = 0; | ||
| 727 | break; | ||
| 728 | } | ||
| 729 | } | ||
| 730 | ubifs_scan_destroy(sleb); | ||
| 731 | if (lnum == c->lhead_lnum) | ||
| 732 | break; | ||
| 733 | lnum = next_log_lnum(c, lnum); | ||
| 734 | } | ||
| 735 | if (offs) { | ||
| 736 | int sz = ALIGN(offs, c->min_io_size); | ||
| 737 | |||
| 738 | ubifs_pad(c, buf + offs, sz - offs); | ||
| 739 | err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM); | ||
| 740 | if (err) | ||
| 741 | goto out_free; | ||
| 742 | offs = ALIGN(offs, c->min_io_size); | ||
| 743 | } | ||
| 744 | destroy_done_tree(&done_tree); | ||
| 745 | vfree(buf); | ||
| 746 | if (write_lnum == c->lhead_lnum) { | ||
| 747 | ubifs_err("log is too full"); | ||
| 748 | return -EINVAL; | ||
| 749 | } | ||
| 750 | /* Unmap remaining LEBs */ | ||
| 751 | lnum = write_lnum; | ||
| 752 | do { | ||
| 753 | lnum = next_log_lnum(c, lnum); | ||
| 754 | err = ubifs_leb_unmap(c, lnum); | ||
| 755 | if (err) | ||
| 756 | return err; | ||
| 757 | } while (lnum != c->lhead_lnum); | ||
| 758 | c->lhead_lnum = write_lnum; | ||
| 759 | c->lhead_offs = offs; | ||
| 760 | dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs); | ||
| 761 | return 0; | ||
| 762 | |||
| 763 | out_scan: | ||
| 764 | ubifs_scan_destroy(sleb); | ||
| 765 | out_free: | ||
| 766 | destroy_done_tree(&done_tree); | ||
| 767 | vfree(buf); | ||
| 768 | return err; | ||
| 769 | } | ||
| 770 | |||
| 771 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 772 | |||
| 773 | /** | ||
| 774 | * dbg_check_bud_bytes - make sure bud bytes calculation are all right. | ||
| 775 | * @c: UBIFS file-system description object | ||
| 776 | * | ||
| 777 | * This function makes sure the amount of flash space used by closed buds | ||
| 778 | * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in | ||
| 779 | * case of failure. | ||
| 780 | */ | ||
| 781 | static int dbg_check_bud_bytes(struct ubifs_info *c) | ||
| 782 | { | ||
| 783 | int i, err = 0; | ||
| 784 | struct ubifs_bud *bud; | ||
| 785 | long long bud_bytes = 0; | ||
| 786 | |||
| 787 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
| 788 | return 0; | ||
| 789 | |||
| 790 | spin_lock(&c->buds_lock); | ||
| 791 | for (i = 0; i < c->jhead_cnt; i++) | ||
| 792 | list_for_each_entry(bud, &c->jheads[i].buds_list, list) | ||
| 793 | bud_bytes += c->leb_size - bud->start; | ||
| 794 | |||
| 795 | if (c->bud_bytes != bud_bytes) { | ||
| 796 | ubifs_err("bad bud_bytes %lld, calculated %lld", | ||
| 797 | c->bud_bytes, bud_bytes); | ||
| 798 | err = -EINVAL; | ||
| 799 | } | ||
| 800 | spin_unlock(&c->buds_lock); | ||
| 801 | |||
| 802 | return err; | ||
| 803 | } | ||
| 804 | |||
| 805 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c new file mode 100644 index 000000000000..2ba93da71b65 --- /dev/null +++ b/fs/ubifs/lprops.c | |||
| @@ -0,0 +1,1357 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements the functions that access LEB properties and their | ||
| 25 | * categories. LEBs are categorized based on the needs of UBIFS, and the | ||
| 26 | * categories are stored as either heaps or lists to provide a fast way of | ||
| 27 | * finding a LEB in a particular category. For example, UBIFS may need to find | ||
| 28 | * an empty LEB for the journal, or a very dirty LEB for garbage collection. | ||
| 29 | */ | ||
| 30 | |||
| 31 | #include "ubifs.h" | ||
| 32 | |||
| 33 | /** | ||
| 34 | * get_heap_comp_val - get the LEB properties value for heap comparisons. | ||
| 35 | * @lprops: LEB properties | ||
| 36 | * @cat: LEB category | ||
| 37 | */ | ||
| 38 | static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat) | ||
| 39 | { | ||
| 40 | switch (cat) { | ||
| 41 | case LPROPS_FREE: | ||
| 42 | return lprops->free; | ||
| 43 | case LPROPS_DIRTY_IDX: | ||
| 44 | return lprops->free + lprops->dirty; | ||
| 45 | default: | ||
| 46 | return lprops->dirty; | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | /** | ||
| 51 | * move_up_lpt_heap - move a new heap entry up as far as possible. | ||
| 52 | * @c: UBIFS file-system description object | ||
| 53 | * @heap: LEB category heap | ||
| 54 | * @lprops: LEB properties to move | ||
| 55 | * @cat: LEB category | ||
| 56 | * | ||
| 57 | * New entries to a heap are added at the bottom and then moved up until the | ||
| 58 | * parent's value is greater. In the case of LPT's category heaps, the value | ||
| 59 | * is either the amount of free space or the amount of dirty space, depending | ||
| 60 | * on the category. | ||
| 61 | */ | ||
| 62 | static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, | ||
| 63 | struct ubifs_lprops *lprops, int cat) | ||
| 64 | { | ||
| 65 | int val1, val2, hpos; | ||
| 66 | |||
| 67 | hpos = lprops->hpos; | ||
| 68 | if (!hpos) | ||
| 69 | return; /* Already top of the heap */ | ||
| 70 | val1 = get_heap_comp_val(lprops, cat); | ||
| 71 | /* Compare to parent and, if greater, move up the heap */ | ||
| 72 | do { | ||
| 73 | int ppos = (hpos - 1) / 2; | ||
| 74 | |||
| 75 | val2 = get_heap_comp_val(heap->arr[ppos], cat); | ||
| 76 | if (val2 >= val1) | ||
| 77 | return; | ||
| 78 | /* Greater than parent so move up */ | ||
| 79 | heap->arr[ppos]->hpos = hpos; | ||
| 80 | heap->arr[hpos] = heap->arr[ppos]; | ||
| 81 | heap->arr[ppos] = lprops; | ||
| 82 | lprops->hpos = ppos; | ||
| 83 | hpos = ppos; | ||
| 84 | } while (hpos); | ||
| 85 | } | ||
| 86 | |||
| 87 | /** | ||
| 88 | * adjust_lpt_heap - move a changed heap entry up or down the heap. | ||
| 89 | * @c: UBIFS file-system description object | ||
| 90 | * @heap: LEB category heap | ||
| 91 | * @lprops: LEB properties to move | ||
| 92 | * @hpos: heap position of @lprops | ||
| 93 | * @cat: LEB category | ||
| 94 | * | ||
| 95 | * Changed entries in a heap are moved up or down until the parent's value is | ||
| 96 | * greater. In the case of LPT's category heaps, the value is either the amount | ||
| 97 | * of free space or the amount of dirty space, depending on the category. | ||
| 98 | */ | ||
| 99 | static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, | ||
| 100 | struct ubifs_lprops *lprops, int hpos, int cat) | ||
| 101 | { | ||
| 102 | int val1, val2, val3, cpos; | ||
| 103 | |||
| 104 | val1 = get_heap_comp_val(lprops, cat); | ||
| 105 | /* Compare to parent and, if greater than parent, move up the heap */ | ||
| 106 | if (hpos) { | ||
| 107 | int ppos = (hpos - 1) / 2; | ||
| 108 | |||
| 109 | val2 = get_heap_comp_val(heap->arr[ppos], cat); | ||
| 110 | if (val1 > val2) { | ||
| 111 | /* Greater than parent so move up */ | ||
| 112 | while (1) { | ||
| 113 | heap->arr[ppos]->hpos = hpos; | ||
| 114 | heap->arr[hpos] = heap->arr[ppos]; | ||
| 115 | heap->arr[ppos] = lprops; | ||
| 116 | lprops->hpos = ppos; | ||
| 117 | hpos = ppos; | ||
| 118 | if (!hpos) | ||
| 119 | return; | ||
| 120 | ppos = (hpos - 1) / 2; | ||
| 121 | val2 = get_heap_comp_val(heap->arr[ppos], cat); | ||
| 122 | if (val1 <= val2) | ||
| 123 | return; | ||
| 124 | /* Still greater than parent so keep going */ | ||
| 125 | } | ||
| 126 | } | ||
| 127 | } | ||
| 128 | /* Not greater than parent, so compare to children */ | ||
| 129 | while (1) { | ||
| 130 | /* Compare to left child */ | ||
| 131 | cpos = hpos * 2 + 1; | ||
| 132 | if (cpos >= heap->cnt) | ||
| 133 | return; | ||
| 134 | val2 = get_heap_comp_val(heap->arr[cpos], cat); | ||
| 135 | if (val1 < val2) { | ||
| 136 | /* Less than left child, so promote biggest child */ | ||
| 137 | if (cpos + 1 < heap->cnt) { | ||
| 138 | val3 = get_heap_comp_val(heap->arr[cpos + 1], | ||
| 139 | cat); | ||
| 140 | if (val3 > val2) | ||
| 141 | cpos += 1; /* Right child is bigger */ | ||
| 142 | } | ||
| 143 | heap->arr[cpos]->hpos = hpos; | ||
| 144 | heap->arr[hpos] = heap->arr[cpos]; | ||
| 145 | heap->arr[cpos] = lprops; | ||
| 146 | lprops->hpos = cpos; | ||
| 147 | hpos = cpos; | ||
| 148 | continue; | ||
| 149 | } | ||
| 150 | /* Compare to right child */ | ||
| 151 | cpos += 1; | ||
| 152 | if (cpos >= heap->cnt) | ||
| 153 | return; | ||
| 154 | val3 = get_heap_comp_val(heap->arr[cpos], cat); | ||
| 155 | if (val1 < val3) { | ||
| 156 | /* Less than right child, so promote right child */ | ||
| 157 | heap->arr[cpos]->hpos = hpos; | ||
| 158 | heap->arr[hpos] = heap->arr[cpos]; | ||
| 159 | heap->arr[cpos] = lprops; | ||
| 160 | lprops->hpos = cpos; | ||
| 161 | hpos = cpos; | ||
| 162 | continue; | ||
| 163 | } | ||
| 164 | return; | ||
| 165 | } | ||
| 166 | } | ||
| 167 | |||
| 168 | /** | ||
| 169 | * add_to_lpt_heap - add LEB properties to a LEB category heap. | ||
| 170 | * @c: UBIFS file-system description object | ||
| 171 | * @lprops: LEB properties to add | ||
| 172 | * @cat: LEB category | ||
| 173 | * | ||
| 174 | * This function returns %1 if @lprops is added to the heap for LEB category | ||
| 175 | * @cat, otherwise %0 is returned because the heap is full. | ||
| 176 | */ | ||
| 177 | static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops, | ||
| 178 | int cat) | ||
| 179 | { | ||
| 180 | struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; | ||
| 181 | |||
| 182 | if (heap->cnt >= heap->max_cnt) { | ||
| 183 | const int b = LPT_HEAP_SZ / 2 - 1; | ||
| 184 | int cpos, val1, val2; | ||
| 185 | |||
| 186 | /* Compare to some other LEB on the bottom of heap */ | ||
| 187 | /* Pick a position kind of randomly */ | ||
| 188 | cpos = (((size_t)lprops >> 4) & b) + b; | ||
| 189 | ubifs_assert(cpos >= b); | ||
| 190 | ubifs_assert(cpos < LPT_HEAP_SZ); | ||
| 191 | ubifs_assert(cpos < heap->cnt); | ||
| 192 | |||
| 193 | val1 = get_heap_comp_val(lprops, cat); | ||
| 194 | val2 = get_heap_comp_val(heap->arr[cpos], cat); | ||
| 195 | if (val1 > val2) { | ||
| 196 | struct ubifs_lprops *lp; | ||
| 197 | |||
| 198 | lp = heap->arr[cpos]; | ||
| 199 | lp->flags &= ~LPROPS_CAT_MASK; | ||
| 200 | lp->flags |= LPROPS_UNCAT; | ||
| 201 | list_add(&lp->list, &c->uncat_list); | ||
| 202 | lprops->hpos = cpos; | ||
| 203 | heap->arr[cpos] = lprops; | ||
| 204 | move_up_lpt_heap(c, heap, lprops, cat); | ||
| 205 | dbg_check_heap(c, heap, cat, lprops->hpos); | ||
| 206 | return 1; /* Added to heap */ | ||
| 207 | } | ||
| 208 | dbg_check_heap(c, heap, cat, -1); | ||
| 209 | return 0; /* Not added to heap */ | ||
| 210 | } else { | ||
| 211 | lprops->hpos = heap->cnt++; | ||
| 212 | heap->arr[lprops->hpos] = lprops; | ||
| 213 | move_up_lpt_heap(c, heap, lprops, cat); | ||
| 214 | dbg_check_heap(c, heap, cat, lprops->hpos); | ||
| 215 | return 1; /* Added to heap */ | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | /** | ||
| 220 | * remove_from_lpt_heap - remove LEB properties from a LEB category heap. | ||
| 221 | * @c: UBIFS file-system description object | ||
| 222 | * @lprops: LEB properties to remove | ||
| 223 | * @cat: LEB category | ||
| 224 | */ | ||
| 225 | static void remove_from_lpt_heap(struct ubifs_info *c, | ||
| 226 | struct ubifs_lprops *lprops, int cat) | ||
| 227 | { | ||
| 228 | struct ubifs_lpt_heap *heap; | ||
| 229 | int hpos = lprops->hpos; | ||
| 230 | |||
| 231 | heap = &c->lpt_heap[cat - 1]; | ||
| 232 | ubifs_assert(hpos >= 0 && hpos < heap->cnt); | ||
| 233 | ubifs_assert(heap->arr[hpos] == lprops); | ||
| 234 | heap->cnt -= 1; | ||
| 235 | if (hpos < heap->cnt) { | ||
| 236 | heap->arr[hpos] = heap->arr[heap->cnt]; | ||
| 237 | heap->arr[hpos]->hpos = hpos; | ||
| 238 | adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat); | ||
| 239 | } | ||
| 240 | dbg_check_heap(c, heap, cat, -1); | ||
| 241 | } | ||
| 242 | |||
| 243 | /** | ||
| 244 | * lpt_heap_replace - replace lprops in a category heap. | ||
| 245 | * @c: UBIFS file-system description object | ||
| 246 | * @old_lprops: LEB properties to replace | ||
| 247 | * @new_lprops: LEB properties with which to replace | ||
| 248 | * @cat: LEB category | ||
| 249 | * | ||
| 250 | * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) | ||
| 251 | * and the lprops that the pnode contains. When that happens, references in | ||
| 252 | * the category heaps to those lprops must be updated to point to the new | ||
| 253 | * lprops. This function does that. | ||
| 254 | */ | ||
| 255 | static void lpt_heap_replace(struct ubifs_info *c, | ||
| 256 | struct ubifs_lprops *old_lprops, | ||
| 257 | struct ubifs_lprops *new_lprops, int cat) | ||
| 258 | { | ||
| 259 | struct ubifs_lpt_heap *heap; | ||
| 260 | int hpos = new_lprops->hpos; | ||
| 261 | |||
| 262 | heap = &c->lpt_heap[cat - 1]; | ||
| 263 | heap->arr[hpos] = new_lprops; | ||
| 264 | } | ||
| 265 | |||
| 266 | /** | ||
| 267 | * ubifs_add_to_cat - add LEB properties to a category list or heap. | ||
| 268 | * @c: UBIFS file-system description object | ||
| 269 | * @lprops: LEB properties to add | ||
| 270 | * @cat: LEB category to which to add | ||
| 271 | * | ||
| 272 | * LEB properties are categorized to enable fast find operations. | ||
| 273 | */ | ||
| 274 | void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, | ||
| 275 | int cat) | ||
| 276 | { | ||
| 277 | switch (cat) { | ||
| 278 | case LPROPS_DIRTY: | ||
| 279 | case LPROPS_DIRTY_IDX: | ||
| 280 | case LPROPS_FREE: | ||
| 281 | if (add_to_lpt_heap(c, lprops, cat)) | ||
| 282 | break; | ||
| 283 | /* No more room on heap so make it uncategorized */ | ||
| 284 | cat = LPROPS_UNCAT; | ||
| 285 | /* Fall through */ | ||
| 286 | case LPROPS_UNCAT: | ||
| 287 | list_add(&lprops->list, &c->uncat_list); | ||
| 288 | break; | ||
| 289 | case LPROPS_EMPTY: | ||
| 290 | list_add(&lprops->list, &c->empty_list); | ||
| 291 | break; | ||
| 292 | case LPROPS_FREEABLE: | ||
| 293 | list_add(&lprops->list, &c->freeable_list); | ||
| 294 | c->freeable_cnt += 1; | ||
| 295 | break; | ||
| 296 | case LPROPS_FRDI_IDX: | ||
| 297 | list_add(&lprops->list, &c->frdi_idx_list); | ||
| 298 | break; | ||
| 299 | default: | ||
| 300 | ubifs_assert(0); | ||
| 301 | } | ||
| 302 | lprops->flags &= ~LPROPS_CAT_MASK; | ||
| 303 | lprops->flags |= cat; | ||
| 304 | } | ||
| 305 | |||
| 306 | /** | ||
| 307 | * ubifs_remove_from_cat - remove LEB properties from a category list or heap. | ||
| 308 | * @c: UBIFS file-system description object | ||
| 309 | * @lprops: LEB properties to remove | ||
| 310 | * @cat: LEB category from which to remove | ||
| 311 | * | ||
| 312 | * LEB properties are categorized to enable fast find operations. | ||
| 313 | */ | ||
| 314 | static void ubifs_remove_from_cat(struct ubifs_info *c, | ||
| 315 | struct ubifs_lprops *lprops, int cat) | ||
| 316 | { | ||
| 317 | switch (cat) { | ||
| 318 | case LPROPS_DIRTY: | ||
| 319 | case LPROPS_DIRTY_IDX: | ||
| 320 | case LPROPS_FREE: | ||
| 321 | remove_from_lpt_heap(c, lprops, cat); | ||
| 322 | break; | ||
| 323 | case LPROPS_FREEABLE: | ||
| 324 | c->freeable_cnt -= 1; | ||
| 325 | ubifs_assert(c->freeable_cnt >= 0); | ||
| 326 | /* Fall through */ | ||
| 327 | case LPROPS_UNCAT: | ||
| 328 | case LPROPS_EMPTY: | ||
| 329 | case LPROPS_FRDI_IDX: | ||
| 330 | ubifs_assert(!list_empty(&lprops->list)); | ||
| 331 | list_del(&lprops->list); | ||
| 332 | break; | ||
| 333 | default: | ||
| 334 | ubifs_assert(0); | ||
| 335 | } | ||
| 336 | } | ||
| 337 | |||
| 338 | /** | ||
| 339 | * ubifs_replace_cat - replace lprops in a category list or heap. | ||
| 340 | * @c: UBIFS file-system description object | ||
| 341 | * @old_lprops: LEB properties to replace | ||
| 342 | * @new_lprops: LEB properties with which to replace | ||
| 343 | * | ||
| 344 | * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) | ||
| 345 | * and the lprops that the pnode contains. When that happens, references in | ||
| 346 | * category lists and heaps must be replaced. This function does that. | ||
| 347 | */ | ||
| 348 | void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, | ||
| 349 | struct ubifs_lprops *new_lprops) | ||
| 350 | { | ||
| 351 | int cat; | ||
| 352 | |||
| 353 | cat = new_lprops->flags & LPROPS_CAT_MASK; | ||
| 354 | switch (cat) { | ||
| 355 | case LPROPS_DIRTY: | ||
| 356 | case LPROPS_DIRTY_IDX: | ||
| 357 | case LPROPS_FREE: | ||
| 358 | lpt_heap_replace(c, old_lprops, new_lprops, cat); | ||
| 359 | break; | ||
| 360 | case LPROPS_UNCAT: | ||
| 361 | case LPROPS_EMPTY: | ||
| 362 | case LPROPS_FREEABLE: | ||
| 363 | case LPROPS_FRDI_IDX: | ||
| 364 | list_replace(&old_lprops->list, &new_lprops->list); | ||
| 365 | break; | ||
| 366 | default: | ||
| 367 | ubifs_assert(0); | ||
| 368 | } | ||
| 369 | } | ||
| 370 | |||
| 371 | /** | ||
| 372 | * ubifs_ensure_cat - ensure LEB properties are categorized. | ||
| 373 | * @c: UBIFS file-system description object | ||
| 374 | * @lprops: LEB properties | ||
| 375 | * | ||
| 376 | * A LEB may have fallen off of the bottom of a heap, and ended up as | ||
| 377 | * uncategorized even though it has enough space for us now. If that is the case | ||
| 378 | * this function will put the LEB back onto a heap. | ||
| 379 | */ | ||
| 380 | void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) | ||
| 381 | { | ||
| 382 | int cat = lprops->flags & LPROPS_CAT_MASK; | ||
| 383 | |||
| 384 | if (cat != LPROPS_UNCAT) | ||
| 385 | return; | ||
| 386 | cat = ubifs_categorize_lprops(c, lprops); | ||
| 387 | if (cat == LPROPS_UNCAT) | ||
| 388 | return; | ||
| 389 | ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT); | ||
| 390 | ubifs_add_to_cat(c, lprops, cat); | ||
| 391 | } | ||
| 392 | |||
| 393 | /** | ||
| 394 | * ubifs_categorize_lprops - categorize LEB properties. | ||
| 395 | * @c: UBIFS file-system description object | ||
| 396 | * @lprops: LEB properties to categorize | ||
| 397 | * | ||
| 398 | * LEB properties are categorized to enable fast find operations. This function | ||
| 399 | * returns the LEB category to which the LEB properties belong. Note however | ||
| 400 | * that if the LEB category is stored as a heap and the heap is full, the | ||
| 401 | * LEB properties may have their category changed to %LPROPS_UNCAT. | ||
| 402 | */ | ||
| 403 | int ubifs_categorize_lprops(const struct ubifs_info *c, | ||
| 404 | const struct ubifs_lprops *lprops) | ||
| 405 | { | ||
| 406 | if (lprops->flags & LPROPS_TAKEN) | ||
| 407 | return LPROPS_UNCAT; | ||
| 408 | |||
| 409 | if (lprops->free == c->leb_size) { | ||
| 410 | ubifs_assert(!(lprops->flags & LPROPS_INDEX)); | ||
| 411 | return LPROPS_EMPTY; | ||
| 412 | } | ||
| 413 | |||
| 414 | if (lprops->free + lprops->dirty == c->leb_size) { | ||
| 415 | if (lprops->flags & LPROPS_INDEX) | ||
| 416 | return LPROPS_FRDI_IDX; | ||
| 417 | else | ||
| 418 | return LPROPS_FREEABLE; | ||
| 419 | } | ||
| 420 | |||
| 421 | if (lprops->flags & LPROPS_INDEX) { | ||
| 422 | if (lprops->dirty + lprops->free >= c->min_idx_node_sz) | ||
| 423 | return LPROPS_DIRTY_IDX; | ||
| 424 | } else { | ||
| 425 | if (lprops->dirty >= c->dead_wm && | ||
| 426 | lprops->dirty > lprops->free) | ||
| 427 | return LPROPS_DIRTY; | ||
| 428 | if (lprops->free > 0) | ||
| 429 | return LPROPS_FREE; | ||
| 430 | } | ||
| 431 | |||
| 432 | return LPROPS_UNCAT; | ||
| 433 | } | ||
| 434 | |||
| 435 | /** | ||
| 436 | * change_category - change LEB properties category. | ||
| 437 | * @c: UBIFS file-system description object | ||
| 438 | * @lprops: LEB properties to recategorize | ||
| 439 | * | ||
| 440 | * LEB properties are categorized to enable fast find operations. When the LEB | ||
| 441 | * properties change they must be recategorized. | ||
| 442 | */ | ||
| 443 | static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) | ||
| 444 | { | ||
| 445 | int old_cat = lprops->flags & LPROPS_CAT_MASK; | ||
| 446 | int new_cat = ubifs_categorize_lprops(c, lprops); | ||
| 447 | |||
| 448 | if (old_cat == new_cat) { | ||
| 449 | struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; | ||
| 450 | |||
| 451 | /* lprops on a heap now must be moved up or down */ | ||
| 452 | if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) | ||
| 453 | return; /* Not on a heap */ | ||
| 454 | heap = &c->lpt_heap[new_cat - 1]; | ||
| 455 | adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat); | ||
| 456 | } else { | ||
| 457 | ubifs_remove_from_cat(c, lprops, old_cat); | ||
| 458 | ubifs_add_to_cat(c, lprops, new_cat); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 462 | /** | ||
| 463 | * ubifs_get_lprops - get reference to LEB properties. | ||
| 464 | * @c: the UBIFS file-system description object | ||
| 465 | * | ||
| 466 | * This function locks lprops. Lprops have to be unlocked by | ||
| 467 | * 'ubifs_release_lprops()'. | ||
| 468 | */ | ||
| 469 | void ubifs_get_lprops(struct ubifs_info *c) | ||
| 470 | { | ||
| 471 | mutex_lock(&c->lp_mutex); | ||
| 472 | } | ||
| 473 | |||
| 474 | /** | ||
| 475 | * calc_dark - calculate LEB dark space size. | ||
| 476 | * @c: the UBIFS file-system description object | ||
| 477 | * @spc: amount of free and dirty space in the LEB | ||
| 478 | * | ||
| 479 | * This function calculates amount of dark space in an LEB which has @spc bytes | ||
| 480 | * of free and dirty space. Returns the calculations result. | ||
| 481 | * | ||
| 482 | * Dark space is the space which is not always usable - it depends on which | ||
| 483 | * nodes are written in which order. E.g., if an LEB has only 512 free bytes, | ||
| 484 | * it is dark space, because it cannot fit a large data node. So UBIFS cannot | ||
| 485 | * count on this LEB and treat these 512 bytes as usable because it is not true | ||
| 486 | * if, for example, only big chunks of uncompressible data will be written to | ||
| 487 | * the FS. | ||
| 488 | */ | ||
| 489 | static int calc_dark(struct ubifs_info *c, int spc) | ||
| 490 | { | ||
| 491 | ubifs_assert(!(spc & 7)); | ||
| 492 | |||
| 493 | if (spc < c->dark_wm) | ||
| 494 | return spc; | ||
| 495 | |||
| 496 | /* | ||
| 497 | * If we have slightly more space then the dark space watermark, we can | ||
| 498 | * anyway safely assume it we'll be able to write a node of the | ||
| 499 | * smallest size there. | ||
| 500 | */ | ||
| 501 | if (spc - c->dark_wm < MIN_WRITE_SZ) | ||
| 502 | return spc - MIN_WRITE_SZ; | ||
| 503 | |||
| 504 | return c->dark_wm; | ||
| 505 | } | ||
| 506 | |||
| 507 | /** | ||
| 508 | * is_lprops_dirty - determine if LEB properties are dirty. | ||
| 509 | * @c: the UBIFS file-system description object | ||
| 510 | * @lprops: LEB properties to test | ||
| 511 | */ | ||
| 512 | static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) | ||
| 513 | { | ||
| 514 | struct ubifs_pnode *pnode; | ||
| 515 | int pos; | ||
| 516 | |||
| 517 | pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1); | ||
| 518 | pnode = (struct ubifs_pnode *)container_of(lprops - pos, | ||
| 519 | struct ubifs_pnode, | ||
| 520 | lprops[0]); | ||
| 521 | return !test_bit(COW_ZNODE, &pnode->flags) && | ||
| 522 | test_bit(DIRTY_CNODE, &pnode->flags); | ||
| 523 | } | ||
| 524 | |||
| 525 | /** | ||
| 526 | * ubifs_change_lp - change LEB properties. | ||
| 527 | * @c: the UBIFS file-system description object | ||
| 528 | * @lp: LEB properties to change | ||
| 529 | * @free: new free space amount | ||
| 530 | * @dirty: new dirty space amount | ||
| 531 | * @flags: new flags | ||
| 532 | * @idx_gc_cnt: change to the count of idx_gc list | ||
| 533 | * | ||
| 534 | * This function changes LEB properties. This function does not change a LEB | ||
| 535 | * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC. | ||
| 536 | * | ||
| 537 | * This function returns a pointer to the updated LEB properties on success | ||
| 538 | * and a negative error code on failure. N.B. the LEB properties may have had to | ||
| 539 | * be copied (due to COW) and consequently the pointer returned may not be the | ||
| 540 | * same as the pointer passed. | ||
| 541 | */ | ||
| 542 | const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | ||
| 543 | const struct ubifs_lprops *lp, | ||
| 544 | int free, int dirty, int flags, | ||
| 545 | int idx_gc_cnt) | ||
| 546 | { | ||
| 547 | /* | ||
| 548 | * This is the only function that is allowed to change lprops, so we | ||
| 549 | * discard the const qualifier. | ||
| 550 | */ | ||
| 551 | struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; | ||
| 552 | |||
| 553 | dbg_lp("LEB %d, free %d, dirty %d, flags %d", | ||
| 554 | lprops->lnum, free, dirty, flags); | ||
| 555 | |||
| 556 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 557 | ubifs_assert(c->lst.empty_lebs >= 0 && | ||
| 558 | c->lst.empty_lebs <= c->main_lebs); | ||
| 559 | ubifs_assert(c->freeable_cnt >= 0); | ||
| 560 | ubifs_assert(c->freeable_cnt <= c->main_lebs); | ||
| 561 | ubifs_assert(c->lst.taken_empty_lebs >= 0); | ||
| 562 | ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs); | ||
| 563 | ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7)); | ||
| 564 | ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7)); | ||
| 565 | ubifs_assert(!(c->lst.total_used & 7)); | ||
| 566 | ubifs_assert(free == LPROPS_NC || free >= 0); | ||
| 567 | ubifs_assert(dirty == LPROPS_NC || dirty >= 0); | ||
| 568 | |||
| 569 | if (!is_lprops_dirty(c, lprops)) { | ||
| 570 | lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum); | ||
| 571 | if (IS_ERR(lprops)) | ||
| 572 | return lprops; | ||
| 573 | } else | ||
| 574 | ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum)); | ||
| 575 | |||
| 576 | ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); | ||
| 577 | |||
| 578 | spin_lock(&c->space_lock); | ||
| 579 | |||
| 580 | if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) | ||
| 581 | c->lst.taken_empty_lebs -= 1; | ||
| 582 | |||
| 583 | if (!(lprops->flags & LPROPS_INDEX)) { | ||
| 584 | int old_spc; | ||
| 585 | |||
| 586 | old_spc = lprops->free + lprops->dirty; | ||
| 587 | if (old_spc < c->dead_wm) | ||
| 588 | c->lst.total_dead -= old_spc; | ||
| 589 | else | ||
| 590 | c->lst.total_dark -= calc_dark(c, old_spc); | ||
| 591 | |||
| 592 | c->lst.total_used -= c->leb_size - old_spc; | ||
| 593 | } | ||
| 594 | |||
| 595 | if (free != LPROPS_NC) { | ||
| 596 | free = ALIGN(free, 8); | ||
| 597 | c->lst.total_free += free - lprops->free; | ||
| 598 | |||
| 599 | /* Increase or decrease empty LEBs counter if needed */ | ||
| 600 | if (free == c->leb_size) { | ||
| 601 | if (lprops->free != c->leb_size) | ||
| 602 | c->lst.empty_lebs += 1; | ||
| 603 | } else if (lprops->free == c->leb_size) | ||
| 604 | c->lst.empty_lebs -= 1; | ||
| 605 | lprops->free = free; | ||
| 606 | } | ||
| 607 | |||
| 608 | if (dirty != LPROPS_NC) { | ||
| 609 | dirty = ALIGN(dirty, 8); | ||
| 610 | c->lst.total_dirty += dirty - lprops->dirty; | ||
| 611 | lprops->dirty = dirty; | ||
| 612 | } | ||
| 613 | |||
| 614 | if (flags != LPROPS_NC) { | ||
| 615 | /* Take care about indexing LEBs counter if needed */ | ||
| 616 | if ((lprops->flags & LPROPS_INDEX)) { | ||
| 617 | if (!(flags & LPROPS_INDEX)) | ||
| 618 | c->lst.idx_lebs -= 1; | ||
| 619 | } else if (flags & LPROPS_INDEX) | ||
| 620 | c->lst.idx_lebs += 1; | ||
| 621 | lprops->flags = flags; | ||
| 622 | } | ||
| 623 | |||
| 624 | if (!(lprops->flags & LPROPS_INDEX)) { | ||
| 625 | int new_spc; | ||
| 626 | |||
| 627 | new_spc = lprops->free + lprops->dirty; | ||
| 628 | if (new_spc < c->dead_wm) | ||
| 629 | c->lst.total_dead += new_spc; | ||
| 630 | else | ||
| 631 | c->lst.total_dark += calc_dark(c, new_spc); | ||
| 632 | |||
| 633 | c->lst.total_used += c->leb_size - new_spc; | ||
| 634 | } | ||
| 635 | |||
| 636 | if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) | ||
| 637 | c->lst.taken_empty_lebs += 1; | ||
| 638 | |||
| 639 | change_category(c, lprops); | ||
| 640 | |||
| 641 | c->idx_gc_cnt += idx_gc_cnt; | ||
| 642 | |||
| 643 | spin_unlock(&c->space_lock); | ||
| 644 | |||
| 645 | return lprops; | ||
| 646 | } | ||
| 647 | |||
| 648 | /** | ||
| 649 | * ubifs_release_lprops - release lprops lock. | ||
| 650 | * @c: the UBIFS file-system description object | ||
| 651 | * | ||
| 652 | * This function has to be called after each 'ubifs_get_lprops()' call to | ||
| 653 | * unlock lprops. | ||
| 654 | */ | ||
| 655 | void ubifs_release_lprops(struct ubifs_info *c) | ||
| 656 | { | ||
| 657 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 658 | ubifs_assert(c->lst.empty_lebs >= 0 && | ||
| 659 | c->lst.empty_lebs <= c->main_lebs); | ||
| 660 | |||
| 661 | mutex_unlock(&c->lp_mutex); | ||
| 662 | } | ||
| 663 | |||
| 664 | /** | ||
| 665 | * ubifs_get_lp_stats - get lprops statistics. | ||
| 666 | * @c: UBIFS file-system description object | ||
| 667 | * @st: return statistics | ||
| 668 | */ | ||
| 669 | void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st) | ||
| 670 | { | ||
| 671 | spin_lock(&c->space_lock); | ||
| 672 | memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats)); | ||
| 673 | spin_unlock(&c->space_lock); | ||
| 674 | } | ||
| 675 | |||
| 676 | /** | ||
| 677 | * ubifs_change_one_lp - change LEB properties. | ||
| 678 | * @c: the UBIFS file-system description object | ||
| 679 | * @lnum: LEB to change properties for | ||
| 680 | * @free: amount of free space | ||
| 681 | * @dirty: amount of dirty space | ||
| 682 | * @flags_set: flags to set | ||
| 683 | * @flags_clean: flags to clean | ||
| 684 | * @idx_gc_cnt: change to the count of idx_gc list | ||
| 685 | * | ||
| 686 | * This function changes properties of LEB @lnum. It is a helper wrapper over | ||
| 687 | * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the | ||
| 688 | * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and | ||
| 689 | * a negative error code in case of failure. | ||
| 690 | */ | ||
| 691 | int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, | ||
| 692 | int flags_set, int flags_clean, int idx_gc_cnt) | ||
| 693 | { | ||
| 694 | int err = 0, flags; | ||
| 695 | const struct ubifs_lprops *lp; | ||
| 696 | |||
| 697 | ubifs_get_lprops(c); | ||
| 698 | |||
| 699 | lp = ubifs_lpt_lookup_dirty(c, lnum); | ||
| 700 | if (IS_ERR(lp)) { | ||
| 701 | err = PTR_ERR(lp); | ||
| 702 | goto out; | ||
| 703 | } | ||
| 704 | |||
| 705 | flags = (lp->flags | flags_set) & ~flags_clean; | ||
| 706 | lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt); | ||
| 707 | if (IS_ERR(lp)) | ||
| 708 | err = PTR_ERR(lp); | ||
| 709 | |||
| 710 | out: | ||
| 711 | ubifs_release_lprops(c); | ||
| 712 | return err; | ||
| 713 | } | ||
| 714 | |||
| 715 | /** | ||
| 716 | * ubifs_update_one_lp - update LEB properties. | ||
| 717 | * @c: the UBIFS file-system description object | ||
| 718 | * @lnum: LEB to change properties for | ||
| 719 | * @free: amount of free space | ||
| 720 | * @dirty: amount of dirty space to add | ||
| 721 | * @flags_set: flags to set | ||
| 722 | * @flags_clean: flags to clean | ||
| 723 | * | ||
| 724 | * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to | ||
| 725 | * current dirty space, not substitutes it. | ||
| 726 | */ | ||
| 727 | int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, | ||
| 728 | int flags_set, int flags_clean) | ||
| 729 | { | ||
| 730 | int err = 0, flags; | ||
| 731 | const struct ubifs_lprops *lp; | ||
| 732 | |||
| 733 | ubifs_get_lprops(c); | ||
| 734 | |||
| 735 | lp = ubifs_lpt_lookup_dirty(c, lnum); | ||
| 736 | if (IS_ERR(lp)) { | ||
| 737 | err = PTR_ERR(lp); | ||
| 738 | goto out; | ||
| 739 | } | ||
| 740 | |||
| 741 | flags = (lp->flags | flags_set) & ~flags_clean; | ||
| 742 | lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0); | ||
| 743 | if (IS_ERR(lp)) | ||
| 744 | err = PTR_ERR(lp); | ||
| 745 | |||
| 746 | out: | ||
| 747 | ubifs_release_lprops(c); | ||
| 748 | return err; | ||
| 749 | } | ||
| 750 | |||
| 751 | /** | ||
| 752 | * ubifs_read_one_lp - read LEB properties. | ||
| 753 | * @c: the UBIFS file-system description object | ||
| 754 | * @lnum: LEB to read properties for | ||
| 755 | * @lp: where to store read properties | ||
| 756 | * | ||
| 757 | * This helper function reads properties of a LEB @lnum and stores them in @lp. | ||
| 758 | * Returns zero in case of success and a negative error code in case of | ||
| 759 | * failure. | ||
| 760 | */ | ||
| 761 | int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) | ||
| 762 | { | ||
| 763 | int err = 0; | ||
| 764 | const struct ubifs_lprops *lpp; | ||
| 765 | |||
| 766 | ubifs_get_lprops(c); | ||
| 767 | |||
| 768 | lpp = ubifs_lpt_lookup(c, lnum); | ||
| 769 | if (IS_ERR(lpp)) { | ||
| 770 | err = PTR_ERR(lpp); | ||
| 771 | goto out; | ||
| 772 | } | ||
| 773 | |||
| 774 | memcpy(lp, lpp, sizeof(struct ubifs_lprops)); | ||
| 775 | |||
| 776 | out: | ||
| 777 | ubifs_release_lprops(c); | ||
| 778 | return err; | ||
| 779 | } | ||
| 780 | |||
| 781 | /** | ||
| 782 | * ubifs_fast_find_free - try to find a LEB with free space quickly. | ||
| 783 | * @c: the UBIFS file-system description object | ||
| 784 | * | ||
| 785 | * This function returns LEB properties for a LEB with free space or %NULL if | ||
| 786 | * the function is unable to find a LEB quickly. | ||
| 787 | */ | ||
| 788 | const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c) | ||
| 789 | { | ||
| 790 | struct ubifs_lprops *lprops; | ||
| 791 | struct ubifs_lpt_heap *heap; | ||
| 792 | |||
| 793 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 794 | |||
| 795 | heap = &c->lpt_heap[LPROPS_FREE - 1]; | ||
| 796 | if (heap->cnt == 0) | ||
| 797 | return NULL; | ||
| 798 | |||
| 799 | lprops = heap->arr[0]; | ||
| 800 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 801 | ubifs_assert(!(lprops->flags & LPROPS_INDEX)); | ||
| 802 | return lprops; | ||
| 803 | } | ||
| 804 | |||
| 805 | /** | ||
| 806 | * ubifs_fast_find_empty - try to find an empty LEB quickly. | ||
| 807 | * @c: the UBIFS file-system description object | ||
| 808 | * | ||
| 809 | * This function returns LEB properties for an empty LEB or %NULL if the | ||
| 810 | * function is unable to find an empty LEB quickly. | ||
| 811 | */ | ||
| 812 | const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c) | ||
| 813 | { | ||
| 814 | struct ubifs_lprops *lprops; | ||
| 815 | |||
| 816 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 817 | |||
| 818 | if (list_empty(&c->empty_list)) | ||
| 819 | return NULL; | ||
| 820 | |||
| 821 | lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list); | ||
| 822 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 823 | ubifs_assert(!(lprops->flags & LPROPS_INDEX)); | ||
| 824 | ubifs_assert(lprops->free == c->leb_size); | ||
| 825 | return lprops; | ||
| 826 | } | ||
| 827 | |||
| 828 | /** | ||
| 829 | * ubifs_fast_find_freeable - try to find a freeable LEB quickly. | ||
| 830 | * @c: the UBIFS file-system description object | ||
| 831 | * | ||
| 832 | * This function returns LEB properties for a freeable LEB or %NULL if the | ||
| 833 | * function is unable to find a freeable LEB quickly. | ||
| 834 | */ | ||
| 835 | const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c) | ||
| 836 | { | ||
| 837 | struct ubifs_lprops *lprops; | ||
| 838 | |||
| 839 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 840 | |||
| 841 | if (list_empty(&c->freeable_list)) | ||
| 842 | return NULL; | ||
| 843 | |||
| 844 | lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list); | ||
| 845 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 846 | ubifs_assert(!(lprops->flags & LPROPS_INDEX)); | ||
| 847 | ubifs_assert(lprops->free + lprops->dirty == c->leb_size); | ||
| 848 | ubifs_assert(c->freeable_cnt > 0); | ||
| 849 | return lprops; | ||
| 850 | } | ||
| 851 | |||
| 852 | /** | ||
| 853 | * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly. | ||
| 854 | * @c: the UBIFS file-system description object | ||
| 855 | * | ||
| 856 | * This function returns LEB properties for a freeable index LEB or %NULL if the | ||
| 857 | * function is unable to find a freeable index LEB quickly. | ||
| 858 | */ | ||
| 859 | const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) | ||
| 860 | { | ||
| 861 | struct ubifs_lprops *lprops; | ||
| 862 | |||
| 863 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 864 | |||
| 865 | if (list_empty(&c->frdi_idx_list)) | ||
| 866 | return NULL; | ||
| 867 | |||
| 868 | lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list); | ||
| 869 | ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); | ||
| 870 | ubifs_assert((lprops->flags & LPROPS_INDEX)); | ||
| 871 | ubifs_assert(lprops->free + lprops->dirty == c->leb_size); | ||
| 872 | return lprops; | ||
| 873 | } | ||
| 874 | |||
| 875 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 876 | |||
| 877 | /** | ||
| 878 | * dbg_check_cats - check category heaps and lists. | ||
| 879 | * @c: UBIFS file-system description object | ||
| 880 | * | ||
| 881 | * This function returns %0 on success and a negative error code on failure. | ||
| 882 | */ | ||
| 883 | int dbg_check_cats(struct ubifs_info *c) | ||
| 884 | { | ||
| 885 | struct ubifs_lprops *lprops; | ||
| 886 | struct list_head *pos; | ||
| 887 | int i, cat; | ||
| 888 | |||
| 889 | if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) | ||
| 890 | return 0; | ||
| 891 | |||
| 892 | list_for_each_entry(lprops, &c->empty_list, list) { | ||
| 893 | if (lprops->free != c->leb_size) { | ||
| 894 | ubifs_err("non-empty LEB %d on empty list " | ||
| 895 | "(free %d dirty %d flags %d)", lprops->lnum, | ||
| 896 | lprops->free, lprops->dirty, lprops->flags); | ||
| 897 | return -EINVAL; | ||
| 898 | } | ||
| 899 | if (lprops->flags & LPROPS_TAKEN) { | ||
| 900 | ubifs_err("taken LEB %d on empty list " | ||
| 901 | "(free %d dirty %d flags %d)", lprops->lnum, | ||
| 902 | lprops->free, lprops->dirty, lprops->flags); | ||
| 903 | return -EINVAL; | ||
| 904 | } | ||
| 905 | } | ||
| 906 | |||
| 907 | i = 0; | ||
| 908 | list_for_each_entry(lprops, &c->freeable_list, list) { | ||
| 909 | if (lprops->free + lprops->dirty != c->leb_size) { | ||
| 910 | ubifs_err("non-freeable LEB %d on freeable list " | ||
| 911 | "(free %d dirty %d flags %d)", lprops->lnum, | ||
| 912 | lprops->free, lprops->dirty, lprops->flags); | ||
| 913 | return -EINVAL; | ||
| 914 | } | ||
| 915 | if (lprops->flags & LPROPS_TAKEN) { | ||
| 916 | ubifs_err("taken LEB %d on freeable list " | ||
| 917 | "(free %d dirty %d flags %d)", lprops->lnum, | ||
| 918 | lprops->free, lprops->dirty, lprops->flags); | ||
| 919 | return -EINVAL; | ||
| 920 | } | ||
| 921 | i += 1; | ||
| 922 | } | ||
| 923 | if (i != c->freeable_cnt) { | ||
| 924 | ubifs_err("freeable list count %d expected %d", i, | ||
| 925 | c->freeable_cnt); | ||
| 926 | return -EINVAL; | ||
| 927 | } | ||
| 928 | |||
| 929 | i = 0; | ||
| 930 | list_for_each(pos, &c->idx_gc) | ||
| 931 | i += 1; | ||
| 932 | if (i != c->idx_gc_cnt) { | ||
| 933 | ubifs_err("idx_gc list count %d expected %d", i, | ||
| 934 | c->idx_gc_cnt); | ||
| 935 | return -EINVAL; | ||
| 936 | } | ||
| 937 | |||
| 938 | list_for_each_entry(lprops, &c->frdi_idx_list, list) { | ||
| 939 | if (lprops->free + lprops->dirty != c->leb_size) { | ||
| 940 | ubifs_err("non-freeable LEB %d on frdi_idx list " | ||
| 941 | "(free %d dirty %d flags %d)", lprops->lnum, | ||
| 942 | lprops->free, lprops->dirty, lprops->flags); | ||
| 943 | return -EINVAL; | ||
| 944 | } | ||
| 945 | if (lprops->flags & LPROPS_TAKEN) { | ||
| 946 | ubifs_err("taken LEB %d on frdi_idx list " | ||
| 947 | "(free %d dirty %d flags %d)", lprops->lnum, | ||
| 948 | lprops->free, lprops->dirty, lprops->flags); | ||
| 949 | return -EINVAL; | ||
| 950 | } | ||
| 951 | if (!(lprops->flags & LPROPS_INDEX)) { | ||
| 952 | ubifs_err("non-index LEB %d on frdi_idx list " | ||
| 953 | "(free %d dirty %d flags %d)", lprops->lnum, | ||
| 954 | lprops->free, lprops->dirty, lprops->flags); | ||
| 955 | return -EINVAL; | ||
| 956 | } | ||
| 957 | } | ||
| 958 | |||
| 959 | for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) { | ||
| 960 | struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; | ||
| 961 | |||
| 962 | for (i = 0; i < heap->cnt; i++) { | ||
| 963 | lprops = heap->arr[i]; | ||
| 964 | if (!lprops) { | ||
| 965 | ubifs_err("null ptr in LPT heap cat %d", cat); | ||
| 966 | return -EINVAL; | ||
| 967 | } | ||
| 968 | if (lprops->hpos != i) { | ||
| 969 | ubifs_err("bad ptr in LPT heap cat %d", cat); | ||
| 970 | return -EINVAL; | ||
| 971 | } | ||
| 972 | if (lprops->flags & LPROPS_TAKEN) { | ||
| 973 | ubifs_err("taken LEB in LPT heap cat %d", cat); | ||
| 974 | return -EINVAL; | ||
| 975 | } | ||
| 976 | } | ||
| 977 | } | ||
| 978 | |||
| 979 | return 0; | ||
| 980 | } | ||
| 981 | |||
| 982 | void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, | ||
| 983 | int add_pos) | ||
| 984 | { | ||
| 985 | int i = 0, j, err = 0; | ||
| 986 | |||
| 987 | if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) | ||
| 988 | return; | ||
| 989 | |||
| 990 | for (i = 0; i < heap->cnt; i++) { | ||
| 991 | struct ubifs_lprops *lprops = heap->arr[i]; | ||
| 992 | struct ubifs_lprops *lp; | ||
| 993 | |||
| 994 | if (i != add_pos) | ||
| 995 | if ((lprops->flags & LPROPS_CAT_MASK) != cat) { | ||
| 996 | err = 1; | ||
| 997 | goto out; | ||
| 998 | } | ||
| 999 | if (lprops->hpos != i) { | ||
| 1000 | err = 2; | ||
| 1001 | goto out; | ||
| 1002 | } | ||
| 1003 | lp = ubifs_lpt_lookup(c, lprops->lnum); | ||
| 1004 | if (IS_ERR(lp)) { | ||
| 1005 | err = 3; | ||
| 1006 | goto out; | ||
| 1007 | } | ||
| 1008 | if (lprops != lp) { | ||
| 1009 | dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", | ||
| 1010 | (size_t)lprops, (size_t)lp, lprops->lnum, | ||
| 1011 | lp->lnum); | ||
| 1012 | err = 4; | ||
| 1013 | goto out; | ||
| 1014 | } | ||
| 1015 | for (j = 0; j < i; j++) { | ||
| 1016 | lp = heap->arr[j]; | ||
| 1017 | if (lp == lprops) { | ||
| 1018 | err = 5; | ||
| 1019 | goto out; | ||
| 1020 | } | ||
| 1021 | if (lp->lnum == lprops->lnum) { | ||
| 1022 | err = 6; | ||
| 1023 | goto out; | ||
| 1024 | } | ||
| 1025 | } | ||
| 1026 | } | ||
| 1027 | out: | ||
| 1028 | if (err) { | ||
| 1029 | dbg_msg("failed cat %d hpos %d err %d", cat, i, err); | ||
| 1030 | dbg_dump_stack(); | ||
| 1031 | dbg_dump_heap(c, heap, cat); | ||
| 1032 | } | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | /** | ||
| 1036 | * struct scan_check_data - data provided to scan callback function. | ||
| 1037 | * @lst: LEB properties statistics | ||
| 1038 | * @err: error code | ||
| 1039 | */ | ||
| 1040 | struct scan_check_data { | ||
| 1041 | struct ubifs_lp_stats lst; | ||
| 1042 | int err; | ||
| 1043 | }; | ||
| 1044 | |||
| 1045 | /** | ||
| 1046 | * scan_check_cb - scan callback. | ||
| 1047 | * @c: the UBIFS file-system description object | ||
| 1048 | * @lp: LEB properties to scan | ||
| 1049 | * @in_tree: whether the LEB properties are in main memory | ||
| 1050 | * @data: information passed to and from the caller of the scan | ||
| 1051 | * | ||
| 1052 | * This function returns a code that indicates whether the scan should continue | ||
| 1053 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree | ||
| 1054 | * in main memory (%LPT_SCAN_ADD), or whether the scan should stop | ||
| 1055 | * (%LPT_SCAN_STOP). | ||
| 1056 | */ | ||
| 1057 | static int scan_check_cb(struct ubifs_info *c, | ||
| 1058 | const struct ubifs_lprops *lp, int in_tree, | ||
| 1059 | struct scan_check_data *data) | ||
| 1060 | { | ||
| 1061 | struct ubifs_scan_leb *sleb; | ||
| 1062 | struct ubifs_scan_node *snod; | ||
| 1063 | struct ubifs_lp_stats *lst = &data->lst; | ||
| 1064 | int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; | ||
| 1065 | |||
| 1066 | cat = lp->flags & LPROPS_CAT_MASK; | ||
| 1067 | if (cat != LPROPS_UNCAT) { | ||
| 1068 | cat = ubifs_categorize_lprops(c, lp); | ||
| 1069 | if (cat != (lp->flags & LPROPS_CAT_MASK)) { | ||
| 1070 | ubifs_err("bad LEB category %d expected %d", | ||
| 1071 | (lp->flags & LPROPS_CAT_MASK), cat); | ||
| 1072 | goto out; | ||
| 1073 | } | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | /* Check lp is on its category list (if it has one) */ | ||
| 1077 | if (in_tree) { | ||
| 1078 | struct list_head *list = NULL; | ||
| 1079 | |||
| 1080 | switch (cat) { | ||
| 1081 | case LPROPS_EMPTY: | ||
| 1082 | list = &c->empty_list; | ||
| 1083 | break; | ||
| 1084 | case LPROPS_FREEABLE: | ||
| 1085 | list = &c->freeable_list; | ||
| 1086 | break; | ||
| 1087 | case LPROPS_FRDI_IDX: | ||
| 1088 | list = &c->frdi_idx_list; | ||
| 1089 | break; | ||
| 1090 | case LPROPS_UNCAT: | ||
| 1091 | list = &c->uncat_list; | ||
| 1092 | break; | ||
| 1093 | } | ||
| 1094 | if (list) { | ||
| 1095 | struct ubifs_lprops *lprops; | ||
| 1096 | int found = 0; | ||
| 1097 | |||
| 1098 | list_for_each_entry(lprops, list, list) { | ||
| 1099 | if (lprops == lp) { | ||
| 1100 | found = 1; | ||
| 1101 | break; | ||
| 1102 | } | ||
| 1103 | } | ||
| 1104 | if (!found) { | ||
| 1105 | ubifs_err("bad LPT list (category %d)", cat); | ||
| 1106 | goto out; | ||
| 1107 | } | ||
| 1108 | } | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | /* Check lp is on its category heap (if it has one) */ | ||
| 1112 | if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) { | ||
| 1113 | struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; | ||
| 1114 | |||
| 1115 | if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || | ||
| 1116 | lp != heap->arr[lp->hpos]) { | ||
| 1117 | ubifs_err("bad LPT heap (category %d)", cat); | ||
| 1118 | goto out; | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); | ||
| 1123 | if (IS_ERR(sleb)) { | ||
| 1124 | /* | ||
| 1125 | * After an unclean unmount, empty and freeable LEBs | ||
| 1126 | * may contain garbage. | ||
| 1127 | */ | ||
| 1128 | if (lp->free == c->leb_size) { | ||
| 1129 | ubifs_err("scan errors were in empty LEB " | ||
| 1130 | "- continuing checking"); | ||
| 1131 | lst->empty_lebs += 1; | ||
| 1132 | lst->total_free += c->leb_size; | ||
| 1133 | lst->total_dark += calc_dark(c, c->leb_size); | ||
| 1134 | return LPT_SCAN_CONTINUE; | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | if (lp->free + lp->dirty == c->leb_size && | ||
| 1138 | !(lp->flags & LPROPS_INDEX)) { | ||
| 1139 | ubifs_err("scan errors were in freeable LEB " | ||
| 1140 | "- continuing checking"); | ||
| 1141 | lst->total_free += lp->free; | ||
| 1142 | lst->total_dirty += lp->dirty; | ||
| 1143 | lst->total_dark += calc_dark(c, c->leb_size); | ||
| 1144 | return LPT_SCAN_CONTINUE; | ||
| 1145 | } | ||
| 1146 | data->err = PTR_ERR(sleb); | ||
| 1147 | return LPT_SCAN_STOP; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | is_idx = -1; | ||
| 1151 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 1152 | int found, level = 0; | ||
| 1153 | |||
| 1154 | cond_resched(); | ||
| 1155 | |||
| 1156 | if (is_idx == -1) | ||
| 1157 | is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; | ||
| 1158 | |||
| 1159 | if (is_idx && snod->type != UBIFS_IDX_NODE) { | ||
| 1160 | ubifs_err("indexing node in data LEB %d:%d", | ||
| 1161 | lnum, snod->offs); | ||
| 1162 | goto out_destroy; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | if (snod->type == UBIFS_IDX_NODE) { | ||
| 1166 | struct ubifs_idx_node *idx = snod->node; | ||
| 1167 | |||
| 1168 | key_read(c, ubifs_idx_key(c, idx), &snod->key); | ||
| 1169 | level = le16_to_cpu(idx->level); | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | found = ubifs_tnc_has_node(c, &snod->key, level, lnum, | ||
| 1173 | snod->offs, is_idx); | ||
| 1174 | if (found) { | ||
| 1175 | if (found < 0) | ||
| 1176 | goto out_destroy; | ||
| 1177 | used += ALIGN(snod->len, 8); | ||
| 1178 | } | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | free = c->leb_size - sleb->endpt; | ||
| 1182 | dirty = sleb->endpt - used; | ||
| 1183 | |||
| 1184 | if (free > c->leb_size || free < 0 || dirty > c->leb_size || | ||
| 1185 | dirty < 0) { | ||
| 1186 | ubifs_err("bad calculated accounting for LEB %d: " | ||
| 1187 | "free %d, dirty %d", lnum, free, dirty); | ||
| 1188 | goto out_destroy; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | if (lp->free + lp->dirty == c->leb_size && | ||
| 1192 | free + dirty == c->leb_size) | ||
| 1193 | if ((is_idx && !(lp->flags & LPROPS_INDEX)) || | ||
| 1194 | (!is_idx && free == c->leb_size) || | ||
| 1195 | lp->free == c->leb_size) { | ||
| 1196 | /* | ||
| 1197 | * Empty or freeable LEBs could contain index | ||
| 1198 | * nodes from an uncompleted commit due to an | ||
| 1199 | * unclean unmount. Or they could be empty for | ||
| 1200 | * the same reason. Or it may simply not have been | ||
| 1201 | * unmapped. | ||
| 1202 | */ | ||
| 1203 | free = lp->free; | ||
| 1204 | dirty = lp->dirty; | ||
| 1205 | is_idx = 0; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | if (is_idx && lp->free + lp->dirty == free + dirty && | ||
| 1209 | lnum != c->ihead_lnum) { | ||
| 1210 | /* | ||
| 1211 | * After an unclean unmount, an index LEB could have a different | ||
| 1212 | * amount of free space than the value recorded by lprops. That | ||
| 1213 | * is because the in-the-gaps method may use free space or | ||
| 1214 | * create free space (as a side-effect of using ubi_leb_change | ||
| 1215 | * and not writing the whole LEB). The incorrect free space | ||
| 1216 | * value is not a problem because the index is only ever | ||
| 1217 | * allocated empty LEBs, so there will never be an attempt to | ||
| 1218 | * write to the free space at the end of an index LEB - except | ||
| 1219 | * by the in-the-gaps method for which it is not a problem. | ||
| 1220 | */ | ||
| 1221 | free = lp->free; | ||
| 1222 | dirty = lp->dirty; | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | if (lp->free != free || lp->dirty != dirty) | ||
| 1226 | goto out_print; | ||
| 1227 | |||
| 1228 | if (is_idx && !(lp->flags & LPROPS_INDEX)) { | ||
| 1229 | if (free == c->leb_size) | ||
| 1230 | /* Free but not unmapped LEB, it's fine */ | ||
| 1231 | is_idx = 0; | ||
| 1232 | else { | ||
| 1233 | ubifs_err("indexing node without indexing " | ||
| 1234 | "flag"); | ||
| 1235 | goto out_print; | ||
| 1236 | } | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | if (!is_idx && (lp->flags & LPROPS_INDEX)) { | ||
| 1240 | ubifs_err("data node with indexing flag"); | ||
| 1241 | goto out_print; | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | if (free == c->leb_size) | ||
| 1245 | lst->empty_lebs += 1; | ||
| 1246 | |||
| 1247 | if (is_idx) | ||
| 1248 | lst->idx_lebs += 1; | ||
| 1249 | |||
| 1250 | if (!(lp->flags & LPROPS_INDEX)) | ||
| 1251 | lst->total_used += c->leb_size - free - dirty; | ||
| 1252 | lst->total_free += free; | ||
| 1253 | lst->total_dirty += dirty; | ||
| 1254 | |||
| 1255 | if (!(lp->flags & LPROPS_INDEX)) { | ||
| 1256 | int spc = free + dirty; | ||
| 1257 | |||
| 1258 | if (spc < c->dead_wm) | ||
| 1259 | lst->total_dead += spc; | ||
| 1260 | else | ||
| 1261 | lst->total_dark += calc_dark(c, spc); | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | ubifs_scan_destroy(sleb); | ||
| 1265 | |||
| 1266 | return LPT_SCAN_CONTINUE; | ||
| 1267 | |||
| 1268 | out_print: | ||
| 1269 | ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " | ||
| 1270 | "should be free %d, dirty %d", | ||
| 1271 | lnum, lp->free, lp->dirty, lp->flags, free, dirty); | ||
| 1272 | dbg_dump_leb(c, lnum); | ||
| 1273 | out_destroy: | ||
| 1274 | ubifs_scan_destroy(sleb); | ||
| 1275 | out: | ||
| 1276 | data->err = -EINVAL; | ||
| 1277 | return LPT_SCAN_STOP; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | /** | ||
| 1281 | * dbg_check_lprops - check all LEB properties. | ||
| 1282 | * @c: UBIFS file-system description object | ||
| 1283 | * | ||
| 1284 | * This function checks all LEB properties and makes sure they are all correct. | ||
| 1285 | * It returns zero if everything is fine, %-EINVAL if there is an inconsistency | ||
| 1286 | * and other negative error codes in case of other errors. This function is | ||
| 1287 | * called while the file system is locked (because of commit start), so no | ||
| 1288 | * additional locking is required. Note that locking the LPT mutex would cause | ||
| 1289 | * a circular lock dependency with the TNC mutex. | ||
| 1290 | */ | ||
| 1291 | int dbg_check_lprops(struct ubifs_info *c) | ||
| 1292 | { | ||
| 1293 | int i, err; | ||
| 1294 | struct scan_check_data data; | ||
| 1295 | struct ubifs_lp_stats *lst = &data.lst; | ||
| 1296 | |||
| 1297 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) | ||
| 1298 | return 0; | ||
| 1299 | |||
| 1300 | /* | ||
| 1301 | * As we are going to scan the media, the write buffers have to be | ||
| 1302 | * synchronized. | ||
| 1303 | */ | ||
| 1304 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 1305 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
| 1306 | if (err) | ||
| 1307 | return err; | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | memset(lst, 0, sizeof(struct ubifs_lp_stats)); | ||
| 1311 | |||
| 1312 | data.err = 0; | ||
| 1313 | err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, | ||
| 1314 | (ubifs_lpt_scan_callback)scan_check_cb, | ||
| 1315 | &data); | ||
| 1316 | if (err && err != -ENOSPC) | ||
| 1317 | goto out; | ||
| 1318 | if (data.err) { | ||
| 1319 | err = data.err; | ||
| 1320 | goto out; | ||
| 1321 | } | ||
| 1322 | |||
| 1323 | if (lst->empty_lebs != c->lst.empty_lebs || | ||
| 1324 | lst->idx_lebs != c->lst.idx_lebs || | ||
| 1325 | lst->total_free != c->lst.total_free || | ||
| 1326 | lst->total_dirty != c->lst.total_dirty || | ||
| 1327 | lst->total_used != c->lst.total_used) { | ||
| 1328 | ubifs_err("bad overall accounting"); | ||
| 1329 | ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " | ||
| 1330 | "total_free %lld, total_dirty %lld, total_used %lld", | ||
| 1331 | lst->empty_lebs, lst->idx_lebs, lst->total_free, | ||
| 1332 | lst->total_dirty, lst->total_used); | ||
| 1333 | ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " | ||
| 1334 | "total_free %lld, total_dirty %lld, total_used %lld", | ||
| 1335 | c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, | ||
| 1336 | c->lst.total_dirty, c->lst.total_used); | ||
| 1337 | err = -EINVAL; | ||
| 1338 | goto out; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | if (lst->total_dead != c->lst.total_dead || | ||
| 1342 | lst->total_dark != c->lst.total_dark) { | ||
| 1343 | ubifs_err("bad dead/dark space accounting"); | ||
| 1344 | ubifs_err("calculated: total_dead %lld, total_dark %lld", | ||
| 1345 | lst->total_dead, lst->total_dark); | ||
| 1346 | ubifs_err("read from lprops: total_dead %lld, total_dark %lld", | ||
| 1347 | c->lst.total_dead, c->lst.total_dark); | ||
| 1348 | err = -EINVAL; | ||
| 1349 | goto out; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | err = dbg_check_cats(c); | ||
| 1353 | out: | ||
| 1354 | return err; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c new file mode 100644 index 000000000000..9ff2463177e5 --- /dev/null +++ b/fs/ubifs/lpt.c | |||
| @@ -0,0 +1,2243 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements the LEB properties tree (LPT) area. The LPT area | ||
| 25 | * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and | ||
| 26 | * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits | ||
| 27 | * between the log and the orphan area. | ||
| 28 | * | ||
| 29 | * The LPT area is like a miniature self-contained file system. It is required | ||
| 30 | * that it never runs out of space, is fast to access and update, and scales | ||
| 31 | * logarithmically. The LEB properties tree is implemented as a wandering tree | ||
| 32 | * much like the TNC, and the LPT area has its own garbage collection. | ||
| 33 | * | ||
| 34 | * The LPT has two slightly different forms called the "small model" and the | ||
| 35 | * "big model". The small model is used when the entire LEB properties table | ||
| 36 | * can be written into a single eraseblock. In that case, garbage collection | ||
| 37 | * consists of just writing the whole table, which therefore makes all other | ||
| 38 | * eraseblocks reusable. In the case of the big model, dirty eraseblocks are | ||
| 39 | * selected for garbage collection, which consists are marking the nodes in | ||
| 40 | * that LEB as dirty, and then only the dirty nodes are written out. Also, in | ||
| 41 | * the case of the big model, a table of LEB numbers is saved so that the entire | ||
| 42 | * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first | ||
| 43 | * mounted. | ||
| 44 | */ | ||
| 45 | |||
| 46 | #include <linux/crc16.h> | ||
| 47 | #include "ubifs.h" | ||
| 48 | |||
| 49 | /** | ||
| 50 | * do_calc_lpt_geom - calculate sizes for the LPT area. | ||
| 51 | * @c: the UBIFS file-system description object | ||
| 52 | * | ||
| 53 | * Calculate the sizes of LPT bit fields, nodes, and tree, based on the | ||
| 54 | * properties of the flash and whether LPT is "big" (c->big_lpt). | ||
| 55 | */ | ||
| 56 | static void do_calc_lpt_geom(struct ubifs_info *c) | ||
| 57 | { | ||
| 58 | int i, n, bits, per_leb_wastage, max_pnode_cnt; | ||
| 59 | long long sz, tot_wastage; | ||
| 60 | |||
| 61 | n = c->main_lebs + c->max_leb_cnt - c->leb_cnt; | ||
| 62 | max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); | ||
| 63 | |||
| 64 | c->lpt_hght = 1; | ||
| 65 | n = UBIFS_LPT_FANOUT; | ||
| 66 | while (n < max_pnode_cnt) { | ||
| 67 | c->lpt_hght += 1; | ||
| 68 | n <<= UBIFS_LPT_FANOUT_SHIFT; | ||
| 69 | } | ||
| 70 | |||
| 71 | c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); | ||
| 72 | |||
| 73 | n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT); | ||
| 74 | c->nnode_cnt = n; | ||
| 75 | for (i = 1; i < c->lpt_hght; i++) { | ||
| 76 | n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); | ||
| 77 | c->nnode_cnt += n; | ||
| 78 | } | ||
| 79 | |||
| 80 | c->space_bits = fls(c->leb_size) - 3; | ||
| 81 | c->lpt_lnum_bits = fls(c->lpt_lebs); | ||
| 82 | c->lpt_offs_bits = fls(c->leb_size - 1); | ||
| 83 | c->lpt_spc_bits = fls(c->leb_size); | ||
| 84 | |||
| 85 | n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT); | ||
| 86 | c->pcnt_bits = fls(n - 1); | ||
| 87 | |||
| 88 | c->lnum_bits = fls(c->max_leb_cnt - 1); | ||
| 89 | |||
| 90 | bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + | ||
| 91 | (c->big_lpt ? c->pcnt_bits : 0) + | ||
| 92 | (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT; | ||
| 93 | c->pnode_sz = (bits + 7) / 8; | ||
| 94 | |||
| 95 | bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + | ||
| 96 | (c->big_lpt ? c->pcnt_bits : 0) + | ||
| 97 | (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT; | ||
| 98 | c->nnode_sz = (bits + 7) / 8; | ||
| 99 | |||
| 100 | bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + | ||
| 101 | c->lpt_lebs * c->lpt_spc_bits * 2; | ||
| 102 | c->ltab_sz = (bits + 7) / 8; | ||
| 103 | |||
| 104 | bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + | ||
| 105 | c->lnum_bits * c->lsave_cnt; | ||
| 106 | c->lsave_sz = (bits + 7) / 8; | ||
| 107 | |||
| 108 | /* Calculate the minimum LPT size */ | ||
| 109 | c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; | ||
| 110 | c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; | ||
| 111 | c->lpt_sz += c->ltab_sz; | ||
| 112 | c->lpt_sz += c->lsave_sz; | ||
| 113 | |||
| 114 | /* Add wastage */ | ||
| 115 | sz = c->lpt_sz; | ||
| 116 | per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz); | ||
| 117 | sz += per_leb_wastage; | ||
| 118 | tot_wastage = per_leb_wastage; | ||
| 119 | while (sz > c->leb_size) { | ||
| 120 | sz += per_leb_wastage; | ||
| 121 | sz -= c->leb_size; | ||
| 122 | tot_wastage += per_leb_wastage; | ||
| 123 | } | ||
| 124 | tot_wastage += ALIGN(sz, c->min_io_size) - sz; | ||
| 125 | c->lpt_sz += tot_wastage; | ||
| 126 | } | ||
| 127 | |||
| 128 | /** | ||
| 129 | * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area. | ||
| 130 | * @c: the UBIFS file-system description object | ||
| 131 | * | ||
| 132 | * This function returns %0 on success and a negative error code on failure. | ||
| 133 | */ | ||
| 134 | int ubifs_calc_lpt_geom(struct ubifs_info *c) | ||
| 135 | { | ||
| 136 | int lebs_needed; | ||
| 137 | uint64_t sz; | ||
| 138 | |||
| 139 | do_calc_lpt_geom(c); | ||
| 140 | |||
| 141 | /* Verify that lpt_lebs is big enough */ | ||
| 142 | sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ | ||
| 143 | sz += c->leb_size - 1; | ||
| 144 | do_div(sz, c->leb_size); | ||
| 145 | lebs_needed = sz; | ||
| 146 | if (lebs_needed > c->lpt_lebs) { | ||
| 147 | ubifs_err("too few LPT LEBs"); | ||
| 148 | return -EINVAL; | ||
| 149 | } | ||
| 150 | |||
| 151 | /* Verify that ltab fits in a single LEB (since ltab is a single node */ | ||
| 152 | if (c->ltab_sz > c->leb_size) { | ||
| 153 | ubifs_err("LPT ltab too big"); | ||
| 154 | return -EINVAL; | ||
| 155 | } | ||
| 156 | |||
| 157 | c->check_lpt_free = c->big_lpt; | ||
| 158 | |||
| 159 | return 0; | ||
| 160 | } | ||
| 161 | |||
| 162 | /** | ||
| 163 | * calc_dflt_lpt_geom - calculate default LPT geometry. | ||
| 164 | * @c: the UBIFS file-system description object | ||
| 165 | * @main_lebs: number of main area LEBs is passed and returned here | ||
| 166 | * @big_lpt: whether the LPT area is "big" is returned here | ||
| 167 | * | ||
| 168 | * The size of the LPT area depends on parameters that themselves are dependent | ||
| 169 | * on the size of the LPT area. This function, successively recalculates the LPT | ||
| 170 | * area geometry until the parameters and resultant geometry are consistent. | ||
| 171 | * | ||
| 172 | * This function returns %0 on success and a negative error code on failure. | ||
| 173 | */ | ||
| 174 | static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, | ||
| 175 | int *big_lpt) | ||
| 176 | { | ||
| 177 | int i, lebs_needed; | ||
| 178 | uint64_t sz; | ||
| 179 | |||
| 180 | /* Start by assuming the minimum number of LPT LEBs */ | ||
| 181 | c->lpt_lebs = UBIFS_MIN_LPT_LEBS; | ||
| 182 | c->main_lebs = *main_lebs - c->lpt_lebs; | ||
| 183 | if (c->main_lebs <= 0) | ||
| 184 | return -EINVAL; | ||
| 185 | |||
| 186 | /* And assume we will use the small LPT model */ | ||
| 187 | c->big_lpt = 0; | ||
| 188 | |||
| 189 | /* | ||
| 190 | * Calculate the geometry based on assumptions above and then see if it | ||
| 191 | * makes sense | ||
| 192 | */ | ||
| 193 | do_calc_lpt_geom(c); | ||
| 194 | |||
| 195 | /* Small LPT model must have lpt_sz < leb_size */ | ||
| 196 | if (c->lpt_sz > c->leb_size) { | ||
| 197 | /* Nope, so try again using big LPT model */ | ||
| 198 | c->big_lpt = 1; | ||
| 199 | do_calc_lpt_geom(c); | ||
| 200 | } | ||
| 201 | |||
| 202 | /* Now check there are enough LPT LEBs */ | ||
| 203 | for (i = 0; i < 64 ; i++) { | ||
| 204 | sz = c->lpt_sz * 4; /* Allow 4 times the size */ | ||
| 205 | sz += c->leb_size - 1; | ||
| 206 | do_div(sz, c->leb_size); | ||
| 207 | lebs_needed = sz; | ||
| 208 | if (lebs_needed > c->lpt_lebs) { | ||
| 209 | /* Not enough LPT LEBs so try again with more */ | ||
| 210 | c->lpt_lebs = lebs_needed; | ||
| 211 | c->main_lebs = *main_lebs - c->lpt_lebs; | ||
| 212 | if (c->main_lebs <= 0) | ||
| 213 | return -EINVAL; | ||
| 214 | do_calc_lpt_geom(c); | ||
| 215 | continue; | ||
| 216 | } | ||
| 217 | if (c->ltab_sz > c->leb_size) { | ||
| 218 | ubifs_err("LPT ltab too big"); | ||
| 219 | return -EINVAL; | ||
| 220 | } | ||
| 221 | *main_lebs = c->main_lebs; | ||
| 222 | *big_lpt = c->big_lpt; | ||
| 223 | return 0; | ||
| 224 | } | ||
| 225 | return -EINVAL; | ||
| 226 | } | ||
| 227 | |||
| 228 | /** | ||
| 229 | * pack_bits - pack bit fields end-to-end. | ||
| 230 | * @addr: address at which to pack (passed and next address returned) | ||
| 231 | * @pos: bit position at which to pack (passed and next position returned) | ||
| 232 | * @val: value to pack | ||
| 233 | * @nrbits: number of bits of value to pack (1-32) | ||
| 234 | */ | ||
| 235 | static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits) | ||
| 236 | { | ||
| 237 | uint8_t *p = *addr; | ||
| 238 | int b = *pos; | ||
| 239 | |||
| 240 | ubifs_assert(nrbits > 0); | ||
| 241 | ubifs_assert(nrbits <= 32); | ||
| 242 | ubifs_assert(*pos >= 0); | ||
| 243 | ubifs_assert(*pos < 8); | ||
| 244 | ubifs_assert((val >> nrbits) == 0 || nrbits == 32); | ||
| 245 | if (b) { | ||
| 246 | *p |= ((uint8_t)val) << b; | ||
| 247 | nrbits += b; | ||
| 248 | if (nrbits > 8) { | ||
| 249 | *++p = (uint8_t)(val >>= (8 - b)); | ||
| 250 | if (nrbits > 16) { | ||
| 251 | *++p = (uint8_t)(val >>= 8); | ||
| 252 | if (nrbits > 24) { | ||
| 253 | *++p = (uint8_t)(val >>= 8); | ||
| 254 | if (nrbits > 32) | ||
| 255 | *++p = (uint8_t)(val >>= 8); | ||
| 256 | } | ||
| 257 | } | ||
| 258 | } | ||
| 259 | } else { | ||
| 260 | *p = (uint8_t)val; | ||
| 261 | if (nrbits > 8) { | ||
| 262 | *++p = (uint8_t)(val >>= 8); | ||
| 263 | if (nrbits > 16) { | ||
| 264 | *++p = (uint8_t)(val >>= 8); | ||
| 265 | if (nrbits > 24) | ||
| 266 | *++p = (uint8_t)(val >>= 8); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | } | ||
| 270 | b = nrbits & 7; | ||
| 271 | if (b == 0) | ||
| 272 | p++; | ||
| 273 | *addr = p; | ||
| 274 | *pos = b; | ||
| 275 | } | ||
| 276 | |||
| 277 | /** | ||
| 278 | * ubifs_unpack_bits - unpack bit fields. | ||
| 279 | * @addr: address at which to unpack (passed and next address returned) | ||
| 280 | * @pos: bit position at which to unpack (passed and next position returned) | ||
| 281 | * @nrbits: number of bits of value to unpack (1-32) | ||
| 282 | * | ||
| 283 | * This functions returns the value unpacked. | ||
| 284 | */ | ||
| 285 | uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) | ||
| 286 | { | ||
| 287 | const int k = 32 - nrbits; | ||
| 288 | uint8_t *p = *addr; | ||
| 289 | int b = *pos; | ||
| 290 | uint32_t val; | ||
| 291 | |||
| 292 | ubifs_assert(nrbits > 0); | ||
| 293 | ubifs_assert(nrbits <= 32); | ||
| 294 | ubifs_assert(*pos >= 0); | ||
| 295 | ubifs_assert(*pos < 8); | ||
| 296 | if (b) { | ||
| 297 | val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) | | ||
| 298 | ((uint32_t)p[4] << 24); | ||
| 299 | val <<= (8 - b); | ||
| 300 | val |= *p >> b; | ||
| 301 | nrbits += b; | ||
| 302 | } else | ||
| 303 | val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | | ||
| 304 | ((uint32_t)p[3] << 24); | ||
| 305 | val <<= k; | ||
| 306 | val >>= k; | ||
| 307 | b = nrbits & 7; | ||
| 308 | p += nrbits / 8; | ||
| 309 | *addr = p; | ||
| 310 | *pos = b; | ||
| 311 | ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); | ||
| 312 | return val; | ||
| 313 | } | ||
| 314 | |||
| 315 | /** | ||
| 316 | * ubifs_pack_pnode - pack all the bit fields of a pnode. | ||
| 317 | * @c: UBIFS file-system description object | ||
| 318 | * @buf: buffer into which to pack | ||
| 319 | * @pnode: pnode to pack | ||
| 320 | */ | ||
| 321 | void ubifs_pack_pnode(struct ubifs_info *c, void *buf, | ||
| 322 | struct ubifs_pnode *pnode) | ||
| 323 | { | ||
| 324 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 325 | int i, pos = 0; | ||
| 326 | uint16_t crc; | ||
| 327 | |||
| 328 | pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS); | ||
| 329 | if (c->big_lpt) | ||
| 330 | pack_bits(&addr, &pos, pnode->num, c->pcnt_bits); | ||
| 331 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 332 | pack_bits(&addr, &pos, pnode->lprops[i].free >> 3, | ||
| 333 | c->space_bits); | ||
| 334 | pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3, | ||
| 335 | c->space_bits); | ||
| 336 | if (pnode->lprops[i].flags & LPROPS_INDEX) | ||
| 337 | pack_bits(&addr, &pos, 1, 1); | ||
| 338 | else | ||
| 339 | pack_bits(&addr, &pos, 0, 1); | ||
| 340 | } | ||
| 341 | crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, | ||
| 342 | c->pnode_sz - UBIFS_LPT_CRC_BYTES); | ||
| 343 | addr = buf; | ||
| 344 | pos = 0; | ||
| 345 | pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); | ||
| 346 | } | ||
| 347 | |||
| 348 | /** | ||
| 349 | * ubifs_pack_nnode - pack all the bit fields of a nnode. | ||
| 350 | * @c: UBIFS file-system description object | ||
| 351 | * @buf: buffer into which to pack | ||
| 352 | * @nnode: nnode to pack | ||
| 353 | */ | ||
| 354 | void ubifs_pack_nnode(struct ubifs_info *c, void *buf, | ||
| 355 | struct ubifs_nnode *nnode) | ||
| 356 | { | ||
| 357 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 358 | int i, pos = 0; | ||
| 359 | uint16_t crc; | ||
| 360 | |||
| 361 | pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS); | ||
| 362 | if (c->big_lpt) | ||
| 363 | pack_bits(&addr, &pos, nnode->num, c->pcnt_bits); | ||
| 364 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 365 | int lnum = nnode->nbranch[i].lnum; | ||
| 366 | |||
| 367 | if (lnum == 0) | ||
| 368 | lnum = c->lpt_last + 1; | ||
| 369 | pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits); | ||
| 370 | pack_bits(&addr, &pos, nnode->nbranch[i].offs, | ||
| 371 | c->lpt_offs_bits); | ||
| 372 | } | ||
| 373 | crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, | ||
| 374 | c->nnode_sz - UBIFS_LPT_CRC_BYTES); | ||
| 375 | addr = buf; | ||
| 376 | pos = 0; | ||
| 377 | pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); | ||
| 378 | } | ||
| 379 | |||
| 380 | /** | ||
| 381 | * ubifs_pack_ltab - pack the LPT's own lprops table. | ||
| 382 | * @c: UBIFS file-system description object | ||
| 383 | * @buf: buffer into which to pack | ||
| 384 | * @ltab: LPT's own lprops table to pack | ||
| 385 | */ | ||
| 386 | void ubifs_pack_ltab(struct ubifs_info *c, void *buf, | ||
| 387 | struct ubifs_lpt_lprops *ltab) | ||
| 388 | { | ||
| 389 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 390 | int i, pos = 0; | ||
| 391 | uint16_t crc; | ||
| 392 | |||
| 393 | pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS); | ||
| 394 | for (i = 0; i < c->lpt_lebs; i++) { | ||
| 395 | pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits); | ||
| 396 | pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits); | ||
| 397 | } | ||
| 398 | crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, | ||
| 399 | c->ltab_sz - UBIFS_LPT_CRC_BYTES); | ||
| 400 | addr = buf; | ||
| 401 | pos = 0; | ||
| 402 | pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * ubifs_pack_lsave - pack the LPT's save table. | ||
| 407 | * @c: UBIFS file-system description object | ||
| 408 | * @buf: buffer into which to pack | ||
| 409 | * @lsave: LPT's save table to pack | ||
| 410 | */ | ||
| 411 | void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave) | ||
| 412 | { | ||
| 413 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 414 | int i, pos = 0; | ||
| 415 | uint16_t crc; | ||
| 416 | |||
| 417 | pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS); | ||
| 418 | for (i = 0; i < c->lsave_cnt; i++) | ||
| 419 | pack_bits(&addr, &pos, lsave[i], c->lnum_bits); | ||
| 420 | crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, | ||
| 421 | c->lsave_sz - UBIFS_LPT_CRC_BYTES); | ||
| 422 | addr = buf; | ||
| 423 | pos = 0; | ||
| 424 | pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); | ||
| 425 | } | ||
| 426 | |||
| 427 | /** | ||
| 428 | * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. | ||
| 429 | * @c: UBIFS file-system description object | ||
| 430 | * @lnum: LEB number to which to add dirty space | ||
| 431 | * @dirty: amount of dirty space to add | ||
| 432 | */ | ||
| 433 | void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) | ||
| 434 | { | ||
| 435 | if (!dirty || !lnum) | ||
| 436 | return; | ||
| 437 | dbg_lp("LEB %d add %d to %d", | ||
| 438 | lnum, dirty, c->ltab[lnum - c->lpt_first].dirty); | ||
| 439 | ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); | ||
| 440 | c->ltab[lnum - c->lpt_first].dirty += dirty; | ||
| 441 | } | ||
| 442 | |||
| 443 | /** | ||
| 444 | * set_ltab - set LPT LEB properties. | ||
| 445 | * @c: UBIFS file-system description object | ||
| 446 | * @lnum: LEB number | ||
| 447 | * @free: amount of free space | ||
| 448 | * @dirty: amount of dirty space | ||
| 449 | */ | ||
| 450 | static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty) | ||
| 451 | { | ||
| 452 | dbg_lp("LEB %d free %d dirty %d to %d %d", | ||
| 453 | lnum, c->ltab[lnum - c->lpt_first].free, | ||
| 454 | c->ltab[lnum - c->lpt_first].dirty, free, dirty); | ||
| 455 | ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); | ||
| 456 | c->ltab[lnum - c->lpt_first].free = free; | ||
| 457 | c->ltab[lnum - c->lpt_first].dirty = dirty; | ||
| 458 | } | ||
| 459 | |||
| 460 | /** | ||
| 461 | * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. | ||
| 462 | * @c: UBIFS file-system description object | ||
| 463 | * @nnode: nnode for which to add dirt | ||
| 464 | */ | ||
| 465 | void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode) | ||
| 466 | { | ||
| 467 | struct ubifs_nnode *np = nnode->parent; | ||
| 468 | |||
| 469 | if (np) | ||
| 470 | ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum, | ||
| 471 | c->nnode_sz); | ||
| 472 | else { | ||
| 473 | ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz); | ||
| 474 | if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { | ||
| 475 | c->lpt_drty_flgs |= LTAB_DIRTY; | ||
| 476 | ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); | ||
| 477 | } | ||
| 478 | } | ||
| 479 | } | ||
| 480 | |||
| 481 | /** | ||
| 482 | * add_pnode_dirt - add dirty space to LPT LEB properties. | ||
| 483 | * @c: UBIFS file-system description object | ||
| 484 | * @pnode: pnode for which to add dirt | ||
| 485 | */ | ||
| 486 | static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) | ||
| 487 | { | ||
| 488 | ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, | ||
| 489 | c->pnode_sz); | ||
| 490 | } | ||
| 491 | |||
| 492 | /** | ||
| 493 | * calc_nnode_num - calculate nnode number. | ||
| 494 | * @row: the row in the tree (root is zero) | ||
| 495 | * @col: the column in the row (leftmost is zero) | ||
| 496 | * | ||
| 497 | * The nnode number is a number that uniquely identifies a nnode and can be used | ||
| 498 | * easily to traverse the tree from the root to that nnode. | ||
| 499 | * | ||
| 500 | * This function calculates and returns the nnode number for the nnode at @row | ||
| 501 | * and @col. | ||
| 502 | */ | ||
| 503 | static int calc_nnode_num(int row, int col) | ||
| 504 | { | ||
| 505 | int num, bits; | ||
| 506 | |||
| 507 | num = 1; | ||
| 508 | while (row--) { | ||
| 509 | bits = (col & (UBIFS_LPT_FANOUT - 1)); | ||
| 510 | col >>= UBIFS_LPT_FANOUT_SHIFT; | ||
| 511 | num <<= UBIFS_LPT_FANOUT_SHIFT; | ||
| 512 | num |= bits; | ||
| 513 | } | ||
| 514 | return num; | ||
| 515 | } | ||
| 516 | |||
| 517 | /** | ||
| 518 | * calc_nnode_num_from_parent - calculate nnode number. | ||
| 519 | * @c: UBIFS file-system description object | ||
| 520 | * @parent: parent nnode | ||
| 521 | * @iip: index in parent | ||
| 522 | * | ||
| 523 | * The nnode number is a number that uniquely identifies a nnode and can be used | ||
| 524 | * easily to traverse the tree from the root to that nnode. | ||
| 525 | * | ||
| 526 | * This function calculates and returns the nnode number based on the parent's | ||
| 527 | * nnode number and the index in parent. | ||
| 528 | */ | ||
| 529 | static int calc_nnode_num_from_parent(struct ubifs_info *c, | ||
| 530 | struct ubifs_nnode *parent, int iip) | ||
| 531 | { | ||
| 532 | int num, shft; | ||
| 533 | |||
| 534 | if (!parent) | ||
| 535 | return 1; | ||
| 536 | shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT; | ||
| 537 | num = parent->num ^ (1 << shft); | ||
| 538 | num |= (UBIFS_LPT_FANOUT + iip) << shft; | ||
| 539 | return num; | ||
| 540 | } | ||
| 541 | |||
| 542 | /** | ||
| 543 | * calc_pnode_num_from_parent - calculate pnode number. | ||
| 544 | * @c: UBIFS file-system description object | ||
| 545 | * @parent: parent nnode | ||
| 546 | * @iip: index in parent | ||
| 547 | * | ||
| 548 | * The pnode number is a number that uniquely identifies a pnode and can be used | ||
| 549 | * easily to traverse the tree from the root to that pnode. | ||
| 550 | * | ||
| 551 | * This function calculates and returns the pnode number based on the parent's | ||
| 552 | * nnode number and the index in parent. | ||
| 553 | */ | ||
| 554 | static int calc_pnode_num_from_parent(struct ubifs_info *c, | ||
| 555 | struct ubifs_nnode *parent, int iip) | ||
| 556 | { | ||
| 557 | int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; | ||
| 558 | |||
| 559 | for (i = 0; i < n; i++) { | ||
| 560 | num <<= UBIFS_LPT_FANOUT_SHIFT; | ||
| 561 | num |= pnum & (UBIFS_LPT_FANOUT - 1); | ||
| 562 | pnum >>= UBIFS_LPT_FANOUT_SHIFT; | ||
| 563 | } | ||
| 564 | num <<= UBIFS_LPT_FANOUT_SHIFT; | ||
| 565 | num |= iip; | ||
| 566 | return num; | ||
| 567 | } | ||
| 568 | |||
| 569 | /** | ||
| 570 | * ubifs_create_dflt_lpt - create default LPT. | ||
| 571 | * @c: UBIFS file-system description object | ||
| 572 | * @main_lebs: number of main area LEBs is passed and returned here | ||
| 573 | * @lpt_first: LEB number of first LPT LEB | ||
| 574 | * @lpt_lebs: number of LEBs for LPT is passed and returned here | ||
| 575 | * @big_lpt: use big LPT model is passed and returned here | ||
| 576 | * | ||
| 577 | * This function returns %0 on success and a negative error code on failure. | ||
| 578 | */ | ||
| 579 | int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, | ||
| 580 | int *lpt_lebs, int *big_lpt) | ||
| 581 | { | ||
| 582 | int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row; | ||
| 583 | int blnum, boffs, bsz, bcnt; | ||
| 584 | struct ubifs_pnode *pnode = NULL; | ||
| 585 | struct ubifs_nnode *nnode = NULL; | ||
| 586 | void *buf = NULL, *p; | ||
| 587 | struct ubifs_lpt_lprops *ltab = NULL; | ||
| 588 | int *lsave = NULL; | ||
| 589 | |||
| 590 | err = calc_dflt_lpt_geom(c, main_lebs, big_lpt); | ||
| 591 | if (err) | ||
| 592 | return err; | ||
| 593 | *lpt_lebs = c->lpt_lebs; | ||
| 594 | |||
| 595 | /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */ | ||
| 596 | c->lpt_first = lpt_first; | ||
| 597 | /* Needed by 'set_ltab()' */ | ||
| 598 | c->lpt_last = lpt_first + c->lpt_lebs - 1; | ||
| 599 | /* Needed by 'ubifs_pack_lsave()' */ | ||
| 600 | c->main_first = c->leb_cnt - *main_lebs; | ||
| 601 | |||
| 602 | lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL); | ||
| 603 | pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); | ||
| 604 | nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); | ||
| 605 | buf = vmalloc(c->leb_size); | ||
| 606 | ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); | ||
| 607 | if (!pnode || !nnode || !buf || !ltab || !lsave) { | ||
| 608 | err = -ENOMEM; | ||
| 609 | goto out; | ||
| 610 | } | ||
| 611 | |||
| 612 | ubifs_assert(!c->ltab); | ||
| 613 | c->ltab = ltab; /* Needed by set_ltab */ | ||
| 614 | |||
| 615 | /* Initialize LPT's own lprops */ | ||
| 616 | for (i = 0; i < c->lpt_lebs; i++) { | ||
| 617 | ltab[i].free = c->leb_size; | ||
| 618 | ltab[i].dirty = 0; | ||
| 619 | ltab[i].tgc = 0; | ||
| 620 | ltab[i].cmt = 0; | ||
| 621 | } | ||
| 622 | |||
| 623 | lnum = lpt_first; | ||
| 624 | p = buf; | ||
| 625 | /* Number of leaf nodes (pnodes) */ | ||
| 626 | cnt = c->pnode_cnt; | ||
| 627 | |||
| 628 | /* | ||
| 629 | * The first pnode contains the LEB properties for the LEBs that contain | ||
| 630 | * the root inode node and the root index node of the index tree. | ||
| 631 | */ | ||
| 632 | node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8); | ||
| 633 | iopos = ALIGN(node_sz, c->min_io_size); | ||
| 634 | pnode->lprops[0].free = c->leb_size - iopos; | ||
| 635 | pnode->lprops[0].dirty = iopos - node_sz; | ||
| 636 | pnode->lprops[0].flags = LPROPS_INDEX; | ||
| 637 | |||
| 638 | node_sz = UBIFS_INO_NODE_SZ; | ||
| 639 | iopos = ALIGN(node_sz, c->min_io_size); | ||
| 640 | pnode->lprops[1].free = c->leb_size - iopos; | ||
| 641 | pnode->lprops[1].dirty = iopos - node_sz; | ||
| 642 | |||
| 643 | for (i = 2; i < UBIFS_LPT_FANOUT; i++) | ||
| 644 | pnode->lprops[i].free = c->leb_size; | ||
| 645 | |||
| 646 | /* Add first pnode */ | ||
| 647 | ubifs_pack_pnode(c, p, pnode); | ||
| 648 | p += c->pnode_sz; | ||
| 649 | len = c->pnode_sz; | ||
| 650 | pnode->num += 1; | ||
| 651 | |||
| 652 | /* Reset pnode values for remaining pnodes */ | ||
| 653 | pnode->lprops[0].free = c->leb_size; | ||
| 654 | pnode->lprops[0].dirty = 0; | ||
| 655 | pnode->lprops[0].flags = 0; | ||
| 656 | |||
| 657 | pnode->lprops[1].free = c->leb_size; | ||
| 658 | pnode->lprops[1].dirty = 0; | ||
| 659 | |||
| 660 | /* | ||
| 661 | * To calculate the internal node branches, we keep information about | ||
| 662 | * the level below. | ||
| 663 | */ | ||
| 664 | blnum = lnum; /* LEB number of level below */ | ||
| 665 | boffs = 0; /* Offset of level below */ | ||
| 666 | bcnt = cnt; /* Number of nodes in level below */ | ||
| 667 | bsz = c->pnode_sz; /* Size of nodes in level below */ | ||
| 668 | |||
| 669 | /* Add all remaining pnodes */ | ||
| 670 | for (i = 1; i < cnt; i++) { | ||
| 671 | if (len + c->pnode_sz > c->leb_size) { | ||
| 672 | alen = ALIGN(len, c->min_io_size); | ||
| 673 | set_ltab(c, lnum, c->leb_size - alen, alen - len); | ||
| 674 | memset(p, 0xff, alen - len); | ||
| 675 | err = ubi_leb_change(c->ubi, lnum++, buf, alen, | ||
| 676 | UBI_SHORTTERM); | ||
| 677 | if (err) | ||
| 678 | goto out; | ||
| 679 | p = buf; | ||
| 680 | len = 0; | ||
| 681 | } | ||
| 682 | ubifs_pack_pnode(c, p, pnode); | ||
| 683 | p += c->pnode_sz; | ||
| 684 | len += c->pnode_sz; | ||
| 685 | /* | ||
| 686 | * pnodes are simply numbered left to right starting at zero, | ||
| 687 | * which means the pnode number can be used easily to traverse | ||
| 688 | * down the tree to the corresponding pnode. | ||
| 689 | */ | ||
| 690 | pnode->num += 1; | ||
| 691 | } | ||
| 692 | |||
| 693 | row = 0; | ||
| 694 | for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT) | ||
| 695 | row += 1; | ||
| 696 | /* Add all nnodes, one level at a time */ | ||
| 697 | while (1) { | ||
| 698 | /* Number of internal nodes (nnodes) at next level */ | ||
| 699 | cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT); | ||
| 700 | for (i = 0; i < cnt; i++) { | ||
| 701 | if (len + c->nnode_sz > c->leb_size) { | ||
| 702 | alen = ALIGN(len, c->min_io_size); | ||
| 703 | set_ltab(c, lnum, c->leb_size - alen, | ||
| 704 | alen - len); | ||
| 705 | memset(p, 0xff, alen - len); | ||
| 706 | err = ubi_leb_change(c->ubi, lnum++, buf, alen, | ||
| 707 | UBI_SHORTTERM); | ||
| 708 | if (err) | ||
| 709 | goto out; | ||
| 710 | p = buf; | ||
| 711 | len = 0; | ||
| 712 | } | ||
| 713 | /* Only 1 nnode at this level, so it is the root */ | ||
| 714 | if (cnt == 1) { | ||
| 715 | c->lpt_lnum = lnum; | ||
| 716 | c->lpt_offs = len; | ||
| 717 | } | ||
| 718 | /* Set branches to the level below */ | ||
| 719 | for (j = 0; j < UBIFS_LPT_FANOUT; j++) { | ||
| 720 | if (bcnt) { | ||
| 721 | if (boffs + bsz > c->leb_size) { | ||
| 722 | blnum += 1; | ||
| 723 | boffs = 0; | ||
| 724 | } | ||
| 725 | nnode->nbranch[j].lnum = blnum; | ||
| 726 | nnode->nbranch[j].offs = boffs; | ||
| 727 | boffs += bsz; | ||
| 728 | bcnt--; | ||
| 729 | } else { | ||
| 730 | nnode->nbranch[j].lnum = 0; | ||
| 731 | nnode->nbranch[j].offs = 0; | ||
| 732 | } | ||
| 733 | } | ||
| 734 | nnode->num = calc_nnode_num(row, i); | ||
| 735 | ubifs_pack_nnode(c, p, nnode); | ||
| 736 | p += c->nnode_sz; | ||
| 737 | len += c->nnode_sz; | ||
| 738 | } | ||
| 739 | /* Only 1 nnode at this level, so it is the root */ | ||
| 740 | if (cnt == 1) | ||
| 741 | break; | ||
| 742 | /* Update the information about the level below */ | ||
| 743 | bcnt = cnt; | ||
| 744 | bsz = c->nnode_sz; | ||
| 745 | row -= 1; | ||
| 746 | } | ||
| 747 | |||
| 748 | if (*big_lpt) { | ||
| 749 | /* Need to add LPT's save table */ | ||
| 750 | if (len + c->lsave_sz > c->leb_size) { | ||
| 751 | alen = ALIGN(len, c->min_io_size); | ||
| 752 | set_ltab(c, lnum, c->leb_size - alen, alen - len); | ||
| 753 | memset(p, 0xff, alen - len); | ||
| 754 | err = ubi_leb_change(c->ubi, lnum++, buf, alen, | ||
| 755 | UBI_SHORTTERM); | ||
| 756 | if (err) | ||
| 757 | goto out; | ||
| 758 | p = buf; | ||
| 759 | len = 0; | ||
| 760 | } | ||
| 761 | |||
| 762 | c->lsave_lnum = lnum; | ||
| 763 | c->lsave_offs = len; | ||
| 764 | |||
| 765 | for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++) | ||
| 766 | lsave[i] = c->main_first + i; | ||
| 767 | for (; i < c->lsave_cnt; i++) | ||
| 768 | lsave[i] = c->main_first; | ||
| 769 | |||
| 770 | ubifs_pack_lsave(c, p, lsave); | ||
| 771 | p += c->lsave_sz; | ||
| 772 | len += c->lsave_sz; | ||
| 773 | } | ||
| 774 | |||
| 775 | /* Need to add LPT's own LEB properties table */ | ||
| 776 | if (len + c->ltab_sz > c->leb_size) { | ||
| 777 | alen = ALIGN(len, c->min_io_size); | ||
| 778 | set_ltab(c, lnum, c->leb_size - alen, alen - len); | ||
| 779 | memset(p, 0xff, alen - len); | ||
| 780 | err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); | ||
| 781 | if (err) | ||
| 782 | goto out; | ||
| 783 | p = buf; | ||
| 784 | len = 0; | ||
| 785 | } | ||
| 786 | |||
| 787 | c->ltab_lnum = lnum; | ||
| 788 | c->ltab_offs = len; | ||
| 789 | |||
| 790 | /* Update ltab before packing it */ | ||
| 791 | len += c->ltab_sz; | ||
| 792 | alen = ALIGN(len, c->min_io_size); | ||
| 793 | set_ltab(c, lnum, c->leb_size - alen, alen - len); | ||
| 794 | |||
| 795 | ubifs_pack_ltab(c, p, ltab); | ||
| 796 | p += c->ltab_sz; | ||
| 797 | |||
| 798 | /* Write remaining buffer */ | ||
| 799 | memset(p, 0xff, alen - len); | ||
| 800 | err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); | ||
| 801 | if (err) | ||
| 802 | goto out; | ||
| 803 | |||
| 804 | c->nhead_lnum = lnum; | ||
| 805 | c->nhead_offs = ALIGN(len, c->min_io_size); | ||
| 806 | |||
| 807 | dbg_lp("space_bits %d", c->space_bits); | ||
| 808 | dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); | ||
| 809 | dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); | ||
| 810 | dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); | ||
| 811 | dbg_lp("pcnt_bits %d", c->pcnt_bits); | ||
| 812 | dbg_lp("lnum_bits %d", c->lnum_bits); | ||
| 813 | dbg_lp("pnode_sz %d", c->pnode_sz); | ||
| 814 | dbg_lp("nnode_sz %d", c->nnode_sz); | ||
| 815 | dbg_lp("ltab_sz %d", c->ltab_sz); | ||
| 816 | dbg_lp("lsave_sz %d", c->lsave_sz); | ||
| 817 | dbg_lp("lsave_cnt %d", c->lsave_cnt); | ||
| 818 | dbg_lp("lpt_hght %d", c->lpt_hght); | ||
| 819 | dbg_lp("big_lpt %d", c->big_lpt); | ||
| 820 | dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); | ||
| 821 | dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); | ||
| 822 | dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); | ||
| 823 | if (c->big_lpt) | ||
| 824 | dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); | ||
| 825 | out: | ||
| 826 | c->ltab = NULL; | ||
| 827 | kfree(lsave); | ||
| 828 | vfree(ltab); | ||
| 829 | vfree(buf); | ||
| 830 | kfree(nnode); | ||
| 831 | kfree(pnode); | ||
| 832 | return err; | ||
| 833 | } | ||
| 834 | |||
| 835 | /** | ||
| 836 | * update_cats - add LEB properties of a pnode to LEB category lists and heaps. | ||
| 837 | * @c: UBIFS file-system description object | ||
| 838 | * @pnode: pnode | ||
| 839 | * | ||
| 840 | * When a pnode is loaded into memory, the LEB properties it contains are added, | ||
| 841 | * by this function, to the LEB category lists and heaps. | ||
| 842 | */ | ||
| 843 | static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode) | ||
| 844 | { | ||
| 845 | int i; | ||
| 846 | |||
| 847 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 848 | int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK; | ||
| 849 | int lnum = pnode->lprops[i].lnum; | ||
| 850 | |||
| 851 | if (!lnum) | ||
| 852 | return; | ||
| 853 | ubifs_add_to_cat(c, &pnode->lprops[i], cat); | ||
| 854 | } | ||
| 855 | } | ||
| 856 | |||
| 857 | /** | ||
| 858 | * replace_cats - add LEB properties of a pnode to LEB category lists and heaps. | ||
| 859 | * @c: UBIFS file-system description object | ||
| 860 | * @old_pnode: pnode copied | ||
| 861 | * @new_pnode: pnode copy | ||
| 862 | * | ||
| 863 | * During commit it is sometimes necessary to copy a pnode | ||
| 864 | * (see dirty_cow_pnode). When that happens, references in | ||
| 865 | * category lists and heaps must be replaced. This function does that. | ||
| 866 | */ | ||
| 867 | static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, | ||
| 868 | struct ubifs_pnode *new_pnode) | ||
| 869 | { | ||
| 870 | int i; | ||
| 871 | |||
| 872 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 873 | if (!new_pnode->lprops[i].lnum) | ||
| 874 | return; | ||
| 875 | ubifs_replace_cat(c, &old_pnode->lprops[i], | ||
| 876 | &new_pnode->lprops[i]); | ||
| 877 | } | ||
| 878 | } | ||
| 879 | |||
| 880 | /** | ||
| 881 | * check_lpt_crc - check LPT node crc is correct. | ||
| 882 | * @c: UBIFS file-system description object | ||
| 883 | * @buf: buffer containing node | ||
| 884 | * @len: length of node | ||
| 885 | * | ||
| 886 | * This function returns %0 on success and a negative error code on failure. | ||
| 887 | */ | ||
| 888 | static int check_lpt_crc(void *buf, int len) | ||
| 889 | { | ||
| 890 | int pos = 0; | ||
| 891 | uint8_t *addr = buf; | ||
| 892 | uint16_t crc, calc_crc; | ||
| 893 | |||
| 894 | crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); | ||
| 895 | calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, | ||
| 896 | len - UBIFS_LPT_CRC_BYTES); | ||
| 897 | if (crc != calc_crc) { | ||
| 898 | ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, | ||
| 899 | calc_crc); | ||
| 900 | dbg_dump_stack(); | ||
| 901 | return -EINVAL; | ||
| 902 | } | ||
| 903 | return 0; | ||
| 904 | } | ||
| 905 | |||
| 906 | /** | ||
| 907 | * check_lpt_type - check LPT node type is correct. | ||
| 908 | * @c: UBIFS file-system description object | ||
| 909 | * @addr: address of type bit field is passed and returned updated here | ||
| 910 | * @pos: position of type bit field is passed and returned updated here | ||
| 911 | * @type: expected type | ||
| 912 | * | ||
| 913 | * This function returns %0 on success and a negative error code on failure. | ||
| 914 | */ | ||
| 915 | static int check_lpt_type(uint8_t **addr, int *pos, int type) | ||
| 916 | { | ||
| 917 | int node_type; | ||
| 918 | |||
| 919 | node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); | ||
| 920 | if (node_type != type) { | ||
| 921 | ubifs_err("invalid type (%d) in LPT node type %d", node_type, | ||
| 922 | type); | ||
| 923 | dbg_dump_stack(); | ||
| 924 | return -EINVAL; | ||
| 925 | } | ||
| 926 | return 0; | ||
| 927 | } | ||
| 928 | |||
| 929 | /** | ||
| 930 | * unpack_pnode - unpack a pnode. | ||
| 931 | * @c: UBIFS file-system description object | ||
| 932 | * @buf: buffer containing packed pnode to unpack | ||
| 933 | * @pnode: pnode structure to fill | ||
| 934 | * | ||
| 935 | * This function returns %0 on success and a negative error code on failure. | ||
| 936 | */ | ||
| 937 | static int unpack_pnode(struct ubifs_info *c, void *buf, | ||
| 938 | struct ubifs_pnode *pnode) | ||
| 939 | { | ||
| 940 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 941 | int i, pos = 0, err; | ||
| 942 | |||
| 943 | err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); | ||
| 944 | if (err) | ||
| 945 | return err; | ||
| 946 | if (c->big_lpt) | ||
| 947 | pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); | ||
| 948 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 949 | struct ubifs_lprops * const lprops = &pnode->lprops[i]; | ||
| 950 | |||
| 951 | lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits); | ||
| 952 | lprops->free <<= 3; | ||
| 953 | lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits); | ||
| 954 | lprops->dirty <<= 3; | ||
| 955 | |||
| 956 | if (ubifs_unpack_bits(&addr, &pos, 1)) | ||
| 957 | lprops->flags = LPROPS_INDEX; | ||
| 958 | else | ||
| 959 | lprops->flags = 0; | ||
| 960 | lprops->flags |= ubifs_categorize_lprops(c, lprops); | ||
| 961 | } | ||
| 962 | err = check_lpt_crc(buf, c->pnode_sz); | ||
| 963 | return err; | ||
| 964 | } | ||
| 965 | |||
| 966 | /** | ||
| 967 | * unpack_nnode - unpack a nnode. | ||
| 968 | * @c: UBIFS file-system description object | ||
| 969 | * @buf: buffer containing packed nnode to unpack | ||
| 970 | * @nnode: nnode structure to fill | ||
| 971 | * | ||
| 972 | * This function returns %0 on success and a negative error code on failure. | ||
| 973 | */ | ||
| 974 | static int unpack_nnode(struct ubifs_info *c, void *buf, | ||
| 975 | struct ubifs_nnode *nnode) | ||
| 976 | { | ||
| 977 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 978 | int i, pos = 0, err; | ||
| 979 | |||
| 980 | err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); | ||
| 981 | if (err) | ||
| 982 | return err; | ||
| 983 | if (c->big_lpt) | ||
| 984 | nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); | ||
| 985 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 986 | int lnum; | ||
| 987 | |||
| 988 | lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) + | ||
| 989 | c->lpt_first; | ||
| 990 | if (lnum == c->lpt_last + 1) | ||
| 991 | lnum = 0; | ||
| 992 | nnode->nbranch[i].lnum = lnum; | ||
| 993 | nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, | ||
| 994 | c->lpt_offs_bits); | ||
| 995 | } | ||
| 996 | err = check_lpt_crc(buf, c->nnode_sz); | ||
| 997 | return err; | ||
| 998 | } | ||
| 999 | |||
| 1000 | /** | ||
| 1001 | * unpack_ltab - unpack the LPT's own lprops table. | ||
| 1002 | * @c: UBIFS file-system description object | ||
| 1003 | * @buf: buffer from which to unpack | ||
| 1004 | * | ||
| 1005 | * This function returns %0 on success and a negative error code on failure. | ||
| 1006 | */ | ||
| 1007 | static int unpack_ltab(struct ubifs_info *c, void *buf) | ||
| 1008 | { | ||
| 1009 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 1010 | int i, pos = 0, err; | ||
| 1011 | |||
| 1012 | err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); | ||
| 1013 | if (err) | ||
| 1014 | return err; | ||
| 1015 | for (i = 0; i < c->lpt_lebs; i++) { | ||
| 1016 | int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); | ||
| 1017 | int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); | ||
| 1018 | |||
| 1019 | if (free < 0 || free > c->leb_size || dirty < 0 || | ||
| 1020 | dirty > c->leb_size || free + dirty > c->leb_size) | ||
| 1021 | return -EINVAL; | ||
| 1022 | |||
| 1023 | c->ltab[i].free = free; | ||
| 1024 | c->ltab[i].dirty = dirty; | ||
| 1025 | c->ltab[i].tgc = 0; | ||
| 1026 | c->ltab[i].cmt = 0; | ||
| 1027 | } | ||
| 1028 | err = check_lpt_crc(buf, c->ltab_sz); | ||
| 1029 | return err; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | /** | ||
| 1033 | * unpack_lsave - unpack the LPT's save table. | ||
| 1034 | * @c: UBIFS file-system description object | ||
| 1035 | * @buf: buffer from which to unpack | ||
| 1036 | * | ||
| 1037 | * This function returns %0 on success and a negative error code on failure. | ||
| 1038 | */ | ||
| 1039 | static int unpack_lsave(struct ubifs_info *c, void *buf) | ||
| 1040 | { | ||
| 1041 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 1042 | int i, pos = 0, err; | ||
| 1043 | |||
| 1044 | err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); | ||
| 1045 | if (err) | ||
| 1046 | return err; | ||
| 1047 | for (i = 0; i < c->lsave_cnt; i++) { | ||
| 1048 | int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits); | ||
| 1049 | |||
| 1050 | if (lnum < c->main_first || lnum >= c->leb_cnt) | ||
| 1051 | return -EINVAL; | ||
| 1052 | c->lsave[i] = lnum; | ||
| 1053 | } | ||
| 1054 | err = check_lpt_crc(buf, c->lsave_sz); | ||
| 1055 | return err; | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | /** | ||
| 1059 | * validate_nnode - validate a nnode. | ||
| 1060 | * @c: UBIFS file-system description object | ||
| 1061 | * @nnode: nnode to validate | ||
| 1062 | * @parent: parent nnode (or NULL for the root nnode) | ||
| 1063 | * @iip: index in parent | ||
| 1064 | * | ||
| 1065 | * This function returns %0 on success and a negative error code on failure. | ||
| 1066 | */ | ||
| 1067 | static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, | ||
| 1068 | struct ubifs_nnode *parent, int iip) | ||
| 1069 | { | ||
| 1070 | int i, lvl, max_offs; | ||
| 1071 | |||
| 1072 | if (c->big_lpt) { | ||
| 1073 | int num = calc_nnode_num_from_parent(c, parent, iip); | ||
| 1074 | |||
| 1075 | if (nnode->num != num) | ||
| 1076 | return -EINVAL; | ||
| 1077 | } | ||
| 1078 | lvl = parent ? parent->level - 1 : c->lpt_hght; | ||
| 1079 | if (lvl < 1) | ||
| 1080 | return -EINVAL; | ||
| 1081 | if (lvl == 1) | ||
| 1082 | max_offs = c->leb_size - c->pnode_sz; | ||
| 1083 | else | ||
| 1084 | max_offs = c->leb_size - c->nnode_sz; | ||
| 1085 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1086 | int lnum = nnode->nbranch[i].lnum; | ||
| 1087 | int offs = nnode->nbranch[i].offs; | ||
| 1088 | |||
| 1089 | if (lnum == 0) { | ||
| 1090 | if (offs != 0) | ||
| 1091 | return -EINVAL; | ||
| 1092 | continue; | ||
| 1093 | } | ||
| 1094 | if (lnum < c->lpt_first || lnum > c->lpt_last) | ||
| 1095 | return -EINVAL; | ||
| 1096 | if (offs < 0 || offs > max_offs) | ||
| 1097 | return -EINVAL; | ||
| 1098 | } | ||
| 1099 | return 0; | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | /** | ||
| 1103 | * validate_pnode - validate a pnode. | ||
| 1104 | * @c: UBIFS file-system description object | ||
| 1105 | * @pnode: pnode to validate | ||
| 1106 | * @parent: parent nnode | ||
| 1107 | * @iip: index in parent | ||
| 1108 | * | ||
| 1109 | * This function returns %0 on success and a negative error code on failure. | ||
| 1110 | */ | ||
| 1111 | static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | ||
| 1112 | struct ubifs_nnode *parent, int iip) | ||
| 1113 | { | ||
| 1114 | int i; | ||
| 1115 | |||
| 1116 | if (c->big_lpt) { | ||
| 1117 | int num = calc_pnode_num_from_parent(c, parent, iip); | ||
| 1118 | |||
| 1119 | if (pnode->num != num) | ||
| 1120 | return -EINVAL; | ||
| 1121 | } | ||
| 1122 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1123 | int free = pnode->lprops[i].free; | ||
| 1124 | int dirty = pnode->lprops[i].dirty; | ||
| 1125 | |||
| 1126 | if (free < 0 || free > c->leb_size || free % c->min_io_size || | ||
| 1127 | (free & 7)) | ||
| 1128 | return -EINVAL; | ||
| 1129 | if (dirty < 0 || dirty > c->leb_size || (dirty & 7)) | ||
| 1130 | return -EINVAL; | ||
| 1131 | if (dirty + free > c->leb_size) | ||
| 1132 | return -EINVAL; | ||
| 1133 | } | ||
| 1134 | return 0; | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | /** | ||
| 1138 | * set_pnode_lnum - set LEB numbers on a pnode. | ||
| 1139 | * @c: UBIFS file-system description object | ||
| 1140 | * @pnode: pnode to update | ||
| 1141 | * | ||
| 1142 | * This function calculates the LEB numbers for the LEB properties it contains | ||
| 1143 | * based on the pnode number. | ||
| 1144 | */ | ||
| 1145 | static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode) | ||
| 1146 | { | ||
| 1147 | int i, lnum; | ||
| 1148 | |||
| 1149 | lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first; | ||
| 1150 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1151 | if (lnum >= c->leb_cnt) | ||
| 1152 | return; | ||
| 1153 | pnode->lprops[i].lnum = lnum++; | ||
| 1154 | } | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | /** | ||
| 1158 | * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory. | ||
| 1159 | * @c: UBIFS file-system description object | ||
| 1160 | * @parent: parent nnode (or NULL for the root) | ||
| 1161 | * @iip: index in parent | ||
| 1162 | * | ||
| 1163 | * This function returns %0 on success and a negative error code on failure. | ||
| 1164 | */ | ||
| 1165 | int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) | ||
| 1166 | { | ||
| 1167 | struct ubifs_nbranch *branch = NULL; | ||
| 1168 | struct ubifs_nnode *nnode = NULL; | ||
| 1169 | void *buf = c->lpt_nod_buf; | ||
| 1170 | int err, lnum, offs; | ||
| 1171 | |||
| 1172 | if (parent) { | ||
| 1173 | branch = &parent->nbranch[iip]; | ||
| 1174 | lnum = branch->lnum; | ||
| 1175 | offs = branch->offs; | ||
| 1176 | } else { | ||
| 1177 | lnum = c->lpt_lnum; | ||
| 1178 | offs = c->lpt_offs; | ||
| 1179 | } | ||
| 1180 | nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS); | ||
| 1181 | if (!nnode) { | ||
| 1182 | err = -ENOMEM; | ||
| 1183 | goto out; | ||
| 1184 | } | ||
| 1185 | if (lnum == 0) { | ||
| 1186 | /* | ||
| 1187 | * This nnode was not written which just means that the LEB | ||
| 1188 | * properties in the subtree below it describe empty LEBs. We | ||
| 1189 | * make the nnode as though we had read it, which in fact means | ||
| 1190 | * doing almost nothing. | ||
| 1191 | */ | ||
| 1192 | if (c->big_lpt) | ||
| 1193 | nnode->num = calc_nnode_num_from_parent(c, parent, iip); | ||
| 1194 | } else { | ||
| 1195 | err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); | ||
| 1196 | if (err) | ||
| 1197 | goto out; | ||
| 1198 | err = unpack_nnode(c, buf, nnode); | ||
| 1199 | if (err) | ||
| 1200 | goto out; | ||
| 1201 | } | ||
| 1202 | err = validate_nnode(c, nnode, parent, iip); | ||
| 1203 | if (err) | ||
| 1204 | goto out; | ||
| 1205 | if (!c->big_lpt) | ||
| 1206 | nnode->num = calc_nnode_num_from_parent(c, parent, iip); | ||
| 1207 | if (parent) { | ||
| 1208 | branch->nnode = nnode; | ||
| 1209 | nnode->level = parent->level - 1; | ||
| 1210 | } else { | ||
| 1211 | c->nroot = nnode; | ||
| 1212 | nnode->level = c->lpt_hght; | ||
| 1213 | } | ||
| 1214 | nnode->parent = parent; | ||
| 1215 | nnode->iip = iip; | ||
| 1216 | return 0; | ||
| 1217 | |||
| 1218 | out: | ||
| 1219 | ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); | ||
| 1220 | kfree(nnode); | ||
| 1221 | return err; | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | /** | ||
| 1225 | * read_pnode - read a pnode from flash and link it to the tree in memory. | ||
| 1226 | * @c: UBIFS file-system description object | ||
| 1227 | * @parent: parent nnode | ||
| 1228 | * @iip: index in parent | ||
| 1229 | * | ||
| 1230 | * This function returns %0 on success and a negative error code on failure. | ||
| 1231 | */ | ||
| 1232 | static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) | ||
| 1233 | { | ||
| 1234 | struct ubifs_nbranch *branch; | ||
| 1235 | struct ubifs_pnode *pnode = NULL; | ||
| 1236 | void *buf = c->lpt_nod_buf; | ||
| 1237 | int err, lnum, offs; | ||
| 1238 | |||
| 1239 | branch = &parent->nbranch[iip]; | ||
| 1240 | lnum = branch->lnum; | ||
| 1241 | offs = branch->offs; | ||
| 1242 | pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); | ||
| 1243 | if (!pnode) { | ||
| 1244 | err = -ENOMEM; | ||
| 1245 | goto out; | ||
| 1246 | } | ||
| 1247 | if (lnum == 0) { | ||
| 1248 | /* | ||
| 1249 | * This pnode was not written which just means that the LEB | ||
| 1250 | * properties in it describe empty LEBs. We make the pnode as | ||
| 1251 | * though we had read it. | ||
| 1252 | */ | ||
| 1253 | int i; | ||
| 1254 | |||
| 1255 | if (c->big_lpt) | ||
| 1256 | pnode->num = calc_pnode_num_from_parent(c, parent, iip); | ||
| 1257 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1258 | struct ubifs_lprops * const lprops = &pnode->lprops[i]; | ||
| 1259 | |||
| 1260 | lprops->free = c->leb_size; | ||
| 1261 | lprops->flags = ubifs_categorize_lprops(c, lprops); | ||
| 1262 | } | ||
| 1263 | } else { | ||
| 1264 | err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); | ||
| 1265 | if (err) | ||
| 1266 | goto out; | ||
| 1267 | err = unpack_pnode(c, buf, pnode); | ||
| 1268 | if (err) | ||
| 1269 | goto out; | ||
| 1270 | } | ||
| 1271 | err = validate_pnode(c, pnode, parent, iip); | ||
| 1272 | if (err) | ||
| 1273 | goto out; | ||
| 1274 | if (!c->big_lpt) | ||
| 1275 | pnode->num = calc_pnode_num_from_parent(c, parent, iip); | ||
| 1276 | branch->pnode = pnode; | ||
| 1277 | pnode->parent = parent; | ||
| 1278 | pnode->iip = iip; | ||
| 1279 | set_pnode_lnum(c, pnode); | ||
| 1280 | c->pnodes_have += 1; | ||
| 1281 | return 0; | ||
| 1282 | |||
| 1283 | out: | ||
| 1284 | ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); | ||
| 1285 | dbg_dump_pnode(c, pnode, parent, iip); | ||
| 1286 | dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); | ||
| 1287 | kfree(pnode); | ||
| 1288 | return err; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | /** | ||
| 1292 | * read_ltab - read LPT's own lprops table. | ||
| 1293 | * @c: UBIFS file-system description object | ||
| 1294 | * | ||
| 1295 | * This function returns %0 on success and a negative error code on failure. | ||
| 1296 | */ | ||
| 1297 | static int read_ltab(struct ubifs_info *c) | ||
| 1298 | { | ||
| 1299 | int err; | ||
| 1300 | void *buf; | ||
| 1301 | |||
| 1302 | buf = vmalloc(c->ltab_sz); | ||
| 1303 | if (!buf) | ||
| 1304 | return -ENOMEM; | ||
| 1305 | err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); | ||
| 1306 | if (err) | ||
| 1307 | goto out; | ||
| 1308 | err = unpack_ltab(c, buf); | ||
| 1309 | out: | ||
| 1310 | vfree(buf); | ||
| 1311 | return err; | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | /** | ||
| 1315 | * read_lsave - read LPT's save table. | ||
| 1316 | * @c: UBIFS file-system description object | ||
| 1317 | * | ||
| 1318 | * This function returns %0 on success and a negative error code on failure. | ||
| 1319 | */ | ||
| 1320 | static int read_lsave(struct ubifs_info *c) | ||
| 1321 | { | ||
| 1322 | int err, i; | ||
| 1323 | void *buf; | ||
| 1324 | |||
| 1325 | buf = vmalloc(c->lsave_sz); | ||
| 1326 | if (!buf) | ||
| 1327 | return -ENOMEM; | ||
| 1328 | err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); | ||
| 1329 | if (err) | ||
| 1330 | goto out; | ||
| 1331 | err = unpack_lsave(c, buf); | ||
| 1332 | if (err) | ||
| 1333 | goto out; | ||
| 1334 | for (i = 0; i < c->lsave_cnt; i++) { | ||
| 1335 | int lnum = c->lsave[i]; | ||
| 1336 | |||
| 1337 | /* | ||
| 1338 | * Due to automatic resizing, the values in the lsave table | ||
| 1339 | * could be beyond the volume size - just ignore them. | ||
| 1340 | */ | ||
| 1341 | if (lnum >= c->leb_cnt) | ||
| 1342 | continue; | ||
| 1343 | ubifs_lpt_lookup(c, lnum); | ||
| 1344 | } | ||
| 1345 | out: | ||
| 1346 | vfree(buf); | ||
| 1347 | return err; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | /** | ||
| 1351 | * ubifs_get_nnode - get a nnode. | ||
| 1352 | * @c: UBIFS file-system description object | ||
| 1353 | * @parent: parent nnode (or NULL for the root) | ||
| 1354 | * @iip: index in parent | ||
| 1355 | * | ||
| 1356 | * This function returns a pointer to the nnode on success or a negative error | ||
| 1357 | * code on failure. | ||
| 1358 | */ | ||
| 1359 | struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, | ||
| 1360 | struct ubifs_nnode *parent, int iip) | ||
| 1361 | { | ||
| 1362 | struct ubifs_nbranch *branch; | ||
| 1363 | struct ubifs_nnode *nnode; | ||
| 1364 | int err; | ||
| 1365 | |||
| 1366 | branch = &parent->nbranch[iip]; | ||
| 1367 | nnode = branch->nnode; | ||
| 1368 | if (nnode) | ||
| 1369 | return nnode; | ||
| 1370 | err = ubifs_read_nnode(c, parent, iip); | ||
| 1371 | if (err) | ||
| 1372 | return ERR_PTR(err); | ||
| 1373 | return branch->nnode; | ||
| 1374 | } | ||
| 1375 | |||
| 1376 | /** | ||
| 1377 | * ubifs_get_pnode - get a pnode. | ||
| 1378 | * @c: UBIFS file-system description object | ||
| 1379 | * @parent: parent nnode | ||
| 1380 | * @iip: index in parent | ||
| 1381 | * | ||
| 1382 | * This function returns a pointer to the pnode on success or a negative error | ||
| 1383 | * code on failure. | ||
| 1384 | */ | ||
| 1385 | struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, | ||
| 1386 | struct ubifs_nnode *parent, int iip) | ||
| 1387 | { | ||
| 1388 | struct ubifs_nbranch *branch; | ||
| 1389 | struct ubifs_pnode *pnode; | ||
| 1390 | int err; | ||
| 1391 | |||
| 1392 | branch = &parent->nbranch[iip]; | ||
| 1393 | pnode = branch->pnode; | ||
| 1394 | if (pnode) | ||
| 1395 | return pnode; | ||
| 1396 | err = read_pnode(c, parent, iip); | ||
| 1397 | if (err) | ||
| 1398 | return ERR_PTR(err); | ||
| 1399 | update_cats(c, branch->pnode); | ||
| 1400 | return branch->pnode; | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | /** | ||
| 1404 | * ubifs_lpt_lookup - lookup LEB properties in the LPT. | ||
| 1405 | * @c: UBIFS file-system description object | ||
| 1406 | * @lnum: LEB number to lookup | ||
| 1407 | * | ||
| 1408 | * This function returns a pointer to the LEB properties on success or a | ||
| 1409 | * negative error code on failure. | ||
| 1410 | */ | ||
| 1411 | struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) | ||
| 1412 | { | ||
| 1413 | int err, i, h, iip, shft; | ||
| 1414 | struct ubifs_nnode *nnode; | ||
| 1415 | struct ubifs_pnode *pnode; | ||
| 1416 | |||
| 1417 | if (!c->nroot) { | ||
| 1418 | err = ubifs_read_nnode(c, NULL, 0); | ||
| 1419 | if (err) | ||
| 1420 | return ERR_PTR(err); | ||
| 1421 | } | ||
| 1422 | nnode = c->nroot; | ||
| 1423 | i = lnum - c->main_first; | ||
| 1424 | shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; | ||
| 1425 | for (h = 1; h < c->lpt_hght; h++) { | ||
| 1426 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 1427 | shft -= UBIFS_LPT_FANOUT_SHIFT; | ||
| 1428 | nnode = ubifs_get_nnode(c, nnode, iip); | ||
| 1429 | if (IS_ERR(nnode)) | ||
| 1430 | return ERR_PTR(PTR_ERR(nnode)); | ||
| 1431 | } | ||
| 1432 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 1433 | shft -= UBIFS_LPT_FANOUT_SHIFT; | ||
| 1434 | pnode = ubifs_get_pnode(c, nnode, iip); | ||
| 1435 | if (IS_ERR(pnode)) | ||
| 1436 | return ERR_PTR(PTR_ERR(pnode)); | ||
| 1437 | iip = (i & (UBIFS_LPT_FANOUT - 1)); | ||
| 1438 | dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, | ||
| 1439 | pnode->lprops[iip].free, pnode->lprops[iip].dirty, | ||
| 1440 | pnode->lprops[iip].flags); | ||
| 1441 | return &pnode->lprops[iip]; | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | /** | ||
| 1445 | * dirty_cow_nnode - ensure a nnode is not being committed. | ||
| 1446 | * @c: UBIFS file-system description object | ||
| 1447 | * @nnode: nnode to check | ||
| 1448 | * | ||
| 1449 | * Returns dirtied nnode on success or negative error code on failure. | ||
| 1450 | */ | ||
| 1451 | static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, | ||
| 1452 | struct ubifs_nnode *nnode) | ||
| 1453 | { | ||
| 1454 | struct ubifs_nnode *n; | ||
| 1455 | int i; | ||
| 1456 | |||
| 1457 | if (!test_bit(COW_CNODE, &nnode->flags)) { | ||
| 1458 | /* nnode is not being committed */ | ||
| 1459 | if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { | ||
| 1460 | c->dirty_nn_cnt += 1; | ||
| 1461 | ubifs_add_nnode_dirt(c, nnode); | ||
| 1462 | } | ||
| 1463 | return nnode; | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | /* nnode is being committed, so copy it */ | ||
| 1467 | n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); | ||
| 1468 | if (unlikely(!n)) | ||
| 1469 | return ERR_PTR(-ENOMEM); | ||
| 1470 | |||
| 1471 | memcpy(n, nnode, sizeof(struct ubifs_nnode)); | ||
| 1472 | n->cnext = NULL; | ||
| 1473 | __set_bit(DIRTY_CNODE, &n->flags); | ||
| 1474 | __clear_bit(COW_CNODE, &n->flags); | ||
| 1475 | |||
| 1476 | /* The children now have new parent */ | ||
| 1477 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1478 | struct ubifs_nbranch *branch = &n->nbranch[i]; | ||
| 1479 | |||
| 1480 | if (branch->cnode) | ||
| 1481 | branch->cnode->parent = n; | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags)); | ||
| 1485 | __set_bit(OBSOLETE_CNODE, &nnode->flags); | ||
| 1486 | |||
| 1487 | c->dirty_nn_cnt += 1; | ||
| 1488 | ubifs_add_nnode_dirt(c, nnode); | ||
| 1489 | if (nnode->parent) | ||
| 1490 | nnode->parent->nbranch[n->iip].nnode = n; | ||
| 1491 | else | ||
| 1492 | c->nroot = n; | ||
| 1493 | return n; | ||
| 1494 | } | ||
| 1495 | |||
| 1496 | /** | ||
| 1497 | * dirty_cow_pnode - ensure a pnode is not being committed. | ||
| 1498 | * @c: UBIFS file-system description object | ||
| 1499 | * @pnode: pnode to check | ||
| 1500 | * | ||
| 1501 | * Returns dirtied pnode on success or negative error code on failure. | ||
| 1502 | */ | ||
| 1503 | static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, | ||
| 1504 | struct ubifs_pnode *pnode) | ||
| 1505 | { | ||
| 1506 | struct ubifs_pnode *p; | ||
| 1507 | |||
| 1508 | if (!test_bit(COW_CNODE, &pnode->flags)) { | ||
| 1509 | /* pnode is not being committed */ | ||
| 1510 | if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { | ||
| 1511 | c->dirty_pn_cnt += 1; | ||
| 1512 | add_pnode_dirt(c, pnode); | ||
| 1513 | } | ||
| 1514 | return pnode; | ||
| 1515 | } | ||
| 1516 | |||
| 1517 | /* pnode is being committed, so copy it */ | ||
| 1518 | p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); | ||
| 1519 | if (unlikely(!p)) | ||
| 1520 | return ERR_PTR(-ENOMEM); | ||
| 1521 | |||
| 1522 | memcpy(p, pnode, sizeof(struct ubifs_pnode)); | ||
| 1523 | p->cnext = NULL; | ||
| 1524 | __set_bit(DIRTY_CNODE, &p->flags); | ||
| 1525 | __clear_bit(COW_CNODE, &p->flags); | ||
| 1526 | replace_cats(c, pnode, p); | ||
| 1527 | |||
| 1528 | ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags)); | ||
| 1529 | __set_bit(OBSOLETE_CNODE, &pnode->flags); | ||
| 1530 | |||
| 1531 | c->dirty_pn_cnt += 1; | ||
| 1532 | add_pnode_dirt(c, pnode); | ||
| 1533 | pnode->parent->nbranch[p->iip].pnode = p; | ||
| 1534 | return p; | ||
| 1535 | } | ||
| 1536 | |||
| 1537 | /** | ||
| 1538 | * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT. | ||
| 1539 | * @c: UBIFS file-system description object | ||
| 1540 | * @lnum: LEB number to lookup | ||
| 1541 | * | ||
| 1542 | * This function returns a pointer to the LEB properties on success or a | ||
| 1543 | * negative error code on failure. | ||
| 1544 | */ | ||
| 1545 | struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) | ||
| 1546 | { | ||
| 1547 | int err, i, h, iip, shft; | ||
| 1548 | struct ubifs_nnode *nnode; | ||
| 1549 | struct ubifs_pnode *pnode; | ||
| 1550 | |||
| 1551 | if (!c->nroot) { | ||
| 1552 | err = ubifs_read_nnode(c, NULL, 0); | ||
| 1553 | if (err) | ||
| 1554 | return ERR_PTR(err); | ||
| 1555 | } | ||
| 1556 | nnode = c->nroot; | ||
| 1557 | nnode = dirty_cow_nnode(c, nnode); | ||
| 1558 | if (IS_ERR(nnode)) | ||
| 1559 | return ERR_PTR(PTR_ERR(nnode)); | ||
| 1560 | i = lnum - c->main_first; | ||
| 1561 | shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; | ||
| 1562 | for (h = 1; h < c->lpt_hght; h++) { | ||
| 1563 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 1564 | shft -= UBIFS_LPT_FANOUT_SHIFT; | ||
| 1565 | nnode = ubifs_get_nnode(c, nnode, iip); | ||
| 1566 | if (IS_ERR(nnode)) | ||
| 1567 | return ERR_PTR(PTR_ERR(nnode)); | ||
| 1568 | nnode = dirty_cow_nnode(c, nnode); | ||
| 1569 | if (IS_ERR(nnode)) | ||
| 1570 | return ERR_PTR(PTR_ERR(nnode)); | ||
| 1571 | } | ||
| 1572 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 1573 | shft -= UBIFS_LPT_FANOUT_SHIFT; | ||
| 1574 | pnode = ubifs_get_pnode(c, nnode, iip); | ||
| 1575 | if (IS_ERR(pnode)) | ||
| 1576 | return ERR_PTR(PTR_ERR(pnode)); | ||
| 1577 | pnode = dirty_cow_pnode(c, pnode); | ||
| 1578 | if (IS_ERR(pnode)) | ||
| 1579 | return ERR_PTR(PTR_ERR(pnode)); | ||
| 1580 | iip = (i & (UBIFS_LPT_FANOUT - 1)); | ||
| 1581 | dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, | ||
| 1582 | pnode->lprops[iip].free, pnode->lprops[iip].dirty, | ||
| 1583 | pnode->lprops[iip].flags); | ||
| 1584 | ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags)); | ||
| 1585 | return &pnode->lprops[iip]; | ||
| 1586 | } | ||
| 1587 | |||
| 1588 | /** | ||
| 1589 | * lpt_init_rd - initialize the LPT for reading. | ||
| 1590 | * @c: UBIFS file-system description object | ||
| 1591 | * | ||
| 1592 | * This function returns %0 on success and a negative error code on failure. | ||
| 1593 | */ | ||
| 1594 | static int lpt_init_rd(struct ubifs_info *c) | ||
| 1595 | { | ||
| 1596 | int err, i; | ||
| 1597 | |||
| 1598 | c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); | ||
| 1599 | if (!c->ltab) | ||
| 1600 | return -ENOMEM; | ||
| 1601 | |||
| 1602 | i = max_t(int, c->nnode_sz, c->pnode_sz); | ||
| 1603 | c->lpt_nod_buf = kmalloc(i, GFP_KERNEL); | ||
| 1604 | if (!c->lpt_nod_buf) | ||
| 1605 | return -ENOMEM; | ||
| 1606 | |||
| 1607 | for (i = 0; i < LPROPS_HEAP_CNT; i++) { | ||
| 1608 | c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, | ||
| 1609 | GFP_KERNEL); | ||
| 1610 | if (!c->lpt_heap[i].arr) | ||
| 1611 | return -ENOMEM; | ||
| 1612 | c->lpt_heap[i].cnt = 0; | ||
| 1613 | c->lpt_heap[i].max_cnt = LPT_HEAP_SZ; | ||
| 1614 | } | ||
| 1615 | |||
| 1616 | c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL); | ||
| 1617 | if (!c->dirty_idx.arr) | ||
| 1618 | return -ENOMEM; | ||
| 1619 | c->dirty_idx.cnt = 0; | ||
| 1620 | c->dirty_idx.max_cnt = LPT_HEAP_SZ; | ||
| 1621 | |||
| 1622 | err = read_ltab(c); | ||
| 1623 | if (err) | ||
| 1624 | return err; | ||
| 1625 | |||
| 1626 | dbg_lp("space_bits %d", c->space_bits); | ||
| 1627 | dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); | ||
| 1628 | dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); | ||
| 1629 | dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); | ||
| 1630 | dbg_lp("pcnt_bits %d", c->pcnt_bits); | ||
| 1631 | dbg_lp("lnum_bits %d", c->lnum_bits); | ||
| 1632 | dbg_lp("pnode_sz %d", c->pnode_sz); | ||
| 1633 | dbg_lp("nnode_sz %d", c->nnode_sz); | ||
| 1634 | dbg_lp("ltab_sz %d", c->ltab_sz); | ||
| 1635 | dbg_lp("lsave_sz %d", c->lsave_sz); | ||
| 1636 | dbg_lp("lsave_cnt %d", c->lsave_cnt); | ||
| 1637 | dbg_lp("lpt_hght %d", c->lpt_hght); | ||
| 1638 | dbg_lp("big_lpt %d", c->big_lpt); | ||
| 1639 | dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); | ||
| 1640 | dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); | ||
| 1641 | dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); | ||
| 1642 | if (c->big_lpt) | ||
| 1643 | dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); | ||
| 1644 | |||
| 1645 | return 0; | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | /** | ||
| 1649 | * lpt_init_wr - initialize the LPT for writing. | ||
| 1650 | * @c: UBIFS file-system description object | ||
| 1651 | * | ||
| 1652 | * 'lpt_init_rd()' must have been called already. | ||
| 1653 | * | ||
| 1654 | * This function returns %0 on success and a negative error code on failure. | ||
| 1655 | */ | ||
| 1656 | static int lpt_init_wr(struct ubifs_info *c) | ||
| 1657 | { | ||
| 1658 | int err, i; | ||
| 1659 | |||
| 1660 | c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); | ||
| 1661 | if (!c->ltab_cmt) | ||
| 1662 | return -ENOMEM; | ||
| 1663 | |||
| 1664 | c->lpt_buf = vmalloc(c->leb_size); | ||
| 1665 | if (!c->lpt_buf) | ||
| 1666 | return -ENOMEM; | ||
| 1667 | |||
| 1668 | if (c->big_lpt) { | ||
| 1669 | c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS); | ||
| 1670 | if (!c->lsave) | ||
| 1671 | return -ENOMEM; | ||
| 1672 | err = read_lsave(c); | ||
| 1673 | if (err) | ||
| 1674 | return err; | ||
| 1675 | } | ||
| 1676 | |||
| 1677 | for (i = 0; i < c->lpt_lebs; i++) | ||
| 1678 | if (c->ltab[i].free == c->leb_size) { | ||
| 1679 | err = ubifs_leb_unmap(c, i + c->lpt_first); | ||
| 1680 | if (err) | ||
| 1681 | return err; | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | return 0; | ||
| 1685 | } | ||
| 1686 | |||
| 1687 | /** | ||
| 1688 | * ubifs_lpt_init - initialize the LPT. | ||
| 1689 | * @c: UBIFS file-system description object | ||
| 1690 | * @rd: whether to initialize lpt for reading | ||
| 1691 | * @wr: whether to initialize lpt for writing | ||
| 1692 | * | ||
| 1693 | * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true | ||
| 1694 | * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is | ||
| 1695 | * true. | ||
| 1696 | * | ||
| 1697 | * This function returns %0 on success and a negative error code on failure. | ||
| 1698 | */ | ||
| 1699 | int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) | ||
| 1700 | { | ||
| 1701 | int err; | ||
| 1702 | |||
| 1703 | if (rd) { | ||
| 1704 | err = lpt_init_rd(c); | ||
| 1705 | if (err) | ||
| 1706 | return err; | ||
| 1707 | } | ||
| 1708 | |||
| 1709 | if (wr) { | ||
| 1710 | err = lpt_init_wr(c); | ||
| 1711 | if (err) | ||
| 1712 | return err; | ||
| 1713 | } | ||
| 1714 | |||
| 1715 | return 0; | ||
| 1716 | } | ||
| 1717 | |||
| 1718 | /** | ||
| 1719 | * struct lpt_scan_node - somewhere to put nodes while we scan LPT. | ||
| 1720 | * @nnode: where to keep a nnode | ||
| 1721 | * @pnode: where to keep a pnode | ||
| 1722 | * @cnode: where to keep a cnode | ||
| 1723 | * @in_tree: is the node in the tree in memory | ||
| 1724 | * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in | ||
| 1725 | * the tree | ||
| 1726 | * @ptr.pnode: ditto for pnode | ||
| 1727 | * @ptr.cnode: ditto for cnode | ||
| 1728 | */ | ||
| 1729 | struct lpt_scan_node { | ||
| 1730 | union { | ||
| 1731 | struct ubifs_nnode nnode; | ||
| 1732 | struct ubifs_pnode pnode; | ||
| 1733 | struct ubifs_cnode cnode; | ||
| 1734 | }; | ||
| 1735 | int in_tree; | ||
| 1736 | union { | ||
| 1737 | struct ubifs_nnode *nnode; | ||
| 1738 | struct ubifs_pnode *pnode; | ||
| 1739 | struct ubifs_cnode *cnode; | ||
| 1740 | } ptr; | ||
| 1741 | }; | ||
| 1742 | |||
| 1743 | /** | ||
| 1744 | * scan_get_nnode - for the scan, get a nnode from either the tree or flash. | ||
| 1745 | * @c: the UBIFS file-system description object | ||
| 1746 | * @path: where to put the nnode | ||
| 1747 | * @parent: parent of the nnode | ||
| 1748 | * @iip: index in parent of the nnode | ||
| 1749 | * | ||
| 1750 | * This function returns a pointer to the nnode on success or a negative error | ||
| 1751 | * code on failure. | ||
| 1752 | */ | ||
| 1753 | static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, | ||
| 1754 | struct lpt_scan_node *path, | ||
| 1755 | struct ubifs_nnode *parent, int iip) | ||
| 1756 | { | ||
| 1757 | struct ubifs_nbranch *branch; | ||
| 1758 | struct ubifs_nnode *nnode; | ||
| 1759 | void *buf = c->lpt_nod_buf; | ||
| 1760 | int err; | ||
| 1761 | |||
| 1762 | branch = &parent->nbranch[iip]; | ||
| 1763 | nnode = branch->nnode; | ||
| 1764 | if (nnode) { | ||
| 1765 | path->in_tree = 1; | ||
| 1766 | path->ptr.nnode = nnode; | ||
| 1767 | return nnode; | ||
| 1768 | } | ||
| 1769 | nnode = &path->nnode; | ||
| 1770 | path->in_tree = 0; | ||
| 1771 | path->ptr.nnode = nnode; | ||
| 1772 | memset(nnode, 0, sizeof(struct ubifs_nnode)); | ||
| 1773 | if (branch->lnum == 0) { | ||
| 1774 | /* | ||
| 1775 | * This nnode was not written which just means that the LEB | ||
| 1776 | * properties in the subtree below it describe empty LEBs. We | ||
| 1777 | * make the nnode as though we had read it, which in fact means | ||
| 1778 | * doing almost nothing. | ||
| 1779 | */ | ||
| 1780 | if (c->big_lpt) | ||
| 1781 | nnode->num = calc_nnode_num_from_parent(c, parent, iip); | ||
| 1782 | } else { | ||
| 1783 | err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, | ||
| 1784 | c->nnode_sz); | ||
| 1785 | if (err) | ||
| 1786 | return ERR_PTR(err); | ||
| 1787 | err = unpack_nnode(c, buf, nnode); | ||
| 1788 | if (err) | ||
| 1789 | return ERR_PTR(err); | ||
| 1790 | } | ||
| 1791 | err = validate_nnode(c, nnode, parent, iip); | ||
| 1792 | if (err) | ||
| 1793 | return ERR_PTR(err); | ||
| 1794 | if (!c->big_lpt) | ||
| 1795 | nnode->num = calc_nnode_num_from_parent(c, parent, iip); | ||
| 1796 | nnode->level = parent->level - 1; | ||
| 1797 | nnode->parent = parent; | ||
| 1798 | nnode->iip = iip; | ||
| 1799 | return nnode; | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | /** | ||
| 1803 | * scan_get_pnode - for the scan, get a pnode from either the tree or flash. | ||
| 1804 | * @c: the UBIFS file-system description object | ||
| 1805 | * @path: where to put the pnode | ||
| 1806 | * @parent: parent of the pnode | ||
| 1807 | * @iip: index in parent of the pnode | ||
| 1808 | * | ||
| 1809 | * This function returns a pointer to the pnode on success or a negative error | ||
| 1810 | * code on failure. | ||
| 1811 | */ | ||
| 1812 | static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, | ||
| 1813 | struct lpt_scan_node *path, | ||
| 1814 | struct ubifs_nnode *parent, int iip) | ||
| 1815 | { | ||
| 1816 | struct ubifs_nbranch *branch; | ||
| 1817 | struct ubifs_pnode *pnode; | ||
| 1818 | void *buf = c->lpt_nod_buf; | ||
| 1819 | int err; | ||
| 1820 | |||
| 1821 | branch = &parent->nbranch[iip]; | ||
| 1822 | pnode = branch->pnode; | ||
| 1823 | if (pnode) { | ||
| 1824 | path->in_tree = 1; | ||
| 1825 | path->ptr.pnode = pnode; | ||
| 1826 | return pnode; | ||
| 1827 | } | ||
| 1828 | pnode = &path->pnode; | ||
| 1829 | path->in_tree = 0; | ||
| 1830 | path->ptr.pnode = pnode; | ||
| 1831 | memset(pnode, 0, sizeof(struct ubifs_pnode)); | ||
| 1832 | if (branch->lnum == 0) { | ||
| 1833 | /* | ||
| 1834 | * This pnode was not written which just means that the LEB | ||
| 1835 | * properties in it describe empty LEBs. We make the pnode as | ||
| 1836 | * though we had read it. | ||
| 1837 | */ | ||
| 1838 | int i; | ||
| 1839 | |||
| 1840 | if (c->big_lpt) | ||
| 1841 | pnode->num = calc_pnode_num_from_parent(c, parent, iip); | ||
| 1842 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1843 | struct ubifs_lprops * const lprops = &pnode->lprops[i]; | ||
| 1844 | |||
| 1845 | lprops->free = c->leb_size; | ||
| 1846 | lprops->flags = ubifs_categorize_lprops(c, lprops); | ||
| 1847 | } | ||
| 1848 | } else { | ||
| 1849 | ubifs_assert(branch->lnum >= c->lpt_first && | ||
| 1850 | branch->lnum <= c->lpt_last); | ||
| 1851 | ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); | ||
| 1852 | err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, | ||
| 1853 | c->pnode_sz); | ||
| 1854 | if (err) | ||
| 1855 | return ERR_PTR(err); | ||
| 1856 | err = unpack_pnode(c, buf, pnode); | ||
| 1857 | if (err) | ||
| 1858 | return ERR_PTR(err); | ||
| 1859 | } | ||
| 1860 | err = validate_pnode(c, pnode, parent, iip); | ||
| 1861 | if (err) | ||
| 1862 | return ERR_PTR(err); | ||
| 1863 | if (!c->big_lpt) | ||
| 1864 | pnode->num = calc_pnode_num_from_parent(c, parent, iip); | ||
| 1865 | pnode->parent = parent; | ||
| 1866 | pnode->iip = iip; | ||
| 1867 | set_pnode_lnum(c, pnode); | ||
| 1868 | return pnode; | ||
| 1869 | } | ||
| 1870 | |||
| 1871 | /** | ||
| 1872 | * ubifs_lpt_scan_nolock - scan the LPT. | ||
| 1873 | * @c: the UBIFS file-system description object | ||
| 1874 | * @start_lnum: LEB number from which to start scanning | ||
| 1875 | * @end_lnum: LEB number at which to stop scanning | ||
| 1876 | * @scan_cb: callback function called for each lprops | ||
| 1877 | * @data: data to be passed to the callback function | ||
| 1878 | * | ||
| 1879 | * This function returns %0 on success and a negative error code on failure. | ||
| 1880 | */ | ||
| 1881 | int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, | ||
| 1882 | ubifs_lpt_scan_callback scan_cb, void *data) | ||
| 1883 | { | ||
| 1884 | int err = 0, i, h, iip, shft; | ||
| 1885 | struct ubifs_nnode *nnode; | ||
| 1886 | struct ubifs_pnode *pnode; | ||
| 1887 | struct lpt_scan_node *path; | ||
| 1888 | |||
| 1889 | if (start_lnum == -1) { | ||
| 1890 | start_lnum = end_lnum + 1; | ||
| 1891 | if (start_lnum >= c->leb_cnt) | ||
| 1892 | start_lnum = c->main_first; | ||
| 1893 | } | ||
| 1894 | |||
| 1895 | ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt); | ||
| 1896 | ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt); | ||
| 1897 | |||
| 1898 | if (!c->nroot) { | ||
| 1899 | err = ubifs_read_nnode(c, NULL, 0); | ||
| 1900 | if (err) | ||
| 1901 | return err; | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1), | ||
| 1905 | GFP_NOFS); | ||
| 1906 | if (!path) | ||
| 1907 | return -ENOMEM; | ||
| 1908 | |||
| 1909 | path[0].ptr.nnode = c->nroot; | ||
| 1910 | path[0].in_tree = 1; | ||
| 1911 | again: | ||
| 1912 | /* Descend to the pnode containing start_lnum */ | ||
| 1913 | nnode = c->nroot; | ||
| 1914 | i = start_lnum - c->main_first; | ||
| 1915 | shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; | ||
| 1916 | for (h = 1; h < c->lpt_hght; h++) { | ||
| 1917 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 1918 | shft -= UBIFS_LPT_FANOUT_SHIFT; | ||
| 1919 | nnode = scan_get_nnode(c, path + h, nnode, iip); | ||
| 1920 | if (IS_ERR(nnode)) { | ||
| 1921 | err = PTR_ERR(nnode); | ||
| 1922 | goto out; | ||
| 1923 | } | ||
| 1924 | } | ||
| 1925 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 1926 | shft -= UBIFS_LPT_FANOUT_SHIFT; | ||
| 1927 | pnode = scan_get_pnode(c, path + h, nnode, iip); | ||
| 1928 | if (IS_ERR(pnode)) { | ||
| 1929 | err = PTR_ERR(pnode); | ||
| 1930 | goto out; | ||
| 1931 | } | ||
| 1932 | iip = (i & (UBIFS_LPT_FANOUT - 1)); | ||
| 1933 | |||
| 1934 | /* Loop for each lprops */ | ||
| 1935 | while (1) { | ||
| 1936 | struct ubifs_lprops *lprops = &pnode->lprops[iip]; | ||
| 1937 | int ret, lnum = lprops->lnum; | ||
| 1938 | |||
| 1939 | ret = scan_cb(c, lprops, path[h].in_tree, data); | ||
| 1940 | if (ret < 0) { | ||
| 1941 | err = ret; | ||
| 1942 | goto out; | ||
| 1943 | } | ||
| 1944 | if (ret & LPT_SCAN_ADD) { | ||
| 1945 | /* Add all the nodes in path to the tree in memory */ | ||
| 1946 | for (h = 1; h < c->lpt_hght; h++) { | ||
| 1947 | const size_t sz = sizeof(struct ubifs_nnode); | ||
| 1948 | struct ubifs_nnode *parent; | ||
| 1949 | |||
| 1950 | if (path[h].in_tree) | ||
| 1951 | continue; | ||
| 1952 | nnode = kmalloc(sz, GFP_NOFS); | ||
| 1953 | if (!nnode) { | ||
| 1954 | err = -ENOMEM; | ||
| 1955 | goto out; | ||
| 1956 | } | ||
| 1957 | memcpy(nnode, &path[h].nnode, sz); | ||
| 1958 | parent = nnode->parent; | ||
| 1959 | parent->nbranch[nnode->iip].nnode = nnode; | ||
| 1960 | path[h].ptr.nnode = nnode; | ||
| 1961 | path[h].in_tree = 1; | ||
| 1962 | path[h + 1].cnode.parent = nnode; | ||
| 1963 | } | ||
| 1964 | if (path[h].in_tree) | ||
| 1965 | ubifs_ensure_cat(c, lprops); | ||
| 1966 | else { | ||
| 1967 | const size_t sz = sizeof(struct ubifs_pnode); | ||
| 1968 | struct ubifs_nnode *parent; | ||
| 1969 | |||
| 1970 | pnode = kmalloc(sz, GFP_NOFS); | ||
| 1971 | if (!pnode) { | ||
| 1972 | err = -ENOMEM; | ||
| 1973 | goto out; | ||
| 1974 | } | ||
| 1975 | memcpy(pnode, &path[h].pnode, sz); | ||
| 1976 | parent = pnode->parent; | ||
| 1977 | parent->nbranch[pnode->iip].pnode = pnode; | ||
| 1978 | path[h].ptr.pnode = pnode; | ||
| 1979 | path[h].in_tree = 1; | ||
| 1980 | update_cats(c, pnode); | ||
| 1981 | c->pnodes_have += 1; | ||
| 1982 | } | ||
| 1983 | err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *) | ||
| 1984 | c->nroot, 0, 0); | ||
| 1985 | if (err) | ||
| 1986 | goto out; | ||
| 1987 | err = dbg_check_cats(c); | ||
| 1988 | if (err) | ||
| 1989 | goto out; | ||
| 1990 | } | ||
| 1991 | if (ret & LPT_SCAN_STOP) { | ||
| 1992 | err = 0; | ||
| 1993 | break; | ||
| 1994 | } | ||
| 1995 | /* Get the next lprops */ | ||
| 1996 | if (lnum == end_lnum) { | ||
| 1997 | /* | ||
| 1998 | * We got to the end without finding what we were | ||
| 1999 | * looking for | ||
| 2000 | */ | ||
| 2001 | err = -ENOSPC; | ||
| 2002 | goto out; | ||
| 2003 | } | ||
| 2004 | if (lnum + 1 >= c->leb_cnt) { | ||
| 2005 | /* Wrap-around to the beginning */ | ||
| 2006 | start_lnum = c->main_first; | ||
| 2007 | goto again; | ||
| 2008 | } | ||
| 2009 | if (iip + 1 < UBIFS_LPT_FANOUT) { | ||
| 2010 | /* Next lprops is in the same pnode */ | ||
| 2011 | iip += 1; | ||
| 2012 | continue; | ||
| 2013 | } | ||
| 2014 | /* We need to get the next pnode. Go up until we can go right */ | ||
| 2015 | iip = pnode->iip; | ||
| 2016 | while (1) { | ||
| 2017 | h -= 1; | ||
| 2018 | ubifs_assert(h >= 0); | ||
| 2019 | nnode = path[h].ptr.nnode; | ||
| 2020 | if (iip + 1 < UBIFS_LPT_FANOUT) | ||
| 2021 | break; | ||
| 2022 | iip = nnode->iip; | ||
| 2023 | } | ||
| 2024 | /* Go right */ | ||
| 2025 | iip += 1; | ||
| 2026 | /* Descend to the pnode */ | ||
| 2027 | h += 1; | ||
| 2028 | for (; h < c->lpt_hght; h++) { | ||
| 2029 | nnode = scan_get_nnode(c, path + h, nnode, iip); | ||
| 2030 | if (IS_ERR(nnode)) { | ||
| 2031 | err = PTR_ERR(nnode); | ||
| 2032 | goto out; | ||
| 2033 | } | ||
| 2034 | iip = 0; | ||
| 2035 | } | ||
| 2036 | pnode = scan_get_pnode(c, path + h, nnode, iip); | ||
| 2037 | if (IS_ERR(pnode)) { | ||
| 2038 | err = PTR_ERR(pnode); | ||
| 2039 | goto out; | ||
| 2040 | } | ||
| 2041 | iip = 0; | ||
| 2042 | } | ||
| 2043 | out: | ||
| 2044 | kfree(path); | ||
| 2045 | return err; | ||
| 2046 | } | ||
| 2047 | |||
| 2048 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 2049 | |||
| 2050 | /** | ||
| 2051 | * dbg_chk_pnode - check a pnode. | ||
| 2052 | * @c: the UBIFS file-system description object | ||
| 2053 | * @pnode: pnode to check | ||
| 2054 | * @col: pnode column | ||
| 2055 | * | ||
| 2056 | * This function returns %0 on success and a negative error code on failure. | ||
| 2057 | */ | ||
| 2058 | static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | ||
| 2059 | int col) | ||
| 2060 | { | ||
| 2061 | int i; | ||
| 2062 | |||
| 2063 | if (pnode->num != col) { | ||
| 2064 | dbg_err("pnode num %d expected %d parent num %d iip %d", | ||
| 2065 | pnode->num, col, pnode->parent->num, pnode->iip); | ||
| 2066 | return -EINVAL; | ||
| 2067 | } | ||
| 2068 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 2069 | struct ubifs_lprops *lp, *lprops = &pnode->lprops[i]; | ||
| 2070 | int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i + | ||
| 2071 | c->main_first; | ||
| 2072 | int found, cat = lprops->flags & LPROPS_CAT_MASK; | ||
| 2073 | struct ubifs_lpt_heap *heap; | ||
| 2074 | struct list_head *list = NULL; | ||
| 2075 | |||
| 2076 | if (lnum >= c->leb_cnt) | ||
| 2077 | continue; | ||
| 2078 | if (lprops->lnum != lnum) { | ||
| 2079 | dbg_err("bad LEB number %d expected %d", | ||
| 2080 | lprops->lnum, lnum); | ||
| 2081 | return -EINVAL; | ||
| 2082 | } | ||
| 2083 | if (lprops->flags & LPROPS_TAKEN) { | ||
| 2084 | if (cat != LPROPS_UNCAT) { | ||
| 2085 | dbg_err("LEB %d taken but not uncat %d", | ||
| 2086 | lprops->lnum, cat); | ||
| 2087 | return -EINVAL; | ||
| 2088 | } | ||
| 2089 | continue; | ||
| 2090 | } | ||
| 2091 | if (lprops->flags & LPROPS_INDEX) { | ||
| 2092 | switch (cat) { | ||
| 2093 | case LPROPS_UNCAT: | ||
| 2094 | case LPROPS_DIRTY_IDX: | ||
| 2095 | case LPROPS_FRDI_IDX: | ||
| 2096 | break; | ||
| 2097 | default: | ||
| 2098 | dbg_err("LEB %d index but cat %d", | ||
| 2099 | lprops->lnum, cat); | ||
| 2100 | return -EINVAL; | ||
| 2101 | } | ||
| 2102 | } else { | ||
| 2103 | switch (cat) { | ||
| 2104 | case LPROPS_UNCAT: | ||
| 2105 | case LPROPS_DIRTY: | ||
| 2106 | case LPROPS_FREE: | ||
| 2107 | case LPROPS_EMPTY: | ||
| 2108 | case LPROPS_FREEABLE: | ||
| 2109 | break; | ||
| 2110 | default: | ||
| 2111 | dbg_err("LEB %d not index but cat %d", | ||
| 2112 | lprops->lnum, cat); | ||
| 2113 | return -EINVAL; | ||
| 2114 | } | ||
| 2115 | } | ||
| 2116 | switch (cat) { | ||
| 2117 | case LPROPS_UNCAT: | ||
| 2118 | list = &c->uncat_list; | ||
| 2119 | break; | ||
| 2120 | case LPROPS_EMPTY: | ||
| 2121 | list = &c->empty_list; | ||
| 2122 | break; | ||
| 2123 | case LPROPS_FREEABLE: | ||
| 2124 | list = &c->freeable_list; | ||
| 2125 | break; | ||
| 2126 | case LPROPS_FRDI_IDX: | ||
| 2127 | list = &c->frdi_idx_list; | ||
| 2128 | break; | ||
| 2129 | } | ||
| 2130 | found = 0; | ||
| 2131 | switch (cat) { | ||
| 2132 | case LPROPS_DIRTY: | ||
| 2133 | case LPROPS_DIRTY_IDX: | ||
| 2134 | case LPROPS_FREE: | ||
| 2135 | heap = &c->lpt_heap[cat - 1]; | ||
| 2136 | if (lprops->hpos < heap->cnt && | ||
| 2137 | heap->arr[lprops->hpos] == lprops) | ||
| 2138 | found = 1; | ||
| 2139 | break; | ||
| 2140 | case LPROPS_UNCAT: | ||
| 2141 | case LPROPS_EMPTY: | ||
| 2142 | case LPROPS_FREEABLE: | ||
| 2143 | case LPROPS_FRDI_IDX: | ||
| 2144 | list_for_each_entry(lp, list, list) | ||
| 2145 | if (lprops == lp) { | ||
| 2146 | found = 1; | ||
| 2147 | break; | ||
| 2148 | } | ||
| 2149 | break; | ||
| 2150 | } | ||
| 2151 | if (!found) { | ||
| 2152 | dbg_err("LEB %d cat %d not found in cat heap/list", | ||
| 2153 | lprops->lnum, cat); | ||
| 2154 | return -EINVAL; | ||
| 2155 | } | ||
| 2156 | switch (cat) { | ||
| 2157 | case LPROPS_EMPTY: | ||
| 2158 | if (lprops->free != c->leb_size) { | ||
| 2159 | dbg_err("LEB %d cat %d free %d dirty %d", | ||
| 2160 | lprops->lnum, cat, lprops->free, | ||
| 2161 | lprops->dirty); | ||
| 2162 | return -EINVAL; | ||
| 2163 | } | ||
| 2164 | case LPROPS_FREEABLE: | ||
| 2165 | case LPROPS_FRDI_IDX: | ||
| 2166 | if (lprops->free + lprops->dirty != c->leb_size) { | ||
| 2167 | dbg_err("LEB %d cat %d free %d dirty %d", | ||
| 2168 | lprops->lnum, cat, lprops->free, | ||
| 2169 | lprops->dirty); | ||
| 2170 | return -EINVAL; | ||
| 2171 | } | ||
| 2172 | } | ||
| 2173 | } | ||
| 2174 | return 0; | ||
| 2175 | } | ||
| 2176 | |||
| 2177 | /** | ||
| 2178 | * dbg_check_lpt_nodes - check nnodes and pnodes. | ||
| 2179 | * @c: the UBIFS file-system description object | ||
| 2180 | * @cnode: next cnode (nnode or pnode) to check | ||
| 2181 | * @row: row of cnode (root is zero) | ||
| 2182 | * @col: column of cnode (leftmost is zero) | ||
| 2183 | * | ||
| 2184 | * This function returns %0 on success and a negative error code on failure. | ||
| 2185 | */ | ||
| 2186 | int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, | ||
| 2187 | int row, int col) | ||
| 2188 | { | ||
| 2189 | struct ubifs_nnode *nnode, *nn; | ||
| 2190 | struct ubifs_cnode *cn; | ||
| 2191 | int num, iip = 0, err; | ||
| 2192 | |||
| 2193 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) | ||
| 2194 | return 0; | ||
| 2195 | |||
| 2196 | while (cnode) { | ||
| 2197 | ubifs_assert(row >= 0); | ||
| 2198 | nnode = cnode->parent; | ||
| 2199 | if (cnode->level) { | ||
| 2200 | /* cnode is a nnode */ | ||
| 2201 | num = calc_nnode_num(row, col); | ||
| 2202 | if (cnode->num != num) { | ||
| 2203 | dbg_err("nnode num %d expected %d " | ||
| 2204 | "parent num %d iip %d", cnode->num, num, | ||
| 2205 | (nnode ? nnode->num : 0), cnode->iip); | ||
| 2206 | return -EINVAL; | ||
| 2207 | } | ||
| 2208 | nn = (struct ubifs_nnode *)cnode; | ||
| 2209 | while (iip < UBIFS_LPT_FANOUT) { | ||
| 2210 | cn = nn->nbranch[iip].cnode; | ||
| 2211 | if (cn) { | ||
| 2212 | /* Go down */ | ||
| 2213 | row += 1; | ||
| 2214 | col <<= UBIFS_LPT_FANOUT_SHIFT; | ||
| 2215 | col += iip; | ||
| 2216 | iip = 0; | ||
| 2217 | cnode = cn; | ||
| 2218 | break; | ||
| 2219 | } | ||
| 2220 | /* Go right */ | ||
| 2221 | iip += 1; | ||
| 2222 | } | ||
| 2223 | if (iip < UBIFS_LPT_FANOUT) | ||
| 2224 | continue; | ||
| 2225 | } else { | ||
| 2226 | struct ubifs_pnode *pnode; | ||
| 2227 | |||
| 2228 | /* cnode is a pnode */ | ||
| 2229 | pnode = (struct ubifs_pnode *)cnode; | ||
| 2230 | err = dbg_chk_pnode(c, pnode, col); | ||
| 2231 | if (err) | ||
| 2232 | return err; | ||
| 2233 | } | ||
| 2234 | /* Go up and to the right */ | ||
| 2235 | row -= 1; | ||
| 2236 | col >>= UBIFS_LPT_FANOUT_SHIFT; | ||
| 2237 | iip = cnode->iip + 1; | ||
| 2238 | cnode = (struct ubifs_cnode *)nnode; | ||
| 2239 | } | ||
| 2240 | return 0; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c new file mode 100644 index 000000000000..5f0b83e20af6 --- /dev/null +++ b/fs/ubifs/lpt_commit.c | |||
| @@ -0,0 +1,1648 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements commit-related functionality of the LEB properties | ||
| 25 | * subsystem. | ||
| 26 | */ | ||
| 27 | |||
| 28 | #include <linux/crc16.h> | ||
| 29 | #include "ubifs.h" | ||
| 30 | |||
| 31 | /** | ||
| 32 | * first_dirty_cnode - find first dirty cnode. | ||
| 33 | * @c: UBIFS file-system description object | ||
| 34 | * @nnode: nnode at which to start | ||
| 35 | * | ||
| 36 | * This function returns the first dirty cnode or %NULL if there is not one. | ||
| 37 | */ | ||
| 38 | static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode) | ||
| 39 | { | ||
| 40 | ubifs_assert(nnode); | ||
| 41 | while (1) { | ||
| 42 | int i, cont = 0; | ||
| 43 | |||
| 44 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 45 | struct ubifs_cnode *cnode; | ||
| 46 | |||
| 47 | cnode = nnode->nbranch[i].cnode; | ||
| 48 | if (cnode && | ||
| 49 | test_bit(DIRTY_CNODE, &cnode->flags)) { | ||
| 50 | if (cnode->level == 0) | ||
| 51 | return cnode; | ||
| 52 | nnode = (struct ubifs_nnode *)cnode; | ||
| 53 | cont = 1; | ||
| 54 | break; | ||
| 55 | } | ||
| 56 | } | ||
| 57 | if (!cont) | ||
| 58 | return (struct ubifs_cnode *)nnode; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | /** | ||
| 63 | * next_dirty_cnode - find next dirty cnode. | ||
| 64 | * @cnode: cnode from which to begin searching | ||
| 65 | * | ||
| 66 | * This function returns the next dirty cnode or %NULL if there is not one. | ||
| 67 | */ | ||
| 68 | static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode) | ||
| 69 | { | ||
| 70 | struct ubifs_nnode *nnode; | ||
| 71 | int i; | ||
| 72 | |||
| 73 | ubifs_assert(cnode); | ||
| 74 | nnode = cnode->parent; | ||
| 75 | if (!nnode) | ||
| 76 | return NULL; | ||
| 77 | for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) { | ||
| 78 | cnode = nnode->nbranch[i].cnode; | ||
| 79 | if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) { | ||
| 80 | if (cnode->level == 0) | ||
| 81 | return cnode; /* cnode is a pnode */ | ||
| 82 | /* cnode is a nnode */ | ||
| 83 | return first_dirty_cnode((struct ubifs_nnode *)cnode); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | return (struct ubifs_cnode *)nnode; | ||
| 87 | } | ||
| 88 | |||
| 89 | /** | ||
| 90 | * get_cnodes_to_commit - create list of dirty cnodes to commit. | ||
| 91 | * @c: UBIFS file-system description object | ||
| 92 | * | ||
| 93 | * This function returns the number of cnodes to commit. | ||
| 94 | */ | ||
| 95 | static int get_cnodes_to_commit(struct ubifs_info *c) | ||
| 96 | { | ||
| 97 | struct ubifs_cnode *cnode, *cnext; | ||
| 98 | int cnt = 0; | ||
| 99 | |||
| 100 | if (!c->nroot) | ||
| 101 | return 0; | ||
| 102 | |||
| 103 | if (!test_bit(DIRTY_CNODE, &c->nroot->flags)) | ||
| 104 | return 0; | ||
| 105 | |||
| 106 | c->lpt_cnext = first_dirty_cnode(c->nroot); | ||
| 107 | cnode = c->lpt_cnext; | ||
| 108 | if (!cnode) | ||
| 109 | return 0; | ||
| 110 | cnt += 1; | ||
| 111 | while (1) { | ||
| 112 | ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); | ||
| 113 | __set_bit(COW_ZNODE, &cnode->flags); | ||
| 114 | cnext = next_dirty_cnode(cnode); | ||
| 115 | if (!cnext) { | ||
| 116 | cnode->cnext = c->lpt_cnext; | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | cnode->cnext = cnext; | ||
| 120 | cnode = cnext; | ||
| 121 | cnt += 1; | ||
| 122 | } | ||
| 123 | dbg_cmt("committing %d cnodes", cnt); | ||
| 124 | dbg_lp("committing %d cnodes", cnt); | ||
| 125 | ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt); | ||
| 126 | return cnt; | ||
| 127 | } | ||
| 128 | |||
| 129 | /** | ||
| 130 | * upd_ltab - update LPT LEB properties. | ||
| 131 | * @c: UBIFS file-system description object | ||
| 132 | * @lnum: LEB number | ||
| 133 | * @free: amount of free space | ||
| 134 | * @dirty: amount of dirty space to add | ||
| 135 | */ | ||
| 136 | static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty) | ||
| 137 | { | ||
| 138 | dbg_lp("LEB %d free %d dirty %d to %d +%d", | ||
| 139 | lnum, c->ltab[lnum - c->lpt_first].free, | ||
| 140 | c->ltab[lnum - c->lpt_first].dirty, free, dirty); | ||
| 141 | ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); | ||
| 142 | c->ltab[lnum - c->lpt_first].free = free; | ||
| 143 | c->ltab[lnum - c->lpt_first].dirty += dirty; | ||
| 144 | } | ||
| 145 | |||
| 146 | /** | ||
| 147 | * alloc_lpt_leb - allocate an LPT LEB that is empty. | ||
| 148 | * @c: UBIFS file-system description object | ||
| 149 | * @lnum: LEB number is passed and returned here | ||
| 150 | * | ||
| 151 | * This function finds the next empty LEB in the ltab starting from @lnum. If a | ||
| 152 | * an empty LEB is found it is returned in @lnum and the function returns %0. | ||
| 153 | * Otherwise the function returns -ENOSPC. Note however, that LPT is designed | ||
| 154 | * never to run out of space. | ||
| 155 | */ | ||
| 156 | static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) | ||
| 157 | { | ||
| 158 | int i, n; | ||
| 159 | |||
| 160 | n = *lnum - c->lpt_first + 1; | ||
| 161 | for (i = n; i < c->lpt_lebs; i++) { | ||
| 162 | if (c->ltab[i].tgc || c->ltab[i].cmt) | ||
| 163 | continue; | ||
| 164 | if (c->ltab[i].free == c->leb_size) { | ||
| 165 | c->ltab[i].cmt = 1; | ||
| 166 | *lnum = i + c->lpt_first; | ||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | for (i = 0; i < n; i++) { | ||
| 172 | if (c->ltab[i].tgc || c->ltab[i].cmt) | ||
| 173 | continue; | ||
| 174 | if (c->ltab[i].free == c->leb_size) { | ||
| 175 | c->ltab[i].cmt = 1; | ||
| 176 | *lnum = i + c->lpt_first; | ||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | } | ||
| 180 | dbg_err("last LEB %d", *lnum); | ||
| 181 | dump_stack(); | ||
| 182 | return -ENOSPC; | ||
| 183 | } | ||
| 184 | |||
| 185 | /** | ||
| 186 | * layout_cnodes - layout cnodes for commit. | ||
| 187 | * @c: UBIFS file-system description object | ||
| 188 | * | ||
| 189 | * This function returns %0 on success and a negative error code on failure. | ||
| 190 | */ | ||
| 191 | static int layout_cnodes(struct ubifs_info *c) | ||
| 192 | { | ||
| 193 | int lnum, offs, len, alen, done_lsave, done_ltab, err; | ||
| 194 | struct ubifs_cnode *cnode; | ||
| 195 | |||
| 196 | cnode = c->lpt_cnext; | ||
| 197 | if (!cnode) | ||
| 198 | return 0; | ||
| 199 | lnum = c->nhead_lnum; | ||
| 200 | offs = c->nhead_offs; | ||
| 201 | /* Try to place lsave and ltab nicely */ | ||
| 202 | done_lsave = !c->big_lpt; | ||
| 203 | done_ltab = 0; | ||
| 204 | if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { | ||
| 205 | done_lsave = 1; | ||
| 206 | c->lsave_lnum = lnum; | ||
| 207 | c->lsave_offs = offs; | ||
| 208 | offs += c->lsave_sz; | ||
| 209 | } | ||
| 210 | |||
| 211 | if (offs + c->ltab_sz <= c->leb_size) { | ||
| 212 | done_ltab = 1; | ||
| 213 | c->ltab_lnum = lnum; | ||
| 214 | c->ltab_offs = offs; | ||
| 215 | offs += c->ltab_sz; | ||
| 216 | } | ||
| 217 | |||
| 218 | do { | ||
| 219 | if (cnode->level) { | ||
| 220 | len = c->nnode_sz; | ||
| 221 | c->dirty_nn_cnt -= 1; | ||
| 222 | } else { | ||
| 223 | len = c->pnode_sz; | ||
| 224 | c->dirty_pn_cnt -= 1; | ||
| 225 | } | ||
| 226 | while (offs + len > c->leb_size) { | ||
| 227 | alen = ALIGN(offs, c->min_io_size); | ||
| 228 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | ||
| 229 | err = alloc_lpt_leb(c, &lnum); | ||
| 230 | if (err) | ||
| 231 | return err; | ||
| 232 | offs = 0; | ||
| 233 | ubifs_assert(lnum >= c->lpt_first && | ||
| 234 | lnum <= c->lpt_last); | ||
| 235 | /* Try to place lsave and ltab nicely */ | ||
| 236 | if (!done_lsave) { | ||
| 237 | done_lsave = 1; | ||
| 238 | c->lsave_lnum = lnum; | ||
| 239 | c->lsave_offs = offs; | ||
| 240 | offs += c->lsave_sz; | ||
| 241 | continue; | ||
| 242 | } | ||
| 243 | if (!done_ltab) { | ||
| 244 | done_ltab = 1; | ||
| 245 | c->ltab_lnum = lnum; | ||
| 246 | c->ltab_offs = offs; | ||
| 247 | offs += c->ltab_sz; | ||
| 248 | continue; | ||
| 249 | } | ||
| 250 | break; | ||
| 251 | } | ||
| 252 | if (cnode->parent) { | ||
| 253 | cnode->parent->nbranch[cnode->iip].lnum = lnum; | ||
| 254 | cnode->parent->nbranch[cnode->iip].offs = offs; | ||
| 255 | } else { | ||
| 256 | c->lpt_lnum = lnum; | ||
| 257 | c->lpt_offs = offs; | ||
| 258 | } | ||
| 259 | offs += len; | ||
| 260 | cnode = cnode->cnext; | ||
| 261 | } while (cnode && cnode != c->lpt_cnext); | ||
| 262 | |||
| 263 | /* Make sure to place LPT's save table */ | ||
| 264 | if (!done_lsave) { | ||
| 265 | if (offs + c->lsave_sz > c->leb_size) { | ||
| 266 | alen = ALIGN(offs, c->min_io_size); | ||
| 267 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | ||
| 268 | err = alloc_lpt_leb(c, &lnum); | ||
| 269 | if (err) | ||
| 270 | return err; | ||
| 271 | offs = 0; | ||
| 272 | ubifs_assert(lnum >= c->lpt_first && | ||
| 273 | lnum <= c->lpt_last); | ||
| 274 | } | ||
| 275 | done_lsave = 1; | ||
| 276 | c->lsave_lnum = lnum; | ||
| 277 | c->lsave_offs = offs; | ||
| 278 | offs += c->lsave_sz; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* Make sure to place LPT's own lprops table */ | ||
| 282 | if (!done_ltab) { | ||
| 283 | if (offs + c->ltab_sz > c->leb_size) { | ||
| 284 | alen = ALIGN(offs, c->min_io_size); | ||
| 285 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | ||
| 286 | err = alloc_lpt_leb(c, &lnum); | ||
| 287 | if (err) | ||
| 288 | return err; | ||
| 289 | offs = 0; | ||
| 290 | ubifs_assert(lnum >= c->lpt_first && | ||
| 291 | lnum <= c->lpt_last); | ||
| 292 | } | ||
| 293 | done_ltab = 1; | ||
| 294 | c->ltab_lnum = lnum; | ||
| 295 | c->ltab_offs = offs; | ||
| 296 | offs += c->ltab_sz; | ||
| 297 | } | ||
| 298 | |||
| 299 | alen = ALIGN(offs, c->min_io_size); | ||
| 300 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | ||
| 301 | return 0; | ||
| 302 | } | ||
| 303 | |||
| 304 | /** | ||
| 305 | * realloc_lpt_leb - allocate an LPT LEB that is empty. | ||
| 306 | * @c: UBIFS file-system description object | ||
| 307 | * @lnum: LEB number is passed and returned here | ||
| 308 | * | ||
| 309 | * This function duplicates exactly the results of the function alloc_lpt_leb. | ||
| 310 | * It is used during end commit to reallocate the same LEB numbers that were | ||
| 311 | * allocated by alloc_lpt_leb during start commit. | ||
| 312 | * | ||
| 313 | * This function finds the next LEB that was allocated by the alloc_lpt_leb | ||
| 314 | * function starting from @lnum. If a LEB is found it is returned in @lnum and | ||
| 315 | * the function returns %0. Otherwise the function returns -ENOSPC. | ||
| 316 | * Note however, that LPT is designed never to run out of space. | ||
| 317 | */ | ||
| 318 | static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) | ||
| 319 | { | ||
| 320 | int i, n; | ||
| 321 | |||
| 322 | n = *lnum - c->lpt_first + 1; | ||
| 323 | for (i = n; i < c->lpt_lebs; i++) | ||
| 324 | if (c->ltab[i].cmt) { | ||
| 325 | c->ltab[i].cmt = 0; | ||
| 326 | *lnum = i + c->lpt_first; | ||
| 327 | return 0; | ||
| 328 | } | ||
| 329 | |||
| 330 | for (i = 0; i < n; i++) | ||
| 331 | if (c->ltab[i].cmt) { | ||
| 332 | c->ltab[i].cmt = 0; | ||
| 333 | *lnum = i + c->lpt_first; | ||
| 334 | return 0; | ||
| 335 | } | ||
| 336 | dbg_err("last LEB %d", *lnum); | ||
| 337 | dump_stack(); | ||
| 338 | return -ENOSPC; | ||
| 339 | } | ||
| 340 | |||
| 341 | /** | ||
| 342 | * write_cnodes - write cnodes for commit. | ||
| 343 | * @c: UBIFS file-system description object | ||
| 344 | * | ||
| 345 | * This function returns %0 on success and a negative error code on failure. | ||
| 346 | */ | ||
| 347 | static int write_cnodes(struct ubifs_info *c) | ||
| 348 | { | ||
| 349 | int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave; | ||
| 350 | struct ubifs_cnode *cnode; | ||
| 351 | void *buf = c->lpt_buf; | ||
| 352 | |||
| 353 | cnode = c->lpt_cnext; | ||
| 354 | if (!cnode) | ||
| 355 | return 0; | ||
| 356 | lnum = c->nhead_lnum; | ||
| 357 | offs = c->nhead_offs; | ||
| 358 | from = offs; | ||
| 359 | /* Ensure empty LEB is unmapped */ | ||
| 360 | if (offs == 0) { | ||
| 361 | err = ubifs_leb_unmap(c, lnum); | ||
| 362 | if (err) | ||
| 363 | return err; | ||
| 364 | } | ||
| 365 | /* Try to place lsave and ltab nicely */ | ||
| 366 | done_lsave = !c->big_lpt; | ||
| 367 | done_ltab = 0; | ||
| 368 | if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { | ||
| 369 | done_lsave = 1; | ||
| 370 | ubifs_pack_lsave(c, buf + offs, c->lsave); | ||
| 371 | offs += c->lsave_sz; | ||
| 372 | } | ||
| 373 | |||
| 374 | if (offs + c->ltab_sz <= c->leb_size) { | ||
| 375 | done_ltab = 1; | ||
| 376 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | ||
| 377 | offs += c->ltab_sz; | ||
| 378 | } | ||
| 379 | |||
| 380 | /* Loop for each cnode */ | ||
| 381 | do { | ||
| 382 | if (cnode->level) | ||
| 383 | len = c->nnode_sz; | ||
| 384 | else | ||
| 385 | len = c->pnode_sz; | ||
| 386 | while (offs + len > c->leb_size) { | ||
| 387 | wlen = offs - from; | ||
| 388 | if (wlen) { | ||
| 389 | alen = ALIGN(wlen, c->min_io_size); | ||
| 390 | memset(buf + offs, 0xff, alen - wlen); | ||
| 391 | err = ubifs_leb_write(c, lnum, buf + from, from, | ||
| 392 | alen, UBI_SHORTTERM); | ||
| 393 | if (err) | ||
| 394 | return err; | ||
| 395 | } | ||
| 396 | err = realloc_lpt_leb(c, &lnum); | ||
| 397 | if (err) | ||
| 398 | return err; | ||
| 399 | offs = 0; | ||
| 400 | from = 0; | ||
| 401 | ubifs_assert(lnum >= c->lpt_first && | ||
| 402 | lnum <= c->lpt_last); | ||
| 403 | err = ubifs_leb_unmap(c, lnum); | ||
| 404 | if (err) | ||
| 405 | return err; | ||
| 406 | /* Try to place lsave and ltab nicely */ | ||
| 407 | if (!done_lsave) { | ||
| 408 | done_lsave = 1; | ||
| 409 | ubifs_pack_lsave(c, buf + offs, c->lsave); | ||
| 410 | offs += c->lsave_sz; | ||
| 411 | continue; | ||
| 412 | } | ||
| 413 | if (!done_ltab) { | ||
| 414 | done_ltab = 1; | ||
| 415 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | ||
| 416 | offs += c->ltab_sz; | ||
| 417 | continue; | ||
| 418 | } | ||
| 419 | break; | ||
| 420 | } | ||
| 421 | if (cnode->level) | ||
| 422 | ubifs_pack_nnode(c, buf + offs, | ||
| 423 | (struct ubifs_nnode *)cnode); | ||
| 424 | else | ||
| 425 | ubifs_pack_pnode(c, buf + offs, | ||
| 426 | (struct ubifs_pnode *)cnode); | ||
| 427 | /* | ||
| 428 | * The reason for the barriers is the same as in case of TNC. | ||
| 429 | * See comment in 'write_index()'. 'dirty_cow_nnode()' and | ||
| 430 | * 'dirty_cow_pnode()' are the functions for which this is | ||
| 431 | * important. | ||
| 432 | */ | ||
| 433 | clear_bit(DIRTY_CNODE, &cnode->flags); | ||
| 434 | smp_mb__before_clear_bit(); | ||
| 435 | clear_bit(COW_ZNODE, &cnode->flags); | ||
| 436 | smp_mb__after_clear_bit(); | ||
| 437 | offs += len; | ||
| 438 | cnode = cnode->cnext; | ||
| 439 | } while (cnode && cnode != c->lpt_cnext); | ||
| 440 | |||
| 441 | /* Make sure to place LPT's save table */ | ||
| 442 | if (!done_lsave) { | ||
| 443 | if (offs + c->lsave_sz > c->leb_size) { | ||
| 444 | wlen = offs - from; | ||
| 445 | alen = ALIGN(wlen, c->min_io_size); | ||
| 446 | memset(buf + offs, 0xff, alen - wlen); | ||
| 447 | err = ubifs_leb_write(c, lnum, buf + from, from, alen, | ||
| 448 | UBI_SHORTTERM); | ||
| 449 | if (err) | ||
| 450 | return err; | ||
| 451 | err = realloc_lpt_leb(c, &lnum); | ||
| 452 | if (err) | ||
| 453 | return err; | ||
| 454 | offs = 0; | ||
| 455 | ubifs_assert(lnum >= c->lpt_first && | ||
| 456 | lnum <= c->lpt_last); | ||
| 457 | err = ubifs_leb_unmap(c, lnum); | ||
| 458 | if (err) | ||
| 459 | return err; | ||
| 460 | } | ||
| 461 | done_lsave = 1; | ||
| 462 | ubifs_pack_lsave(c, buf + offs, c->lsave); | ||
| 463 | offs += c->lsave_sz; | ||
| 464 | } | ||
| 465 | |||
| 466 | /* Make sure to place LPT's own lprops table */ | ||
| 467 | if (!done_ltab) { | ||
| 468 | if (offs + c->ltab_sz > c->leb_size) { | ||
| 469 | wlen = offs - from; | ||
| 470 | alen = ALIGN(wlen, c->min_io_size); | ||
| 471 | memset(buf + offs, 0xff, alen - wlen); | ||
| 472 | err = ubifs_leb_write(c, lnum, buf + from, from, alen, | ||
| 473 | UBI_SHORTTERM); | ||
| 474 | if (err) | ||
| 475 | return err; | ||
| 476 | err = realloc_lpt_leb(c, &lnum); | ||
| 477 | if (err) | ||
| 478 | return err; | ||
| 479 | offs = 0; | ||
| 480 | ubifs_assert(lnum >= c->lpt_first && | ||
| 481 | lnum <= c->lpt_last); | ||
| 482 | err = ubifs_leb_unmap(c, lnum); | ||
| 483 | if (err) | ||
| 484 | return err; | ||
| 485 | } | ||
| 486 | done_ltab = 1; | ||
| 487 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | ||
| 488 | offs += c->ltab_sz; | ||
| 489 | } | ||
| 490 | |||
| 491 | /* Write remaining data in buffer */ | ||
| 492 | wlen = offs - from; | ||
| 493 | alen = ALIGN(wlen, c->min_io_size); | ||
| 494 | memset(buf + offs, 0xff, alen - wlen); | ||
| 495 | err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); | ||
| 496 | if (err) | ||
| 497 | return err; | ||
| 498 | c->nhead_lnum = lnum; | ||
| 499 | c->nhead_offs = ALIGN(offs, c->min_io_size); | ||
| 500 | |||
| 501 | dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); | ||
| 502 | dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); | ||
| 503 | dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); | ||
| 504 | if (c->big_lpt) | ||
| 505 | dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); | ||
| 506 | return 0; | ||
| 507 | } | ||
| 508 | |||
| 509 | /** | ||
| 510 | * next_pnode - find next pnode. | ||
| 511 | * @c: UBIFS file-system description object | ||
| 512 | * @pnode: pnode | ||
| 513 | * | ||
| 514 | * This function returns the next pnode or %NULL if there are no more pnodes. | ||
| 515 | */ | ||
| 516 | static struct ubifs_pnode *next_pnode(struct ubifs_info *c, | ||
| 517 | struct ubifs_pnode *pnode) | ||
| 518 | { | ||
| 519 | struct ubifs_nnode *nnode; | ||
| 520 | int iip; | ||
| 521 | |||
| 522 | /* Try to go right */ | ||
| 523 | nnode = pnode->parent; | ||
| 524 | iip = pnode->iip + 1; | ||
| 525 | if (iip < UBIFS_LPT_FANOUT) { | ||
| 526 | /* We assume here that LEB zero is never an LPT LEB */ | ||
| 527 | if (nnode->nbranch[iip].lnum) | ||
| 528 | return ubifs_get_pnode(c, nnode, iip); | ||
| 529 | else | ||
| 530 | return NULL; | ||
| 531 | } | ||
| 532 | |||
| 533 | /* Go up while can't go right */ | ||
| 534 | do { | ||
| 535 | iip = nnode->iip + 1; | ||
| 536 | nnode = nnode->parent; | ||
| 537 | if (!nnode) | ||
| 538 | return NULL; | ||
| 539 | /* We assume here that LEB zero is never an LPT LEB */ | ||
| 540 | } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum); | ||
| 541 | |||
| 542 | /* Go right */ | ||
| 543 | nnode = ubifs_get_nnode(c, nnode, iip); | ||
| 544 | if (IS_ERR(nnode)) | ||
| 545 | return (void *)nnode; | ||
| 546 | |||
| 547 | /* Go down to level 1 */ | ||
| 548 | while (nnode->level > 1) { | ||
| 549 | nnode = ubifs_get_nnode(c, nnode, 0); | ||
| 550 | if (IS_ERR(nnode)) | ||
| 551 | return (void *)nnode; | ||
| 552 | } | ||
| 553 | |||
| 554 | return ubifs_get_pnode(c, nnode, 0); | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * pnode_lookup - lookup a pnode in the LPT. | ||
| 559 | * @c: UBIFS file-system description object | ||
| 560 | * @i: pnode number (0 to main_lebs - 1) | ||
| 561 | * | ||
| 562 | * This function returns a pointer to the pnode on success or a negative | ||
| 563 | * error code on failure. | ||
| 564 | */ | ||
| 565 | static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) | ||
| 566 | { | ||
| 567 | int err, h, iip, shft; | ||
| 568 | struct ubifs_nnode *nnode; | ||
| 569 | |||
| 570 | if (!c->nroot) { | ||
| 571 | err = ubifs_read_nnode(c, NULL, 0); | ||
| 572 | if (err) | ||
| 573 | return ERR_PTR(err); | ||
| 574 | } | ||
| 575 | i <<= UBIFS_LPT_FANOUT_SHIFT; | ||
| 576 | nnode = c->nroot; | ||
| 577 | shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; | ||
| 578 | for (h = 1; h < c->lpt_hght; h++) { | ||
| 579 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 580 | shft -= UBIFS_LPT_FANOUT_SHIFT; | ||
| 581 | nnode = ubifs_get_nnode(c, nnode, iip); | ||
| 582 | if (IS_ERR(nnode)) | ||
| 583 | return ERR_PTR(PTR_ERR(nnode)); | ||
| 584 | } | ||
| 585 | iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); | ||
| 586 | return ubifs_get_pnode(c, nnode, iip); | ||
| 587 | } | ||
| 588 | |||
| 589 | /** | ||
| 590 | * add_pnode_dirt - add dirty space to LPT LEB properties. | ||
| 591 | * @c: UBIFS file-system description object | ||
| 592 | * @pnode: pnode for which to add dirt | ||
| 593 | */ | ||
| 594 | static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) | ||
| 595 | { | ||
| 596 | ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, | ||
| 597 | c->pnode_sz); | ||
| 598 | } | ||
| 599 | |||
| 600 | /** | ||
| 601 | * do_make_pnode_dirty - mark a pnode dirty. | ||
| 602 | * @c: UBIFS file-system description object | ||
| 603 | * @pnode: pnode to mark dirty | ||
| 604 | */ | ||
| 605 | static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode) | ||
| 606 | { | ||
| 607 | /* Assumes cnext list is empty i.e. not called during commit */ | ||
| 608 | if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { | ||
| 609 | struct ubifs_nnode *nnode; | ||
| 610 | |||
| 611 | c->dirty_pn_cnt += 1; | ||
| 612 | add_pnode_dirt(c, pnode); | ||
| 613 | /* Mark parent and ancestors dirty too */ | ||
| 614 | nnode = pnode->parent; | ||
| 615 | while (nnode) { | ||
| 616 | if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { | ||
| 617 | c->dirty_nn_cnt += 1; | ||
| 618 | ubifs_add_nnode_dirt(c, nnode); | ||
| 619 | nnode = nnode->parent; | ||
| 620 | } else | ||
| 621 | break; | ||
| 622 | } | ||
| 623 | } | ||
| 624 | } | ||
| 625 | |||
| 626 | /** | ||
| 627 | * make_tree_dirty - mark the entire LEB properties tree dirty. | ||
| 628 | * @c: UBIFS file-system description object | ||
| 629 | * | ||
| 630 | * This function is used by the "small" LPT model to cause the entire LEB | ||
| 631 | * properties tree to be written. The "small" LPT model does not use LPT | ||
| 632 | * garbage collection because it is more efficient to write the entire tree | ||
| 633 | * (because it is small). | ||
| 634 | * | ||
| 635 | * This function returns %0 on success and a negative error code on failure. | ||
| 636 | */ | ||
| 637 | static int make_tree_dirty(struct ubifs_info *c) | ||
| 638 | { | ||
| 639 | struct ubifs_pnode *pnode; | ||
| 640 | |||
| 641 | pnode = pnode_lookup(c, 0); | ||
| 642 | while (pnode) { | ||
| 643 | do_make_pnode_dirty(c, pnode); | ||
| 644 | pnode = next_pnode(c, pnode); | ||
| 645 | if (IS_ERR(pnode)) | ||
| 646 | return PTR_ERR(pnode); | ||
| 647 | } | ||
| 648 | return 0; | ||
| 649 | } | ||
| 650 | |||
| 651 | /** | ||
| 652 | * need_write_all - determine if the LPT area is running out of free space. | ||
| 653 | * @c: UBIFS file-system description object | ||
| 654 | * | ||
| 655 | * This function returns %1 if the LPT area is running out of free space and %0 | ||
| 656 | * if it is not. | ||
| 657 | */ | ||
| 658 | static int need_write_all(struct ubifs_info *c) | ||
| 659 | { | ||
| 660 | long long free = 0; | ||
| 661 | int i; | ||
| 662 | |||
| 663 | for (i = 0; i < c->lpt_lebs; i++) { | ||
| 664 | if (i + c->lpt_first == c->nhead_lnum) | ||
| 665 | free += c->leb_size - c->nhead_offs; | ||
| 666 | else if (c->ltab[i].free == c->leb_size) | ||
| 667 | free += c->leb_size; | ||
| 668 | else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size) | ||
| 669 | free += c->leb_size; | ||
| 670 | } | ||
| 671 | /* Less than twice the size left */ | ||
| 672 | if (free <= c->lpt_sz * 2) | ||
| 673 | return 1; | ||
| 674 | return 0; | ||
| 675 | } | ||
| 676 | |||
| 677 | /** | ||
| 678 | * lpt_tgc_start - start trivial garbage collection of LPT LEBs. | ||
| 679 | * @c: UBIFS file-system description object | ||
| 680 | * | ||
| 681 | * LPT trivial garbage collection is where a LPT LEB contains only dirty and | ||
| 682 | * free space and so may be reused as soon as the next commit is completed. | ||
| 683 | * This function is called during start commit to mark LPT LEBs for trivial GC. | ||
| 684 | */ | ||
| 685 | static void lpt_tgc_start(struct ubifs_info *c) | ||
| 686 | { | ||
| 687 | int i; | ||
| 688 | |||
| 689 | for (i = 0; i < c->lpt_lebs; i++) { | ||
| 690 | if (i + c->lpt_first == c->nhead_lnum) | ||
| 691 | continue; | ||
| 692 | if (c->ltab[i].dirty > 0 && | ||
| 693 | c->ltab[i].free + c->ltab[i].dirty == c->leb_size) { | ||
| 694 | c->ltab[i].tgc = 1; | ||
| 695 | c->ltab[i].free = c->leb_size; | ||
| 696 | c->ltab[i].dirty = 0; | ||
| 697 | dbg_lp("LEB %d", i + c->lpt_first); | ||
| 698 | } | ||
| 699 | } | ||
| 700 | } | ||
| 701 | |||
| 702 | /** | ||
| 703 | * lpt_tgc_end - end trivial garbage collection of LPT LEBs. | ||
| 704 | * @c: UBIFS file-system description object | ||
| 705 | * | ||
| 706 | * LPT trivial garbage collection is where a LPT LEB contains only dirty and | ||
| 707 | * free space and so may be reused as soon as the next commit is completed. | ||
| 708 | * This function is called after the commit is completed (master node has been | ||
| 709 | * written) and unmaps LPT LEBs that were marked for trivial GC. | ||
| 710 | */ | ||
| 711 | static int lpt_tgc_end(struct ubifs_info *c) | ||
| 712 | { | ||
| 713 | int i, err; | ||
| 714 | |||
| 715 | for (i = 0; i < c->lpt_lebs; i++) | ||
| 716 | if (c->ltab[i].tgc) { | ||
| 717 | err = ubifs_leb_unmap(c, i + c->lpt_first); | ||
| 718 | if (err) | ||
| 719 | return err; | ||
| 720 | c->ltab[i].tgc = 0; | ||
| 721 | dbg_lp("LEB %d", i + c->lpt_first); | ||
| 722 | } | ||
| 723 | return 0; | ||
| 724 | } | ||
| 725 | |||
| 726 | /** | ||
| 727 | * populate_lsave - fill the lsave array with important LEB numbers. | ||
| 728 | * @c: the UBIFS file-system description object | ||
| 729 | * | ||
| 730 | * This function is only called for the "big" model. It records a small number | ||
| 731 | * of LEB numbers of important LEBs. Important LEBs are ones that are (from | ||
| 732 | * most important to least important): empty, freeable, freeable index, dirty | ||
| 733 | * index, dirty or free. Upon mount, we read this list of LEB numbers and bring | ||
| 734 | * their pnodes into memory. That will stop us from having to scan the LPT | ||
| 735 | * straight away. For the "small" model we assume that scanning the LPT is no | ||
| 736 | * big deal. | ||
| 737 | */ | ||
| 738 | static void populate_lsave(struct ubifs_info *c) | ||
| 739 | { | ||
| 740 | struct ubifs_lprops *lprops; | ||
| 741 | struct ubifs_lpt_heap *heap; | ||
| 742 | int i, cnt = 0; | ||
| 743 | |||
| 744 | ubifs_assert(c->big_lpt); | ||
| 745 | if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { | ||
| 746 | c->lpt_drty_flgs |= LSAVE_DIRTY; | ||
| 747 | ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); | ||
| 748 | } | ||
| 749 | list_for_each_entry(lprops, &c->empty_list, list) { | ||
| 750 | c->lsave[cnt++] = lprops->lnum; | ||
| 751 | if (cnt >= c->lsave_cnt) | ||
| 752 | return; | ||
| 753 | } | ||
| 754 | list_for_each_entry(lprops, &c->freeable_list, list) { | ||
| 755 | c->lsave[cnt++] = lprops->lnum; | ||
| 756 | if (cnt >= c->lsave_cnt) | ||
| 757 | return; | ||
| 758 | } | ||
| 759 | list_for_each_entry(lprops, &c->frdi_idx_list, list) { | ||
| 760 | c->lsave[cnt++] = lprops->lnum; | ||
| 761 | if (cnt >= c->lsave_cnt) | ||
| 762 | return; | ||
| 763 | } | ||
| 764 | heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; | ||
| 765 | for (i = 0; i < heap->cnt; i++) { | ||
| 766 | c->lsave[cnt++] = heap->arr[i]->lnum; | ||
| 767 | if (cnt >= c->lsave_cnt) | ||
| 768 | return; | ||
| 769 | } | ||
| 770 | heap = &c->lpt_heap[LPROPS_DIRTY - 1]; | ||
| 771 | for (i = 0; i < heap->cnt; i++) { | ||
| 772 | c->lsave[cnt++] = heap->arr[i]->lnum; | ||
| 773 | if (cnt >= c->lsave_cnt) | ||
| 774 | return; | ||
| 775 | } | ||
| 776 | heap = &c->lpt_heap[LPROPS_FREE - 1]; | ||
| 777 | for (i = 0; i < heap->cnt; i++) { | ||
| 778 | c->lsave[cnt++] = heap->arr[i]->lnum; | ||
| 779 | if (cnt >= c->lsave_cnt) | ||
| 780 | return; | ||
| 781 | } | ||
| 782 | /* Fill it up completely */ | ||
| 783 | while (cnt < c->lsave_cnt) | ||
| 784 | c->lsave[cnt++] = c->main_first; | ||
| 785 | } | ||
| 786 | |||
| 787 | /** | ||
| 788 | * nnode_lookup - lookup a nnode in the LPT. | ||
| 789 | * @c: UBIFS file-system description object | ||
| 790 | * @i: nnode number | ||
| 791 | * | ||
| 792 | * This function returns a pointer to the nnode on success or a negative | ||
| 793 | * error code on failure. | ||
| 794 | */ | ||
| 795 | static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i) | ||
| 796 | { | ||
| 797 | int err, iip; | ||
| 798 | struct ubifs_nnode *nnode; | ||
| 799 | |||
| 800 | if (!c->nroot) { | ||
| 801 | err = ubifs_read_nnode(c, NULL, 0); | ||
| 802 | if (err) | ||
| 803 | return ERR_PTR(err); | ||
| 804 | } | ||
| 805 | nnode = c->nroot; | ||
| 806 | while (1) { | ||
| 807 | iip = i & (UBIFS_LPT_FANOUT - 1); | ||
| 808 | i >>= UBIFS_LPT_FANOUT_SHIFT; | ||
| 809 | if (!i) | ||
| 810 | break; | ||
| 811 | nnode = ubifs_get_nnode(c, nnode, iip); | ||
| 812 | if (IS_ERR(nnode)) | ||
| 813 | return nnode; | ||
| 814 | } | ||
| 815 | return nnode; | ||
| 816 | } | ||
| 817 | |||
| 818 | /** | ||
| 819 | * make_nnode_dirty - find a nnode and, if found, make it dirty. | ||
| 820 | * @c: UBIFS file-system description object | ||
| 821 | * @node_num: nnode number of nnode to make dirty | ||
| 822 | * @lnum: LEB number where nnode was written | ||
| 823 | * @offs: offset where nnode was written | ||
| 824 | * | ||
| 825 | * This function is used by LPT garbage collection. LPT garbage collection is | ||
| 826 | * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection | ||
| 827 | * simply involves marking all the nodes in the LEB being garbage-collected as | ||
| 828 | * dirty. The dirty nodes are written next commit, after which the LEB is free | ||
| 829 | * to be reused. | ||
| 830 | * | ||
| 831 | * This function returns %0 on success and a negative error code on failure. | ||
| 832 | */ | ||
| 833 | static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum, | ||
| 834 | int offs) | ||
| 835 | { | ||
| 836 | struct ubifs_nnode *nnode; | ||
| 837 | |||
| 838 | nnode = nnode_lookup(c, node_num); | ||
| 839 | if (IS_ERR(nnode)) | ||
| 840 | return PTR_ERR(nnode); | ||
| 841 | if (nnode->parent) { | ||
| 842 | struct ubifs_nbranch *branch; | ||
| 843 | |||
| 844 | branch = &nnode->parent->nbranch[nnode->iip]; | ||
| 845 | if (branch->lnum != lnum || branch->offs != offs) | ||
| 846 | return 0; /* nnode is obsolete */ | ||
| 847 | } else if (c->lpt_lnum != lnum || c->lpt_offs != offs) | ||
| 848 | return 0; /* nnode is obsolete */ | ||
| 849 | /* Assumes cnext list is empty i.e. not called during commit */ | ||
| 850 | if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { | ||
| 851 | c->dirty_nn_cnt += 1; | ||
| 852 | ubifs_add_nnode_dirt(c, nnode); | ||
| 853 | /* Mark parent and ancestors dirty too */ | ||
| 854 | nnode = nnode->parent; | ||
| 855 | while (nnode) { | ||
| 856 | if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { | ||
| 857 | c->dirty_nn_cnt += 1; | ||
| 858 | ubifs_add_nnode_dirt(c, nnode); | ||
| 859 | nnode = nnode->parent; | ||
| 860 | } else | ||
| 861 | break; | ||
| 862 | } | ||
| 863 | } | ||
| 864 | return 0; | ||
| 865 | } | ||
| 866 | |||
| 867 | /** | ||
| 868 | * make_pnode_dirty - find a pnode and, if found, make it dirty. | ||
| 869 | * @c: UBIFS file-system description object | ||
| 870 | * @node_num: pnode number of pnode to make dirty | ||
| 871 | * @lnum: LEB number where pnode was written | ||
| 872 | * @offs: offset where pnode was written | ||
| 873 | * | ||
| 874 | * This function is used by LPT garbage collection. LPT garbage collection is | ||
| 875 | * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection | ||
| 876 | * simply involves marking all the nodes in the LEB being garbage-collected as | ||
| 877 | * dirty. The dirty nodes are written next commit, after which the LEB is free | ||
| 878 | * to be reused. | ||
| 879 | * | ||
| 880 | * This function returns %0 on success and a negative error code on failure. | ||
| 881 | */ | ||
| 882 | static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum, | ||
| 883 | int offs) | ||
| 884 | { | ||
| 885 | struct ubifs_pnode *pnode; | ||
| 886 | struct ubifs_nbranch *branch; | ||
| 887 | |||
| 888 | pnode = pnode_lookup(c, node_num); | ||
| 889 | if (IS_ERR(pnode)) | ||
| 890 | return PTR_ERR(pnode); | ||
| 891 | branch = &pnode->parent->nbranch[pnode->iip]; | ||
| 892 | if (branch->lnum != lnum || branch->offs != offs) | ||
| 893 | return 0; | ||
| 894 | do_make_pnode_dirty(c, pnode); | ||
| 895 | return 0; | ||
| 896 | } | ||
| 897 | |||
| 898 | /** | ||
| 899 | * make_ltab_dirty - make ltab node dirty. | ||
| 900 | * @c: UBIFS file-system description object | ||
| 901 | * @lnum: LEB number where ltab was written | ||
| 902 | * @offs: offset where ltab was written | ||
| 903 | * | ||
| 904 | * This function is used by LPT garbage collection. LPT garbage collection is | ||
| 905 | * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection | ||
| 906 | * simply involves marking all the nodes in the LEB being garbage-collected as | ||
| 907 | * dirty. The dirty nodes are written next commit, after which the LEB is free | ||
| 908 | * to be reused. | ||
| 909 | * | ||
| 910 | * This function returns %0 on success and a negative error code on failure. | ||
| 911 | */ | ||
| 912 | static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs) | ||
| 913 | { | ||
| 914 | if (lnum != c->ltab_lnum || offs != c->ltab_offs) | ||
| 915 | return 0; /* This ltab node is obsolete */ | ||
| 916 | if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { | ||
| 917 | c->lpt_drty_flgs |= LTAB_DIRTY; | ||
| 918 | ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); | ||
| 919 | } | ||
| 920 | return 0; | ||
| 921 | } | ||
| 922 | |||
| 923 | /** | ||
| 924 | * make_lsave_dirty - make lsave node dirty. | ||
| 925 | * @c: UBIFS file-system description object | ||
| 926 | * @lnum: LEB number where lsave was written | ||
| 927 | * @offs: offset where lsave was written | ||
| 928 | * | ||
| 929 | * This function is used by LPT garbage collection. LPT garbage collection is | ||
| 930 | * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection | ||
| 931 | * simply involves marking all the nodes in the LEB being garbage-collected as | ||
| 932 | * dirty. The dirty nodes are written next commit, after which the LEB is free | ||
| 933 | * to be reused. | ||
| 934 | * | ||
| 935 | * This function returns %0 on success and a negative error code on failure. | ||
| 936 | */ | ||
| 937 | static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs) | ||
| 938 | { | ||
| 939 | if (lnum != c->lsave_lnum || offs != c->lsave_offs) | ||
| 940 | return 0; /* This lsave node is obsolete */ | ||
| 941 | if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { | ||
| 942 | c->lpt_drty_flgs |= LSAVE_DIRTY; | ||
| 943 | ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); | ||
| 944 | } | ||
| 945 | return 0; | ||
| 946 | } | ||
| 947 | |||
| 948 | /** | ||
| 949 | * make_node_dirty - make node dirty. | ||
| 950 | * @c: UBIFS file-system description object | ||
| 951 | * @node_type: LPT node type | ||
| 952 | * @node_num: node number | ||
| 953 | * @lnum: LEB number where node was written | ||
| 954 | * @offs: offset where node was written | ||
| 955 | * | ||
| 956 | * This function is used by LPT garbage collection. LPT garbage collection is | ||
| 957 | * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection | ||
| 958 | * simply involves marking all the nodes in the LEB being garbage-collected as | ||
| 959 | * dirty. The dirty nodes are written next commit, after which the LEB is free | ||
| 960 | * to be reused. | ||
| 961 | * | ||
| 962 | * This function returns %0 on success and a negative error code on failure. | ||
| 963 | */ | ||
| 964 | static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, | ||
| 965 | int lnum, int offs) | ||
| 966 | { | ||
| 967 | switch (node_type) { | ||
| 968 | case UBIFS_LPT_NNODE: | ||
| 969 | return make_nnode_dirty(c, node_num, lnum, offs); | ||
| 970 | case UBIFS_LPT_PNODE: | ||
| 971 | return make_pnode_dirty(c, node_num, lnum, offs); | ||
| 972 | case UBIFS_LPT_LTAB: | ||
| 973 | return make_ltab_dirty(c, lnum, offs); | ||
| 974 | case UBIFS_LPT_LSAVE: | ||
| 975 | return make_lsave_dirty(c, lnum, offs); | ||
| 976 | } | ||
| 977 | return -EINVAL; | ||
| 978 | } | ||
| 979 | |||
| 980 | /** | ||
| 981 | * get_lpt_node_len - return the length of a node based on its type. | ||
| 982 | * @c: UBIFS file-system description object | ||
| 983 | * @node_type: LPT node type | ||
| 984 | */ | ||
| 985 | static int get_lpt_node_len(struct ubifs_info *c, int node_type) | ||
| 986 | { | ||
| 987 | switch (node_type) { | ||
| 988 | case UBIFS_LPT_NNODE: | ||
| 989 | return c->nnode_sz; | ||
| 990 | case UBIFS_LPT_PNODE: | ||
| 991 | return c->pnode_sz; | ||
| 992 | case UBIFS_LPT_LTAB: | ||
| 993 | return c->ltab_sz; | ||
| 994 | case UBIFS_LPT_LSAVE: | ||
| 995 | return c->lsave_sz; | ||
| 996 | } | ||
| 997 | return 0; | ||
| 998 | } | ||
| 999 | |||
| 1000 | /** | ||
| 1001 | * get_pad_len - return the length of padding in a buffer. | ||
| 1002 | * @c: UBIFS file-system description object | ||
| 1003 | * @buf: buffer | ||
| 1004 | * @len: length of buffer | ||
| 1005 | */ | ||
| 1006 | static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) | ||
| 1007 | { | ||
| 1008 | int offs, pad_len; | ||
| 1009 | |||
| 1010 | if (c->min_io_size == 1) | ||
| 1011 | return 0; | ||
| 1012 | offs = c->leb_size - len; | ||
| 1013 | pad_len = ALIGN(offs, c->min_io_size) - offs; | ||
| 1014 | return pad_len; | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | /** | ||
| 1018 | * get_lpt_node_type - return type (and node number) of a node in a buffer. | ||
| 1019 | * @c: UBIFS file-system description object | ||
| 1020 | * @buf: buffer | ||
| 1021 | * @node_num: node number is returned here | ||
| 1022 | */ | ||
| 1023 | static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) | ||
| 1024 | { | ||
| 1025 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 1026 | int pos = 0, node_type; | ||
| 1027 | |||
| 1028 | node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); | ||
| 1029 | *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); | ||
| 1030 | return node_type; | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | /** | ||
| 1034 | * is_a_node - determine if a buffer contains a node. | ||
| 1035 | * @c: UBIFS file-system description object | ||
| 1036 | * @buf: buffer | ||
| 1037 | * @len: length of buffer | ||
| 1038 | * | ||
| 1039 | * This function returns %1 if the buffer contains a node or %0 if it does not. | ||
| 1040 | */ | ||
| 1041 | static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) | ||
| 1042 | { | ||
| 1043 | uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; | ||
| 1044 | int pos = 0, node_type, node_len; | ||
| 1045 | uint16_t crc, calc_crc; | ||
| 1046 | |||
| 1047 | node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); | ||
| 1048 | if (node_type == UBIFS_LPT_NOT_A_NODE) | ||
| 1049 | return 0; | ||
| 1050 | node_len = get_lpt_node_len(c, node_type); | ||
| 1051 | if (!node_len || node_len > len) | ||
| 1052 | return 0; | ||
| 1053 | pos = 0; | ||
| 1054 | addr = buf; | ||
| 1055 | crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); | ||
| 1056 | calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, | ||
| 1057 | node_len - UBIFS_LPT_CRC_BYTES); | ||
| 1058 | if (crc != calc_crc) | ||
| 1059 | return 0; | ||
| 1060 | return 1; | ||
| 1061 | } | ||
| 1062 | |||
| 1063 | |||
| 1064 | /** | ||
| 1065 | * lpt_gc_lnum - garbage collect a LPT LEB. | ||
| 1066 | * @c: UBIFS file-system description object | ||
| 1067 | * @lnum: LEB number to garbage collect | ||
| 1068 | * | ||
| 1069 | * LPT garbage collection is used only for the "big" LPT model | ||
| 1070 | * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes | ||
| 1071 | * in the LEB being garbage-collected as dirty. The dirty nodes are written | ||
| 1072 | * next commit, after which the LEB is free to be reused. | ||
| 1073 | * | ||
| 1074 | * This function returns %0 on success and a negative error code on failure. | ||
| 1075 | */ | ||
| 1076 | static int lpt_gc_lnum(struct ubifs_info *c, int lnum) | ||
| 1077 | { | ||
| 1078 | int err, len = c->leb_size, node_type, node_num, node_len, offs; | ||
| 1079 | void *buf = c->lpt_buf; | ||
| 1080 | |||
| 1081 | dbg_lp("LEB %d", lnum); | ||
| 1082 | err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); | ||
| 1083 | if (err) { | ||
| 1084 | ubifs_err("cannot read LEB %d, error %d", lnum, err); | ||
| 1085 | return err; | ||
| 1086 | } | ||
| 1087 | while (1) { | ||
| 1088 | if (!is_a_node(c, buf, len)) { | ||
| 1089 | int pad_len; | ||
| 1090 | |||
| 1091 | pad_len = get_pad_len(c, buf, len); | ||
| 1092 | if (pad_len) { | ||
| 1093 | buf += pad_len; | ||
| 1094 | len -= pad_len; | ||
| 1095 | continue; | ||
| 1096 | } | ||
| 1097 | return 0; | ||
| 1098 | } | ||
| 1099 | node_type = get_lpt_node_type(c, buf, &node_num); | ||
| 1100 | node_len = get_lpt_node_len(c, node_type); | ||
| 1101 | offs = c->leb_size - len; | ||
| 1102 | ubifs_assert(node_len != 0); | ||
| 1103 | mutex_lock(&c->lp_mutex); | ||
| 1104 | err = make_node_dirty(c, node_type, node_num, lnum, offs); | ||
| 1105 | mutex_unlock(&c->lp_mutex); | ||
| 1106 | if (err) | ||
| 1107 | return err; | ||
| 1108 | buf += node_len; | ||
| 1109 | len -= node_len; | ||
| 1110 | } | ||
| 1111 | return 0; | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | /** | ||
| 1115 | * lpt_gc - LPT garbage collection. | ||
| 1116 | * @c: UBIFS file-system description object | ||
| 1117 | * | ||
| 1118 | * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'. | ||
| 1119 | * Returns %0 on success and a negative error code on failure. | ||
| 1120 | */ | ||
| 1121 | static int lpt_gc(struct ubifs_info *c) | ||
| 1122 | { | ||
| 1123 | int i, lnum = -1, dirty = 0; | ||
| 1124 | |||
| 1125 | mutex_lock(&c->lp_mutex); | ||
| 1126 | for (i = 0; i < c->lpt_lebs; i++) { | ||
| 1127 | ubifs_assert(!c->ltab[i].tgc); | ||
| 1128 | if (i + c->lpt_first == c->nhead_lnum || | ||
| 1129 | c->ltab[i].free + c->ltab[i].dirty == c->leb_size) | ||
| 1130 | continue; | ||
| 1131 | if (c->ltab[i].dirty > dirty) { | ||
| 1132 | dirty = c->ltab[i].dirty; | ||
| 1133 | lnum = i + c->lpt_first; | ||
| 1134 | } | ||
| 1135 | } | ||
| 1136 | mutex_unlock(&c->lp_mutex); | ||
| 1137 | if (lnum == -1) | ||
| 1138 | return -ENOSPC; | ||
| 1139 | return lpt_gc_lnum(c, lnum); | ||
| 1140 | } | ||
| 1141 | |||
| 1142 | /** | ||
| 1143 | * ubifs_lpt_start_commit - UBIFS commit starts. | ||
| 1144 | * @c: the UBIFS file-system description object | ||
| 1145 | * | ||
| 1146 | * This function has to be called when UBIFS starts the commit operation. | ||
| 1147 | * This function "freezes" all currently dirty LEB properties and does not | ||
| 1148 | * change them anymore. Further changes are saved and tracked separately | ||
| 1149 | * because they are not part of this commit. This function returns zero in case | ||
| 1150 | * of success and a negative error code in case of failure. | ||
| 1151 | */ | ||
| 1152 | int ubifs_lpt_start_commit(struct ubifs_info *c) | ||
| 1153 | { | ||
| 1154 | int err, cnt; | ||
| 1155 | |||
| 1156 | dbg_lp(""); | ||
| 1157 | |||
| 1158 | mutex_lock(&c->lp_mutex); | ||
| 1159 | err = dbg_check_ltab(c); | ||
| 1160 | if (err) | ||
| 1161 | goto out; | ||
| 1162 | |||
| 1163 | if (c->check_lpt_free) { | ||
| 1164 | /* | ||
| 1165 | * We ensure there is enough free space in | ||
| 1166 | * ubifs_lpt_post_commit() by marking nodes dirty. That | ||
| 1167 | * information is lost when we unmount, so we also need | ||
| 1168 | * to check free space once after mounting also. | ||
| 1169 | */ | ||
| 1170 | c->check_lpt_free = 0; | ||
| 1171 | while (need_write_all(c)) { | ||
| 1172 | mutex_unlock(&c->lp_mutex); | ||
| 1173 | err = lpt_gc(c); | ||
| 1174 | if (err) | ||
| 1175 | return err; | ||
| 1176 | mutex_lock(&c->lp_mutex); | ||
| 1177 | } | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | lpt_tgc_start(c); | ||
| 1181 | |||
| 1182 | if (!c->dirty_pn_cnt) { | ||
| 1183 | dbg_cmt("no cnodes to commit"); | ||
| 1184 | err = 0; | ||
| 1185 | goto out; | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | if (!c->big_lpt && need_write_all(c)) { | ||
| 1189 | /* If needed, write everything */ | ||
| 1190 | err = make_tree_dirty(c); | ||
| 1191 | if (err) | ||
| 1192 | goto out; | ||
| 1193 | lpt_tgc_start(c); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | if (c->big_lpt) | ||
| 1197 | populate_lsave(c); | ||
| 1198 | |||
| 1199 | cnt = get_cnodes_to_commit(c); | ||
| 1200 | ubifs_assert(cnt != 0); | ||
| 1201 | |||
| 1202 | err = layout_cnodes(c); | ||
| 1203 | if (err) | ||
| 1204 | goto out; | ||
| 1205 | |||
| 1206 | /* Copy the LPT's own lprops for end commit to write */ | ||
| 1207 | memcpy(c->ltab_cmt, c->ltab, | ||
| 1208 | sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); | ||
| 1209 | c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY); | ||
| 1210 | |||
| 1211 | out: | ||
| 1212 | mutex_unlock(&c->lp_mutex); | ||
| 1213 | return err; | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | /** | ||
| 1217 | * free_obsolete_cnodes - free obsolete cnodes for commit end. | ||
| 1218 | * @c: UBIFS file-system description object | ||
| 1219 | */ | ||
| 1220 | static void free_obsolete_cnodes(struct ubifs_info *c) | ||
| 1221 | { | ||
| 1222 | struct ubifs_cnode *cnode, *cnext; | ||
| 1223 | |||
| 1224 | cnext = c->lpt_cnext; | ||
| 1225 | if (!cnext) | ||
| 1226 | return; | ||
| 1227 | do { | ||
| 1228 | cnode = cnext; | ||
| 1229 | cnext = cnode->cnext; | ||
| 1230 | if (test_bit(OBSOLETE_CNODE, &cnode->flags)) | ||
| 1231 | kfree(cnode); | ||
| 1232 | else | ||
| 1233 | cnode->cnext = NULL; | ||
| 1234 | } while (cnext != c->lpt_cnext); | ||
| 1235 | c->lpt_cnext = NULL; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | /** | ||
| 1239 | * ubifs_lpt_end_commit - finish the commit operation. | ||
| 1240 | * @c: the UBIFS file-system description object | ||
| 1241 | * | ||
| 1242 | * This function has to be called when the commit operation finishes. It | ||
| 1243 | * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to | ||
| 1244 | * the media. Returns zero in case of success and a negative error code in case | ||
| 1245 | * of failure. | ||
| 1246 | */ | ||
| 1247 | int ubifs_lpt_end_commit(struct ubifs_info *c) | ||
| 1248 | { | ||
| 1249 | int err; | ||
| 1250 | |||
| 1251 | dbg_lp(""); | ||
| 1252 | |||
| 1253 | if (!c->lpt_cnext) | ||
| 1254 | return 0; | ||
| 1255 | |||
| 1256 | err = write_cnodes(c); | ||
| 1257 | if (err) | ||
| 1258 | return err; | ||
| 1259 | |||
| 1260 | mutex_lock(&c->lp_mutex); | ||
| 1261 | free_obsolete_cnodes(c); | ||
| 1262 | mutex_unlock(&c->lp_mutex); | ||
| 1263 | |||
| 1264 | return 0; | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | /** | ||
| 1268 | * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC. | ||
| 1269 | * @c: UBIFS file-system description object | ||
| 1270 | * | ||
| 1271 | * LPT trivial GC is completed after a commit. Also LPT GC is done after a | ||
| 1272 | * commit for the "big" LPT model. | ||
| 1273 | */ | ||
| 1274 | int ubifs_lpt_post_commit(struct ubifs_info *c) | ||
| 1275 | { | ||
| 1276 | int err; | ||
| 1277 | |||
| 1278 | mutex_lock(&c->lp_mutex); | ||
| 1279 | err = lpt_tgc_end(c); | ||
| 1280 | if (err) | ||
| 1281 | goto out; | ||
| 1282 | if (c->big_lpt) | ||
| 1283 | while (need_write_all(c)) { | ||
| 1284 | mutex_unlock(&c->lp_mutex); | ||
| 1285 | err = lpt_gc(c); | ||
| 1286 | if (err) | ||
| 1287 | return err; | ||
| 1288 | mutex_lock(&c->lp_mutex); | ||
| 1289 | } | ||
| 1290 | out: | ||
| 1291 | mutex_unlock(&c->lp_mutex); | ||
| 1292 | return err; | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | /** | ||
| 1296 | * first_nnode - find the first nnode in memory. | ||
| 1297 | * @c: UBIFS file-system description object | ||
| 1298 | * @hght: height of tree where nnode found is returned here | ||
| 1299 | * | ||
| 1300 | * This function returns a pointer to the nnode found or %NULL if no nnode is | ||
| 1301 | * found. This function is a helper to 'ubifs_lpt_free()'. | ||
| 1302 | */ | ||
| 1303 | static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght) | ||
| 1304 | { | ||
| 1305 | struct ubifs_nnode *nnode; | ||
| 1306 | int h, i, found; | ||
| 1307 | |||
| 1308 | nnode = c->nroot; | ||
| 1309 | *hght = 0; | ||
| 1310 | if (!nnode) | ||
| 1311 | return NULL; | ||
| 1312 | for (h = 1; h < c->lpt_hght; h++) { | ||
| 1313 | found = 0; | ||
| 1314 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1315 | if (nnode->nbranch[i].nnode) { | ||
| 1316 | found = 1; | ||
| 1317 | nnode = nnode->nbranch[i].nnode; | ||
| 1318 | *hght = h; | ||
| 1319 | break; | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | if (!found) | ||
| 1323 | break; | ||
| 1324 | } | ||
| 1325 | return nnode; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | /** | ||
| 1329 | * next_nnode - find the next nnode in memory. | ||
| 1330 | * @c: UBIFS file-system description object | ||
| 1331 | * @nnode: nnode from which to start. | ||
| 1332 | * @hght: height of tree where nnode is, is passed and returned here | ||
| 1333 | * | ||
| 1334 | * This function returns a pointer to the nnode found or %NULL if no nnode is | ||
| 1335 | * found. This function is a helper to 'ubifs_lpt_free()'. | ||
| 1336 | */ | ||
| 1337 | static struct ubifs_nnode *next_nnode(struct ubifs_info *c, | ||
| 1338 | struct ubifs_nnode *nnode, int *hght) | ||
| 1339 | { | ||
| 1340 | struct ubifs_nnode *parent; | ||
| 1341 | int iip, h, i, found; | ||
| 1342 | |||
| 1343 | parent = nnode->parent; | ||
| 1344 | if (!parent) | ||
| 1345 | return NULL; | ||
| 1346 | if (nnode->iip == UBIFS_LPT_FANOUT - 1) { | ||
| 1347 | *hght -= 1; | ||
| 1348 | return parent; | ||
| 1349 | } | ||
| 1350 | for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { | ||
| 1351 | nnode = parent->nbranch[iip].nnode; | ||
| 1352 | if (nnode) | ||
| 1353 | break; | ||
| 1354 | } | ||
| 1355 | if (!nnode) { | ||
| 1356 | *hght -= 1; | ||
| 1357 | return parent; | ||
| 1358 | } | ||
| 1359 | for (h = *hght + 1; h < c->lpt_hght; h++) { | ||
| 1360 | found = 0; | ||
| 1361 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | ||
| 1362 | if (nnode->nbranch[i].nnode) { | ||
| 1363 | found = 1; | ||
| 1364 | nnode = nnode->nbranch[i].nnode; | ||
| 1365 | *hght = h; | ||
| 1366 | break; | ||
| 1367 | } | ||
| 1368 | } | ||
| 1369 | if (!found) | ||
| 1370 | break; | ||
| 1371 | } | ||
| 1372 | return nnode; | ||
| 1373 | } | ||
| 1374 | |||
| 1375 | /** | ||
| 1376 | * ubifs_lpt_free - free resources owned by the LPT. | ||
| 1377 | * @c: UBIFS file-system description object | ||
| 1378 | * @wr_only: free only resources used for writing | ||
| 1379 | */ | ||
| 1380 | void ubifs_lpt_free(struct ubifs_info *c, int wr_only) | ||
| 1381 | { | ||
| 1382 | struct ubifs_nnode *nnode; | ||
| 1383 | int i, hght; | ||
| 1384 | |||
| 1385 | /* Free write-only things first */ | ||
| 1386 | |||
| 1387 | free_obsolete_cnodes(c); /* Leftover from a failed commit */ | ||
| 1388 | |||
| 1389 | vfree(c->ltab_cmt); | ||
| 1390 | c->ltab_cmt = NULL; | ||
| 1391 | vfree(c->lpt_buf); | ||
| 1392 | c->lpt_buf = NULL; | ||
| 1393 | kfree(c->lsave); | ||
| 1394 | c->lsave = NULL; | ||
| 1395 | |||
| 1396 | if (wr_only) | ||
| 1397 | return; | ||
| 1398 | |||
| 1399 | /* Now free the rest */ | ||
| 1400 | |||
| 1401 | nnode = first_nnode(c, &hght); | ||
| 1402 | while (nnode) { | ||
| 1403 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) | ||
| 1404 | kfree(nnode->nbranch[i].nnode); | ||
| 1405 | nnode = next_nnode(c, nnode, &hght); | ||
| 1406 | } | ||
| 1407 | for (i = 0; i < LPROPS_HEAP_CNT; i++) | ||
| 1408 | kfree(c->lpt_heap[i].arr); | ||
| 1409 | kfree(c->dirty_idx.arr); | ||
| 1410 | kfree(c->nroot); | ||
| 1411 | vfree(c->ltab); | ||
| 1412 | kfree(c->lpt_nod_buf); | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 1416 | |||
| 1417 | /** | ||
| 1418 | * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. | ||
| 1419 | * @buf: buffer | ||
| 1420 | * @len: buffer length | ||
| 1421 | */ | ||
| 1422 | static int dbg_is_all_ff(uint8_t *buf, int len) | ||
| 1423 | { | ||
| 1424 | int i; | ||
| 1425 | |||
| 1426 | for (i = 0; i < len; i++) | ||
| 1427 | if (buf[i] != 0xff) | ||
| 1428 | return 0; | ||
| 1429 | return 1; | ||
| 1430 | } | ||
| 1431 | |||
| 1432 | /** | ||
| 1433 | * dbg_is_nnode_dirty - determine if a nnode is dirty. | ||
| 1434 | * @c: the UBIFS file-system description object | ||
| 1435 | * @lnum: LEB number where nnode was written | ||
| 1436 | * @offs: offset where nnode was written | ||
| 1437 | */ | ||
| 1438 | static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) | ||
| 1439 | { | ||
| 1440 | struct ubifs_nnode *nnode; | ||
| 1441 | int hght; | ||
| 1442 | |||
| 1443 | /* Entire tree is in memory so first_nnode / next_nnode are ok */ | ||
| 1444 | nnode = first_nnode(c, &hght); | ||
| 1445 | for (; nnode; nnode = next_nnode(c, nnode, &hght)) { | ||
| 1446 | struct ubifs_nbranch *branch; | ||
| 1447 | |||
| 1448 | cond_resched(); | ||
| 1449 | if (nnode->parent) { | ||
| 1450 | branch = &nnode->parent->nbranch[nnode->iip]; | ||
| 1451 | if (branch->lnum != lnum || branch->offs != offs) | ||
| 1452 | continue; | ||
| 1453 | if (test_bit(DIRTY_CNODE, &nnode->flags)) | ||
| 1454 | return 1; | ||
| 1455 | return 0; | ||
| 1456 | } else { | ||
| 1457 | if (c->lpt_lnum != lnum || c->lpt_offs != offs) | ||
| 1458 | continue; | ||
| 1459 | if (test_bit(DIRTY_CNODE, &nnode->flags)) | ||
| 1460 | return 1; | ||
| 1461 | return 0; | ||
| 1462 | } | ||
| 1463 | } | ||
| 1464 | return 1; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | /** | ||
| 1468 | * dbg_is_pnode_dirty - determine if a pnode is dirty. | ||
| 1469 | * @c: the UBIFS file-system description object | ||
| 1470 | * @lnum: LEB number where pnode was written | ||
| 1471 | * @offs: offset where pnode was written | ||
| 1472 | */ | ||
| 1473 | static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs) | ||
| 1474 | { | ||
| 1475 | int i, cnt; | ||
| 1476 | |||
| 1477 | cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); | ||
| 1478 | for (i = 0; i < cnt; i++) { | ||
| 1479 | struct ubifs_pnode *pnode; | ||
| 1480 | struct ubifs_nbranch *branch; | ||
| 1481 | |||
| 1482 | cond_resched(); | ||
| 1483 | pnode = pnode_lookup(c, i); | ||
| 1484 | if (IS_ERR(pnode)) | ||
| 1485 | return PTR_ERR(pnode); | ||
| 1486 | branch = &pnode->parent->nbranch[pnode->iip]; | ||
| 1487 | if (branch->lnum != lnum || branch->offs != offs) | ||
| 1488 | continue; | ||
| 1489 | if (test_bit(DIRTY_CNODE, &pnode->flags)) | ||
| 1490 | return 1; | ||
| 1491 | return 0; | ||
| 1492 | } | ||
| 1493 | return 1; | ||
| 1494 | } | ||
| 1495 | |||
| 1496 | /** | ||
| 1497 | * dbg_is_ltab_dirty - determine if a ltab node is dirty. | ||
| 1498 | * @c: the UBIFS file-system description object | ||
| 1499 | * @lnum: LEB number where ltab node was written | ||
| 1500 | * @offs: offset where ltab node was written | ||
| 1501 | */ | ||
| 1502 | static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs) | ||
| 1503 | { | ||
| 1504 | if (lnum != c->ltab_lnum || offs != c->ltab_offs) | ||
| 1505 | return 1; | ||
| 1506 | return (c->lpt_drty_flgs & LTAB_DIRTY) != 0; | ||
| 1507 | } | ||
| 1508 | |||
| 1509 | /** | ||
| 1510 | * dbg_is_lsave_dirty - determine if a lsave node is dirty. | ||
| 1511 | * @c: the UBIFS file-system description object | ||
| 1512 | * @lnum: LEB number where lsave node was written | ||
| 1513 | * @offs: offset where lsave node was written | ||
| 1514 | */ | ||
| 1515 | static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs) | ||
| 1516 | { | ||
| 1517 | if (lnum != c->lsave_lnum || offs != c->lsave_offs) | ||
| 1518 | return 1; | ||
| 1519 | return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0; | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | /** | ||
| 1523 | * dbg_is_node_dirty - determine if a node is dirty. | ||
| 1524 | * @c: the UBIFS file-system description object | ||
| 1525 | * @node_type: node type | ||
| 1526 | * @lnum: LEB number where node was written | ||
| 1527 | * @offs: offset where node was written | ||
| 1528 | */ | ||
| 1529 | static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum, | ||
| 1530 | int offs) | ||
| 1531 | { | ||
| 1532 | switch (node_type) { | ||
| 1533 | case UBIFS_LPT_NNODE: | ||
| 1534 | return dbg_is_nnode_dirty(c, lnum, offs); | ||
| 1535 | case UBIFS_LPT_PNODE: | ||
| 1536 | return dbg_is_pnode_dirty(c, lnum, offs); | ||
| 1537 | case UBIFS_LPT_LTAB: | ||
| 1538 | return dbg_is_ltab_dirty(c, lnum, offs); | ||
| 1539 | case UBIFS_LPT_LSAVE: | ||
| 1540 | return dbg_is_lsave_dirty(c, lnum, offs); | ||
| 1541 | } | ||
| 1542 | return 1; | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | /** | ||
| 1546 | * dbg_check_ltab_lnum - check the ltab for a LPT LEB number. | ||
| 1547 | * @c: the UBIFS file-system description object | ||
| 1548 | * @lnum: LEB number where node was written | ||
| 1549 | * @offs: offset where node was written | ||
| 1550 | * | ||
| 1551 | * This function returns %0 on success and a negative error code on failure. | ||
| 1552 | */ | ||
| 1553 | static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) | ||
| 1554 | { | ||
| 1555 | int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; | ||
| 1556 | int ret; | ||
| 1557 | void *buf = c->dbg_buf; | ||
| 1558 | |||
| 1559 | dbg_lp("LEB %d", lnum); | ||
| 1560 | err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); | ||
| 1561 | if (err) { | ||
| 1562 | dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); | ||
| 1563 | return err; | ||
| 1564 | } | ||
| 1565 | while (1) { | ||
| 1566 | if (!is_a_node(c, buf, len)) { | ||
| 1567 | int i, pad_len; | ||
| 1568 | |||
| 1569 | pad_len = get_pad_len(c, buf, len); | ||
| 1570 | if (pad_len) { | ||
| 1571 | buf += pad_len; | ||
| 1572 | len -= pad_len; | ||
| 1573 | dirty += pad_len; | ||
| 1574 | continue; | ||
| 1575 | } | ||
| 1576 | if (!dbg_is_all_ff(buf, len)) { | ||
| 1577 | dbg_msg("invalid empty space in LEB %d at %d", | ||
| 1578 | lnum, c->leb_size - len); | ||
| 1579 | err = -EINVAL; | ||
| 1580 | } | ||
| 1581 | i = lnum - c->lpt_first; | ||
| 1582 | if (len != c->ltab[i].free) { | ||
| 1583 | dbg_msg("invalid free space in LEB %d " | ||
| 1584 | "(free %d, expected %d)", | ||
| 1585 | lnum, len, c->ltab[i].free); | ||
| 1586 | err = -EINVAL; | ||
| 1587 | } | ||
| 1588 | if (dirty != c->ltab[i].dirty) { | ||
| 1589 | dbg_msg("invalid dirty space in LEB %d " | ||
| 1590 | "(dirty %d, expected %d)", | ||
| 1591 | lnum, dirty, c->ltab[i].dirty); | ||
| 1592 | err = -EINVAL; | ||
| 1593 | } | ||
| 1594 | return err; | ||
| 1595 | } | ||
| 1596 | node_type = get_lpt_node_type(c, buf, &node_num); | ||
| 1597 | node_len = get_lpt_node_len(c, node_type); | ||
| 1598 | ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); | ||
| 1599 | if (ret == 1) | ||
| 1600 | dirty += node_len; | ||
| 1601 | buf += node_len; | ||
| 1602 | len -= node_len; | ||
| 1603 | } | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | /** | ||
| 1607 | * dbg_check_ltab - check the free and dirty space in the ltab. | ||
| 1608 | * @c: the UBIFS file-system description object | ||
| 1609 | * | ||
| 1610 | * This function returns %0 on success and a negative error code on failure. | ||
| 1611 | */ | ||
| 1612 | int dbg_check_ltab(struct ubifs_info *c) | ||
| 1613 | { | ||
| 1614 | int lnum, err, i, cnt; | ||
| 1615 | |||
| 1616 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) | ||
| 1617 | return 0; | ||
| 1618 | |||
| 1619 | /* Bring the entire tree into memory */ | ||
| 1620 | cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); | ||
| 1621 | for (i = 0; i < cnt; i++) { | ||
| 1622 | struct ubifs_pnode *pnode; | ||
| 1623 | |||
| 1624 | pnode = pnode_lookup(c, i); | ||
| 1625 | if (IS_ERR(pnode)) | ||
| 1626 | return PTR_ERR(pnode); | ||
| 1627 | cond_resched(); | ||
| 1628 | } | ||
| 1629 | |||
| 1630 | /* Check nodes */ | ||
| 1631 | err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0); | ||
| 1632 | if (err) | ||
| 1633 | return err; | ||
| 1634 | |||
| 1635 | /* Check each LEB */ | ||
| 1636 | for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { | ||
| 1637 | err = dbg_check_ltab_lnum(c, lnum); | ||
| 1638 | if (err) { | ||
| 1639 | dbg_err("failed at LEB %d", lnum); | ||
| 1640 | return err; | ||
| 1641 | } | ||
| 1642 | } | ||
| 1643 | |||
| 1644 | dbg_lp("succeeded"); | ||
| 1645 | return 0; | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c new file mode 100644 index 000000000000..71d5493bf565 --- /dev/null +++ b/fs/ubifs/master.c | |||
| @@ -0,0 +1,387 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* This file implements reading and writing the master node */ | ||
| 24 | |||
| 25 | #include "ubifs.h" | ||
| 26 | |||
| 27 | /** | ||
| 28 | * scan_for_master - search the valid master node. | ||
| 29 | * @c: UBIFS file-system description object | ||
| 30 | * | ||
| 31 | * This function scans the master node LEBs and search for the latest master | ||
| 32 | * node. Returns zero in case of success and a negative error code in case of | ||
| 33 | * failure. | ||
| 34 | */ | ||
| 35 | static int scan_for_master(struct ubifs_info *c) | ||
| 36 | { | ||
| 37 | struct ubifs_scan_leb *sleb; | ||
| 38 | struct ubifs_scan_node *snod; | ||
| 39 | int lnum, offs = 0, nodes_cnt; | ||
| 40 | |||
| 41 | lnum = UBIFS_MST_LNUM; | ||
| 42 | |||
| 43 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | ||
| 44 | if (IS_ERR(sleb)) | ||
| 45 | return PTR_ERR(sleb); | ||
| 46 | nodes_cnt = sleb->nodes_cnt; | ||
| 47 | if (nodes_cnt > 0) { | ||
| 48 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, | ||
| 49 | list); | ||
| 50 | if (snod->type != UBIFS_MST_NODE) | ||
| 51 | goto out; | ||
| 52 | memcpy(c->mst_node, snod->node, snod->len); | ||
| 53 | offs = snod->offs; | ||
| 54 | } | ||
| 55 | ubifs_scan_destroy(sleb); | ||
| 56 | |||
| 57 | lnum += 1; | ||
| 58 | |||
| 59 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | ||
| 60 | if (IS_ERR(sleb)) | ||
| 61 | return PTR_ERR(sleb); | ||
| 62 | if (sleb->nodes_cnt != nodes_cnt) | ||
| 63 | goto out; | ||
| 64 | if (!sleb->nodes_cnt) | ||
| 65 | goto out; | ||
| 66 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); | ||
| 67 | if (snod->type != UBIFS_MST_NODE) | ||
| 68 | goto out; | ||
| 69 | if (snod->offs != offs) | ||
| 70 | goto out; | ||
| 71 | if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, | ||
| 72 | (void *)snod->node + UBIFS_CH_SZ, | ||
| 73 | UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) | ||
| 74 | goto out; | ||
| 75 | c->mst_offs = offs; | ||
| 76 | ubifs_scan_destroy(sleb); | ||
| 77 | return 0; | ||
| 78 | |||
| 79 | out: | ||
| 80 | ubifs_scan_destroy(sleb); | ||
| 81 | return -EINVAL; | ||
| 82 | } | ||
| 83 | |||
| 84 | /** | ||
| 85 | * validate_master - validate master node. | ||
| 86 | * @c: UBIFS file-system description object | ||
| 87 | * | ||
| 88 | * This function validates data which was read from master node. Returns zero | ||
| 89 | * if the data is all right and %-EINVAL if not. | ||
| 90 | */ | ||
| 91 | static int validate_master(const struct ubifs_info *c) | ||
| 92 | { | ||
| 93 | long long main_sz; | ||
| 94 | int err; | ||
| 95 | |||
| 96 | if (c->max_sqnum >= SQNUM_WATERMARK) { | ||
| 97 | err = 1; | ||
| 98 | goto out; | ||
| 99 | } | ||
| 100 | |||
| 101 | if (c->cmt_no >= c->max_sqnum) { | ||
| 102 | err = 2; | ||
| 103 | goto out; | ||
| 104 | } | ||
| 105 | |||
| 106 | if (c->highest_inum >= INUM_WATERMARK) { | ||
| 107 | err = 3; | ||
| 108 | goto out; | ||
| 109 | } | ||
| 110 | |||
| 111 | if (c->lhead_lnum < UBIFS_LOG_LNUM || | ||
| 112 | c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs || | ||
| 113 | c->lhead_offs < 0 || c->lhead_offs >= c->leb_size || | ||
| 114 | c->lhead_offs & (c->min_io_size - 1)) { | ||
| 115 | err = 4; | ||
| 116 | goto out; | ||
| 117 | } | ||
| 118 | |||
| 119 | if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first || | ||
| 120 | c->zroot.offs >= c->leb_size || c->zroot.offs & 7) { | ||
| 121 | err = 5; | ||
| 122 | goto out; | ||
| 123 | } | ||
| 124 | |||
| 125 | if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len || | ||
| 126 | c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) { | ||
| 127 | err = 6; | ||
| 128 | goto out; | ||
| 129 | } | ||
| 130 | |||
| 131 | if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) { | ||
| 132 | err = 7; | ||
| 133 | goto out; | ||
| 134 | } | ||
| 135 | |||
| 136 | if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first || | ||
| 137 | c->ihead_offs % c->min_io_size || c->ihead_offs < 0 || | ||
| 138 | c->ihead_offs > c->leb_size || c->ihead_offs & 7) { | ||
| 139 | err = 8; | ||
| 140 | goto out; | ||
| 141 | } | ||
| 142 | |||
| 143 | main_sz = (long long)c->main_lebs * c->leb_size; | ||
| 144 | if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { | ||
| 145 | err = 9; | ||
| 146 | goto out; | ||
| 147 | } | ||
| 148 | |||
| 149 | if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last || | ||
| 150 | c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) { | ||
| 151 | err = 10; | ||
| 152 | goto out; | ||
| 153 | } | ||
| 154 | |||
| 155 | if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last || | ||
| 156 | c->nhead_offs < 0 || c->nhead_offs % c->min_io_size || | ||
| 157 | c->nhead_offs > c->leb_size) { | ||
| 158 | err = 11; | ||
| 159 | goto out; | ||
| 160 | } | ||
| 161 | |||
| 162 | if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last || | ||
| 163 | c->ltab_offs < 0 || | ||
| 164 | c->ltab_offs + c->ltab_sz > c->leb_size) { | ||
| 165 | err = 12; | ||
| 166 | goto out; | ||
| 167 | } | ||
| 168 | |||
| 169 | if (c->big_lpt && (c->lsave_lnum < c->lpt_first || | ||
| 170 | c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 || | ||
| 171 | c->lsave_offs + c->lsave_sz > c->leb_size)) { | ||
| 172 | err = 13; | ||
| 173 | goto out; | ||
| 174 | } | ||
| 175 | |||
| 176 | if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) { | ||
| 177 | err = 14; | ||
| 178 | goto out; | ||
| 179 | } | ||
| 180 | |||
| 181 | if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) { | ||
| 182 | err = 15; | ||
| 183 | goto out; | ||
| 184 | } | ||
| 185 | |||
| 186 | if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) { | ||
| 187 | err = 16; | ||
| 188 | goto out; | ||
| 189 | } | ||
| 190 | |||
| 191 | if (c->lst.total_free < 0 || c->lst.total_free > main_sz || | ||
| 192 | c->lst.total_free & 7) { | ||
| 193 | err = 17; | ||
| 194 | goto out; | ||
| 195 | } | ||
| 196 | |||
| 197 | if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) { | ||
| 198 | err = 18; | ||
| 199 | goto out; | ||
| 200 | } | ||
| 201 | |||
| 202 | if (c->lst.total_used < 0 || (c->lst.total_used & 7)) { | ||
| 203 | err = 19; | ||
| 204 | goto out; | ||
| 205 | } | ||
| 206 | |||
| 207 | if (c->lst.total_free + c->lst.total_dirty + | ||
| 208 | c->lst.total_used > main_sz) { | ||
| 209 | err = 20; | ||
| 210 | goto out; | ||
| 211 | } | ||
| 212 | |||
| 213 | if (c->lst.total_dead + c->lst.total_dark + | ||
| 214 | c->lst.total_used + c->old_idx_sz > main_sz) { | ||
| 215 | err = 21; | ||
| 216 | goto out; | ||
| 217 | } | ||
| 218 | |||
| 219 | if (c->lst.total_dead < 0 || | ||
| 220 | c->lst.total_dead > c->lst.total_free + c->lst.total_dirty || | ||
| 221 | c->lst.total_dead & 7) { | ||
| 222 | err = 22; | ||
| 223 | goto out; | ||
| 224 | } | ||
| 225 | |||
| 226 | if (c->lst.total_dark < 0 || | ||
| 227 | c->lst.total_dark > c->lst.total_free + c->lst.total_dirty || | ||
| 228 | c->lst.total_dark & 7) { | ||
| 229 | err = 23; | ||
| 230 | goto out; | ||
| 231 | } | ||
| 232 | |||
| 233 | return 0; | ||
| 234 | |||
| 235 | out: | ||
| 236 | ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); | ||
| 237 | dbg_dump_node(c, c->mst_node); | ||
| 238 | return -EINVAL; | ||
| 239 | } | ||
| 240 | |||
| 241 | /** | ||
| 242 | * ubifs_read_master - read master node. | ||
| 243 | * @c: UBIFS file-system description object | ||
| 244 | * | ||
| 245 | * This function finds and reads the master node during file-system mount. If | ||
| 246 | * the flash is empty, it creates default master node as well. Returns zero in | ||
| 247 | * case of success and a negative error code in case of failure. | ||
| 248 | */ | ||
| 249 | int ubifs_read_master(struct ubifs_info *c) | ||
| 250 | { | ||
| 251 | int err, old_leb_cnt; | ||
| 252 | |||
| 253 | c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL); | ||
| 254 | if (!c->mst_node) | ||
| 255 | return -ENOMEM; | ||
| 256 | |||
| 257 | err = scan_for_master(c); | ||
| 258 | if (err) { | ||
| 259 | err = ubifs_recover_master_node(c); | ||
| 260 | if (err) | ||
| 261 | /* | ||
| 262 | * Note, we do not free 'c->mst_node' here because the | ||
| 263 | * unmount routine will take care of this. | ||
| 264 | */ | ||
| 265 | return err; | ||
| 266 | } | ||
| 267 | |||
| 268 | /* Make sure that the recovery flag is clear */ | ||
| 269 | c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY); | ||
| 270 | |||
| 271 | c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum); | ||
| 272 | c->highest_inum = le64_to_cpu(c->mst_node->highest_inum); | ||
| 273 | c->cmt_no = le64_to_cpu(c->mst_node->cmt_no); | ||
| 274 | c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum); | ||
| 275 | c->zroot.offs = le32_to_cpu(c->mst_node->root_offs); | ||
| 276 | c->zroot.len = le32_to_cpu(c->mst_node->root_len); | ||
| 277 | c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum); | ||
| 278 | c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); | ||
| 279 | c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); | ||
| 280 | c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); | ||
| 281 | c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); | ||
| 282 | c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); | ||
| 283 | c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); | ||
| 284 | c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); | ||
| 285 | c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs); | ||
| 286 | c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum); | ||
| 287 | c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs); | ||
| 288 | c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum); | ||
| 289 | c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs); | ||
| 290 | c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum); | ||
| 291 | c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs); | ||
| 292 | c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs); | ||
| 293 | old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt); | ||
| 294 | c->lst.total_free = le64_to_cpu(c->mst_node->total_free); | ||
| 295 | c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty); | ||
| 296 | c->lst.total_used = le64_to_cpu(c->mst_node->total_used); | ||
| 297 | c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); | ||
| 298 | c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); | ||
| 299 | |||
| 300 | c->calc_idx_sz = c->old_idx_sz; | ||
| 301 | |||
| 302 | if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) | ||
| 303 | c->no_orphs = 1; | ||
| 304 | |||
| 305 | if (old_leb_cnt != c->leb_cnt) { | ||
| 306 | /* The file system has been resized */ | ||
| 307 | int growth = c->leb_cnt - old_leb_cnt; | ||
| 308 | |||
| 309 | if (c->leb_cnt < old_leb_cnt || | ||
| 310 | c->leb_cnt < UBIFS_MIN_LEB_CNT) { | ||
| 311 | ubifs_err("bad leb_cnt on master node"); | ||
| 312 | dbg_dump_node(c, c->mst_node); | ||
| 313 | return -EINVAL; | ||
| 314 | } | ||
| 315 | |||
| 316 | dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs", | ||
| 317 | old_leb_cnt, c->leb_cnt); | ||
| 318 | c->lst.empty_lebs += growth; | ||
| 319 | c->lst.total_free += growth * (long long)c->leb_size; | ||
| 320 | c->lst.total_dark += growth * (long long)c->dark_wm; | ||
| 321 | |||
| 322 | /* | ||
| 323 | * Reflect changes back onto the master node. N.B. the master | ||
| 324 | * node gets written immediately whenever mounting (or | ||
| 325 | * remounting) in read-write mode, so we do not need to write it | ||
| 326 | * here. | ||
| 327 | */ | ||
| 328 | c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt); | ||
| 329 | c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs); | ||
| 330 | c->mst_node->total_free = cpu_to_le64(c->lst.total_free); | ||
| 331 | c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark); | ||
| 332 | } | ||
| 333 | |||
| 334 | err = validate_master(c); | ||
| 335 | if (err) | ||
| 336 | return err; | ||
| 337 | |||
| 338 | err = dbg_old_index_check_init(c, &c->zroot); | ||
| 339 | |||
| 340 | return err; | ||
| 341 | } | ||
| 342 | |||
| 343 | /** | ||
| 344 | * ubifs_write_master - write master node. | ||
| 345 | * @c: UBIFS file-system description object | ||
| 346 | * | ||
| 347 | * This function writes the master node. The caller has to take the | ||
| 348 | * @c->mst_mutex lock before calling this function. Returns zero in case of | ||
| 349 | * success and a negative error code in case of failure. The master node is | ||
| 350 | * written twice to enable recovery. | ||
| 351 | */ | ||
| 352 | int ubifs_write_master(struct ubifs_info *c) | ||
| 353 | { | ||
| 354 | int err, lnum, offs, len; | ||
| 355 | |||
| 356 | if (c->ro_media) | ||
| 357 | return -EINVAL; | ||
| 358 | |||
| 359 | lnum = UBIFS_MST_LNUM; | ||
| 360 | offs = c->mst_offs + c->mst_node_alsz; | ||
| 361 | len = UBIFS_MST_NODE_SZ; | ||
| 362 | |||
| 363 | if (offs + UBIFS_MST_NODE_SZ > c->leb_size) { | ||
| 364 | err = ubifs_leb_unmap(c, lnum); | ||
| 365 | if (err) | ||
| 366 | return err; | ||
| 367 | offs = 0; | ||
| 368 | } | ||
| 369 | |||
| 370 | c->mst_offs = offs; | ||
| 371 | c->mst_node->highest_inum = cpu_to_le64(c->highest_inum); | ||
| 372 | |||
| 373 | err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); | ||
| 374 | if (err) | ||
| 375 | return err; | ||
| 376 | |||
| 377 | lnum += 1; | ||
| 378 | |||
| 379 | if (offs == 0) { | ||
| 380 | err = ubifs_leb_unmap(c, lnum); | ||
| 381 | if (err) | ||
| 382 | return err; | ||
| 383 | } | ||
| 384 | err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); | ||
| 385 | |||
| 386 | return err; | ||
| 387 | } | ||
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h new file mode 100644 index 000000000000..4beccfc256d2 --- /dev/null +++ b/fs/ubifs/misc.h | |||
| @@ -0,0 +1,342 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file contains miscellaneous helper functions. | ||
| 25 | */ | ||
| 26 | |||
| 27 | #ifndef __UBIFS_MISC_H__ | ||
| 28 | #define __UBIFS_MISC_H__ | ||
| 29 | |||
| 30 | /** | ||
| 31 | * ubifs_zn_dirty - check if znode is dirty. | ||
| 32 | * @znode: znode to check | ||
| 33 | * | ||
| 34 | * This helper function returns %1 if @znode is dirty and %0 otherwise. | ||
| 35 | */ | ||
| 36 | static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) | ||
| 37 | { | ||
| 38 | return !!test_bit(DIRTY_ZNODE, &znode->flags); | ||
| 39 | } | ||
| 40 | |||
| 41 | /** | ||
| 42 | * ubifs_wake_up_bgt - wake up background thread. | ||
| 43 | * @c: UBIFS file-system description object | ||
| 44 | */ | ||
| 45 | static inline void ubifs_wake_up_bgt(struct ubifs_info *c) | ||
| 46 | { | ||
| 47 | if (c->bgt && !c->need_bgt) { | ||
| 48 | c->need_bgt = 1; | ||
| 49 | wake_up_process(c->bgt); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | /** | ||
| 54 | * ubifs_tnc_find_child - find next child in znode. | ||
| 55 | * @znode: znode to search at | ||
| 56 | * @start: the zbranch index to start at | ||
| 57 | * | ||
| 58 | * This helper function looks for znode child starting at index @start. Returns | ||
| 59 | * the child or %NULL if no children were found. | ||
| 60 | */ | ||
| 61 | static inline struct ubifs_znode * | ||
| 62 | ubifs_tnc_find_child(struct ubifs_znode *znode, int start) | ||
| 63 | { | ||
| 64 | while (start < znode->child_cnt) { | ||
| 65 | if (znode->zbranch[start].znode) | ||
| 66 | return znode->zbranch[start].znode; | ||
| 67 | start += 1; | ||
| 68 | } | ||
| 69 | |||
| 70 | return NULL; | ||
| 71 | } | ||
| 72 | |||
| 73 | /** | ||
| 74 | * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object. | ||
| 75 | * @inode: the VFS 'struct inode' pointer | ||
| 76 | */ | ||
| 77 | static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) | ||
| 78 | { | ||
| 79 | return container_of(inode, struct ubifs_inode, vfs_inode); | ||
| 80 | } | ||
| 81 | |||
| 82 | /** | ||
| 83 | * ubifs_ro_mode - switch UBIFS to read read-only mode. | ||
| 84 | * @c: UBIFS file-system description object | ||
| 85 | * @err: error code which is the reason of switching to R/O mode | ||
| 86 | */ | ||
| 87 | static inline void ubifs_ro_mode(struct ubifs_info *c, int err) | ||
| 88 | { | ||
| 89 | if (!c->ro_media) { | ||
| 90 | c->ro_media = 1; | ||
| 91 | ubifs_warn("switched to read-only mode, error %d", err); | ||
| 92 | dbg_dump_stack(); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | /** | ||
| 97 | * ubifs_compr_present - check if compressor was compiled in. | ||
| 98 | * @compr_type: compressor type to check | ||
| 99 | * | ||
| 100 | * This function returns %1 of compressor of type @compr_type is present, and | ||
| 101 | * %0 if not. | ||
| 102 | */ | ||
| 103 | static inline int ubifs_compr_present(int compr_type) | ||
| 104 | { | ||
| 105 | ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); | ||
| 106 | return !!ubifs_compressors[compr_type]->capi_name; | ||
| 107 | } | ||
| 108 | |||
| 109 | /** | ||
| 110 | * ubifs_compr_name - get compressor name string by its type. | ||
| 111 | * @compr_type: compressor type | ||
| 112 | * | ||
| 113 | * This function returns compressor type string. | ||
| 114 | */ | ||
| 115 | static inline const char *ubifs_compr_name(int compr_type) | ||
| 116 | { | ||
| 117 | ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); | ||
| 118 | return ubifs_compressors[compr_type]->name; | ||
| 119 | } | ||
| 120 | |||
| 121 | /** | ||
| 122 | * ubifs_wbuf_sync - synchronize write-buffer. | ||
| 123 | * @wbuf: write-buffer to synchronize | ||
| 124 | * | ||
| 125 | * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume | ||
| 126 | * that the write-buffer is already locked. | ||
| 127 | */ | ||
| 128 | static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) | ||
| 129 | { | ||
| 130 | int err; | ||
| 131 | |||
| 132 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 133 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 134 | mutex_unlock(&wbuf->io_mutex); | ||
| 135 | return err; | ||
| 136 | } | ||
| 137 | |||
| 138 | /** | ||
| 139 | * ubifs_leb_unmap - unmap an LEB. | ||
| 140 | * @c: UBIFS file-system description object | ||
| 141 | * @lnum: LEB number to unmap | ||
| 142 | * | ||
| 143 | * This function returns %0 on success and a negative error code on failure. | ||
| 144 | */ | ||
| 145 | static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) | ||
| 146 | { | ||
| 147 | int err; | ||
| 148 | |||
| 149 | if (c->ro_media) | ||
| 150 | return -EROFS; | ||
| 151 | err = ubi_leb_unmap(c->ubi, lnum); | ||
| 152 | if (err) { | ||
| 153 | ubifs_err("unmap LEB %d failed, error %d", lnum, err); | ||
| 154 | return err; | ||
| 155 | } | ||
| 156 | |||
| 157 | return 0; | ||
| 158 | } | ||
| 159 | |||
| 160 | /** | ||
| 161 | * ubifs_leb_write - write to a LEB. | ||
| 162 | * @c: UBIFS file-system description object | ||
| 163 | * @lnum: LEB number to write | ||
| 164 | * @buf: buffer to write from | ||
| 165 | * @offs: offset within LEB to write to | ||
| 166 | * @len: length to write | ||
| 167 | * @dtype: data type | ||
| 168 | * | ||
| 169 | * This function returns %0 on success and a negative error code on failure. | ||
| 170 | */ | ||
| 171 | static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, | ||
| 172 | const void *buf, int offs, int len, int dtype) | ||
| 173 | { | ||
| 174 | int err; | ||
| 175 | |||
| 176 | if (c->ro_media) | ||
| 177 | return -EROFS; | ||
| 178 | err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); | ||
| 179 | if (err) { | ||
| 180 | ubifs_err("writing %d bytes at %d:%d, error %d", | ||
| 181 | len, lnum, offs, err); | ||
| 182 | return err; | ||
| 183 | } | ||
| 184 | |||
| 185 | return 0; | ||
| 186 | } | ||
| 187 | |||
| 188 | /** | ||
| 189 | * ubifs_leb_change - atomic LEB change. | ||
| 190 | * @c: UBIFS file-system description object | ||
| 191 | * @lnum: LEB number to write | ||
| 192 | * @buf: buffer to write from | ||
| 193 | * @len: length to write | ||
| 194 | * @dtype: data type | ||
| 195 | * | ||
| 196 | * This function returns %0 on success and a negative error code on failure. | ||
| 197 | */ | ||
| 198 | static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, | ||
| 199 | const void *buf, int len, int dtype) | ||
| 200 | { | ||
| 201 | int err; | ||
| 202 | |||
| 203 | if (c->ro_media) | ||
| 204 | return -EROFS; | ||
| 205 | err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); | ||
| 206 | if (err) { | ||
| 207 | ubifs_err("changing %d bytes in LEB %d, error %d", | ||
| 208 | len, lnum, err); | ||
| 209 | return err; | ||
| 210 | } | ||
| 211 | |||
| 212 | return 0; | ||
| 213 | } | ||
| 214 | |||
| 215 | /** | ||
| 216 | * ubifs_encode_dev - encode device node IDs. | ||
| 217 | * @dev: UBIFS device node information | ||
| 218 | * @rdev: device IDs to encode | ||
| 219 | * | ||
| 220 | * This is a helper function which encodes major/minor numbers of a device node | ||
| 221 | * into UBIFS device node description. We use standard Linux "new" and "huge" | ||
| 222 | * encodings. | ||
| 223 | */ | ||
| 224 | static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) | ||
| 225 | { | ||
| 226 | if (new_valid_dev(rdev)) { | ||
| 227 | dev->new = cpu_to_le32(new_encode_dev(rdev)); | ||
| 228 | return sizeof(dev->new); | ||
| 229 | } else { | ||
| 230 | dev->huge = cpu_to_le64(huge_encode_dev(rdev)); | ||
| 231 | return sizeof(dev->huge); | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | /** | ||
| 236 | * ubifs_add_dirt - add dirty space to LEB properties. | ||
| 237 | * @c: the UBIFS file-system description object | ||
| 238 | * @lnum: LEB to add dirty space for | ||
| 239 | * @dirty: dirty space to add | ||
| 240 | * | ||
| 241 | * This is a helper function which increased amount of dirty LEB space. Returns | ||
| 242 | * zero in case of success and a negative error code in case of failure. | ||
| 243 | */ | ||
| 244 | static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty) | ||
| 245 | { | ||
| 246 | return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0); | ||
| 247 | } | ||
| 248 | |||
| 249 | /** | ||
| 250 | * ubifs_return_leb - return LEB to lprops. | ||
| 251 | * @c: the UBIFS file-system description object | ||
| 252 | * @lnum: LEB to return | ||
| 253 | * | ||
| 254 | * This helper function cleans the "taken" flag of a logical eraseblock in the | ||
| 255 | * lprops. Returns zero in case of success and a negative error code in case of | ||
| 256 | * failure. | ||
| 257 | */ | ||
| 258 | static inline int ubifs_return_leb(struct ubifs_info *c, int lnum) | ||
| 259 | { | ||
| 260 | return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 261 | LPROPS_TAKEN, 0); | ||
| 262 | } | ||
| 263 | |||
| 264 | /** | ||
| 265 | * ubifs_idx_node_sz - return index node size. | ||
| 266 | * @c: the UBIFS file-system description object | ||
| 267 | * @child_cnt: number of children of this index node | ||
| 268 | */ | ||
| 269 | static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt) | ||
| 270 | { | ||
| 271 | return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt; | ||
| 272 | } | ||
| 273 | |||
| 274 | /** | ||
| 275 | * ubifs_idx_branch - return pointer to an index branch. | ||
| 276 | * @c: the UBIFS file-system description object | ||
| 277 | * @idx: index node | ||
| 278 | * @bnum: branch number | ||
| 279 | */ | ||
| 280 | static inline | ||
| 281 | struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, | ||
| 282 | const struct ubifs_idx_node *idx, | ||
| 283 | int bnum) | ||
| 284 | { | ||
| 285 | return (struct ubifs_branch *)((void *)idx->branches + | ||
| 286 | (UBIFS_BRANCH_SZ + c->key_len) * bnum); | ||
| 287 | } | ||
| 288 | |||
| 289 | /** | ||
| 290 | * ubifs_idx_key - return pointer to an index key. | ||
| 291 | * @c: the UBIFS file-system description object | ||
| 292 | * @idx: index node | ||
| 293 | */ | ||
| 294 | static inline void *ubifs_idx_key(const struct ubifs_info *c, | ||
| 295 | const struct ubifs_idx_node *idx) | ||
| 296 | { | ||
| 297 | return (void *)((struct ubifs_branch *)idx->branches)->key; | ||
| 298 | } | ||
| 299 | |||
| 300 | /** | ||
| 301 | * ubifs_reported_space - calculate reported free space. | ||
| 302 | * @c: the UBIFS file-system description object | ||
| 303 | * @free: amount of free space | ||
| 304 | * | ||
| 305 | * This function calculates amount of free space which will be reported to | ||
| 306 | * user-space. User-space application tend to expect that if the file-system | ||
| 307 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
| 308 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
| 309 | * node and it has to write indexind nodes as well. This introduces additional | ||
| 310 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
| 311 | * above expectetion. | ||
| 312 | * | ||
| 313 | * This function assumes free space is made up of uncompressed data nodes and | ||
| 314 | * full index nodes (one per data node, doubled because we always allow enough | ||
| 315 | * space to write the index twice). | ||
| 316 | * | ||
| 317 | * Note, the calculation is pessimistic, which means that most of the time | ||
| 318 | * UBIFS reports less space than it actually has. | ||
| 319 | */ | ||
| 320 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
| 321 | uint64_t free) | ||
| 322 | { | ||
| 323 | int divisor, factor; | ||
| 324 | |||
| 325 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); | ||
| 326 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
| 327 | do_div(free, divisor); | ||
| 328 | |||
| 329 | return free * factor; | ||
| 330 | } | ||
| 331 | |||
| 332 | /** | ||
| 333 | * ubifs_current_time - round current time to time granularity. | ||
| 334 | * @inode: inode | ||
| 335 | */ | ||
| 336 | static inline struct timespec ubifs_current_time(struct inode *inode) | ||
| 337 | { | ||
| 338 | return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? | ||
| 339 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | ||
| 340 | } | ||
| 341 | |||
| 342 | #endif /* __UBIFS_MISC_H__ */ | ||
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c new file mode 100644 index 000000000000..3afeb9242c6a --- /dev/null +++ b/fs/ubifs/orphan.c | |||
| @@ -0,0 +1,958 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Author: Adrian Hunter | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include "ubifs.h" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * An orphan is an inode number whose inode node has been committed to the index | ||
| 26 | * with a link count of zero. That happens when an open file is deleted | ||
| 27 | * (unlinked) and then a commit is run. In the normal course of events the inode | ||
| 28 | * would be deleted when the file is closed. However in the case of an unclean | ||
| 29 | * unmount, orphans need to be accounted for. After an unclean unmount, the | ||
| 30 | * orphans' inodes must be deleted which means either scanning the entire index | ||
| 31 | * looking for them, or keeping a list on flash somewhere. This unit implements | ||
| 32 | * the latter approach. | ||
| 33 | * | ||
| 34 | * The orphan area is a fixed number of LEBs situated between the LPT area and | ||
| 35 | * the main area. The number of orphan area LEBs is specified when the file | ||
| 36 | * system is created. The minimum number is 1. The size of the orphan area | ||
| 37 | * should be so that it can hold the maximum number of orphans that are expected | ||
| 38 | * to ever exist at one time. | ||
| 39 | * | ||
| 40 | * The number of orphans that can fit in a LEB is: | ||
| 41 | * | ||
| 42 | * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) | ||
| 43 | * | ||
| 44 | * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. | ||
| 45 | * | ||
| 46 | * Orphans are accumulated in a rb-tree. When an inode's link count drops to | ||
| 47 | * zero, the inode number is added to the rb-tree. It is removed from the tree | ||
| 48 | * when the inode is deleted. Any new orphans that are in the orphan tree when | ||
| 49 | * the commit is run, are written to the orphan area in 1 or more orph nodes. | ||
| 50 | * If the orphan area is full, it is consolidated to make space. There is | ||
| 51 | * always enough space because validation prevents the user from creating more | ||
| 52 | * than the maximum number of orphans allowed. | ||
| 53 | */ | ||
| 54 | |||
| 55 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 56 | static int dbg_check_orphans(struct ubifs_info *c); | ||
| 57 | #else | ||
| 58 | #define dbg_check_orphans(c) 0 | ||
| 59 | #endif | ||
| 60 | |||
| 61 | /** | ||
| 62 | * ubifs_add_orphan - add an orphan. | ||
| 63 | * @c: UBIFS file-system description object | ||
| 64 | * @inum: orphan inode number | ||
| 65 | * | ||
| 66 | * Add an orphan. This function is called when an inodes link count drops to | ||
| 67 | * zero. | ||
| 68 | */ | ||
| 69 | int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) | ||
| 70 | { | ||
| 71 | struct ubifs_orphan *orphan, *o; | ||
| 72 | struct rb_node **p, *parent = NULL; | ||
| 73 | |||
| 74 | orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); | ||
| 75 | if (!orphan) | ||
| 76 | return -ENOMEM; | ||
| 77 | orphan->inum = inum; | ||
| 78 | orphan->new = 1; | ||
| 79 | |||
| 80 | spin_lock(&c->orphan_lock); | ||
| 81 | if (c->tot_orphans >= c->max_orphans) { | ||
| 82 | spin_unlock(&c->orphan_lock); | ||
| 83 | kfree(orphan); | ||
| 84 | return -ENFILE; | ||
| 85 | } | ||
| 86 | p = &c->orph_tree.rb_node; | ||
| 87 | while (*p) { | ||
| 88 | parent = *p; | ||
| 89 | o = rb_entry(parent, struct ubifs_orphan, rb); | ||
| 90 | if (inum < o->inum) | ||
| 91 | p = &(*p)->rb_left; | ||
| 92 | else if (inum > o->inum) | ||
| 93 | p = &(*p)->rb_right; | ||
| 94 | else { | ||
| 95 | dbg_err("orphaned twice"); | ||
| 96 | spin_unlock(&c->orphan_lock); | ||
| 97 | kfree(orphan); | ||
| 98 | return 0; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | c->tot_orphans += 1; | ||
| 102 | c->new_orphans += 1; | ||
| 103 | rb_link_node(&orphan->rb, parent, p); | ||
| 104 | rb_insert_color(&orphan->rb, &c->orph_tree); | ||
| 105 | list_add_tail(&orphan->list, &c->orph_list); | ||
| 106 | list_add_tail(&orphan->new_list, &c->orph_new); | ||
| 107 | spin_unlock(&c->orphan_lock); | ||
| 108 | dbg_gen("ino %lu", inum); | ||
| 109 | return 0; | ||
| 110 | } | ||
| 111 | |||
| 112 | /** | ||
| 113 | * ubifs_delete_orphan - delete an orphan. | ||
| 114 | * @c: UBIFS file-system description object | ||
| 115 | * @inum: orphan inode number | ||
| 116 | * | ||
| 117 | * Delete an orphan. This function is called when an inode is deleted. | ||
| 118 | */ | ||
| 119 | void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) | ||
| 120 | { | ||
| 121 | struct ubifs_orphan *o; | ||
| 122 | struct rb_node *p; | ||
| 123 | |||
| 124 | spin_lock(&c->orphan_lock); | ||
| 125 | p = c->orph_tree.rb_node; | ||
| 126 | while (p) { | ||
| 127 | o = rb_entry(p, struct ubifs_orphan, rb); | ||
| 128 | if (inum < o->inum) | ||
| 129 | p = p->rb_left; | ||
| 130 | else if (inum > o->inum) | ||
| 131 | p = p->rb_right; | ||
| 132 | else { | ||
| 133 | if (o->dnext) { | ||
| 134 | spin_unlock(&c->orphan_lock); | ||
| 135 | dbg_gen("deleted twice ino %lu", inum); | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | if (o->cnext) { | ||
| 139 | o->dnext = c->orph_dnext; | ||
| 140 | c->orph_dnext = o; | ||
| 141 | spin_unlock(&c->orphan_lock); | ||
| 142 | dbg_gen("delete later ino %lu", inum); | ||
| 143 | return; | ||
| 144 | } | ||
| 145 | rb_erase(p, &c->orph_tree); | ||
| 146 | list_del(&o->list); | ||
| 147 | c->tot_orphans -= 1; | ||
| 148 | if (o->new) { | ||
| 149 | list_del(&o->new_list); | ||
| 150 | c->new_orphans -= 1; | ||
| 151 | } | ||
| 152 | spin_unlock(&c->orphan_lock); | ||
| 153 | kfree(o); | ||
| 154 | dbg_gen("inum %lu", inum); | ||
| 155 | return; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | spin_unlock(&c->orphan_lock); | ||
| 159 | dbg_err("missing orphan ino %lu", inum); | ||
| 160 | dbg_dump_stack(); | ||
| 161 | } | ||
| 162 | |||
| 163 | /** | ||
| 164 | * ubifs_orphan_start_commit - start commit of orphans. | ||
| 165 | * @c: UBIFS file-system description object | ||
| 166 | * | ||
| 167 | * Start commit of orphans. | ||
| 168 | */ | ||
| 169 | int ubifs_orphan_start_commit(struct ubifs_info *c) | ||
| 170 | { | ||
| 171 | struct ubifs_orphan *orphan, **last; | ||
| 172 | |||
| 173 | spin_lock(&c->orphan_lock); | ||
| 174 | last = &c->orph_cnext; | ||
| 175 | list_for_each_entry(orphan, &c->orph_new, new_list) { | ||
| 176 | ubifs_assert(orphan->new); | ||
| 177 | orphan->new = 0; | ||
| 178 | *last = orphan; | ||
| 179 | last = &orphan->cnext; | ||
| 180 | } | ||
| 181 | *last = orphan->cnext; | ||
| 182 | c->cmt_orphans = c->new_orphans; | ||
| 183 | c->new_orphans = 0; | ||
| 184 | dbg_cmt("%d orphans to commit", c->cmt_orphans); | ||
| 185 | INIT_LIST_HEAD(&c->orph_new); | ||
| 186 | if (c->tot_orphans == 0) | ||
| 187 | c->no_orphs = 1; | ||
| 188 | else | ||
| 189 | c->no_orphs = 0; | ||
| 190 | spin_unlock(&c->orphan_lock); | ||
| 191 | return 0; | ||
| 192 | } | ||
| 193 | |||
| 194 | /** | ||
| 195 | * avail_orphs - calculate available space. | ||
| 196 | * @c: UBIFS file-system description object | ||
| 197 | * | ||
| 198 | * This function returns the number of orphans that can be written in the | ||
| 199 | * available space. | ||
| 200 | */ | ||
| 201 | static int avail_orphs(struct ubifs_info *c) | ||
| 202 | { | ||
| 203 | int avail_lebs, avail, gap; | ||
| 204 | |||
| 205 | avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1; | ||
| 206 | avail = avail_lebs * | ||
| 207 | ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); | ||
| 208 | gap = c->leb_size - c->ohead_offs; | ||
| 209 | if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64)) | ||
| 210 | avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); | ||
| 211 | return avail; | ||
| 212 | } | ||
| 213 | |||
| 214 | /** | ||
| 215 | * tot_avail_orphs - calculate total space. | ||
| 216 | * @c: UBIFS file-system description object | ||
| 217 | * | ||
| 218 | * This function returns the number of orphans that can be written in half | ||
| 219 | * the total space. That leaves half the space for adding new orphans. | ||
| 220 | */ | ||
| 221 | static int tot_avail_orphs(struct ubifs_info *c) | ||
| 222 | { | ||
| 223 | int avail_lebs, avail; | ||
| 224 | |||
| 225 | avail_lebs = c->orph_lebs; | ||
| 226 | avail = avail_lebs * | ||
| 227 | ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); | ||
| 228 | return avail / 2; | ||
| 229 | } | ||
| 230 | |||
| 231 | /** | ||
| 232 | * do_write_orph_node - write a node | ||
| 233 | * @c: UBIFS file-system description object | ||
| 234 | * @len: length of node | ||
| 235 | * @atomic: write atomically | ||
| 236 | * | ||
| 237 | * This function writes a node to the orphan head from the orphan buffer. If | ||
| 238 | * %atomic is not zero, then the write is done atomically. On success, %0 is | ||
| 239 | * returned, otherwise a negative error code is returned. | ||
| 240 | */ | ||
| 241 | static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) | ||
| 242 | { | ||
| 243 | int err = 0; | ||
| 244 | |||
| 245 | if (atomic) { | ||
| 246 | ubifs_assert(c->ohead_offs == 0); | ||
| 247 | ubifs_prepare_node(c, c->orph_buf, len, 1); | ||
| 248 | len = ALIGN(len, c->min_io_size); | ||
| 249 | err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len, | ||
| 250 | UBI_SHORTTERM); | ||
| 251 | } else { | ||
| 252 | if (c->ohead_offs == 0) { | ||
| 253 | /* Ensure LEB has been unmapped */ | ||
| 254 | err = ubifs_leb_unmap(c, c->ohead_lnum); | ||
| 255 | if (err) | ||
| 256 | return err; | ||
| 257 | } | ||
| 258 | err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum, | ||
| 259 | c->ohead_offs, UBI_SHORTTERM); | ||
| 260 | } | ||
| 261 | return err; | ||
| 262 | } | ||
| 263 | |||
| 264 | /** | ||
| 265 | * write_orph_node - write an orph node | ||
| 266 | * @c: UBIFS file-system description object | ||
| 267 | * @atomic: write atomically | ||
| 268 | * | ||
| 269 | * This function builds an orph node from the cnext list and writes it to the | ||
| 270 | * orphan head. On success, %0 is returned, otherwise a negative error code | ||
| 271 | * is returned. | ||
| 272 | */ | ||
| 273 | static int write_orph_node(struct ubifs_info *c, int atomic) | ||
| 274 | { | ||
| 275 | struct ubifs_orphan *orphan, *cnext; | ||
| 276 | struct ubifs_orph_node *orph; | ||
| 277 | int gap, err, len, cnt, i; | ||
| 278 | |||
| 279 | ubifs_assert(c->cmt_orphans > 0); | ||
| 280 | gap = c->leb_size - c->ohead_offs; | ||
| 281 | if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) { | ||
| 282 | c->ohead_lnum += 1; | ||
| 283 | c->ohead_offs = 0; | ||
| 284 | gap = c->leb_size; | ||
| 285 | if (c->ohead_lnum > c->orph_last) { | ||
| 286 | /* | ||
| 287 | * We limit the number of orphans so that this should | ||
| 288 | * never happen. | ||
| 289 | */ | ||
| 290 | ubifs_err("out of space in orphan area"); | ||
| 291 | return -EINVAL; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); | ||
| 295 | if (cnt > c->cmt_orphans) | ||
| 296 | cnt = c->cmt_orphans; | ||
| 297 | len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64); | ||
| 298 | ubifs_assert(c->orph_buf); | ||
| 299 | orph = c->orph_buf; | ||
| 300 | orph->ch.node_type = UBIFS_ORPH_NODE; | ||
| 301 | spin_lock(&c->orphan_lock); | ||
| 302 | cnext = c->orph_cnext; | ||
| 303 | for (i = 0; i < cnt; i++) { | ||
| 304 | orphan = cnext; | ||
| 305 | orph->inos[i] = cpu_to_le64(orphan->inum); | ||
| 306 | cnext = orphan->cnext; | ||
| 307 | orphan->cnext = NULL; | ||
| 308 | } | ||
| 309 | c->orph_cnext = cnext; | ||
| 310 | c->cmt_orphans -= cnt; | ||
| 311 | spin_unlock(&c->orphan_lock); | ||
| 312 | if (c->cmt_orphans) | ||
| 313 | orph->cmt_no = cpu_to_le64(c->cmt_no + 1); | ||
| 314 | else | ||
| 315 | /* Mark the last node of the commit */ | ||
| 316 | orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); | ||
| 317 | ubifs_assert(c->ohead_offs + len <= c->leb_size); | ||
| 318 | ubifs_assert(c->ohead_lnum >= c->orph_first); | ||
| 319 | ubifs_assert(c->ohead_lnum <= c->orph_last); | ||
| 320 | err = do_write_orph_node(c, len, atomic); | ||
| 321 | c->ohead_offs += ALIGN(len, c->min_io_size); | ||
| 322 | c->ohead_offs = ALIGN(c->ohead_offs, 8); | ||
| 323 | return err; | ||
| 324 | } | ||
| 325 | |||
| 326 | /** | ||
| 327 | * write_orph_nodes - write orph nodes until there are no more to commit | ||
| 328 | * @c: UBIFS file-system description object | ||
| 329 | * @atomic: write atomically | ||
| 330 | * | ||
| 331 | * This function writes orph nodes for all the orphans to commit. On success, | ||
| 332 | * %0 is returned, otherwise a negative error code is returned. | ||
| 333 | */ | ||
| 334 | static int write_orph_nodes(struct ubifs_info *c, int atomic) | ||
| 335 | { | ||
| 336 | int err; | ||
| 337 | |||
| 338 | while (c->cmt_orphans > 0) { | ||
| 339 | err = write_orph_node(c, atomic); | ||
| 340 | if (err) | ||
| 341 | return err; | ||
| 342 | } | ||
| 343 | if (atomic) { | ||
| 344 | int lnum; | ||
| 345 | |||
| 346 | /* Unmap any unused LEBs after consolidation */ | ||
| 347 | lnum = c->ohead_lnum + 1; | ||
| 348 | for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { | ||
| 349 | err = ubifs_leb_unmap(c, lnum); | ||
| 350 | if (err) | ||
| 351 | return err; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | return 0; | ||
| 355 | } | ||
| 356 | |||
| 357 | /** | ||
| 358 | * consolidate - consolidate the orphan area. | ||
| 359 | * @c: UBIFS file-system description object | ||
| 360 | * | ||
| 361 | * This function enables consolidation by putting all the orphans into the list | ||
| 362 | * to commit. The list is in the order that the orphans were added, and the | ||
| 363 | * LEBs are written atomically in order, so at no time can orphans be lost by | ||
| 364 | * an unclean unmount. | ||
| 365 | * | ||
| 366 | * This function returns %0 on success and a negative error code on failure. | ||
| 367 | */ | ||
| 368 | static int consolidate(struct ubifs_info *c) | ||
| 369 | { | ||
| 370 | int tot_avail = tot_avail_orphs(c), err = 0; | ||
| 371 | |||
| 372 | spin_lock(&c->orphan_lock); | ||
| 373 | dbg_cmt("there is space for %d orphans and there are %d", | ||
| 374 | tot_avail, c->tot_orphans); | ||
| 375 | if (c->tot_orphans - c->new_orphans <= tot_avail) { | ||
| 376 | struct ubifs_orphan *orphan, **last; | ||
| 377 | int cnt = 0; | ||
| 378 | |||
| 379 | /* Change the cnext list to include all non-new orphans */ | ||
| 380 | last = &c->orph_cnext; | ||
| 381 | list_for_each_entry(orphan, &c->orph_list, list) { | ||
| 382 | if (orphan->new) | ||
| 383 | continue; | ||
| 384 | *last = orphan; | ||
| 385 | last = &orphan->cnext; | ||
| 386 | cnt += 1; | ||
| 387 | } | ||
| 388 | *last = orphan->cnext; | ||
| 389 | ubifs_assert(cnt == c->tot_orphans - c->new_orphans); | ||
| 390 | c->cmt_orphans = cnt; | ||
| 391 | c->ohead_lnum = c->orph_first; | ||
| 392 | c->ohead_offs = 0; | ||
| 393 | } else { | ||
| 394 | /* | ||
| 395 | * We limit the number of orphans so that this should | ||
| 396 | * never happen. | ||
| 397 | */ | ||
| 398 | ubifs_err("out of space in orphan area"); | ||
| 399 | err = -EINVAL; | ||
| 400 | } | ||
| 401 | spin_unlock(&c->orphan_lock); | ||
| 402 | return err; | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * commit_orphans - commit orphans. | ||
| 407 | * @c: UBIFS file-system description object | ||
| 408 | * | ||
| 409 | * This function commits orphans to flash. On success, %0 is returned, | ||
| 410 | * otherwise a negative error code is returned. | ||
| 411 | */ | ||
| 412 | static int commit_orphans(struct ubifs_info *c) | ||
| 413 | { | ||
| 414 | int avail, atomic = 0, err; | ||
| 415 | |||
| 416 | ubifs_assert(c->cmt_orphans > 0); | ||
| 417 | avail = avail_orphs(c); | ||
| 418 | if (avail < c->cmt_orphans) { | ||
| 419 | /* Not enough space to write new orphans, so consolidate */ | ||
| 420 | err = consolidate(c); | ||
| 421 | if (err) | ||
| 422 | return err; | ||
| 423 | atomic = 1; | ||
| 424 | } | ||
| 425 | err = write_orph_nodes(c, atomic); | ||
| 426 | return err; | ||
| 427 | } | ||
| 428 | |||
| 429 | /** | ||
| 430 | * erase_deleted - erase the orphans marked for deletion. | ||
| 431 | * @c: UBIFS file-system description object | ||
| 432 | * | ||
| 433 | * During commit, the orphans being committed cannot be deleted, so they are | ||
| 434 | * marked for deletion and deleted by this function. Also, the recovery | ||
| 435 | * adds killed orphans to the deletion list, and therefore they are deleted | ||
| 436 | * here too. | ||
| 437 | */ | ||
| 438 | static void erase_deleted(struct ubifs_info *c) | ||
| 439 | { | ||
| 440 | struct ubifs_orphan *orphan, *dnext; | ||
| 441 | |||
| 442 | spin_lock(&c->orphan_lock); | ||
| 443 | dnext = c->orph_dnext; | ||
| 444 | while (dnext) { | ||
| 445 | orphan = dnext; | ||
| 446 | dnext = orphan->dnext; | ||
| 447 | ubifs_assert(!orphan->new); | ||
| 448 | rb_erase(&orphan->rb, &c->orph_tree); | ||
| 449 | list_del(&orphan->list); | ||
| 450 | c->tot_orphans -= 1; | ||
| 451 | dbg_gen("deleting orphan ino %lu", orphan->inum); | ||
| 452 | kfree(orphan); | ||
| 453 | } | ||
| 454 | c->orph_dnext = NULL; | ||
| 455 | spin_unlock(&c->orphan_lock); | ||
| 456 | } | ||
| 457 | |||
| 458 | /** | ||
| 459 | * ubifs_orphan_end_commit - end commit of orphans. | ||
| 460 | * @c: UBIFS file-system description object | ||
| 461 | * | ||
| 462 | * End commit of orphans. | ||
| 463 | */ | ||
| 464 | int ubifs_orphan_end_commit(struct ubifs_info *c) | ||
| 465 | { | ||
| 466 | int err; | ||
| 467 | |||
| 468 | if (c->cmt_orphans != 0) { | ||
| 469 | err = commit_orphans(c); | ||
| 470 | if (err) | ||
| 471 | return err; | ||
| 472 | } | ||
| 473 | erase_deleted(c); | ||
| 474 | err = dbg_check_orphans(c); | ||
| 475 | return err; | ||
| 476 | } | ||
| 477 | |||
| 478 | /** | ||
| 479 | * clear_orphans - erase all LEBs used for orphans. | ||
| 480 | * @c: UBIFS file-system description object | ||
| 481 | * | ||
| 482 | * If recovery is not required, then the orphans from the previous session | ||
| 483 | * are not needed. This function locates the LEBs used to record | ||
| 484 | * orphans, and un-maps them. | ||
| 485 | */ | ||
| 486 | static int clear_orphans(struct ubifs_info *c) | ||
| 487 | { | ||
| 488 | int lnum, err; | ||
| 489 | |||
| 490 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { | ||
| 491 | err = ubifs_leb_unmap(c, lnum); | ||
| 492 | if (err) | ||
| 493 | return err; | ||
| 494 | } | ||
| 495 | c->ohead_lnum = c->orph_first; | ||
| 496 | c->ohead_offs = 0; | ||
| 497 | return 0; | ||
| 498 | } | ||
| 499 | |||
| 500 | /** | ||
| 501 | * insert_dead_orphan - insert an orphan. | ||
| 502 | * @c: UBIFS file-system description object | ||
| 503 | * @inum: orphan inode number | ||
| 504 | * | ||
| 505 | * This function is a helper to the 'do_kill_orphans()' function. The orphan | ||
| 506 | * must be kept until the next commit, so it is added to the rb-tree and the | ||
| 507 | * deletion list. | ||
| 508 | */ | ||
| 509 | static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) | ||
| 510 | { | ||
| 511 | struct ubifs_orphan *orphan, *o; | ||
| 512 | struct rb_node **p, *parent = NULL; | ||
| 513 | |||
| 514 | orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); | ||
| 515 | if (!orphan) | ||
| 516 | return -ENOMEM; | ||
| 517 | orphan->inum = inum; | ||
| 518 | |||
| 519 | p = &c->orph_tree.rb_node; | ||
| 520 | while (*p) { | ||
| 521 | parent = *p; | ||
| 522 | o = rb_entry(parent, struct ubifs_orphan, rb); | ||
| 523 | if (inum < o->inum) | ||
| 524 | p = &(*p)->rb_left; | ||
| 525 | else if (inum > o->inum) | ||
| 526 | p = &(*p)->rb_right; | ||
| 527 | else { | ||
| 528 | /* Already added - no problem */ | ||
| 529 | kfree(orphan); | ||
| 530 | return 0; | ||
| 531 | } | ||
| 532 | } | ||
| 533 | c->tot_orphans += 1; | ||
| 534 | rb_link_node(&orphan->rb, parent, p); | ||
| 535 | rb_insert_color(&orphan->rb, &c->orph_tree); | ||
| 536 | list_add_tail(&orphan->list, &c->orph_list); | ||
| 537 | orphan->dnext = c->orph_dnext; | ||
| 538 | c->orph_dnext = orphan; | ||
| 539 | dbg_mnt("ino %lu, new %d, tot %d", | ||
| 540 | inum, c->new_orphans, c->tot_orphans); | ||
| 541 | return 0; | ||
| 542 | } | ||
| 543 | |||
| 544 | /** | ||
| 545 | * do_kill_orphans - remove orphan inodes from the index. | ||
| 546 | * @c: UBIFS file-system description object | ||
| 547 | * @sleb: scanned LEB | ||
| 548 | * @last_cmt_no: cmt_no of last orph node read is passed and returned here | ||
| 549 | * @outofdate: whether the LEB is out of date is returned here | ||
| 550 | * @last_flagged: whether the end orph node is encountered | ||
| 551 | * | ||
| 552 | * This function is a helper to the 'kill_orphans()' function. It goes through | ||
| 553 | * every orphan node in a LEB and for every inode number recorded, removes | ||
| 554 | * all keys for that inode from the TNC. | ||
| 555 | */ | ||
| 556 | static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
| 557 | unsigned long long *last_cmt_no, int *outofdate, | ||
| 558 | int *last_flagged) | ||
| 559 | { | ||
| 560 | struct ubifs_scan_node *snod; | ||
| 561 | struct ubifs_orph_node *orph; | ||
| 562 | unsigned long long cmt_no; | ||
| 563 | ino_t inum; | ||
| 564 | int i, n, err, first = 1; | ||
| 565 | |||
| 566 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 567 | if (snod->type != UBIFS_ORPH_NODE) { | ||
| 568 | ubifs_err("invalid node type %d in orphan area at " | ||
| 569 | "%d:%d", snod->type, sleb->lnum, snod->offs); | ||
| 570 | dbg_dump_node(c, snod->node); | ||
| 571 | return -EINVAL; | ||
| 572 | } | ||
| 573 | |||
| 574 | orph = snod->node; | ||
| 575 | |||
| 576 | /* Check commit number */ | ||
| 577 | cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; | ||
| 578 | /* | ||
| 579 | * The commit number on the master node may be less, because | ||
| 580 | * of a failed commit. If there are several failed commits in a | ||
| 581 | * row, the commit number written on orph nodes will continue to | ||
| 582 | * increase (because the commit number is adjusted here) even | ||
| 583 | * though the commit number on the master node stays the same | ||
| 584 | * because the master node has not been re-written. | ||
| 585 | */ | ||
| 586 | if (cmt_no > c->cmt_no) | ||
| 587 | c->cmt_no = cmt_no; | ||
| 588 | if (cmt_no < *last_cmt_no && *last_flagged) { | ||
| 589 | /* | ||
| 590 | * The last orph node had a higher commit number and was | ||
| 591 | * flagged as the last written for that commit number. | ||
| 592 | * That makes this orph node, out of date. | ||
| 593 | */ | ||
| 594 | if (!first) { | ||
| 595 | ubifs_err("out of order commit number %llu in " | ||
| 596 | "orphan node at %d:%d", | ||
| 597 | cmt_no, sleb->lnum, snod->offs); | ||
| 598 | dbg_dump_node(c, snod->node); | ||
| 599 | return -EINVAL; | ||
| 600 | } | ||
| 601 | dbg_rcvry("out of date LEB %d", sleb->lnum); | ||
| 602 | *outofdate = 1; | ||
| 603 | return 0; | ||
| 604 | } | ||
| 605 | |||
| 606 | if (first) | ||
| 607 | first = 0; | ||
| 608 | |||
| 609 | n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; | ||
| 610 | for (i = 0; i < n; i++) { | ||
| 611 | inum = le64_to_cpu(orph->inos[i]); | ||
| 612 | dbg_rcvry("deleting orphaned inode %lu", inum); | ||
| 613 | err = ubifs_tnc_remove_ino(c, inum); | ||
| 614 | if (err) | ||
| 615 | return err; | ||
| 616 | err = insert_dead_orphan(c, inum); | ||
| 617 | if (err) | ||
| 618 | return err; | ||
| 619 | } | ||
| 620 | |||
| 621 | *last_cmt_no = cmt_no; | ||
| 622 | if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { | ||
| 623 | dbg_rcvry("last orph node for commit %llu at %d:%d", | ||
| 624 | cmt_no, sleb->lnum, snod->offs); | ||
| 625 | *last_flagged = 1; | ||
| 626 | } else | ||
| 627 | *last_flagged = 0; | ||
| 628 | } | ||
| 629 | |||
| 630 | return 0; | ||
| 631 | } | ||
| 632 | |||
| 633 | /** | ||
| 634 | * kill_orphans - remove all orphan inodes from the index. | ||
| 635 | * @c: UBIFS file-system description object | ||
| 636 | * | ||
| 637 | * If recovery is required, then orphan inodes recorded during the previous | ||
| 638 | * session (which ended with an unclean unmount) must be deleted from the index. | ||
| 639 | * This is done by updating the TNC, but since the index is not updated until | ||
| 640 | * the next commit, the LEBs where the orphan information is recorded are not | ||
| 641 | * erased until the next commit. | ||
| 642 | */ | ||
| 643 | static int kill_orphans(struct ubifs_info *c) | ||
| 644 | { | ||
| 645 | unsigned long long last_cmt_no = 0; | ||
| 646 | int lnum, err = 0, outofdate = 0, last_flagged = 0; | ||
| 647 | |||
| 648 | c->ohead_lnum = c->orph_first; | ||
| 649 | c->ohead_offs = 0; | ||
| 650 | /* Check no-orphans flag and skip this if no orphans */ | ||
| 651 | if (c->no_orphs) { | ||
| 652 | dbg_rcvry("no orphans"); | ||
| 653 | return 0; | ||
| 654 | } | ||
| 655 | /* | ||
| 656 | * Orph nodes always start at c->orph_first and are written to each | ||
| 657 | * successive LEB in turn. Generally unused LEBs will have been unmapped | ||
| 658 | * but may contain out of date orph nodes if the unmap didn't go | ||
| 659 | * through. In addition, the last orph node written for each commit is | ||
| 660 | * marked (top bit of orph->cmt_no is set to 1). It is possible that | ||
| 661 | * there are orph nodes from the next commit (i.e. the commit did not | ||
| 662 | * complete successfully). In that case, no orphans will have been lost | ||
| 663 | * due to the way that orphans are written, and any orphans added will | ||
| 664 | * be valid orphans anyway and so can be deleted. | ||
| 665 | */ | ||
| 666 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { | ||
| 667 | struct ubifs_scan_leb *sleb; | ||
| 668 | |||
| 669 | dbg_rcvry("LEB %d", lnum); | ||
| 670 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | ||
| 671 | if (IS_ERR(sleb)) { | ||
| 672 | sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); | ||
| 673 | if (IS_ERR(sleb)) { | ||
| 674 | err = PTR_ERR(sleb); | ||
| 675 | break; | ||
| 676 | } | ||
| 677 | } | ||
| 678 | err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate, | ||
| 679 | &last_flagged); | ||
| 680 | if (err || outofdate) { | ||
| 681 | ubifs_scan_destroy(sleb); | ||
| 682 | break; | ||
| 683 | } | ||
| 684 | if (sleb->endpt) { | ||
| 685 | c->ohead_lnum = lnum; | ||
| 686 | c->ohead_offs = sleb->endpt; | ||
| 687 | } | ||
| 688 | ubifs_scan_destroy(sleb); | ||
| 689 | } | ||
| 690 | return err; | ||
| 691 | } | ||
| 692 | |||
| 693 | /** | ||
| 694 | * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. | ||
| 695 | * @c: UBIFS file-system description object | ||
| 696 | * @unclean: indicates recovery from unclean unmount | ||
| 697 | * @read_only: indicates read only mount | ||
| 698 | * | ||
| 699 | * This function is called when mounting to erase orphans from the previous | ||
| 700 | * session. If UBIFS was not unmounted cleanly, then the inodes recorded as | ||
| 701 | * orphans are deleted. | ||
| 702 | */ | ||
| 703 | int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) | ||
| 704 | { | ||
| 705 | int err = 0; | ||
| 706 | |||
| 707 | c->max_orphans = tot_avail_orphs(c); | ||
| 708 | |||
| 709 | if (!read_only) { | ||
| 710 | c->orph_buf = vmalloc(c->leb_size); | ||
| 711 | if (!c->orph_buf) | ||
| 712 | return -ENOMEM; | ||
| 713 | } | ||
| 714 | |||
| 715 | if (unclean) | ||
| 716 | err = kill_orphans(c); | ||
| 717 | else if (!read_only) | ||
| 718 | err = clear_orphans(c); | ||
| 719 | |||
| 720 | return err; | ||
| 721 | } | ||
| 722 | |||
| 723 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 724 | |||
| 725 | struct check_orphan { | ||
| 726 | struct rb_node rb; | ||
| 727 | ino_t inum; | ||
| 728 | }; | ||
| 729 | |||
| 730 | struct check_info { | ||
| 731 | unsigned long last_ino; | ||
| 732 | unsigned long tot_inos; | ||
| 733 | unsigned long missing; | ||
| 734 | unsigned long long leaf_cnt; | ||
| 735 | struct ubifs_ino_node *node; | ||
| 736 | struct rb_root root; | ||
| 737 | }; | ||
| 738 | |||
| 739 | static int dbg_find_orphan(struct ubifs_info *c, ino_t inum) | ||
| 740 | { | ||
| 741 | struct ubifs_orphan *o; | ||
| 742 | struct rb_node *p; | ||
| 743 | |||
| 744 | spin_lock(&c->orphan_lock); | ||
| 745 | p = c->orph_tree.rb_node; | ||
| 746 | while (p) { | ||
| 747 | o = rb_entry(p, struct ubifs_orphan, rb); | ||
| 748 | if (inum < o->inum) | ||
| 749 | p = p->rb_left; | ||
| 750 | else if (inum > o->inum) | ||
| 751 | p = p->rb_right; | ||
| 752 | else { | ||
| 753 | spin_unlock(&c->orphan_lock); | ||
| 754 | return 1; | ||
| 755 | } | ||
| 756 | } | ||
| 757 | spin_unlock(&c->orphan_lock); | ||
| 758 | return 0; | ||
| 759 | } | ||
| 760 | |||
| 761 | static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum) | ||
| 762 | { | ||
| 763 | struct check_orphan *orphan, *o; | ||
| 764 | struct rb_node **p, *parent = NULL; | ||
| 765 | |||
| 766 | orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS); | ||
| 767 | if (!orphan) | ||
| 768 | return -ENOMEM; | ||
| 769 | orphan->inum = inum; | ||
| 770 | |||
| 771 | p = &root->rb_node; | ||
| 772 | while (*p) { | ||
| 773 | parent = *p; | ||
| 774 | o = rb_entry(parent, struct check_orphan, rb); | ||
| 775 | if (inum < o->inum) | ||
| 776 | p = &(*p)->rb_left; | ||
| 777 | else if (inum > o->inum) | ||
| 778 | p = &(*p)->rb_right; | ||
| 779 | else { | ||
| 780 | kfree(orphan); | ||
| 781 | return 0; | ||
| 782 | } | ||
| 783 | } | ||
| 784 | rb_link_node(&orphan->rb, parent, p); | ||
| 785 | rb_insert_color(&orphan->rb, root); | ||
| 786 | return 0; | ||
| 787 | } | ||
| 788 | |||
| 789 | static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) | ||
| 790 | { | ||
| 791 | struct check_orphan *o; | ||
| 792 | struct rb_node *p; | ||
| 793 | |||
| 794 | p = root->rb_node; | ||
| 795 | while (p) { | ||
| 796 | o = rb_entry(p, struct check_orphan, rb); | ||
| 797 | if (inum < o->inum) | ||
| 798 | p = p->rb_left; | ||
| 799 | else if (inum > o->inum) | ||
| 800 | p = p->rb_right; | ||
| 801 | else | ||
| 802 | return 1; | ||
| 803 | } | ||
| 804 | return 0; | ||
| 805 | } | ||
| 806 | |||
| 807 | static void dbg_free_check_tree(struct rb_root *root) | ||
| 808 | { | ||
| 809 | struct rb_node *this = root->rb_node; | ||
| 810 | struct check_orphan *o; | ||
| 811 | |||
| 812 | while (this) { | ||
| 813 | if (this->rb_left) { | ||
| 814 | this = this->rb_left; | ||
| 815 | continue; | ||
| 816 | } else if (this->rb_right) { | ||
| 817 | this = this->rb_right; | ||
| 818 | continue; | ||
| 819 | } | ||
| 820 | o = rb_entry(this, struct check_orphan, rb); | ||
| 821 | this = rb_parent(this); | ||
| 822 | if (this) { | ||
| 823 | if (this->rb_left == &o->rb) | ||
| 824 | this->rb_left = NULL; | ||
| 825 | else | ||
| 826 | this->rb_right = NULL; | ||
| 827 | } | ||
| 828 | kfree(o); | ||
| 829 | } | ||
| 830 | } | ||
| 831 | |||
| 832 | static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 833 | void *priv) | ||
| 834 | { | ||
| 835 | struct check_info *ci = priv; | ||
| 836 | ino_t inum; | ||
| 837 | int err; | ||
| 838 | |||
| 839 | inum = key_inum(c, &zbr->key); | ||
| 840 | if (inum != ci->last_ino) { | ||
| 841 | /* Lowest node type is the inode node, so it comes first */ | ||
| 842 | if (key_type(c, &zbr->key) != UBIFS_INO_KEY) | ||
| 843 | ubifs_err("found orphan node ino %lu, type %d", inum, | ||
| 844 | key_type(c, &zbr->key)); | ||
| 845 | ci->last_ino = inum; | ||
| 846 | ci->tot_inos += 1; | ||
| 847 | err = ubifs_tnc_read_node(c, zbr, ci->node); | ||
| 848 | if (err) { | ||
| 849 | ubifs_err("node read failed, error %d", err); | ||
| 850 | return err; | ||
| 851 | } | ||
| 852 | if (ci->node->nlink == 0) | ||
| 853 | /* Must be recorded as an orphan */ | ||
| 854 | if (!dbg_find_check_orphan(&ci->root, inum) && | ||
| 855 | !dbg_find_orphan(c, inum)) { | ||
| 856 | ubifs_err("missing orphan, ino %lu", inum); | ||
| 857 | ci->missing += 1; | ||
| 858 | } | ||
| 859 | } | ||
| 860 | ci->leaf_cnt += 1; | ||
| 861 | return 0; | ||
| 862 | } | ||
| 863 | |||
| 864 | static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) | ||
| 865 | { | ||
| 866 | struct ubifs_scan_node *snod; | ||
| 867 | struct ubifs_orph_node *orph; | ||
| 868 | ino_t inum; | ||
| 869 | int i, n, err; | ||
| 870 | |||
| 871 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 872 | cond_resched(); | ||
| 873 | if (snod->type != UBIFS_ORPH_NODE) | ||
| 874 | continue; | ||
| 875 | orph = snod->node; | ||
| 876 | n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; | ||
| 877 | for (i = 0; i < n; i++) { | ||
| 878 | inum = le64_to_cpu(orph->inos[i]); | ||
| 879 | err = dbg_ins_check_orphan(&ci->root, inum); | ||
| 880 | if (err) | ||
| 881 | return err; | ||
| 882 | } | ||
| 883 | } | ||
| 884 | return 0; | ||
| 885 | } | ||
| 886 | |||
| 887 | static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) | ||
| 888 | { | ||
| 889 | int lnum, err = 0; | ||
| 890 | |||
| 891 | /* Check no-orphans flag and skip this if no orphans */ | ||
| 892 | if (c->no_orphs) | ||
| 893 | return 0; | ||
| 894 | |||
| 895 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { | ||
| 896 | struct ubifs_scan_leb *sleb; | ||
| 897 | |||
| 898 | sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); | ||
| 899 | if (IS_ERR(sleb)) { | ||
| 900 | err = PTR_ERR(sleb); | ||
| 901 | break; | ||
| 902 | } | ||
| 903 | |||
| 904 | err = dbg_read_orphans(ci, sleb); | ||
| 905 | ubifs_scan_destroy(sleb); | ||
| 906 | if (err) | ||
| 907 | break; | ||
| 908 | } | ||
| 909 | |||
| 910 | return err; | ||
| 911 | } | ||
| 912 | |||
| 913 | static int dbg_check_orphans(struct ubifs_info *c) | ||
| 914 | { | ||
| 915 | struct check_info ci; | ||
| 916 | int err; | ||
| 917 | |||
| 918 | if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) | ||
| 919 | return 0; | ||
| 920 | |||
| 921 | ci.last_ino = 0; | ||
| 922 | ci.tot_inos = 0; | ||
| 923 | ci.missing = 0; | ||
| 924 | ci.leaf_cnt = 0; | ||
| 925 | ci.root = RB_ROOT; | ||
| 926 | ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); | ||
| 927 | if (!ci.node) { | ||
| 928 | ubifs_err("out of memory"); | ||
| 929 | return -ENOMEM; | ||
| 930 | } | ||
| 931 | |||
| 932 | err = dbg_scan_orphans(c, &ci); | ||
| 933 | if (err) | ||
| 934 | goto out; | ||
| 935 | |||
| 936 | err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); | ||
| 937 | if (err) { | ||
| 938 | ubifs_err("cannot scan TNC, error %d", err); | ||
| 939 | goto out; | ||
| 940 | } | ||
| 941 | |||
| 942 | if (ci.missing) { | ||
| 943 | ubifs_err("%lu missing orphan(s)", ci.missing); | ||
| 944 | err = -EINVAL; | ||
| 945 | goto out; | ||
| 946 | } | ||
| 947 | |||
| 948 | dbg_cmt("last inode number is %lu", ci.last_ino); | ||
| 949 | dbg_cmt("total number of inodes is %lu", ci.tot_inos); | ||
| 950 | dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt); | ||
| 951 | |||
| 952 | out: | ||
| 953 | dbg_free_check_tree(&ci.root); | ||
| 954 | kfree(ci.node); | ||
| 955 | return err; | ||
| 956 | } | ||
| 957 | |||
| 958 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c new file mode 100644 index 000000000000..77d26c141cf6 --- /dev/null +++ b/fs/ubifs/recovery.c | |||
| @@ -0,0 +1,1519 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements functions needed to recover from unclean un-mounts. | ||
| 25 | * When UBIFS is mounted, it checks a flag on the master node to determine if | ||
| 26 | * an un-mount was completed sucessfully. If not, the process of mounting | ||
| 27 | * incorparates additional checking and fixing of on-flash data structures. | ||
| 28 | * UBIFS always cleans away all remnants of an unclean un-mount, so that | ||
| 29 | * errors do not accumulate. However UBIFS defers recovery if it is mounted | ||
| 30 | * read-only, and the flash is not modified in that case. | ||
| 31 | */ | ||
| 32 | |||
| 33 | #include <linux/crc32.h> | ||
| 34 | #include "ubifs.h" | ||
| 35 | |||
| 36 | /** | ||
| 37 | * is_empty - determine whether a buffer is empty (contains all 0xff). | ||
| 38 | * @buf: buffer to clean | ||
| 39 | * @len: length of buffer | ||
| 40 | * | ||
| 41 | * This function returns %1 if the buffer is empty (contains all 0xff) otherwise | ||
| 42 | * %0 is returned. | ||
| 43 | */ | ||
| 44 | static int is_empty(void *buf, int len) | ||
| 45 | { | ||
| 46 | uint8_t *p = buf; | ||
| 47 | int i; | ||
| 48 | |||
| 49 | for (i = 0; i < len; i++) | ||
| 50 | if (*p++ != 0xff) | ||
| 51 | return 0; | ||
| 52 | return 1; | ||
| 53 | } | ||
| 54 | |||
| 55 | /** | ||
| 56 | * get_master_node - get the last valid master node allowing for corruption. | ||
| 57 | * @c: UBIFS file-system description object | ||
| 58 | * @lnum: LEB number | ||
| 59 | * @pbuf: buffer containing the LEB read, is returned here | ||
| 60 | * @mst: master node, if found, is returned here | ||
| 61 | * @cor: corruption, if found, is returned here | ||
| 62 | * | ||
| 63 | * This function allocates a buffer, reads the LEB into it, and finds and | ||
| 64 | * returns the last valid master node allowing for one area of corruption. | ||
| 65 | * The corrupt area, if there is one, must be consistent with the assumption | ||
| 66 | * that it is the result of an unclean unmount while the master node was being | ||
| 67 | * written. Under those circumstances, it is valid to use the previously written | ||
| 68 | * master node. | ||
| 69 | * | ||
| 70 | * This function returns %0 on success and a negative error code on failure. | ||
| 71 | */ | ||
| 72 | static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, | ||
| 73 | struct ubifs_mst_node **mst, void **cor) | ||
| 74 | { | ||
| 75 | const int sz = c->mst_node_alsz; | ||
| 76 | int err, offs, len; | ||
| 77 | void *sbuf, *buf; | ||
| 78 | |||
| 79 | sbuf = vmalloc(c->leb_size); | ||
| 80 | if (!sbuf) | ||
| 81 | return -ENOMEM; | ||
| 82 | |||
| 83 | err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); | ||
| 84 | if (err && err != -EBADMSG) | ||
| 85 | goto out_free; | ||
| 86 | |||
| 87 | /* Find the first position that is definitely not a node */ | ||
| 88 | offs = 0; | ||
| 89 | buf = sbuf; | ||
| 90 | len = c->leb_size; | ||
| 91 | while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { | ||
| 92 | struct ubifs_ch *ch = buf; | ||
| 93 | |||
| 94 | if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) | ||
| 95 | break; | ||
| 96 | offs += sz; | ||
| 97 | buf += sz; | ||
| 98 | len -= sz; | ||
| 99 | } | ||
| 100 | /* See if there was a valid master node before that */ | ||
| 101 | if (offs) { | ||
| 102 | int ret; | ||
| 103 | |||
| 104 | offs -= sz; | ||
| 105 | buf -= sz; | ||
| 106 | len += sz; | ||
| 107 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | ||
| 108 | if (ret != SCANNED_A_NODE && offs) { | ||
| 109 | /* Could have been corruption so check one place back */ | ||
| 110 | offs -= sz; | ||
| 111 | buf -= sz; | ||
| 112 | len += sz; | ||
| 113 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | ||
| 114 | if (ret != SCANNED_A_NODE) | ||
| 115 | /* | ||
| 116 | * We accept only one area of corruption because | ||
| 117 | * we are assuming that it was caused while | ||
| 118 | * trying to write a master node. | ||
| 119 | */ | ||
| 120 | goto out_err; | ||
| 121 | } | ||
| 122 | if (ret == SCANNED_A_NODE) { | ||
| 123 | struct ubifs_ch *ch = buf; | ||
| 124 | |||
| 125 | if (ch->node_type != UBIFS_MST_NODE) | ||
| 126 | goto out_err; | ||
| 127 | dbg_rcvry("found a master node at %d:%d", lnum, offs); | ||
| 128 | *mst = buf; | ||
| 129 | offs += sz; | ||
| 130 | buf += sz; | ||
| 131 | len -= sz; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | /* Check for corruption */ | ||
| 135 | if (offs < c->leb_size) { | ||
| 136 | if (!is_empty(buf, min_t(int, len, sz))) { | ||
| 137 | *cor = buf; | ||
| 138 | dbg_rcvry("found corruption at %d:%d", lnum, offs); | ||
| 139 | } | ||
| 140 | offs += sz; | ||
| 141 | buf += sz; | ||
| 142 | len -= sz; | ||
| 143 | } | ||
| 144 | /* Check remaining empty space */ | ||
| 145 | if (offs < c->leb_size) | ||
| 146 | if (!is_empty(buf, len)) | ||
| 147 | goto out_err; | ||
| 148 | *pbuf = sbuf; | ||
| 149 | return 0; | ||
| 150 | |||
| 151 | out_err: | ||
| 152 | err = -EINVAL; | ||
| 153 | out_free: | ||
| 154 | vfree(sbuf); | ||
| 155 | *mst = NULL; | ||
| 156 | *cor = NULL; | ||
| 157 | return err; | ||
| 158 | } | ||
| 159 | |||
| 160 | /** | ||
| 161 | * write_rcvrd_mst_node - write recovered master node. | ||
| 162 | * @c: UBIFS file-system description object | ||
| 163 | * @mst: master node | ||
| 164 | * | ||
| 165 | * This function returns %0 on success and a negative error code on failure. | ||
| 166 | */ | ||
| 167 | static int write_rcvrd_mst_node(struct ubifs_info *c, | ||
| 168 | struct ubifs_mst_node *mst) | ||
| 169 | { | ||
| 170 | int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; | ||
| 171 | uint32_t save_flags; | ||
| 172 | |||
| 173 | dbg_rcvry("recovery"); | ||
| 174 | |||
| 175 | save_flags = mst->flags; | ||
| 176 | mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY); | ||
| 177 | |||
| 178 | ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); | ||
| 179 | err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); | ||
| 180 | if (err) | ||
| 181 | goto out; | ||
| 182 | err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); | ||
| 183 | if (err) | ||
| 184 | goto out; | ||
| 185 | out: | ||
| 186 | mst->flags = save_flags; | ||
| 187 | return err; | ||
| 188 | } | ||
| 189 | |||
| 190 | /** | ||
| 191 | * ubifs_recover_master_node - recover the master node. | ||
| 192 | * @c: UBIFS file-system description object | ||
| 193 | * | ||
| 194 | * This function recovers the master node from corruption that may occur due to | ||
| 195 | * an unclean unmount. | ||
| 196 | * | ||
| 197 | * This function returns %0 on success and a negative error code on failure. | ||
| 198 | */ | ||
| 199 | int ubifs_recover_master_node(struct ubifs_info *c) | ||
| 200 | { | ||
| 201 | void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; | ||
| 202 | struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; | ||
| 203 | const int sz = c->mst_node_alsz; | ||
| 204 | int err, offs1, offs2; | ||
| 205 | |||
| 206 | dbg_rcvry("recovery"); | ||
| 207 | |||
| 208 | err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); | ||
| 209 | if (err) | ||
| 210 | goto out_free; | ||
| 211 | |||
| 212 | err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); | ||
| 213 | if (err) | ||
| 214 | goto out_free; | ||
| 215 | |||
| 216 | if (mst1) { | ||
| 217 | offs1 = (void *)mst1 - buf1; | ||
| 218 | if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && | ||
| 219 | (offs1 == 0 && !cor1)) { | ||
| 220 | /* | ||
| 221 | * mst1 was written by recovery at offset 0 with no | ||
| 222 | * corruption. | ||
| 223 | */ | ||
| 224 | dbg_rcvry("recovery recovery"); | ||
| 225 | mst = mst1; | ||
| 226 | } else if (mst2) { | ||
| 227 | offs2 = (void *)mst2 - buf2; | ||
| 228 | if (offs1 == offs2) { | ||
| 229 | /* Same offset, so must be the same */ | ||
| 230 | if (memcmp((void *)mst1 + UBIFS_CH_SZ, | ||
| 231 | (void *)mst2 + UBIFS_CH_SZ, | ||
| 232 | UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) | ||
| 233 | goto out_err; | ||
| 234 | mst = mst1; | ||
| 235 | } else if (offs2 + sz == offs1) { | ||
| 236 | /* 1st LEB was written, 2nd was not */ | ||
| 237 | if (cor1) | ||
| 238 | goto out_err; | ||
| 239 | mst = mst1; | ||
| 240 | } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { | ||
| 241 | /* 1st LEB was unmapped and written, 2nd not */ | ||
| 242 | if (cor1) | ||
| 243 | goto out_err; | ||
| 244 | mst = mst1; | ||
| 245 | } else | ||
| 246 | goto out_err; | ||
| 247 | } else { | ||
| 248 | /* | ||
| 249 | * 2nd LEB was unmapped and about to be written, so | ||
| 250 | * there must be only one master node in the first LEB | ||
| 251 | * and no corruption. | ||
| 252 | */ | ||
| 253 | if (offs1 != 0 || cor1) | ||
| 254 | goto out_err; | ||
| 255 | mst = mst1; | ||
| 256 | } | ||
| 257 | } else { | ||
| 258 | if (!mst2) | ||
| 259 | goto out_err; | ||
| 260 | /* | ||
| 261 | * 1st LEB was unmapped and about to be written, so there must | ||
| 262 | * be no room left in 2nd LEB. | ||
| 263 | */ | ||
| 264 | offs2 = (void *)mst2 - buf2; | ||
| 265 | if (offs2 + sz + sz <= c->leb_size) | ||
| 266 | goto out_err; | ||
| 267 | mst = mst2; | ||
| 268 | } | ||
| 269 | |||
| 270 | dbg_rcvry("recovered master node from LEB %d", | ||
| 271 | (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); | ||
| 272 | |||
| 273 | memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); | ||
| 274 | |||
| 275 | if ((c->vfs_sb->s_flags & MS_RDONLY)) { | ||
| 276 | /* Read-only mode. Keep a copy for switching to rw mode */ | ||
| 277 | c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); | ||
| 278 | if (!c->rcvrd_mst_node) { | ||
| 279 | err = -ENOMEM; | ||
| 280 | goto out_free; | ||
| 281 | } | ||
| 282 | memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); | ||
| 283 | } else { | ||
| 284 | /* Write the recovered master node */ | ||
| 285 | c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; | ||
| 286 | err = write_rcvrd_mst_node(c, c->mst_node); | ||
| 287 | if (err) | ||
| 288 | goto out_free; | ||
| 289 | } | ||
| 290 | |||
| 291 | vfree(buf2); | ||
| 292 | vfree(buf1); | ||
| 293 | |||
| 294 | return 0; | ||
| 295 | |||
| 296 | out_err: | ||
| 297 | err = -EINVAL; | ||
| 298 | out_free: | ||
| 299 | ubifs_err("failed to recover master node"); | ||
| 300 | if (mst1) { | ||
| 301 | dbg_err("dumping first master node"); | ||
| 302 | dbg_dump_node(c, mst1); | ||
| 303 | } | ||
| 304 | if (mst2) { | ||
| 305 | dbg_err("dumping second master node"); | ||
| 306 | dbg_dump_node(c, mst2); | ||
| 307 | } | ||
| 308 | vfree(buf2); | ||
| 309 | vfree(buf1); | ||
| 310 | return err; | ||
| 311 | } | ||
| 312 | |||
| 313 | /** | ||
| 314 | * ubifs_write_rcvrd_mst_node - write the recovered master node. | ||
| 315 | * @c: UBIFS file-system description object | ||
| 316 | * | ||
| 317 | * This function writes the master node that was recovered during mounting in | ||
| 318 | * read-only mode and must now be written because we are remounting rw. | ||
| 319 | * | ||
| 320 | * This function returns %0 on success and a negative error code on failure. | ||
| 321 | */ | ||
| 322 | int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) | ||
| 323 | { | ||
| 324 | int err; | ||
| 325 | |||
| 326 | if (!c->rcvrd_mst_node) | ||
| 327 | return 0; | ||
| 328 | c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); | ||
| 329 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); | ||
| 330 | err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); | ||
| 331 | if (err) | ||
| 332 | return err; | ||
| 333 | kfree(c->rcvrd_mst_node); | ||
| 334 | c->rcvrd_mst_node = NULL; | ||
| 335 | return 0; | ||
| 336 | } | ||
| 337 | |||
| 338 | /** | ||
| 339 | * is_last_write - determine if an offset was in the last write to a LEB. | ||
| 340 | * @c: UBIFS file-system description object | ||
| 341 | * @buf: buffer to check | ||
| 342 | * @offs: offset to check | ||
| 343 | * | ||
| 344 | * This function returns %1 if @offs was in the last write to the LEB whose data | ||
| 345 | * is in @buf, otherwise %0 is returned. The determination is made by checking | ||
| 346 | * for subsequent empty space starting from the next min_io_size boundary (or a | ||
| 347 | * bit less than the common header size if min_io_size is one). | ||
| 348 | */ | ||
| 349 | static int is_last_write(const struct ubifs_info *c, void *buf, int offs) | ||
| 350 | { | ||
| 351 | int empty_offs; | ||
| 352 | int check_len; | ||
| 353 | uint8_t *p; | ||
| 354 | |||
| 355 | if (c->min_io_size == 1) { | ||
| 356 | check_len = c->leb_size - offs; | ||
| 357 | p = buf + check_len; | ||
| 358 | for (; check_len > 0; check_len--) | ||
| 359 | if (*--p != 0xff) | ||
| 360 | break; | ||
| 361 | /* | ||
| 362 | * 'check_len' is the size of the corruption which cannot be | ||
| 363 | * more than the size of 1 node if it was caused by an unclean | ||
| 364 | * unmount. | ||
| 365 | */ | ||
| 366 | if (check_len > UBIFS_MAX_NODE_SZ) | ||
| 367 | return 0; | ||
| 368 | return 1; | ||
| 369 | } | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Round up to the next c->min_io_size boundary i.e. 'offs' is in the | ||
| 373 | * last wbuf written. After that should be empty space. | ||
| 374 | */ | ||
| 375 | empty_offs = ALIGN(offs + 1, c->min_io_size); | ||
| 376 | check_len = c->leb_size - empty_offs; | ||
| 377 | p = buf + empty_offs - offs; | ||
| 378 | |||
| 379 | for (; check_len > 0; check_len--) | ||
| 380 | if (*p++ != 0xff) | ||
| 381 | return 0; | ||
| 382 | return 1; | ||
| 383 | } | ||
| 384 | |||
| 385 | /** | ||
| 386 | * clean_buf - clean the data from an LEB sitting in a buffer. | ||
| 387 | * @c: UBIFS file-system description object | ||
| 388 | * @buf: buffer to clean | ||
| 389 | * @lnum: LEB number to clean | ||
| 390 | * @offs: offset from which to clean | ||
| 391 | * @len: length of buffer | ||
| 392 | * | ||
| 393 | * This function pads up to the next min_io_size boundary (if there is one) and | ||
| 394 | * sets empty space to all 0xff. @buf, @offs and @len are updated to the next | ||
| 395 | * min_io_size boundary (if there is one). | ||
| 396 | */ | ||
| 397 | static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, | ||
| 398 | int *offs, int *len) | ||
| 399 | { | ||
| 400 | int empty_offs, pad_len; | ||
| 401 | |||
| 402 | lnum = lnum; | ||
| 403 | dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); | ||
| 404 | |||
| 405 | if (c->min_io_size == 1) { | ||
| 406 | memset(*buf, 0xff, c->leb_size - *offs); | ||
| 407 | return; | ||
| 408 | } | ||
| 409 | |||
| 410 | ubifs_assert(!(*offs & 7)); | ||
| 411 | empty_offs = ALIGN(*offs, c->min_io_size); | ||
| 412 | pad_len = empty_offs - *offs; | ||
| 413 | ubifs_pad(c, *buf, pad_len); | ||
| 414 | *offs += pad_len; | ||
| 415 | *buf += pad_len; | ||
| 416 | *len -= pad_len; | ||
| 417 | memset(*buf, 0xff, c->leb_size - empty_offs); | ||
| 418 | } | ||
| 419 | |||
| 420 | /** | ||
| 421 | * no_more_nodes - determine if there are no more nodes in a buffer. | ||
| 422 | * @c: UBIFS file-system description object | ||
| 423 | * @buf: buffer to check | ||
| 424 | * @len: length of buffer | ||
| 425 | * @lnum: LEB number of the LEB from which @buf was read | ||
| 426 | * @offs: offset from which @buf was read | ||
| 427 | * | ||
| 428 | * This function scans @buf for more nodes and returns %0 is a node is found and | ||
| 429 | * %1 if no more nodes are found. | ||
| 430 | */ | ||
| 431 | static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, | ||
| 432 | int lnum, int offs) | ||
| 433 | { | ||
| 434 | int skip, next_offs = 0; | ||
| 435 | |||
| 436 | if (len > UBIFS_DATA_NODE_SZ) { | ||
| 437 | struct ubifs_ch *ch = buf; | ||
| 438 | int dlen = le32_to_cpu(ch->len); | ||
| 439 | |||
| 440 | if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && | ||
| 441 | dlen <= UBIFS_MAX_DATA_NODE_SZ) | ||
| 442 | /* The corrupt node looks like a data node */ | ||
| 443 | next_offs = ALIGN(offs + dlen, 8); | ||
| 444 | } | ||
| 445 | |||
| 446 | if (c->min_io_size == 1) | ||
| 447 | skip = 8; | ||
| 448 | else | ||
| 449 | skip = ALIGN(offs + 1, c->min_io_size) - offs; | ||
| 450 | |||
| 451 | offs += skip; | ||
| 452 | buf += skip; | ||
| 453 | len -= skip; | ||
| 454 | while (len > 8) { | ||
| 455 | struct ubifs_ch *ch = buf; | ||
| 456 | uint32_t magic = le32_to_cpu(ch->magic); | ||
| 457 | int ret; | ||
| 458 | |||
| 459 | if (magic == UBIFS_NODE_MAGIC) { | ||
| 460 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | ||
| 461 | if (ret == SCANNED_A_NODE || ret > 0) { | ||
| 462 | /* | ||
| 463 | * There is a small chance this is just data in | ||
| 464 | * a data node, so check that possibility. e.g. | ||
| 465 | * this is part of a file that itself contains | ||
| 466 | * a UBIFS image. | ||
| 467 | */ | ||
| 468 | if (next_offs && offs + le32_to_cpu(ch->len) <= | ||
| 469 | next_offs) | ||
| 470 | continue; | ||
| 471 | dbg_rcvry("unexpected node at %d:%d", lnum, | ||
| 472 | offs); | ||
| 473 | return 0; | ||
| 474 | } | ||
| 475 | } | ||
| 476 | offs += 8; | ||
| 477 | buf += 8; | ||
| 478 | len -= 8; | ||
| 479 | } | ||
| 480 | return 1; | ||
| 481 | } | ||
| 482 | |||
| 483 | /** | ||
| 484 | * fix_unclean_leb - fix an unclean LEB. | ||
| 485 | * @c: UBIFS file-system description object | ||
| 486 | * @sleb: scanned LEB information | ||
| 487 | * @start: offset where scan started | ||
| 488 | */ | ||
| 489 | static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
| 490 | int start) | ||
| 491 | { | ||
| 492 | int lnum = sleb->lnum, endpt = start; | ||
| 493 | |||
| 494 | /* Get the end offset of the last node we are keeping */ | ||
| 495 | if (!list_empty(&sleb->nodes)) { | ||
| 496 | struct ubifs_scan_node *snod; | ||
| 497 | |||
| 498 | snod = list_entry(sleb->nodes.prev, | ||
| 499 | struct ubifs_scan_node, list); | ||
| 500 | endpt = snod->offs + snod->len; | ||
| 501 | } | ||
| 502 | |||
| 503 | if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { | ||
| 504 | /* Add to recovery list */ | ||
| 505 | struct ubifs_unclean_leb *ucleb; | ||
| 506 | |||
| 507 | dbg_rcvry("need to fix LEB %d start %d endpt %d", | ||
| 508 | lnum, start, sleb->endpt); | ||
| 509 | ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); | ||
| 510 | if (!ucleb) | ||
| 511 | return -ENOMEM; | ||
| 512 | ucleb->lnum = lnum; | ||
| 513 | ucleb->endpt = endpt; | ||
| 514 | list_add_tail(&ucleb->list, &c->unclean_leb_list); | ||
| 515 | } else { | ||
| 516 | /* Write the fixed LEB back to flash */ | ||
| 517 | int err; | ||
| 518 | |||
| 519 | dbg_rcvry("fixing LEB %d start %d endpt %d", | ||
| 520 | lnum, start, sleb->endpt); | ||
| 521 | if (endpt == 0) { | ||
| 522 | err = ubifs_leb_unmap(c, lnum); | ||
| 523 | if (err) | ||
| 524 | return err; | ||
| 525 | } else { | ||
| 526 | int len = ALIGN(endpt, c->min_io_size); | ||
| 527 | |||
| 528 | if (start) { | ||
| 529 | err = ubi_read(c->ubi, lnum, sleb->buf, 0, | ||
| 530 | start); | ||
| 531 | if (err) | ||
| 532 | return err; | ||
| 533 | } | ||
| 534 | /* Pad to min_io_size */ | ||
| 535 | if (len > endpt) { | ||
| 536 | int pad_len = len - ALIGN(endpt, 8); | ||
| 537 | |||
| 538 | if (pad_len > 0) { | ||
| 539 | void *buf = sleb->buf + len - pad_len; | ||
| 540 | |||
| 541 | ubifs_pad(c, buf, pad_len); | ||
| 542 | } | ||
| 543 | } | ||
| 544 | err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, | ||
| 545 | UBI_UNKNOWN); | ||
| 546 | if (err) | ||
| 547 | return err; | ||
| 548 | } | ||
| 549 | } | ||
| 550 | return 0; | ||
| 551 | } | ||
| 552 | |||
| 553 | /** | ||
| 554 | * drop_incomplete_group - drop nodes from an incomplete group. | ||
| 555 | * @sleb: scanned LEB information | ||
| 556 | * @offs: offset of dropped nodes is returned here | ||
| 557 | * | ||
| 558 | * This function returns %1 if nodes are dropped and %0 otherwise. | ||
| 559 | */ | ||
| 560 | static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | ||
| 561 | { | ||
| 562 | int dropped = 0; | ||
| 563 | |||
| 564 | while (!list_empty(&sleb->nodes)) { | ||
| 565 | struct ubifs_scan_node *snod; | ||
| 566 | struct ubifs_ch *ch; | ||
| 567 | |||
| 568 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, | ||
| 569 | list); | ||
| 570 | ch = snod->node; | ||
| 571 | if (ch->group_type != UBIFS_IN_NODE_GROUP) | ||
| 572 | return dropped; | ||
| 573 | dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); | ||
| 574 | *offs = snod->offs; | ||
| 575 | list_del(&snod->list); | ||
| 576 | kfree(snod); | ||
| 577 | sleb->nodes_cnt -= 1; | ||
| 578 | dropped = 1; | ||
| 579 | } | ||
| 580 | return dropped; | ||
| 581 | } | ||
| 582 | |||
| 583 | /** | ||
| 584 | * ubifs_recover_leb - scan and recover a LEB. | ||
| 585 | * @c: UBIFS file-system description object | ||
| 586 | * @lnum: LEB number | ||
| 587 | * @offs: offset | ||
| 588 | * @sbuf: LEB-sized buffer to use | ||
| 589 | * @grouped: nodes may be grouped for recovery | ||
| 590 | * | ||
| 591 | * This function does a scan of a LEB, but caters for errors that might have | ||
| 592 | * been caused by the unclean unmount from which we are attempting to recover. | ||
| 593 | * | ||
| 594 | * This function returns %0 on success and a negative error code on failure. | ||
| 595 | */ | ||
| 596 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | ||
| 597 | int offs, void *sbuf, int grouped) | ||
| 598 | { | ||
| 599 | int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; | ||
| 600 | int empty_chkd = 0, start = offs; | ||
| 601 | struct ubifs_scan_leb *sleb; | ||
| 602 | void *buf = sbuf + offs; | ||
| 603 | |||
| 604 | dbg_rcvry("%d:%d", lnum, offs); | ||
| 605 | |||
| 606 | sleb = ubifs_start_scan(c, lnum, offs, sbuf); | ||
| 607 | if (IS_ERR(sleb)) | ||
| 608 | return sleb; | ||
| 609 | |||
| 610 | if (sleb->ecc) | ||
| 611 | need_clean = 1; | ||
| 612 | |||
| 613 | while (len >= 8) { | ||
| 614 | int ret; | ||
| 615 | |||
| 616 | dbg_scan("look at LEB %d:%d (%d bytes left)", | ||
| 617 | lnum, offs, len); | ||
| 618 | |||
| 619 | cond_resched(); | ||
| 620 | |||
| 621 | /* | ||
| 622 | * Scan quietly until there is an error from which we cannot | ||
| 623 | * recover | ||
| 624 | */ | ||
| 625 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); | ||
| 626 | |||
| 627 | if (ret == SCANNED_A_NODE) { | ||
| 628 | /* A valid node, and not a padding node */ | ||
| 629 | struct ubifs_ch *ch = buf; | ||
| 630 | int node_len; | ||
| 631 | |||
| 632 | err = ubifs_add_snod(c, sleb, buf, offs); | ||
| 633 | if (err) | ||
| 634 | goto error; | ||
| 635 | node_len = ALIGN(le32_to_cpu(ch->len), 8); | ||
| 636 | offs += node_len; | ||
| 637 | buf += node_len; | ||
| 638 | len -= node_len; | ||
| 639 | continue; | ||
| 640 | } | ||
| 641 | |||
| 642 | if (ret > 0) { | ||
| 643 | /* Padding bytes or a valid padding node */ | ||
| 644 | offs += ret; | ||
| 645 | buf += ret; | ||
| 646 | len -= ret; | ||
| 647 | continue; | ||
| 648 | } | ||
| 649 | |||
| 650 | if (ret == SCANNED_EMPTY_SPACE) { | ||
| 651 | if (!is_empty(buf, len)) { | ||
| 652 | if (!is_last_write(c, buf, offs)) | ||
| 653 | break; | ||
| 654 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 655 | need_clean = 1; | ||
| 656 | } | ||
| 657 | empty_chkd = 1; | ||
| 658 | break; | ||
| 659 | } | ||
| 660 | |||
| 661 | if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) | ||
| 662 | if (is_last_write(c, buf, offs)) { | ||
| 663 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 664 | need_clean = 1; | ||
| 665 | empty_chkd = 1; | ||
| 666 | break; | ||
| 667 | } | ||
| 668 | |||
| 669 | if (ret == SCANNED_A_CORRUPT_NODE) | ||
| 670 | if (no_more_nodes(c, buf, len, lnum, offs)) { | ||
| 671 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 672 | need_clean = 1; | ||
| 673 | empty_chkd = 1; | ||
| 674 | break; | ||
| 675 | } | ||
| 676 | |||
| 677 | if (quiet) { | ||
| 678 | /* Redo the last scan but noisily */ | ||
| 679 | quiet = 0; | ||
| 680 | continue; | ||
| 681 | } | ||
| 682 | |||
| 683 | switch (ret) { | ||
| 684 | case SCANNED_GARBAGE: | ||
| 685 | dbg_err("garbage"); | ||
| 686 | goto corrupted; | ||
| 687 | case SCANNED_A_CORRUPT_NODE: | ||
| 688 | case SCANNED_A_BAD_PAD_NODE: | ||
| 689 | dbg_err("bad node"); | ||
| 690 | goto corrupted; | ||
| 691 | default: | ||
| 692 | dbg_err("unknown"); | ||
| 693 | goto corrupted; | ||
| 694 | } | ||
| 695 | } | ||
| 696 | |||
| 697 | if (!empty_chkd && !is_empty(buf, len)) { | ||
| 698 | if (is_last_write(c, buf, offs)) { | ||
| 699 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 700 | need_clean = 1; | ||
| 701 | } else { | ||
| 702 | ubifs_err("corrupt empty space at LEB %d:%d", | ||
| 703 | lnum, offs); | ||
| 704 | goto corrupted; | ||
| 705 | } | ||
| 706 | } | ||
| 707 | |||
| 708 | /* Drop nodes from incomplete group */ | ||
| 709 | if (grouped && drop_incomplete_group(sleb, &offs)) { | ||
| 710 | buf = sbuf + offs; | ||
| 711 | len = c->leb_size - offs; | ||
| 712 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 713 | need_clean = 1; | ||
| 714 | } | ||
| 715 | |||
| 716 | if (offs % c->min_io_size) { | ||
| 717 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 718 | need_clean = 1; | ||
| 719 | } | ||
| 720 | |||
| 721 | ubifs_end_scan(c, sleb, lnum, offs); | ||
| 722 | |||
| 723 | if (need_clean) { | ||
| 724 | err = fix_unclean_leb(c, sleb, start); | ||
| 725 | if (err) | ||
| 726 | goto error; | ||
| 727 | } | ||
| 728 | |||
| 729 | return sleb; | ||
| 730 | |||
| 731 | corrupted: | ||
| 732 | ubifs_scanned_corruption(c, lnum, offs, buf); | ||
| 733 | err = -EUCLEAN; | ||
| 734 | error: | ||
| 735 | ubifs_err("LEB %d scanning failed", lnum); | ||
| 736 | ubifs_scan_destroy(sleb); | ||
| 737 | return ERR_PTR(err); | ||
| 738 | } | ||
| 739 | |||
| 740 | /** | ||
| 741 | * get_cs_sqnum - get commit start sequence number. | ||
| 742 | * @c: UBIFS file-system description object | ||
| 743 | * @lnum: LEB number of commit start node | ||
| 744 | * @offs: offset of commit start node | ||
| 745 | * @cs_sqnum: commit start sequence number is returned here | ||
| 746 | * | ||
| 747 | * This function returns %0 on success and a negative error code on failure. | ||
| 748 | */ | ||
| 749 | static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, | ||
| 750 | unsigned long long *cs_sqnum) | ||
| 751 | { | ||
| 752 | struct ubifs_cs_node *cs_node = NULL; | ||
| 753 | int err, ret; | ||
| 754 | |||
| 755 | dbg_rcvry("at %d:%d", lnum, offs); | ||
| 756 | cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); | ||
| 757 | if (!cs_node) | ||
| 758 | return -ENOMEM; | ||
| 759 | if (c->leb_size - offs < UBIFS_CS_NODE_SZ) | ||
| 760 | goto out_err; | ||
| 761 | err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); | ||
| 762 | if (err && err != -EBADMSG) | ||
| 763 | goto out_free; | ||
| 764 | ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); | ||
| 765 | if (ret != SCANNED_A_NODE) { | ||
| 766 | dbg_err("Not a valid node"); | ||
| 767 | goto out_err; | ||
| 768 | } | ||
| 769 | if (cs_node->ch.node_type != UBIFS_CS_NODE) { | ||
| 770 | dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); | ||
| 771 | goto out_err; | ||
| 772 | } | ||
| 773 | if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { | ||
| 774 | dbg_err("CS node cmt_no %llu != current cmt_no %llu", | ||
| 775 | (unsigned long long)le64_to_cpu(cs_node->cmt_no), | ||
| 776 | c->cmt_no); | ||
| 777 | goto out_err; | ||
| 778 | } | ||
| 779 | *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); | ||
| 780 | dbg_rcvry("commit start sqnum %llu", *cs_sqnum); | ||
| 781 | kfree(cs_node); | ||
| 782 | return 0; | ||
| 783 | |||
| 784 | out_err: | ||
| 785 | err = -EINVAL; | ||
| 786 | out_free: | ||
| 787 | ubifs_err("failed to get CS sqnum"); | ||
| 788 | kfree(cs_node); | ||
| 789 | return err; | ||
| 790 | } | ||
| 791 | |||
| 792 | /** | ||
| 793 | * ubifs_recover_log_leb - scan and recover a log LEB. | ||
| 794 | * @c: UBIFS file-system description object | ||
| 795 | * @lnum: LEB number | ||
| 796 | * @offs: offset | ||
| 797 | * @sbuf: LEB-sized buffer to use | ||
| 798 | * | ||
| 799 | * This function does a scan of a LEB, but caters for errors that might have | ||
| 800 | * been caused by the unclean unmount from which we are attempting to recover. | ||
| 801 | * | ||
| 802 | * This function returns %0 on success and a negative error code on failure. | ||
| 803 | */ | ||
| 804 | struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | ||
| 805 | int offs, void *sbuf) | ||
| 806 | { | ||
| 807 | struct ubifs_scan_leb *sleb; | ||
| 808 | int next_lnum; | ||
| 809 | |||
| 810 | dbg_rcvry("LEB %d", lnum); | ||
| 811 | next_lnum = lnum + 1; | ||
| 812 | if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) | ||
| 813 | next_lnum = UBIFS_LOG_LNUM; | ||
| 814 | if (next_lnum != c->ltail_lnum) { | ||
| 815 | /* | ||
| 816 | * We can only recover at the end of the log, so check that the | ||
| 817 | * next log LEB is empty or out of date. | ||
| 818 | */ | ||
| 819 | sleb = ubifs_scan(c, next_lnum, 0, sbuf); | ||
| 820 | if (IS_ERR(sleb)) | ||
| 821 | return sleb; | ||
| 822 | if (sleb->nodes_cnt) { | ||
| 823 | struct ubifs_scan_node *snod; | ||
| 824 | unsigned long long cs_sqnum = c->cs_sqnum; | ||
| 825 | |||
| 826 | snod = list_entry(sleb->nodes.next, | ||
| 827 | struct ubifs_scan_node, list); | ||
| 828 | if (cs_sqnum == 0) { | ||
| 829 | int err; | ||
| 830 | |||
| 831 | err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); | ||
| 832 | if (err) { | ||
| 833 | ubifs_scan_destroy(sleb); | ||
| 834 | return ERR_PTR(err); | ||
| 835 | } | ||
| 836 | } | ||
| 837 | if (snod->sqnum > cs_sqnum) { | ||
| 838 | ubifs_err("unrecoverable log corruption " | ||
| 839 | "in LEB %d", lnum); | ||
| 840 | ubifs_scan_destroy(sleb); | ||
| 841 | return ERR_PTR(-EUCLEAN); | ||
| 842 | } | ||
| 843 | } | ||
| 844 | ubifs_scan_destroy(sleb); | ||
| 845 | } | ||
| 846 | return ubifs_recover_leb(c, lnum, offs, sbuf, 0); | ||
| 847 | } | ||
| 848 | |||
| 849 | /** | ||
| 850 | * recover_head - recover a head. | ||
| 851 | * @c: UBIFS file-system description object | ||
| 852 | * @lnum: LEB number of head to recover | ||
| 853 | * @offs: offset of head to recover | ||
| 854 | * @sbuf: LEB-sized buffer to use | ||
| 855 | * | ||
| 856 | * This function ensures that there is no data on the flash at a head location. | ||
| 857 | * | ||
| 858 | * This function returns %0 on success and a negative error code on failure. | ||
| 859 | */ | ||
| 860 | static int recover_head(const struct ubifs_info *c, int lnum, int offs, | ||
| 861 | void *sbuf) | ||
| 862 | { | ||
| 863 | int len, err, need_clean = 0; | ||
| 864 | |||
| 865 | if (c->min_io_size > 1) | ||
| 866 | len = c->min_io_size; | ||
| 867 | else | ||
| 868 | len = 512; | ||
| 869 | if (offs + len > c->leb_size) | ||
| 870 | len = c->leb_size - offs; | ||
| 871 | |||
| 872 | if (!len) | ||
| 873 | return 0; | ||
| 874 | |||
| 875 | /* Read at the head location and check it is empty flash */ | ||
| 876 | err = ubi_read(c->ubi, lnum, sbuf, offs, len); | ||
| 877 | if (err) | ||
| 878 | need_clean = 1; | ||
| 879 | else { | ||
| 880 | uint8_t *p = sbuf; | ||
| 881 | |||
| 882 | while (len--) | ||
| 883 | if (*p++ != 0xff) { | ||
| 884 | need_clean = 1; | ||
| 885 | break; | ||
| 886 | } | ||
| 887 | } | ||
| 888 | |||
| 889 | if (need_clean) { | ||
| 890 | dbg_rcvry("cleaning head at %d:%d", lnum, offs); | ||
| 891 | if (offs == 0) | ||
| 892 | return ubifs_leb_unmap(c, lnum); | ||
| 893 | err = ubi_read(c->ubi, lnum, sbuf, 0, offs); | ||
| 894 | if (err) | ||
| 895 | return err; | ||
| 896 | return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); | ||
| 897 | } | ||
| 898 | |||
| 899 | return 0; | ||
| 900 | } | ||
| 901 | |||
| 902 | /** | ||
| 903 | * ubifs_recover_inl_heads - recover index and LPT heads. | ||
| 904 | * @c: UBIFS file-system description object | ||
| 905 | * @sbuf: LEB-sized buffer to use | ||
| 906 | * | ||
| 907 | * This function ensures that there is no data on the flash at the index and | ||
| 908 | * LPT head locations. | ||
| 909 | * | ||
| 910 | * This deals with the recovery of a half-completed journal commit. UBIFS is | ||
| 911 | * careful never to overwrite the last version of the index or the LPT. Because | ||
| 912 | * the index and LPT are wandering trees, data from a half-completed commit will | ||
| 913 | * not be referenced anywhere in UBIFS. The data will be either in LEBs that are | ||
| 914 | * assumed to be empty and will be unmapped anyway before use, or in the index | ||
| 915 | * and LPT heads. | ||
| 916 | * | ||
| 917 | * This function returns %0 on success and a negative error code on failure. | ||
| 918 | */ | ||
| 919 | int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) | ||
| 920 | { | ||
| 921 | int err; | ||
| 922 | |||
| 923 | ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); | ||
| 924 | |||
| 925 | dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); | ||
| 926 | err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); | ||
| 927 | if (err) | ||
| 928 | return err; | ||
| 929 | |||
| 930 | dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); | ||
| 931 | err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); | ||
| 932 | if (err) | ||
| 933 | return err; | ||
| 934 | |||
| 935 | return 0; | ||
| 936 | } | ||
| 937 | |||
| 938 | /** | ||
| 939 | * clean_an_unclean_leb - read and write a LEB to remove corruption. | ||
| 940 | * @c: UBIFS file-system description object | ||
| 941 | * @ucleb: unclean LEB information | ||
| 942 | * @sbuf: LEB-sized buffer to use | ||
| 943 | * | ||
| 944 | * This function reads a LEB up to a point pre-determined by the mount recovery, | ||
| 945 | * checks the nodes, and writes the result back to the flash, thereby cleaning | ||
| 946 | * off any following corruption, or non-fatal ECC errors. | ||
| 947 | * | ||
| 948 | * This function returns %0 on success and a negative error code on failure. | ||
| 949 | */ | ||
| 950 | static int clean_an_unclean_leb(const struct ubifs_info *c, | ||
| 951 | struct ubifs_unclean_leb *ucleb, void *sbuf) | ||
| 952 | { | ||
| 953 | int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; | ||
| 954 | void *buf = sbuf; | ||
| 955 | |||
| 956 | dbg_rcvry("LEB %d len %d", lnum, len); | ||
| 957 | |||
| 958 | if (len == 0) { | ||
| 959 | /* Nothing to read, just unmap it */ | ||
| 960 | err = ubifs_leb_unmap(c, lnum); | ||
| 961 | if (err) | ||
| 962 | return err; | ||
| 963 | return 0; | ||
| 964 | } | ||
| 965 | |||
| 966 | err = ubi_read(c->ubi, lnum, buf, offs, len); | ||
| 967 | if (err && err != -EBADMSG) | ||
| 968 | return err; | ||
| 969 | |||
| 970 | while (len >= 8) { | ||
| 971 | int ret; | ||
| 972 | |||
| 973 | cond_resched(); | ||
| 974 | |||
| 975 | /* Scan quietly until there is an error */ | ||
| 976 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); | ||
| 977 | |||
| 978 | if (ret == SCANNED_A_NODE) { | ||
| 979 | /* A valid node, and not a padding node */ | ||
| 980 | struct ubifs_ch *ch = buf; | ||
| 981 | int node_len; | ||
| 982 | |||
| 983 | node_len = ALIGN(le32_to_cpu(ch->len), 8); | ||
| 984 | offs += node_len; | ||
| 985 | buf += node_len; | ||
| 986 | len -= node_len; | ||
| 987 | continue; | ||
| 988 | } | ||
| 989 | |||
| 990 | if (ret > 0) { | ||
| 991 | /* Padding bytes or a valid padding node */ | ||
| 992 | offs += ret; | ||
| 993 | buf += ret; | ||
| 994 | len -= ret; | ||
| 995 | continue; | ||
| 996 | } | ||
| 997 | |||
| 998 | if (ret == SCANNED_EMPTY_SPACE) { | ||
| 999 | ubifs_err("unexpected empty space at %d:%d", | ||
| 1000 | lnum, offs); | ||
| 1001 | return -EUCLEAN; | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | if (quiet) { | ||
| 1005 | /* Redo the last scan but noisily */ | ||
| 1006 | quiet = 0; | ||
| 1007 | continue; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | ubifs_scanned_corruption(c, lnum, offs, buf); | ||
| 1011 | return -EUCLEAN; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | /* Pad to min_io_size */ | ||
| 1015 | len = ALIGN(ucleb->endpt, c->min_io_size); | ||
| 1016 | if (len > ucleb->endpt) { | ||
| 1017 | int pad_len = len - ALIGN(ucleb->endpt, 8); | ||
| 1018 | |||
| 1019 | if (pad_len > 0) { | ||
| 1020 | buf = c->sbuf + len - pad_len; | ||
| 1021 | ubifs_pad(c, buf, pad_len); | ||
| 1022 | } | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | /* Write back the LEB atomically */ | ||
| 1026 | err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); | ||
| 1027 | if (err) | ||
| 1028 | return err; | ||
| 1029 | |||
| 1030 | dbg_rcvry("cleaned LEB %d", lnum); | ||
| 1031 | |||
| 1032 | return 0; | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | /** | ||
| 1036 | * ubifs_clean_lebs - clean LEBs recovered during read-only mount. | ||
| 1037 | * @c: UBIFS file-system description object | ||
| 1038 | * @sbuf: LEB-sized buffer to use | ||
| 1039 | * | ||
| 1040 | * This function cleans a LEB identified during recovery that needs to be | ||
| 1041 | * written but was not because UBIFS was mounted read-only. This happens when | ||
| 1042 | * remounting to read-write mode. | ||
| 1043 | * | ||
| 1044 | * This function returns %0 on success and a negative error code on failure. | ||
| 1045 | */ | ||
| 1046 | int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) | ||
| 1047 | { | ||
| 1048 | dbg_rcvry("recovery"); | ||
| 1049 | while (!list_empty(&c->unclean_leb_list)) { | ||
| 1050 | struct ubifs_unclean_leb *ucleb; | ||
| 1051 | int err; | ||
| 1052 | |||
| 1053 | ucleb = list_entry(c->unclean_leb_list.next, | ||
| 1054 | struct ubifs_unclean_leb, list); | ||
| 1055 | err = clean_an_unclean_leb(c, ucleb, sbuf); | ||
| 1056 | if (err) | ||
| 1057 | return err; | ||
| 1058 | list_del(&ucleb->list); | ||
| 1059 | kfree(ucleb); | ||
| 1060 | } | ||
| 1061 | return 0; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | /** | ||
| 1065 | * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. | ||
| 1066 | * @c: UBIFS file-system description object | ||
| 1067 | * | ||
| 1068 | * Out-of-place garbage collection requires always one empty LEB with which to | ||
| 1069 | * start garbage collection. The LEB number is recorded in c->gc_lnum and is | ||
| 1070 | * written to the master node on unmounting. In the case of an unclean unmount | ||
| 1071 | * the value of gc_lnum recorded in the master node is out of date and cannot | ||
| 1072 | * be used. Instead, recovery must allocate an empty LEB for this purpose. | ||
| 1073 | * However, there may not be enough empty space, in which case it must be | ||
| 1074 | * possible to GC the dirtiest LEB into the GC head LEB. | ||
| 1075 | * | ||
| 1076 | * This function also runs the commit which causes the TNC updates from | ||
| 1077 | * size-recovery and orphans to be written to the flash. That is important to | ||
| 1078 | * ensure correct replay order for subsequent mounts. | ||
| 1079 | * | ||
| 1080 | * This function returns %0 on success and a negative error code on failure. | ||
| 1081 | */ | ||
| 1082 | int ubifs_rcvry_gc_commit(struct ubifs_info *c) | ||
| 1083 | { | ||
| 1084 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
| 1085 | struct ubifs_lprops lp; | ||
| 1086 | int lnum, err; | ||
| 1087 | |||
| 1088 | c->gc_lnum = -1; | ||
| 1089 | if (wbuf->lnum == -1) { | ||
| 1090 | dbg_rcvry("no GC head LEB"); | ||
| 1091 | goto find_free; | ||
| 1092 | } | ||
| 1093 | /* | ||
| 1094 | * See whether the used space in the dirtiest LEB fits in the GC head | ||
| 1095 | * LEB. | ||
| 1096 | */ | ||
| 1097 | if (wbuf->offs == c->leb_size) { | ||
| 1098 | dbg_rcvry("no room in GC head LEB"); | ||
| 1099 | goto find_free; | ||
| 1100 | } | ||
| 1101 | err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); | ||
| 1102 | if (err) { | ||
| 1103 | if (err == -ENOSPC) | ||
| 1104 | dbg_err("could not find a dirty LEB"); | ||
| 1105 | return err; | ||
| 1106 | } | ||
| 1107 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 1108 | lnum = lp.lnum; | ||
| 1109 | if (lp.free + lp.dirty == c->leb_size) { | ||
| 1110 | /* An empty LEB was returned */ | ||
| 1111 | if (lp.free != c->leb_size) { | ||
| 1112 | err = ubifs_change_one_lp(c, lnum, c->leb_size, | ||
| 1113 | 0, 0, 0, 0); | ||
| 1114 | if (err) | ||
| 1115 | return err; | ||
| 1116 | } | ||
| 1117 | err = ubifs_leb_unmap(c, lnum); | ||
| 1118 | if (err) | ||
| 1119 | return err; | ||
| 1120 | c->gc_lnum = lnum; | ||
| 1121 | dbg_rcvry("allocated LEB %d for GC", lnum); | ||
| 1122 | /* Run the commit */ | ||
| 1123 | dbg_rcvry("committing"); | ||
| 1124 | return ubifs_run_commit(c); | ||
| 1125 | } | ||
| 1126 | /* | ||
| 1127 | * There was no empty LEB so the used space in the dirtiest LEB must fit | ||
| 1128 | * in the GC head LEB. | ||
| 1129 | */ | ||
| 1130 | if (lp.free + lp.dirty < wbuf->offs) { | ||
| 1131 | dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", | ||
| 1132 | lnum, wbuf->lnum, wbuf->offs); | ||
| 1133 | err = ubifs_return_leb(c, lnum); | ||
| 1134 | if (err) | ||
| 1135 | return err; | ||
| 1136 | goto find_free; | ||
| 1137 | } | ||
| 1138 | /* | ||
| 1139 | * We run the commit before garbage collection otherwise subsequent | ||
| 1140 | * mounts will see the GC and orphan deletion in a different order. | ||
| 1141 | */ | ||
| 1142 | dbg_rcvry("committing"); | ||
| 1143 | err = ubifs_run_commit(c); | ||
| 1144 | if (err) | ||
| 1145 | return err; | ||
| 1146 | /* | ||
| 1147 | * The data in the dirtiest LEB fits in the GC head LEB, so do the GC | ||
| 1148 | * - use locking to keep 'ubifs_assert()' happy. | ||
| 1149 | */ | ||
| 1150 | dbg_rcvry("GC'ing LEB %d", lnum); | ||
| 1151 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | ||
| 1152 | err = ubifs_garbage_collect_leb(c, &lp); | ||
| 1153 | if (err >= 0) { | ||
| 1154 | int err2 = ubifs_wbuf_sync_nolock(wbuf); | ||
| 1155 | |||
| 1156 | if (err2) | ||
| 1157 | err = err2; | ||
| 1158 | } | ||
| 1159 | mutex_unlock(&wbuf->io_mutex); | ||
| 1160 | if (err < 0) { | ||
| 1161 | dbg_err("GC failed, error %d", err); | ||
| 1162 | if (err == -EAGAIN) | ||
| 1163 | err = -EINVAL; | ||
| 1164 | return err; | ||
| 1165 | } | ||
| 1166 | if (err != LEB_RETAINED) { | ||
| 1167 | dbg_err("GC returned %d", err); | ||
| 1168 | return -EINVAL; | ||
| 1169 | } | ||
| 1170 | err = ubifs_leb_unmap(c, c->gc_lnum); | ||
| 1171 | if (err) | ||
| 1172 | return err; | ||
| 1173 | dbg_rcvry("allocated LEB %d for GC", lnum); | ||
| 1174 | return 0; | ||
| 1175 | |||
| 1176 | find_free: | ||
| 1177 | /* | ||
| 1178 | * There is no GC head LEB or the free space in the GC head LEB is too | ||
| 1179 | * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so | ||
| 1180 | * GC is not run. | ||
| 1181 | */ | ||
| 1182 | lnum = ubifs_find_free_leb_for_idx(c); | ||
| 1183 | if (lnum < 0) { | ||
| 1184 | dbg_err("could not find an empty LEB"); | ||
| 1185 | return lnum; | ||
| 1186 | } | ||
| 1187 | /* And reset the index flag */ | ||
| 1188 | err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 1189 | LPROPS_INDEX, 0); | ||
| 1190 | if (err) | ||
| 1191 | return err; | ||
| 1192 | c->gc_lnum = lnum; | ||
| 1193 | dbg_rcvry("allocated LEB %d for GC", lnum); | ||
| 1194 | /* Run the commit */ | ||
| 1195 | dbg_rcvry("committing"); | ||
| 1196 | return ubifs_run_commit(c); | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | /** | ||
| 1200 | * struct size_entry - inode size information for recovery. | ||
| 1201 | * @rb: link in the RB-tree of sizes | ||
| 1202 | * @inum: inode number | ||
| 1203 | * @i_size: size on inode | ||
| 1204 | * @d_size: maximum size based on data nodes | ||
| 1205 | * @exists: indicates whether the inode exists | ||
| 1206 | * @inode: inode if pinned in memory awaiting rw mode to fix it | ||
| 1207 | */ | ||
| 1208 | struct size_entry { | ||
| 1209 | struct rb_node rb; | ||
| 1210 | ino_t inum; | ||
| 1211 | loff_t i_size; | ||
| 1212 | loff_t d_size; | ||
| 1213 | int exists; | ||
| 1214 | struct inode *inode; | ||
| 1215 | }; | ||
| 1216 | |||
| 1217 | /** | ||
| 1218 | * add_ino - add an entry to the size tree. | ||
| 1219 | * @c: UBIFS file-system description object | ||
| 1220 | * @inum: inode number | ||
| 1221 | * @i_size: size on inode | ||
| 1222 | * @d_size: maximum size based on data nodes | ||
| 1223 | * @exists: indicates whether the inode exists | ||
| 1224 | */ | ||
| 1225 | static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, | ||
| 1226 | loff_t d_size, int exists) | ||
| 1227 | { | ||
| 1228 | struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; | ||
| 1229 | struct size_entry *e; | ||
| 1230 | |||
| 1231 | while (*p) { | ||
| 1232 | parent = *p; | ||
| 1233 | e = rb_entry(parent, struct size_entry, rb); | ||
| 1234 | if (inum < e->inum) | ||
| 1235 | p = &(*p)->rb_left; | ||
| 1236 | else | ||
| 1237 | p = &(*p)->rb_right; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); | ||
| 1241 | if (!e) | ||
| 1242 | return -ENOMEM; | ||
| 1243 | |||
| 1244 | e->inum = inum; | ||
| 1245 | e->i_size = i_size; | ||
| 1246 | e->d_size = d_size; | ||
| 1247 | e->exists = exists; | ||
| 1248 | |||
| 1249 | rb_link_node(&e->rb, parent, p); | ||
| 1250 | rb_insert_color(&e->rb, &c->size_tree); | ||
| 1251 | |||
| 1252 | return 0; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | /** | ||
| 1256 | * find_ino - find an entry on the size tree. | ||
| 1257 | * @c: UBIFS file-system description object | ||
| 1258 | * @inum: inode number | ||
| 1259 | */ | ||
| 1260 | static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) | ||
| 1261 | { | ||
| 1262 | struct rb_node *p = c->size_tree.rb_node; | ||
| 1263 | struct size_entry *e; | ||
| 1264 | |||
| 1265 | while (p) { | ||
| 1266 | e = rb_entry(p, struct size_entry, rb); | ||
| 1267 | if (inum < e->inum) | ||
| 1268 | p = p->rb_left; | ||
| 1269 | else if (inum > e->inum) | ||
| 1270 | p = p->rb_right; | ||
| 1271 | else | ||
| 1272 | return e; | ||
| 1273 | } | ||
| 1274 | return NULL; | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | /** | ||
| 1278 | * remove_ino - remove an entry from the size tree. | ||
| 1279 | * @c: UBIFS file-system description object | ||
| 1280 | * @inum: inode number | ||
| 1281 | */ | ||
| 1282 | static void remove_ino(struct ubifs_info *c, ino_t inum) | ||
| 1283 | { | ||
| 1284 | struct size_entry *e = find_ino(c, inum); | ||
| 1285 | |||
| 1286 | if (!e) | ||
| 1287 | return; | ||
| 1288 | rb_erase(&e->rb, &c->size_tree); | ||
| 1289 | kfree(e); | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | /** | ||
| 1293 | * ubifs_destroy_size_tree - free resources related to the size tree. | ||
| 1294 | * @c: UBIFS file-system description object | ||
| 1295 | */ | ||
| 1296 | void ubifs_destroy_size_tree(struct ubifs_info *c) | ||
| 1297 | { | ||
| 1298 | struct rb_node *this = c->size_tree.rb_node; | ||
| 1299 | struct size_entry *e; | ||
| 1300 | |||
| 1301 | while (this) { | ||
| 1302 | if (this->rb_left) { | ||
| 1303 | this = this->rb_left; | ||
| 1304 | continue; | ||
| 1305 | } else if (this->rb_right) { | ||
| 1306 | this = this->rb_right; | ||
| 1307 | continue; | ||
| 1308 | } | ||
| 1309 | e = rb_entry(this, struct size_entry, rb); | ||
| 1310 | if (e->inode) | ||
| 1311 | iput(e->inode); | ||
| 1312 | this = rb_parent(this); | ||
| 1313 | if (this) { | ||
| 1314 | if (this->rb_left == &e->rb) | ||
| 1315 | this->rb_left = NULL; | ||
| 1316 | else | ||
| 1317 | this->rb_right = NULL; | ||
| 1318 | } | ||
| 1319 | kfree(e); | ||
| 1320 | } | ||
| 1321 | c->size_tree = RB_ROOT; | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | /** | ||
| 1325 | * ubifs_recover_size_accum - accumulate inode sizes for recovery. | ||
| 1326 | * @c: UBIFS file-system description object | ||
| 1327 | * @key: node key | ||
| 1328 | * @deletion: node is for a deletion | ||
| 1329 | * @new_size: inode size | ||
| 1330 | * | ||
| 1331 | * This function has two purposes: | ||
| 1332 | * 1) to ensure there are no data nodes that fall outside the inode size | ||
| 1333 | * 2) to ensure there are no data nodes for inodes that do not exist | ||
| 1334 | * To accomplish those purposes, a rb-tree is constructed containing an entry | ||
| 1335 | * for each inode number in the journal that has not been deleted, and recording | ||
| 1336 | * the size from the inode node, the maximum size of any data node (also altered | ||
| 1337 | * by truncations) and a flag indicating a inode number for which no inode node | ||
| 1338 | * was present in the journal. | ||
| 1339 | * | ||
| 1340 | * Note that there is still the possibility that there are data nodes that have | ||
| 1341 | * been committed that are beyond the inode size, however the only way to find | ||
| 1342 | * them would be to scan the entire index. Alternatively, some provision could | ||
| 1343 | * be made to record the size of inodes at the start of commit, which would seem | ||
| 1344 | * very cumbersome for a scenario that is quite unlikely and the only negative | ||
| 1345 | * consequence of which is wasted space. | ||
| 1346 | * | ||
| 1347 | * This functions returns %0 on success and a negative error code on failure. | ||
| 1348 | */ | ||
| 1349 | int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, | ||
| 1350 | int deletion, loff_t new_size) | ||
| 1351 | { | ||
| 1352 | ino_t inum = key_inum(c, key); | ||
| 1353 | struct size_entry *e; | ||
| 1354 | int err; | ||
| 1355 | |||
| 1356 | switch (key_type(c, key)) { | ||
| 1357 | case UBIFS_INO_KEY: | ||
| 1358 | if (deletion) | ||
| 1359 | remove_ino(c, inum); | ||
| 1360 | else { | ||
| 1361 | e = find_ino(c, inum); | ||
| 1362 | if (e) { | ||
| 1363 | e->i_size = new_size; | ||
| 1364 | e->exists = 1; | ||
| 1365 | } else { | ||
| 1366 | err = add_ino(c, inum, new_size, 0, 1); | ||
| 1367 | if (err) | ||
| 1368 | return err; | ||
| 1369 | } | ||
| 1370 | } | ||
| 1371 | break; | ||
| 1372 | case UBIFS_DATA_KEY: | ||
| 1373 | e = find_ino(c, inum); | ||
| 1374 | if (e) { | ||
| 1375 | if (new_size > e->d_size) | ||
| 1376 | e->d_size = new_size; | ||
| 1377 | } else { | ||
| 1378 | err = add_ino(c, inum, 0, new_size, 0); | ||
| 1379 | if (err) | ||
| 1380 | return err; | ||
| 1381 | } | ||
| 1382 | break; | ||
| 1383 | case UBIFS_TRUN_KEY: | ||
| 1384 | e = find_ino(c, inum); | ||
| 1385 | if (e) | ||
| 1386 | e->d_size = new_size; | ||
| 1387 | break; | ||
| 1388 | } | ||
| 1389 | return 0; | ||
| 1390 | } | ||
| 1391 | |||
| 1392 | /** | ||
| 1393 | * fix_size_in_place - fix inode size in place on flash. | ||
| 1394 | * @c: UBIFS file-system description object | ||
| 1395 | * @e: inode size information for recovery | ||
| 1396 | */ | ||
| 1397 | static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) | ||
| 1398 | { | ||
| 1399 | struct ubifs_ino_node *ino = c->sbuf; | ||
| 1400 | unsigned char *p; | ||
| 1401 | union ubifs_key key; | ||
| 1402 | int err, lnum, offs, len; | ||
| 1403 | loff_t i_size; | ||
| 1404 | uint32_t crc; | ||
| 1405 | |||
| 1406 | /* Locate the inode node LEB number and offset */ | ||
| 1407 | ino_key_init(c, &key, e->inum); | ||
| 1408 | err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs); | ||
| 1409 | if (err) | ||
| 1410 | goto out; | ||
| 1411 | /* | ||
| 1412 | * If the size recorded on the inode node is greater than the size that | ||
| 1413 | * was calculated from nodes in the journal then don't change the inode. | ||
| 1414 | */ | ||
| 1415 | i_size = le64_to_cpu(ino->size); | ||
| 1416 | if (i_size >= e->d_size) | ||
| 1417 | return 0; | ||
| 1418 | /* Read the LEB */ | ||
| 1419 | err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); | ||
| 1420 | if (err) | ||
| 1421 | goto out; | ||
| 1422 | /* Change the size field and recalculate the CRC */ | ||
| 1423 | ino = c->sbuf + offs; | ||
| 1424 | ino->size = cpu_to_le64(e->d_size); | ||
| 1425 | len = le32_to_cpu(ino->ch.len); | ||
| 1426 | crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); | ||
| 1427 | ino->ch.crc = cpu_to_le32(crc); | ||
| 1428 | /* Work out where data in the LEB ends and free space begins */ | ||
| 1429 | p = c->sbuf; | ||
| 1430 | len = c->leb_size - 1; | ||
| 1431 | while (p[len] == 0xff) | ||
| 1432 | len -= 1; | ||
| 1433 | len = ALIGN(len + 1, c->min_io_size); | ||
| 1434 | /* Atomically write the fixed LEB back again */ | ||
| 1435 | err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); | ||
| 1436 | if (err) | ||
| 1437 | goto out; | ||
| 1438 | dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs, | ||
| 1439 | i_size, e->d_size); | ||
| 1440 | return 0; | ||
| 1441 | |||
| 1442 | out: | ||
| 1443 | ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", | ||
| 1444 | e->inum, e->i_size, e->d_size, err); | ||
| 1445 | return err; | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | /** | ||
| 1449 | * ubifs_recover_size - recover inode size. | ||
| 1450 | * @c: UBIFS file-system description object | ||
| 1451 | * | ||
| 1452 | * This function attempts to fix inode size discrepancies identified by the | ||
| 1453 | * 'ubifs_recover_size_accum()' function. | ||
| 1454 | * | ||
| 1455 | * This functions returns %0 on success and a negative error code on failure. | ||
| 1456 | */ | ||
| 1457 | int ubifs_recover_size(struct ubifs_info *c) | ||
| 1458 | { | ||
| 1459 | struct rb_node *this = rb_first(&c->size_tree); | ||
| 1460 | |||
| 1461 | while (this) { | ||
| 1462 | struct size_entry *e; | ||
| 1463 | int err; | ||
| 1464 | |||
| 1465 | e = rb_entry(this, struct size_entry, rb); | ||
| 1466 | if (!e->exists) { | ||
| 1467 | union ubifs_key key; | ||
| 1468 | |||
| 1469 | ino_key_init(c, &key, e->inum); | ||
| 1470 | err = ubifs_tnc_lookup(c, &key, c->sbuf); | ||
| 1471 | if (err && err != -ENOENT) | ||
| 1472 | return err; | ||
| 1473 | if (err == -ENOENT) { | ||
| 1474 | /* Remove data nodes that have no inode */ | ||
| 1475 | dbg_rcvry("removing ino %lu", e->inum); | ||
| 1476 | err = ubifs_tnc_remove_ino(c, e->inum); | ||
| 1477 | if (err) | ||
| 1478 | return err; | ||
| 1479 | } else { | ||
| 1480 | struct ubifs_ino_node *ino = c->sbuf; | ||
| 1481 | |||
| 1482 | e->exists = 1; | ||
| 1483 | e->i_size = le64_to_cpu(ino->size); | ||
| 1484 | } | ||
| 1485 | } | ||
| 1486 | if (e->exists && e->i_size < e->d_size) { | ||
| 1487 | if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { | ||
| 1488 | /* Fix the inode size and pin it in memory */ | ||
| 1489 | struct inode *inode; | ||
| 1490 | |||
| 1491 | inode = ubifs_iget(c->vfs_sb, e->inum); | ||
| 1492 | if (IS_ERR(inode)) | ||
| 1493 | return PTR_ERR(inode); | ||
| 1494 | if (inode->i_size < e->d_size) { | ||
| 1495 | dbg_rcvry("ino %lu size %lld -> %lld", | ||
| 1496 | e->inum, e->d_size, | ||
| 1497 | inode->i_size); | ||
| 1498 | inode->i_size = e->d_size; | ||
| 1499 | ubifs_inode(inode)->ui_size = e->d_size; | ||
| 1500 | e->inode = inode; | ||
| 1501 | this = rb_next(this); | ||
| 1502 | continue; | ||
| 1503 | } | ||
| 1504 | iput(inode); | ||
| 1505 | } else { | ||
| 1506 | /* Fix the size in place */ | ||
| 1507 | err = fix_size_in_place(c, e); | ||
| 1508 | if (err) | ||
| 1509 | return err; | ||
| 1510 | if (e->inode) | ||
| 1511 | iput(e->inode); | ||
| 1512 | } | ||
| 1513 | } | ||
| 1514 | this = rb_next(this); | ||
| 1515 | rb_erase(&e->rb, &c->size_tree); | ||
| 1516 | kfree(e); | ||
| 1517 | } | ||
| 1518 | return 0; | ||
| 1519 | } | ||
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c new file mode 100644 index 000000000000..7399692af859 --- /dev/null +++ b/fs/ubifs/replay.c | |||
| @@ -0,0 +1,1075 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file contains journal replay code. It runs when the file-system is being | ||
| 25 | * mounted and requires no locking. | ||
| 26 | * | ||
| 27 | * The larger is the journal, the longer it takes to scan it, so the longer it | ||
| 28 | * takes to mount UBIFS. This is why the journal has limited size which may be | ||
| 29 | * changed depending on the system requirements. But a larger journal gives | ||
| 30 | * faster I/O speed because it writes the index less frequently. So this is a | ||
| 31 | * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the | ||
| 32 | * larger is the journal, the more memory its index may consume. | ||
| 33 | */ | ||
| 34 | |||
| 35 | #include "ubifs.h" | ||
| 36 | |||
| 37 | /* | ||
| 38 | * Replay flags. | ||
| 39 | * | ||
| 40 | * REPLAY_DELETION: node was deleted | ||
| 41 | * REPLAY_REF: node is a reference node | ||
| 42 | */ | ||
| 43 | enum { | ||
| 44 | REPLAY_DELETION = 1, | ||
| 45 | REPLAY_REF = 2, | ||
| 46 | }; | ||
| 47 | |||
| 48 | /** | ||
| 49 | * struct replay_entry - replay tree entry. | ||
| 50 | * @lnum: logical eraseblock number of the node | ||
| 51 | * @offs: node offset | ||
| 52 | * @len: node length | ||
| 53 | * @sqnum: node sequence number | ||
| 54 | * @flags: replay flags | ||
| 55 | * @rb: links the replay tree | ||
| 56 | * @key: node key | ||
| 57 | * @nm: directory entry name | ||
| 58 | * @old_size: truncation old size | ||
| 59 | * @new_size: truncation new size | ||
| 60 | * @free: amount of free space in a bud | ||
| 61 | * @dirty: amount of dirty space in a bud from padding and deletion nodes | ||
| 62 | * | ||
| 63 | * UBIFS journal replay must compare node sequence numbers, which means it must | ||
| 64 | * build a tree of node information to insert into the TNC. | ||
| 65 | */ | ||
| 66 | struct replay_entry { | ||
| 67 | int lnum; | ||
| 68 | int offs; | ||
| 69 | int len; | ||
| 70 | unsigned long long sqnum; | ||
| 71 | int flags; | ||
| 72 | struct rb_node rb; | ||
| 73 | union ubifs_key key; | ||
| 74 | union { | ||
| 75 | struct qstr nm; | ||
| 76 | struct { | ||
| 77 | loff_t old_size; | ||
| 78 | loff_t new_size; | ||
| 79 | }; | ||
| 80 | struct { | ||
| 81 | int free; | ||
| 82 | int dirty; | ||
| 83 | }; | ||
| 84 | }; | ||
| 85 | }; | ||
| 86 | |||
| 87 | /** | ||
| 88 | * struct bud_entry - entry in the list of buds to replay. | ||
| 89 | * @list: next bud in the list | ||
| 90 | * @bud: bud description object | ||
| 91 | * @free: free bytes in the bud | ||
| 92 | * @sqnum: reference node sequence number | ||
| 93 | */ | ||
| 94 | struct bud_entry { | ||
| 95 | struct list_head list; | ||
| 96 | struct ubifs_bud *bud; | ||
| 97 | int free; | ||
| 98 | unsigned long long sqnum; | ||
| 99 | }; | ||
| 100 | |||
| 101 | /** | ||
| 102 | * set_bud_lprops - set free and dirty space used by a bud. | ||
| 103 | * @c: UBIFS file-system description object | ||
| 104 | * @r: replay entry of bud | ||
| 105 | */ | ||
| 106 | static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | ||
| 107 | { | ||
| 108 | const struct ubifs_lprops *lp; | ||
| 109 | int err = 0, dirty; | ||
| 110 | |||
| 111 | ubifs_get_lprops(c); | ||
| 112 | |||
| 113 | lp = ubifs_lpt_lookup_dirty(c, r->lnum); | ||
| 114 | if (IS_ERR(lp)) { | ||
| 115 | err = PTR_ERR(lp); | ||
| 116 | goto out; | ||
| 117 | } | ||
| 118 | |||
| 119 | dirty = lp->dirty; | ||
| 120 | if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { | ||
| 121 | /* | ||
| 122 | * The LEB was added to the journal with a starting offset of | ||
| 123 | * zero which means the LEB must have been empty. The LEB | ||
| 124 | * property values should be lp->free == c->leb_size and | ||
| 125 | * lp->dirty == 0, but that is not the case. The reason is that | ||
| 126 | * the LEB was garbage collected. The garbage collector resets | ||
| 127 | * the free and dirty space without recording it anywhere except | ||
| 128 | * lprops, so if there is not a commit then lprops does not have | ||
| 129 | * that information next time the file system is mounted. | ||
| 130 | * | ||
| 131 | * We do not need to adjust free space because the scan has told | ||
| 132 | * us the exact value which is recorded in the replay entry as | ||
| 133 | * r->free. | ||
| 134 | * | ||
| 135 | * However we do need to subtract from the dirty space the | ||
| 136 | * amount of space that the garbage collector reclaimed, which | ||
| 137 | * is the whole LEB minus the amount of space that was free. | ||
| 138 | */ | ||
| 139 | dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | ||
| 140 | lp->free, lp->dirty); | ||
| 141 | dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | ||
| 142 | lp->free, lp->dirty); | ||
| 143 | dirty -= c->leb_size - lp->free; | ||
| 144 | /* | ||
| 145 | * If the replay order was perfect the dirty space would now be | ||
| 146 | * zero. The order is not perfect because the the journal heads | ||
| 147 | * race with eachother. This is not a problem but is does mean | ||
| 148 | * that the dirty space may temporarily exceed c->leb_size | ||
| 149 | * during the replay. | ||
| 150 | */ | ||
| 151 | if (dirty != 0) | ||
| 152 | dbg_msg("LEB %d lp: %d free %d dirty " | ||
| 153 | "replay: %d free %d dirty", r->lnum, lp->free, | ||
| 154 | lp->dirty, r->free, r->dirty); | ||
| 155 | } | ||
| 156 | lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, | ||
| 157 | lp->flags | LPROPS_TAKEN, 0); | ||
| 158 | if (IS_ERR(lp)) { | ||
| 159 | err = PTR_ERR(lp); | ||
| 160 | goto out; | ||
| 161 | } | ||
| 162 | out: | ||
| 163 | ubifs_release_lprops(c); | ||
| 164 | return err; | ||
| 165 | } | ||
| 166 | |||
| 167 | /** | ||
| 168 | * trun_remove_range - apply a replay entry for a truncation to the TNC. | ||
| 169 | * @c: UBIFS file-system description object | ||
| 170 | * @r: replay entry of truncation | ||
| 171 | */ | ||
| 172 | static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) | ||
| 173 | { | ||
| 174 | unsigned min_blk, max_blk; | ||
| 175 | union ubifs_key min_key, max_key; | ||
| 176 | ino_t ino; | ||
| 177 | |||
| 178 | min_blk = r->new_size / UBIFS_BLOCK_SIZE; | ||
| 179 | if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) | ||
| 180 | min_blk += 1; | ||
| 181 | |||
| 182 | max_blk = r->old_size / UBIFS_BLOCK_SIZE; | ||
| 183 | if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) | ||
| 184 | max_blk -= 1; | ||
| 185 | |||
| 186 | ino = key_inum(c, &r->key); | ||
| 187 | |||
| 188 | data_key_init(c, &min_key, ino, min_blk); | ||
| 189 | data_key_init(c, &max_key, ino, max_blk); | ||
| 190 | |||
| 191 | return ubifs_tnc_remove_range(c, &min_key, &max_key); | ||
| 192 | } | ||
| 193 | |||
| 194 | /** | ||
| 195 | * apply_replay_entry - apply a replay entry to the TNC. | ||
| 196 | * @c: UBIFS file-system description object | ||
| 197 | * @r: replay entry to apply | ||
| 198 | * | ||
| 199 | * Apply a replay entry to the TNC. | ||
| 200 | */ | ||
| 201 | static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | ||
| 202 | { | ||
| 203 | int err, deletion = ((r->flags & REPLAY_DELETION) != 0); | ||
| 204 | |||
| 205 | dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, | ||
| 206 | r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); | ||
| 207 | |||
| 208 | /* Set c->replay_sqnum to help deal with dangling branches. */ | ||
| 209 | c->replay_sqnum = r->sqnum; | ||
| 210 | |||
| 211 | if (r->flags & REPLAY_REF) | ||
| 212 | err = set_bud_lprops(c, r); | ||
| 213 | else if (is_hash_key(c, &r->key)) { | ||
| 214 | if (deletion) | ||
| 215 | err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); | ||
| 216 | else | ||
| 217 | err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, | ||
| 218 | r->len, &r->nm); | ||
| 219 | } else { | ||
| 220 | if (deletion) | ||
| 221 | switch (key_type(c, &r->key)) { | ||
| 222 | case UBIFS_INO_KEY: | ||
| 223 | { | ||
| 224 | ino_t inum = key_inum(c, &r->key); | ||
| 225 | |||
| 226 | err = ubifs_tnc_remove_ino(c, inum); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | case UBIFS_TRUN_KEY: | ||
| 230 | err = trun_remove_range(c, r); | ||
| 231 | break; | ||
| 232 | default: | ||
| 233 | err = ubifs_tnc_remove(c, &r->key); | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | else | ||
| 237 | err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, | ||
| 238 | r->len); | ||
| 239 | if (err) | ||
| 240 | return err; | ||
| 241 | |||
| 242 | if (c->need_recovery) | ||
| 243 | err = ubifs_recover_size_accum(c, &r->key, deletion, | ||
| 244 | r->new_size); | ||
| 245 | } | ||
| 246 | |||
| 247 | return err; | ||
| 248 | } | ||
| 249 | |||
| 250 | /** | ||
| 251 | * destroy_replay_tree - destroy the replay. | ||
| 252 | * @c: UBIFS file-system description object | ||
| 253 | * | ||
| 254 | * Destroy the replay tree. | ||
| 255 | */ | ||
| 256 | static void destroy_replay_tree(struct ubifs_info *c) | ||
| 257 | { | ||
| 258 | struct rb_node *this = c->replay_tree.rb_node; | ||
| 259 | struct replay_entry *r; | ||
| 260 | |||
| 261 | while (this) { | ||
| 262 | if (this->rb_left) { | ||
| 263 | this = this->rb_left; | ||
| 264 | continue; | ||
| 265 | } else if (this->rb_right) { | ||
| 266 | this = this->rb_right; | ||
| 267 | continue; | ||
| 268 | } | ||
| 269 | r = rb_entry(this, struct replay_entry, rb); | ||
| 270 | this = rb_parent(this); | ||
| 271 | if (this) { | ||
| 272 | if (this->rb_left == &r->rb) | ||
| 273 | this->rb_left = NULL; | ||
| 274 | else | ||
| 275 | this->rb_right = NULL; | ||
| 276 | } | ||
| 277 | if (is_hash_key(c, &r->key)) | ||
| 278 | kfree(r->nm.name); | ||
| 279 | kfree(r); | ||
| 280 | } | ||
| 281 | c->replay_tree = RB_ROOT; | ||
| 282 | } | ||
| 283 | |||
| 284 | /** | ||
| 285 | * apply_replay_tree - apply the replay tree to the TNC. | ||
| 286 | * @c: UBIFS file-system description object | ||
| 287 | * | ||
| 288 | * Apply the replay tree. | ||
| 289 | * Returns zero in case of success and a negative error code in case of | ||
| 290 | * failure. | ||
| 291 | */ | ||
| 292 | static int apply_replay_tree(struct ubifs_info *c) | ||
| 293 | { | ||
| 294 | struct rb_node *this = rb_first(&c->replay_tree); | ||
| 295 | |||
| 296 | while (this) { | ||
| 297 | struct replay_entry *r; | ||
| 298 | int err; | ||
| 299 | |||
| 300 | cond_resched(); | ||
| 301 | |||
| 302 | r = rb_entry(this, struct replay_entry, rb); | ||
| 303 | err = apply_replay_entry(c, r); | ||
| 304 | if (err) | ||
| 305 | return err; | ||
| 306 | this = rb_next(this); | ||
| 307 | } | ||
| 308 | return 0; | ||
| 309 | } | ||
| 310 | |||
| 311 | /** | ||
| 312 | * insert_node - insert a node to the replay tree. | ||
| 313 | * @c: UBIFS file-system description object | ||
| 314 | * @lnum: node logical eraseblock number | ||
| 315 | * @offs: node offset | ||
| 316 | * @len: node length | ||
| 317 | * @key: node key | ||
| 318 | * @sqnum: sequence number | ||
| 319 | * @deletion: non-zero if this is a deletion | ||
| 320 | * @used: number of bytes in use in a LEB | ||
| 321 | * @old_size: truncation old size | ||
| 322 | * @new_size: truncation new size | ||
| 323 | * | ||
| 324 | * This function inserts a scanned non-direntry node to the replay tree. The | ||
| 325 | * replay tree is an RB-tree containing @struct replay_entry elements which are | ||
| 326 | * indexed by the sequence number. The replay tree is applied at the very end | ||
| 327 | * of the replay process. Since the tree is sorted in sequence number order, | ||
| 328 | * the older modifications are applied first. This function returns zero in | ||
| 329 | * case of success and a negative error code in case of failure. | ||
| 330 | */ | ||
| 331 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | ||
| 332 | union ubifs_key *key, unsigned long long sqnum, | ||
| 333 | int deletion, int *used, loff_t old_size, | ||
| 334 | loff_t new_size) | ||
| 335 | { | ||
| 336 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 337 | struct replay_entry *r; | ||
| 338 | |||
| 339 | if (key_inum(c, key) >= c->highest_inum) | ||
| 340 | c->highest_inum = key_inum(c, key); | ||
| 341 | |||
| 342 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 343 | while (*p) { | ||
| 344 | parent = *p; | ||
| 345 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 346 | if (sqnum < r->sqnum) { | ||
| 347 | p = &(*p)->rb_left; | ||
| 348 | continue; | ||
| 349 | } else if (sqnum > r->sqnum) { | ||
| 350 | p = &(*p)->rb_right; | ||
| 351 | continue; | ||
| 352 | } | ||
| 353 | ubifs_err("duplicate sqnum in replay"); | ||
| 354 | return -EINVAL; | ||
| 355 | } | ||
| 356 | |||
| 357 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
| 358 | if (!r) | ||
| 359 | return -ENOMEM; | ||
| 360 | |||
| 361 | if (!deletion) | ||
| 362 | *used += ALIGN(len, 8); | ||
| 363 | r->lnum = lnum; | ||
| 364 | r->offs = offs; | ||
| 365 | r->len = len; | ||
| 366 | r->sqnum = sqnum; | ||
| 367 | r->flags = (deletion ? REPLAY_DELETION : 0); | ||
| 368 | r->old_size = old_size; | ||
| 369 | r->new_size = new_size; | ||
| 370 | key_copy(c, key, &r->key); | ||
| 371 | |||
| 372 | rb_link_node(&r->rb, parent, p); | ||
| 373 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 374 | return 0; | ||
| 375 | } | ||
| 376 | |||
| 377 | /** | ||
| 378 | * insert_dent - insert a directory entry node into the replay tree. | ||
| 379 | * @c: UBIFS file-system description object | ||
| 380 | * @lnum: node logical eraseblock number | ||
| 381 | * @offs: node offset | ||
| 382 | * @len: node length | ||
| 383 | * @key: node key | ||
| 384 | * @name: directory entry name | ||
| 385 | * @nlen: directory entry name length | ||
| 386 | * @sqnum: sequence number | ||
| 387 | * @deletion: non-zero if this is a deletion | ||
| 388 | * @used: number of bytes in use in a LEB | ||
| 389 | * | ||
| 390 | * This function inserts a scanned directory entry node to the replay tree. | ||
| 391 | * Returns zero in case of success and a negative error code in case of | ||
| 392 | * failure. | ||
| 393 | * | ||
| 394 | * This function is also used for extended attribute entries because they are | ||
| 395 | * implemented as directory entry nodes. | ||
| 396 | */ | ||
| 397 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | ||
| 398 | union ubifs_key *key, const char *name, int nlen, | ||
| 399 | unsigned long long sqnum, int deletion, int *used) | ||
| 400 | { | ||
| 401 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 402 | struct replay_entry *r; | ||
| 403 | char *nbuf; | ||
| 404 | |||
| 405 | if (key_inum(c, key) >= c->highest_inum) | ||
| 406 | c->highest_inum = key_inum(c, key); | ||
| 407 | |||
| 408 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 409 | while (*p) { | ||
| 410 | parent = *p; | ||
| 411 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 412 | if (sqnum < r->sqnum) { | ||
| 413 | p = &(*p)->rb_left; | ||
| 414 | continue; | ||
| 415 | } | ||
| 416 | if (sqnum > r->sqnum) { | ||
| 417 | p = &(*p)->rb_right; | ||
| 418 | continue; | ||
| 419 | } | ||
| 420 | ubifs_err("duplicate sqnum in replay"); | ||
| 421 | return -EINVAL; | ||
| 422 | } | ||
| 423 | |||
| 424 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
| 425 | if (!r) | ||
| 426 | return -ENOMEM; | ||
| 427 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); | ||
| 428 | if (!nbuf) { | ||
| 429 | kfree(r); | ||
| 430 | return -ENOMEM; | ||
| 431 | } | ||
| 432 | |||
| 433 | if (!deletion) | ||
| 434 | *used += ALIGN(len, 8); | ||
| 435 | r->lnum = lnum; | ||
| 436 | r->offs = offs; | ||
| 437 | r->len = len; | ||
| 438 | r->sqnum = sqnum; | ||
| 439 | r->nm.len = nlen; | ||
| 440 | memcpy(nbuf, name, nlen); | ||
| 441 | nbuf[nlen] = '\0'; | ||
| 442 | r->nm.name = nbuf; | ||
| 443 | r->flags = (deletion ? REPLAY_DELETION : 0); | ||
| 444 | key_copy(c, key, &r->key); | ||
| 445 | |||
| 446 | ubifs_assert(!*p); | ||
| 447 | rb_link_node(&r->rb, parent, p); | ||
| 448 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 449 | return 0; | ||
| 450 | } | ||
| 451 | |||
| 452 | /** | ||
| 453 | * ubifs_validate_entry - validate directory or extended attribute entry node. | ||
| 454 | * @c: UBIFS file-system description object | ||
| 455 | * @dent: the node to validate | ||
| 456 | * | ||
| 457 | * This function validates directory or extended attribute entry node @dent. | ||
| 458 | * Returns zero if the node is all right and a %-EINVAL if not. | ||
| 459 | */ | ||
| 460 | int ubifs_validate_entry(struct ubifs_info *c, | ||
| 461 | const struct ubifs_dent_node *dent) | ||
| 462 | { | ||
| 463 | int key_type = key_type_flash(c, dent->key); | ||
| 464 | int nlen = le16_to_cpu(dent->nlen); | ||
| 465 | |||
| 466 | if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || | ||
| 467 | dent->type >= UBIFS_ITYPES_CNT || | ||
| 468 | nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || | ||
| 469 | strnlen(dent->name, nlen) != nlen || | ||
| 470 | le64_to_cpu(dent->inum) > MAX_INUM) { | ||
| 471 | ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? | ||
| 472 | "directory entry" : "extended attribute entry"); | ||
| 473 | return -EINVAL; | ||
| 474 | } | ||
| 475 | |||
| 476 | if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { | ||
| 477 | ubifs_err("bad key type %d", key_type); | ||
| 478 | return -EINVAL; | ||
| 479 | } | ||
| 480 | |||
| 481 | return 0; | ||
| 482 | } | ||
| 483 | |||
| 484 | /** | ||
| 485 | * replay_bud - replay a bud logical eraseblock. | ||
| 486 | * @c: UBIFS file-system description object | ||
| 487 | * @lnum: bud logical eraseblock number to replay | ||
| 488 | * @offs: bud start offset | ||
| 489 | * @jhead: journal head to which this bud belongs | ||
| 490 | * @free: amount of free space in the bud is returned here | ||
| 491 | * @dirty: amount of dirty space from padding and deletion nodes is returned | ||
| 492 | * here | ||
| 493 | * | ||
| 494 | * This function returns zero in case of success and a negative error code in | ||
| 495 | * case of failure. | ||
| 496 | */ | ||
| 497 | static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | ||
| 498 | int *free, int *dirty) | ||
| 499 | { | ||
| 500 | int err = 0, used = 0; | ||
| 501 | struct ubifs_scan_leb *sleb; | ||
| 502 | struct ubifs_scan_node *snod; | ||
| 503 | struct ubifs_bud *bud; | ||
| 504 | |||
| 505 | dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); | ||
| 506 | if (c->need_recovery) | ||
| 507 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); | ||
| 508 | else | ||
| 509 | sleb = ubifs_scan(c, lnum, offs, c->sbuf); | ||
| 510 | if (IS_ERR(sleb)) | ||
| 511 | return PTR_ERR(sleb); | ||
| 512 | |||
| 513 | /* | ||
| 514 | * The bud does not have to start from offset zero - the beginning of | ||
| 515 | * the 'lnum' LEB may contain previously committed data. One of the | ||
| 516 | * things we have to do in replay is to correctly update lprops with | ||
| 517 | * newer information about this LEB. | ||
| 518 | * | ||
| 519 | * At this point lprops thinks that this LEB has 'c->leb_size - offs' | ||
| 520 | * bytes of free space because it only contain information about | ||
| 521 | * committed data. | ||
| 522 | * | ||
| 523 | * But we know that real amount of free space is 'c->leb_size - | ||
| 524 | * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and | ||
| 525 | * 'sleb->endpt' is used by bud data. We have to correctly calculate | ||
| 526 | * how much of these data are dirty and update lprops with this | ||
| 527 | * information. | ||
| 528 | * | ||
| 529 | * The dirt in that LEB region is comprised of padding nodes, deletion | ||
| 530 | * nodes, truncation nodes and nodes which are obsoleted by subsequent | ||
| 531 | * nodes in this LEB. So instead of calculating clean space, we | ||
| 532 | * calculate used space ('used' variable). | ||
| 533 | */ | ||
| 534 | |||
| 535 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 536 | int deletion = 0; | ||
| 537 | |||
| 538 | cond_resched(); | ||
| 539 | |||
| 540 | if (snod->sqnum >= SQNUM_WATERMARK) { | ||
| 541 | ubifs_err("file system's life ended"); | ||
| 542 | goto out_dump; | ||
| 543 | } | ||
| 544 | |||
| 545 | if (snod->sqnum > c->max_sqnum) | ||
| 546 | c->max_sqnum = snod->sqnum; | ||
| 547 | |||
| 548 | switch (snod->type) { | ||
| 549 | case UBIFS_INO_NODE: | ||
| 550 | { | ||
| 551 | struct ubifs_ino_node *ino = snod->node; | ||
| 552 | loff_t new_size = le64_to_cpu(ino->size); | ||
| 553 | |||
| 554 | if (le32_to_cpu(ino->nlink) == 0) | ||
| 555 | deletion = 1; | ||
| 556 | err = insert_node(c, lnum, snod->offs, snod->len, | ||
| 557 | &snod->key, snod->sqnum, deletion, | ||
| 558 | &used, 0, new_size); | ||
| 559 | break; | ||
| 560 | } | ||
| 561 | case UBIFS_DATA_NODE: | ||
| 562 | { | ||
| 563 | struct ubifs_data_node *dn = snod->node; | ||
| 564 | loff_t new_size = le32_to_cpu(dn->size) + | ||
| 565 | key_block(c, &snod->key) * | ||
| 566 | UBIFS_BLOCK_SIZE; | ||
| 567 | |||
| 568 | err = insert_node(c, lnum, snod->offs, snod->len, | ||
| 569 | &snod->key, snod->sqnum, deletion, | ||
| 570 | &used, 0, new_size); | ||
| 571 | break; | ||
| 572 | } | ||
| 573 | case UBIFS_DENT_NODE: | ||
| 574 | case UBIFS_XENT_NODE: | ||
| 575 | { | ||
| 576 | struct ubifs_dent_node *dent = snod->node; | ||
| 577 | |||
| 578 | err = ubifs_validate_entry(c, dent); | ||
| 579 | if (err) | ||
| 580 | goto out_dump; | ||
| 581 | |||
| 582 | err = insert_dent(c, lnum, snod->offs, snod->len, | ||
| 583 | &snod->key, dent->name, | ||
| 584 | le16_to_cpu(dent->nlen), snod->sqnum, | ||
| 585 | !le64_to_cpu(dent->inum), &used); | ||
| 586 | break; | ||
| 587 | } | ||
| 588 | case UBIFS_TRUN_NODE: | ||
| 589 | { | ||
| 590 | struct ubifs_trun_node *trun = snod->node; | ||
| 591 | loff_t old_size = le64_to_cpu(trun->old_size); | ||
| 592 | loff_t new_size = le64_to_cpu(trun->new_size); | ||
| 593 | union ubifs_key key; | ||
| 594 | |||
| 595 | /* Validate truncation node */ | ||
| 596 | if (old_size < 0 || old_size > c->max_inode_sz || | ||
| 597 | new_size < 0 || new_size > c->max_inode_sz || | ||
| 598 | old_size <= new_size) { | ||
| 599 | ubifs_err("bad truncation node"); | ||
| 600 | goto out_dump; | ||
| 601 | } | ||
| 602 | |||
| 603 | /* | ||
| 604 | * Create a fake truncation key just to use the same | ||
| 605 | * functions which expect nodes to have keys. | ||
| 606 | */ | ||
| 607 | trun_key_init(c, &key, le32_to_cpu(trun->inum)); | ||
| 608 | err = insert_node(c, lnum, snod->offs, snod->len, | ||
| 609 | &key, snod->sqnum, 1, &used, | ||
| 610 | old_size, new_size); | ||
| 611 | break; | ||
| 612 | } | ||
| 613 | default: | ||
| 614 | ubifs_err("unexpected node type %d in bud LEB %d:%d", | ||
| 615 | snod->type, lnum, snod->offs); | ||
| 616 | err = -EINVAL; | ||
| 617 | goto out_dump; | ||
| 618 | } | ||
| 619 | if (err) | ||
| 620 | goto out; | ||
| 621 | } | ||
| 622 | |||
| 623 | bud = ubifs_search_bud(c, lnum); | ||
| 624 | if (!bud) | ||
| 625 | BUG(); | ||
| 626 | |||
| 627 | ubifs_assert(sleb->endpt - offs >= used); | ||
| 628 | ubifs_assert(sleb->endpt % c->min_io_size == 0); | ||
| 629 | |||
| 630 | if (sleb->endpt + c->min_io_size <= c->leb_size && | ||
| 631 | !(c->vfs_sb->s_flags & MS_RDONLY)) | ||
| 632 | err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, | ||
| 633 | sleb->endpt, UBI_SHORTTERM); | ||
| 634 | |||
| 635 | *dirty = sleb->endpt - offs - used; | ||
| 636 | *free = c->leb_size - sleb->endpt; | ||
| 637 | |||
| 638 | out: | ||
| 639 | ubifs_scan_destroy(sleb); | ||
| 640 | return err; | ||
| 641 | |||
| 642 | out_dump: | ||
| 643 | ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); | ||
| 644 | dbg_dump_node(c, snod->node); | ||
| 645 | ubifs_scan_destroy(sleb); | ||
| 646 | return -EINVAL; | ||
| 647 | } | ||
| 648 | |||
| 649 | /** | ||
| 650 | * insert_ref_node - insert a reference node to the replay tree. | ||
| 651 | * @c: UBIFS file-system description object | ||
| 652 | * @lnum: node logical eraseblock number | ||
| 653 | * @offs: node offset | ||
| 654 | * @sqnum: sequence number | ||
| 655 | * @free: amount of free space in bud | ||
| 656 | * @dirty: amount of dirty space from padding and deletion nodes | ||
| 657 | * | ||
| 658 | * This function inserts a reference node to the replay tree and returns zero | ||
| 659 | * in case of success ort a negative error code in case of failure. | ||
| 660 | */ | ||
| 661 | static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, | ||
| 662 | unsigned long long sqnum, int free, int dirty) | ||
| 663 | { | ||
| 664 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 665 | struct replay_entry *r; | ||
| 666 | |||
| 667 | dbg_mnt("add ref LEB %d:%d", lnum, offs); | ||
| 668 | while (*p) { | ||
| 669 | parent = *p; | ||
| 670 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 671 | if (sqnum < r->sqnum) { | ||
| 672 | p = &(*p)->rb_left; | ||
| 673 | continue; | ||
| 674 | } else if (sqnum > r->sqnum) { | ||
| 675 | p = &(*p)->rb_right; | ||
| 676 | continue; | ||
| 677 | } | ||
| 678 | ubifs_err("duplicate sqnum in replay tree"); | ||
| 679 | return -EINVAL; | ||
| 680 | } | ||
| 681 | |||
| 682 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
| 683 | if (!r) | ||
| 684 | return -ENOMEM; | ||
| 685 | |||
| 686 | r->lnum = lnum; | ||
| 687 | r->offs = offs; | ||
| 688 | r->sqnum = sqnum; | ||
| 689 | r->flags = REPLAY_REF; | ||
| 690 | r->free = free; | ||
| 691 | r->dirty = dirty; | ||
| 692 | |||
| 693 | rb_link_node(&r->rb, parent, p); | ||
| 694 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 695 | return 0; | ||
| 696 | } | ||
| 697 | |||
| 698 | /** | ||
| 699 | * replay_buds - replay all buds. | ||
| 700 | * @c: UBIFS file-system description object | ||
| 701 | * | ||
| 702 | * This function returns zero in case of success and a negative error code in | ||
| 703 | * case of failure. | ||
| 704 | */ | ||
| 705 | static int replay_buds(struct ubifs_info *c) | ||
| 706 | { | ||
| 707 | struct bud_entry *b; | ||
| 708 | int err, uninitialized_var(free), uninitialized_var(dirty); | ||
| 709 | |||
| 710 | list_for_each_entry(b, &c->replay_buds, list) { | ||
| 711 | err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, | ||
| 712 | &free, &dirty); | ||
| 713 | if (err) | ||
| 714 | return err; | ||
| 715 | err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, | ||
| 716 | free, dirty); | ||
| 717 | if (err) | ||
| 718 | return err; | ||
| 719 | } | ||
| 720 | |||
| 721 | return 0; | ||
| 722 | } | ||
| 723 | |||
| 724 | /** | ||
| 725 | * destroy_bud_list - destroy the list of buds to replay. | ||
| 726 | * @c: UBIFS file-system description object | ||
| 727 | */ | ||
| 728 | static void destroy_bud_list(struct ubifs_info *c) | ||
| 729 | { | ||
| 730 | struct bud_entry *b; | ||
| 731 | |||
| 732 | while (!list_empty(&c->replay_buds)) { | ||
| 733 | b = list_entry(c->replay_buds.next, struct bud_entry, list); | ||
| 734 | list_del(&b->list); | ||
| 735 | kfree(b); | ||
| 736 | } | ||
| 737 | } | ||
| 738 | |||
| 739 | /** | ||
| 740 | * add_replay_bud - add a bud to the list of buds to replay. | ||
| 741 | * @c: UBIFS file-system description object | ||
| 742 | * @lnum: bud logical eraseblock number to replay | ||
| 743 | * @offs: bud start offset | ||
| 744 | * @jhead: journal head to which this bud belongs | ||
| 745 | * @sqnum: reference node sequence number | ||
| 746 | * | ||
| 747 | * This function returns zero in case of success and a negative error code in | ||
| 748 | * case of failure. | ||
| 749 | */ | ||
| 750 | static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | ||
| 751 | unsigned long long sqnum) | ||
| 752 | { | ||
| 753 | struct ubifs_bud *bud; | ||
| 754 | struct bud_entry *b; | ||
| 755 | |||
| 756 | dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); | ||
| 757 | |||
| 758 | bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); | ||
| 759 | if (!bud) | ||
| 760 | return -ENOMEM; | ||
| 761 | |||
| 762 | b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); | ||
| 763 | if (!b) { | ||
| 764 | kfree(bud); | ||
| 765 | return -ENOMEM; | ||
| 766 | } | ||
| 767 | |||
| 768 | bud->lnum = lnum; | ||
| 769 | bud->start = offs; | ||
| 770 | bud->jhead = jhead; | ||
| 771 | ubifs_add_bud(c, bud); | ||
| 772 | |||
| 773 | b->bud = bud; | ||
| 774 | b->sqnum = sqnum; | ||
| 775 | list_add_tail(&b->list, &c->replay_buds); | ||
| 776 | |||
| 777 | return 0; | ||
| 778 | } | ||
| 779 | |||
| 780 | /** | ||
| 781 | * validate_ref - validate a reference node. | ||
| 782 | * @c: UBIFS file-system description object | ||
| 783 | * @ref: the reference node to validate | ||
| 784 | * @ref_lnum: LEB number of the reference node | ||
| 785 | * @ref_offs: reference node offset | ||
| 786 | * | ||
| 787 | * This function returns %1 if a bud reference already exists for the LEB. %0 is | ||
| 788 | * returned if the reference node is new, otherwise %-EINVAL is returned if | ||
| 789 | * validation failed. | ||
| 790 | */ | ||
| 791 | static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) | ||
| 792 | { | ||
| 793 | struct ubifs_bud *bud; | ||
| 794 | int lnum = le32_to_cpu(ref->lnum); | ||
| 795 | unsigned int offs = le32_to_cpu(ref->offs); | ||
| 796 | unsigned int jhead = le32_to_cpu(ref->jhead); | ||
| 797 | |||
| 798 | /* | ||
| 799 | * ref->offs may point to the end of LEB when the journal head points | ||
| 800 | * to the end of LEB and we write reference node for it during commit. | ||
| 801 | * So this is why we require 'offs > c->leb_size'. | ||
| 802 | */ | ||
| 803 | if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || | ||
| 804 | lnum < c->main_first || offs > c->leb_size || | ||
| 805 | offs & (c->min_io_size - 1)) | ||
| 806 | return -EINVAL; | ||
| 807 | |||
| 808 | /* Make sure we have not already looked at this bud */ | ||
| 809 | bud = ubifs_search_bud(c, lnum); | ||
| 810 | if (bud) { | ||
| 811 | if (bud->jhead == jhead && bud->start <= offs) | ||
| 812 | return 1; | ||
| 813 | ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); | ||
| 814 | return -EINVAL; | ||
| 815 | } | ||
| 816 | |||
| 817 | return 0; | ||
| 818 | } | ||
| 819 | |||
| 820 | /** | ||
| 821 | * replay_log_leb - replay a log logical eraseblock. | ||
| 822 | * @c: UBIFS file-system description object | ||
| 823 | * @lnum: log logical eraseblock to replay | ||
| 824 | * @offs: offset to start replaying from | ||
| 825 | * @sbuf: scan buffer | ||
| 826 | * | ||
| 827 | * This function replays a log LEB and returns zero in case of success, %1 if | ||
| 828 | * this is the last LEB in the log, and a negative error code in case of | ||
| 829 | * failure. | ||
| 830 | */ | ||
| 831 | static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) | ||
| 832 | { | ||
| 833 | int err; | ||
| 834 | struct ubifs_scan_leb *sleb; | ||
| 835 | struct ubifs_scan_node *snod; | ||
| 836 | const struct ubifs_cs_node *node; | ||
| 837 | |||
| 838 | dbg_mnt("replay log LEB %d:%d", lnum, offs); | ||
| 839 | sleb = ubifs_scan(c, lnum, offs, sbuf); | ||
| 840 | if (IS_ERR(sleb)) { | ||
| 841 | if (c->need_recovery) | ||
| 842 | sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); | ||
| 843 | if (IS_ERR(sleb)) | ||
| 844 | return PTR_ERR(sleb); | ||
| 845 | } | ||
| 846 | |||
| 847 | if (sleb->nodes_cnt == 0) { | ||
| 848 | err = 1; | ||
| 849 | goto out; | ||
| 850 | } | ||
| 851 | |||
| 852 | node = sleb->buf; | ||
| 853 | |||
| 854 | snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); | ||
| 855 | if (c->cs_sqnum == 0) { | ||
| 856 | /* | ||
| 857 | * This is the first log LEB we are looking at, make sure that | ||
| 858 | * the first node is a commit start node. Also record its | ||
| 859 | * sequence number so that UBIFS can determine where the log | ||
| 860 | * ends, because all nodes which were have higher sequence | ||
| 861 | * numbers. | ||
| 862 | */ | ||
| 863 | if (snod->type != UBIFS_CS_NODE) { | ||
| 864 | dbg_err("first log node at LEB %d:%d is not CS node", | ||
| 865 | lnum, offs); | ||
| 866 | goto out_dump; | ||
| 867 | } | ||
| 868 | if (le64_to_cpu(node->cmt_no) != c->cmt_no) { | ||
| 869 | dbg_err("first CS node at LEB %d:%d has wrong " | ||
| 870 | "commit number %llu expected %llu", | ||
| 871 | lnum, offs, | ||
| 872 | (unsigned long long)le64_to_cpu(node->cmt_no), | ||
| 873 | c->cmt_no); | ||
| 874 | goto out_dump; | ||
| 875 | } | ||
| 876 | |||
| 877 | c->cs_sqnum = le64_to_cpu(node->ch.sqnum); | ||
| 878 | dbg_mnt("commit start sqnum %llu", c->cs_sqnum); | ||
| 879 | } | ||
| 880 | |||
| 881 | if (snod->sqnum < c->cs_sqnum) { | ||
| 882 | /* | ||
| 883 | * This means that we reached end of log and now | ||
| 884 | * look to the older log data, which was already | ||
| 885 | * committed but the eraseblock was not erased (UBIFS | ||
| 886 | * only unmaps it). So this basically means we have to | ||
| 887 | * exit with "end of log" code. | ||
| 888 | */ | ||
| 889 | err = 1; | ||
| 890 | goto out; | ||
| 891 | } | ||
| 892 | |||
| 893 | /* Make sure the first node sits at offset zero of the LEB */ | ||
| 894 | if (snod->offs != 0) { | ||
| 895 | dbg_err("first node is not at zero offset"); | ||
| 896 | goto out_dump; | ||
| 897 | } | ||
| 898 | |||
| 899 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 900 | |||
| 901 | cond_resched(); | ||
| 902 | |||
| 903 | if (snod->sqnum >= SQNUM_WATERMARK) { | ||
| 904 | ubifs_err("file system's life ended"); | ||
| 905 | goto out_dump; | ||
| 906 | } | ||
| 907 | |||
| 908 | if (snod->sqnum < c->cs_sqnum) { | ||
| 909 | dbg_err("bad sqnum %llu, commit sqnum %llu", | ||
| 910 | snod->sqnum, c->cs_sqnum); | ||
| 911 | goto out_dump; | ||
| 912 | } | ||
| 913 | |||
| 914 | if (snod->sqnum > c->max_sqnum) | ||
| 915 | c->max_sqnum = snod->sqnum; | ||
| 916 | |||
| 917 | switch (snod->type) { | ||
| 918 | case UBIFS_REF_NODE: { | ||
| 919 | const struct ubifs_ref_node *ref = snod->node; | ||
| 920 | |||
| 921 | err = validate_ref(c, ref); | ||
| 922 | if (err == 1) | ||
| 923 | break; /* Already have this bud */ | ||
| 924 | if (err) | ||
| 925 | goto out_dump; | ||
| 926 | |||
| 927 | err = add_replay_bud(c, le32_to_cpu(ref->lnum), | ||
| 928 | le32_to_cpu(ref->offs), | ||
| 929 | le32_to_cpu(ref->jhead), | ||
| 930 | snod->sqnum); | ||
| 931 | if (err) | ||
| 932 | goto out; | ||
| 933 | |||
| 934 | break; | ||
| 935 | } | ||
| 936 | case UBIFS_CS_NODE: | ||
| 937 | /* Make sure it sits at the beginning of LEB */ | ||
| 938 | if (snod->offs != 0) { | ||
| 939 | ubifs_err("unexpected node in log"); | ||
| 940 | goto out_dump; | ||
| 941 | } | ||
| 942 | break; | ||
| 943 | default: | ||
| 944 | ubifs_err("unexpected node in log"); | ||
| 945 | goto out_dump; | ||
| 946 | } | ||
| 947 | } | ||
| 948 | |||
| 949 | if (sleb->endpt || c->lhead_offs >= c->leb_size) { | ||
| 950 | c->lhead_lnum = lnum; | ||
| 951 | c->lhead_offs = sleb->endpt; | ||
| 952 | } | ||
| 953 | |||
| 954 | err = !sleb->endpt; | ||
| 955 | out: | ||
| 956 | ubifs_scan_destroy(sleb); | ||
| 957 | return err; | ||
| 958 | |||
| 959 | out_dump: | ||
| 960 | ubifs_err("log error detected while replying the log at LEB %d:%d", | ||
| 961 | lnum, offs + snod->offs); | ||
| 962 | dbg_dump_node(c, snod->node); | ||
| 963 | ubifs_scan_destroy(sleb); | ||
| 964 | return -EINVAL; | ||
| 965 | } | ||
| 966 | |||
| 967 | /** | ||
| 968 | * take_ihead - update the status of the index head in lprops to 'taken'. | ||
| 969 | * @c: UBIFS file-system description object | ||
| 970 | * | ||
| 971 | * This function returns the amount of free space in the index head LEB or a | ||
| 972 | * negative error code. | ||
| 973 | */ | ||
| 974 | static int take_ihead(struct ubifs_info *c) | ||
| 975 | { | ||
| 976 | const struct ubifs_lprops *lp; | ||
| 977 | int err, free; | ||
| 978 | |||
| 979 | ubifs_get_lprops(c); | ||
| 980 | |||
| 981 | lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); | ||
| 982 | if (IS_ERR(lp)) { | ||
| 983 | err = PTR_ERR(lp); | ||
| 984 | goto out; | ||
| 985 | } | ||
| 986 | |||
| 987 | free = lp->free; | ||
| 988 | |||
| 989 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, | ||
| 990 | lp->flags | LPROPS_TAKEN, 0); | ||
| 991 | if (IS_ERR(lp)) { | ||
| 992 | err = PTR_ERR(lp); | ||
| 993 | goto out; | ||
| 994 | } | ||
| 995 | |||
| 996 | err = free; | ||
| 997 | out: | ||
| 998 | ubifs_release_lprops(c); | ||
| 999 | return err; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | /** | ||
| 1003 | * ubifs_replay_journal - replay journal. | ||
| 1004 | * @c: UBIFS file-system description object | ||
| 1005 | * | ||
| 1006 | * This function scans the journal, replays and cleans it up. It makes sure all | ||
| 1007 | * memory data structures related to uncommitted journal are built (dirty TNC | ||
| 1008 | * tree, tree of buds, modified lprops, etc). | ||
| 1009 | */ | ||
| 1010 | int ubifs_replay_journal(struct ubifs_info *c) | ||
| 1011 | { | ||
| 1012 | int err, i, lnum, offs, free; | ||
| 1013 | void *sbuf = NULL; | ||
| 1014 | |||
| 1015 | BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); | ||
| 1016 | |||
| 1017 | /* Update the status of the index head in lprops to 'taken' */ | ||
| 1018 | free = take_ihead(c); | ||
| 1019 | if (free < 0) | ||
| 1020 | return free; /* Error code */ | ||
| 1021 | |||
| 1022 | if (c->ihead_offs != c->leb_size - free) { | ||
| 1023 | ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, | ||
| 1024 | c->ihead_offs); | ||
| 1025 | return -EINVAL; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | sbuf = vmalloc(c->leb_size); | ||
| 1029 | if (!sbuf) | ||
| 1030 | return -ENOMEM; | ||
| 1031 | |||
| 1032 | dbg_mnt("start replaying the journal"); | ||
| 1033 | |||
| 1034 | c->replaying = 1; | ||
| 1035 | |||
| 1036 | lnum = c->ltail_lnum = c->lhead_lnum; | ||
| 1037 | offs = c->lhead_offs; | ||
| 1038 | |||
| 1039 | for (i = 0; i < c->log_lebs; i++, lnum++) { | ||
| 1040 | if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { | ||
| 1041 | /* | ||
| 1042 | * The log is logically circular, we reached the last | ||
| 1043 | * LEB, switch to the first one. | ||
| 1044 | */ | ||
| 1045 | lnum = UBIFS_LOG_LNUM; | ||
| 1046 | offs = 0; | ||
| 1047 | } | ||
| 1048 | err = replay_log_leb(c, lnum, offs, sbuf); | ||
| 1049 | if (err == 1) | ||
| 1050 | /* We hit the end of the log */ | ||
| 1051 | break; | ||
| 1052 | if (err) | ||
| 1053 | goto out; | ||
| 1054 | offs = 0; | ||
| 1055 | } | ||
| 1056 | |||
| 1057 | err = replay_buds(c); | ||
| 1058 | if (err) | ||
| 1059 | goto out; | ||
| 1060 | |||
| 1061 | err = apply_replay_tree(c); | ||
| 1062 | if (err) | ||
| 1063 | goto out; | ||
| 1064 | |||
| 1065 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); | ||
| 1066 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " | ||
| 1067 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, | ||
| 1068 | c->highest_inum); | ||
| 1069 | out: | ||
| 1070 | destroy_replay_tree(c); | ||
| 1071 | destroy_bud_list(c); | ||
| 1072 | vfree(sbuf); | ||
| 1073 | c->replaying = 0; | ||
| 1074 | return err; | ||
| 1075 | } | ||
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c new file mode 100644 index 000000000000..2bf753b38889 --- /dev/null +++ b/fs/ubifs/sb.c | |||
| @@ -0,0 +1,629 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements UBIFS superblock. The superblock is stored at the first | ||
| 25 | * LEB of the volume and is never changed by UBIFS. Only user-space tools may | ||
| 26 | * change it. The superblock node mostly contains geometry information. | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include "ubifs.h" | ||
| 30 | #include <linux/random.h> | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Default journal size in logical eraseblocks as a percent of total | ||
| 34 | * flash size. | ||
| 35 | */ | ||
| 36 | #define DEFAULT_JNL_PERCENT 5 | ||
| 37 | |||
| 38 | /* Default maximum journal size in bytes */ | ||
| 39 | #define DEFAULT_MAX_JNL (32*1024*1024) | ||
| 40 | |||
| 41 | /* Default indexing tree fanout */ | ||
| 42 | #define DEFAULT_FANOUT 8 | ||
| 43 | |||
| 44 | /* Default number of data journal heads */ | ||
| 45 | #define DEFAULT_JHEADS_CNT 1 | ||
| 46 | |||
| 47 | /* Default positions of different LEBs in the main area */ | ||
| 48 | #define DEFAULT_IDX_LEB 0 | ||
| 49 | #define DEFAULT_DATA_LEB 1 | ||
| 50 | #define DEFAULT_GC_LEB 2 | ||
| 51 | |||
| 52 | /* Default number of LEB numbers in LPT's save table */ | ||
| 53 | #define DEFAULT_LSAVE_CNT 256 | ||
| 54 | |||
| 55 | /* Default reserved pool size as a percent of maximum free space */ | ||
| 56 | #define DEFAULT_RP_PERCENT 5 | ||
| 57 | |||
| 58 | /* The default maximum size of reserved pool in bytes */ | ||
| 59 | #define DEFAULT_MAX_RP_SIZE (5*1024*1024) | ||
| 60 | |||
| 61 | /* Default time granularity in nanoseconds */ | ||
| 62 | #define DEFAULT_TIME_GRAN 1000000000 | ||
| 63 | |||
| 64 | /** | ||
| 65 | * create_default_filesystem - format empty UBI volume. | ||
| 66 | * @c: UBIFS file-system description object | ||
| 67 | * | ||
| 68 | * This function creates default empty file-system. Returns zero in case of | ||
| 69 | * success and a negative error code in case of failure. | ||
| 70 | */ | ||
| 71 | static int create_default_filesystem(struct ubifs_info *c) | ||
| 72 | { | ||
| 73 | struct ubifs_sb_node *sup; | ||
| 74 | struct ubifs_mst_node *mst; | ||
| 75 | struct ubifs_idx_node *idx; | ||
| 76 | struct ubifs_branch *br; | ||
| 77 | struct ubifs_ino_node *ino; | ||
| 78 | struct ubifs_cs_node *cs; | ||
| 79 | union ubifs_key key; | ||
| 80 | int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; | ||
| 81 | int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; | ||
| 82 | int min_leb_cnt = UBIFS_MIN_LEB_CNT; | ||
| 83 | uint64_t tmp64, main_bytes; | ||
| 84 | |||
| 85 | /* Some functions called from here depend on the @c->key_len filed */ | ||
| 86 | c->key_len = UBIFS_SK_LEN; | ||
| 87 | |||
| 88 | /* | ||
| 89 | * First of all, we have to calculate default file-system geometry - | ||
| 90 | * log size, journal size, etc. | ||
| 91 | */ | ||
| 92 | if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT) | ||
| 93 | /* We can first multiply then divide and have no overflow */ | ||
| 94 | jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100; | ||
| 95 | else | ||
| 96 | jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT; | ||
| 97 | |||
| 98 | if (jnl_lebs < UBIFS_MIN_JNL_LEBS) | ||
| 99 | jnl_lebs = UBIFS_MIN_JNL_LEBS; | ||
| 100 | if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL) | ||
| 101 | jnl_lebs = DEFAULT_MAX_JNL / c->leb_size; | ||
| 102 | |||
| 103 | /* | ||
| 104 | * The log should be large enough to fit reference nodes for all bud | ||
| 105 | * LEBs. Because buds do not have to start from the beginning of LEBs | ||
| 106 | * (half of the LEB may contain committed data), the log should | ||
| 107 | * generally be larger, make it twice as large. | ||
| 108 | */ | ||
| 109 | tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1; | ||
| 110 | log_lebs = tmp / c->leb_size; | ||
| 111 | /* Plus one LEB reserved for commit */ | ||
| 112 | log_lebs += 1; | ||
| 113 | if (c->leb_cnt - min_leb_cnt > 8) { | ||
| 114 | /* And some extra space to allow writes while committing */ | ||
| 115 | log_lebs += 1; | ||
| 116 | min_leb_cnt += 1; | ||
| 117 | } | ||
| 118 | |||
| 119 | max_buds = jnl_lebs - log_lebs; | ||
| 120 | if (max_buds < UBIFS_MIN_BUD_LEBS) | ||
| 121 | max_buds = UBIFS_MIN_BUD_LEBS; | ||
| 122 | |||
| 123 | /* | ||
| 124 | * Orphan nodes are stored in a separate area. One node can store a lot | ||
| 125 | * of orphan inode numbers, but when new orphan comes we just add a new | ||
| 126 | * orphan node. At some point the nodes are consolidated into one | ||
| 127 | * orphan node. | ||
| 128 | */ | ||
| 129 | orph_lebs = UBIFS_MIN_ORPH_LEBS; | ||
| 130 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 131 | if (c->leb_cnt - min_leb_cnt > 1) | ||
| 132 | /* | ||
| 133 | * For debugging purposes it is better to have at least 2 | ||
| 134 | * orphan LEBs, because the orphan subsystem would need to do | ||
| 135 | * consolidations and would be stressed more. | ||
| 136 | */ | ||
| 137 | orph_lebs += 1; | ||
| 138 | #endif | ||
| 139 | |||
| 140 | main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs; | ||
| 141 | main_lebs -= orph_lebs; | ||
| 142 | |||
| 143 | lpt_first = UBIFS_LOG_LNUM + log_lebs; | ||
| 144 | c->lsave_cnt = DEFAULT_LSAVE_CNT; | ||
| 145 | c->max_leb_cnt = c->leb_cnt; | ||
| 146 | err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs, | ||
| 147 | &big_lpt); | ||
| 148 | if (err) | ||
| 149 | return err; | ||
| 150 | |||
| 151 | dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first, | ||
| 152 | lpt_first + lpt_lebs - 1); | ||
| 153 | |||
| 154 | main_first = c->leb_cnt - main_lebs; | ||
| 155 | |||
| 156 | /* Create default superblock */ | ||
| 157 | tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); | ||
| 158 | sup = kzalloc(tmp, GFP_KERNEL); | ||
| 159 | if (!sup) | ||
| 160 | return -ENOMEM; | ||
| 161 | |||
| 162 | tmp64 = (uint64_t)max_buds * c->leb_size; | ||
| 163 | if (big_lpt) | ||
| 164 | sup_flags |= UBIFS_FLG_BIGLPT; | ||
| 165 | |||
| 166 | sup->ch.node_type = UBIFS_SB_NODE; | ||
| 167 | sup->key_hash = UBIFS_KEY_HASH_R5; | ||
| 168 | sup->flags = cpu_to_le32(sup_flags); | ||
| 169 | sup->min_io_size = cpu_to_le32(c->min_io_size); | ||
| 170 | sup->leb_size = cpu_to_le32(c->leb_size); | ||
| 171 | sup->leb_cnt = cpu_to_le32(c->leb_cnt); | ||
| 172 | sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt); | ||
| 173 | sup->max_bud_bytes = cpu_to_le64(tmp64); | ||
| 174 | sup->log_lebs = cpu_to_le32(log_lebs); | ||
| 175 | sup->lpt_lebs = cpu_to_le32(lpt_lebs); | ||
| 176 | sup->orph_lebs = cpu_to_le32(orph_lebs); | ||
| 177 | sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT); | ||
| 178 | sup->fanout = cpu_to_le32(DEFAULT_FANOUT); | ||
| 179 | sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); | ||
| 180 | sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); | ||
| 181 | sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); | ||
| 182 | sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); | ||
| 183 | |||
| 184 | generate_random_uuid(sup->uuid); | ||
| 185 | |||
| 186 | main_bytes = (uint64_t)main_lebs * c->leb_size; | ||
| 187 | tmp64 = main_bytes * DEFAULT_RP_PERCENT; | ||
| 188 | do_div(tmp64, 100); | ||
| 189 | if (tmp64 > DEFAULT_MAX_RP_SIZE) | ||
| 190 | tmp64 = DEFAULT_MAX_RP_SIZE; | ||
| 191 | sup->rp_size = cpu_to_le64(tmp64); | ||
| 192 | |||
| 193 | err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); | ||
| 194 | kfree(sup); | ||
| 195 | if (err) | ||
| 196 | return err; | ||
| 197 | |||
| 198 | dbg_gen("default superblock created at LEB 0:0"); | ||
| 199 | |||
| 200 | /* Create default master node */ | ||
| 201 | mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); | ||
| 202 | if (!mst) | ||
| 203 | return -ENOMEM; | ||
| 204 | |||
| 205 | mst->ch.node_type = UBIFS_MST_NODE; | ||
| 206 | mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM); | ||
| 207 | mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO); | ||
| 208 | mst->cmt_no = 0; | ||
| 209 | mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); | ||
| 210 | mst->root_offs = 0; | ||
| 211 | tmp = ubifs_idx_node_sz(c, 1); | ||
| 212 | mst->root_len = cpu_to_le32(tmp); | ||
| 213 | mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB); | ||
| 214 | mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); | ||
| 215 | mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size)); | ||
| 216 | mst->index_size = cpu_to_le64(ALIGN(tmp, 8)); | ||
| 217 | mst->lpt_lnum = cpu_to_le32(c->lpt_lnum); | ||
| 218 | mst->lpt_offs = cpu_to_le32(c->lpt_offs); | ||
| 219 | mst->nhead_lnum = cpu_to_le32(c->nhead_lnum); | ||
| 220 | mst->nhead_offs = cpu_to_le32(c->nhead_offs); | ||
| 221 | mst->ltab_lnum = cpu_to_le32(c->ltab_lnum); | ||
| 222 | mst->ltab_offs = cpu_to_le32(c->ltab_offs); | ||
| 223 | mst->lsave_lnum = cpu_to_le32(c->lsave_lnum); | ||
| 224 | mst->lsave_offs = cpu_to_le32(c->lsave_offs); | ||
| 225 | mst->lscan_lnum = cpu_to_le32(main_first); | ||
| 226 | mst->empty_lebs = cpu_to_le32(main_lebs - 2); | ||
| 227 | mst->idx_lebs = cpu_to_le32(1); | ||
| 228 | mst->leb_cnt = cpu_to_le32(c->leb_cnt); | ||
| 229 | |||
| 230 | /* Calculate lprops statistics */ | ||
| 231 | tmp64 = main_bytes; | ||
| 232 | tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); | ||
| 233 | tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); | ||
| 234 | mst->total_free = cpu_to_le64(tmp64); | ||
| 235 | |||
| 236 | tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); | ||
| 237 | ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) - | ||
| 238 | UBIFS_INO_NODE_SZ; | ||
| 239 | tmp64 += ino_waste; | ||
| 240 | tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8); | ||
| 241 | mst->total_dirty = cpu_to_le64(tmp64); | ||
| 242 | |||
| 243 | /* The indexing LEB does not contribute to dark space */ | ||
| 244 | tmp64 = (c->main_lebs - 1) * c->dark_wm; | ||
| 245 | mst->total_dark = cpu_to_le64(tmp64); | ||
| 246 | |||
| 247 | mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); | ||
| 248 | |||
| 249 | err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0, | ||
| 250 | UBI_UNKNOWN); | ||
| 251 | if (err) { | ||
| 252 | kfree(mst); | ||
| 253 | return err; | ||
| 254 | } | ||
| 255 | err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0, | ||
| 256 | UBI_UNKNOWN); | ||
| 257 | kfree(mst); | ||
| 258 | if (err) | ||
| 259 | return err; | ||
| 260 | |||
| 261 | dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM); | ||
| 262 | |||
| 263 | /* Create the root indexing node */ | ||
| 264 | tmp = ubifs_idx_node_sz(c, 1); | ||
| 265 | idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); | ||
| 266 | if (!idx) | ||
| 267 | return -ENOMEM; | ||
| 268 | |||
| 269 | c->key_fmt = UBIFS_SIMPLE_KEY_FMT; | ||
| 270 | c->key_hash = key_r5_hash; | ||
| 271 | |||
| 272 | idx->ch.node_type = UBIFS_IDX_NODE; | ||
| 273 | idx->child_cnt = cpu_to_le16(1); | ||
| 274 | ino_key_init(c, &key, UBIFS_ROOT_INO); | ||
| 275 | br = ubifs_idx_branch(c, idx, 0); | ||
| 276 | key_write_idx(c, &key, &br->key); | ||
| 277 | br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB); | ||
| 278 | br->len = cpu_to_le32(UBIFS_INO_NODE_SZ); | ||
| 279 | err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0, | ||
| 280 | UBI_UNKNOWN); | ||
| 281 | kfree(idx); | ||
| 282 | if (err) | ||
| 283 | return err; | ||
| 284 | |||
| 285 | dbg_gen("default root indexing node created LEB %d:0", | ||
| 286 | main_first + DEFAULT_IDX_LEB); | ||
| 287 | |||
| 288 | /* Create default root inode */ | ||
| 289 | tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); | ||
| 290 | ino = kzalloc(tmp, GFP_KERNEL); | ||
| 291 | if (!ino) | ||
| 292 | return -ENOMEM; | ||
| 293 | |||
| 294 | ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO); | ||
| 295 | ino->ch.node_type = UBIFS_INO_NODE; | ||
| 296 | ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); | ||
| 297 | ino->nlink = cpu_to_le32(2); | ||
| 298 | tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); | ||
| 299 | ino->atime_sec = tmp; | ||
| 300 | ino->ctime_sec = tmp; | ||
| 301 | ino->mtime_sec = tmp; | ||
| 302 | ino->atime_nsec = 0; | ||
| 303 | ino->ctime_nsec = 0; | ||
| 304 | ino->mtime_nsec = 0; | ||
| 305 | ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); | ||
| 306 | ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); | ||
| 307 | |||
| 308 | /* Set compression enabled by default */ | ||
| 309 | ino->flags = cpu_to_le32(UBIFS_COMPR_FL); | ||
| 310 | |||
| 311 | err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ, | ||
| 312 | main_first + DEFAULT_DATA_LEB, 0, | ||
| 313 | UBI_UNKNOWN); | ||
| 314 | kfree(ino); | ||
| 315 | if (err) | ||
| 316 | return err; | ||
| 317 | |||
| 318 | dbg_gen("root inode created at LEB %d:0", | ||
| 319 | main_first + DEFAULT_DATA_LEB); | ||
| 320 | |||
| 321 | /* | ||
| 322 | * The first node in the log has to be the commit start node. This is | ||
| 323 | * always the case during normal file-system operation. Write a fake | ||
| 324 | * commit start node to the log. | ||
| 325 | */ | ||
| 326 | tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size); | ||
| 327 | cs = kzalloc(tmp, GFP_KERNEL); | ||
| 328 | if (!cs) | ||
| 329 | return -ENOMEM; | ||
| 330 | |||
| 331 | cs->ch.node_type = UBIFS_CS_NODE; | ||
| 332 | err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, | ||
| 333 | 0, UBI_UNKNOWN); | ||
| 334 | kfree(cs); | ||
| 335 | |||
| 336 | ubifs_msg("default file-system created"); | ||
| 337 | return 0; | ||
| 338 | } | ||
| 339 | |||
| 340 | /** | ||
| 341 | * validate_sb - validate superblock node. | ||
| 342 | * @c: UBIFS file-system description object | ||
| 343 | * @sup: superblock node | ||
| 344 | * | ||
| 345 | * This function validates superblock node @sup. Since most of data was read | ||
| 346 | * from the superblock and stored in @c, the function validates fields in @c | ||
| 347 | * instead. Returns zero in case of success and %-EINVAL in case of validation | ||
| 348 | * failure. | ||
| 349 | */ | ||
| 350 | static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) | ||
| 351 | { | ||
| 352 | long long max_bytes; | ||
| 353 | int err = 1, min_leb_cnt; | ||
| 354 | |||
| 355 | if (!c->key_hash) { | ||
| 356 | err = 2; | ||
| 357 | goto failed; | ||
| 358 | } | ||
| 359 | |||
| 360 | if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) { | ||
| 361 | err = 3; | ||
| 362 | goto failed; | ||
| 363 | } | ||
| 364 | |||
| 365 | if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { | ||
| 366 | ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", | ||
| 367 | le32_to_cpu(sup->min_io_size), c->min_io_size); | ||
| 368 | goto failed; | ||
| 369 | } | ||
| 370 | |||
| 371 | if (le32_to_cpu(sup->leb_size) != c->leb_size) { | ||
| 372 | ubifs_err("LEB size mismatch: %d in superblock, %d real", | ||
| 373 | le32_to_cpu(sup->leb_size), c->leb_size); | ||
| 374 | goto failed; | ||
| 375 | } | ||
| 376 | |||
| 377 | if (c->log_lebs < UBIFS_MIN_LOG_LEBS || | ||
| 378 | c->lpt_lebs < UBIFS_MIN_LPT_LEBS || | ||
| 379 | c->orph_lebs < UBIFS_MIN_ORPH_LEBS || | ||
| 380 | c->main_lebs < UBIFS_MIN_MAIN_LEBS) { | ||
| 381 | err = 4; | ||
| 382 | goto failed; | ||
| 383 | } | ||
| 384 | |||
| 385 | /* | ||
| 386 | * Calculate minimum allowed amount of main area LEBs. This is very | ||
| 387 | * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we | ||
| 388 | * have just read from the superblock. | ||
| 389 | */ | ||
| 390 | min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs; | ||
| 391 | min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; | ||
| 392 | |||
| 393 | if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { | ||
| 394 | ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " | ||
| 395 | "%d minimum required", c->leb_cnt, c->vi.size, | ||
| 396 | min_leb_cnt); | ||
| 397 | goto failed; | ||
| 398 | } | ||
| 399 | |||
| 400 | if (c->max_leb_cnt < c->leb_cnt) { | ||
| 401 | ubifs_err("max. LEB count %d less than LEB count %d", | ||
| 402 | c->max_leb_cnt, c->leb_cnt); | ||
| 403 | goto failed; | ||
| 404 | } | ||
| 405 | |||
| 406 | if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { | ||
| 407 | err = 7; | ||
| 408 | goto failed; | ||
| 409 | } | ||
| 410 | |||
| 411 | if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || | ||
| 412 | c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { | ||
| 413 | err = 8; | ||
| 414 | goto failed; | ||
| 415 | } | ||
| 416 | |||
| 417 | if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 || | ||
| 418 | c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) { | ||
| 419 | err = 9; | ||
| 420 | goto failed; | ||
| 421 | } | ||
| 422 | |||
| 423 | if (c->fanout < UBIFS_MIN_FANOUT || | ||
| 424 | ubifs_idx_node_sz(c, c->fanout) > c->leb_size) { | ||
| 425 | err = 10; | ||
| 426 | goto failed; | ||
| 427 | } | ||
| 428 | |||
| 429 | if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT && | ||
| 430 | c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - | ||
| 431 | c->log_lebs - c->lpt_lebs - c->orph_lebs)) { | ||
| 432 | err = 11; | ||
| 433 | goto failed; | ||
| 434 | } | ||
| 435 | |||
| 436 | if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs + | ||
| 437 | c->orph_lebs + c->main_lebs != c->leb_cnt) { | ||
| 438 | err = 12; | ||
| 439 | goto failed; | ||
| 440 | } | ||
| 441 | |||
| 442 | if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { | ||
| 443 | err = 13; | ||
| 444 | goto failed; | ||
| 445 | } | ||
| 446 | |||
| 447 | max_bytes = c->main_lebs * (long long)c->leb_size; | ||
| 448 | if (c->rp_size < 0 || max_bytes < c->rp_size) { | ||
| 449 | err = 14; | ||
| 450 | goto failed; | ||
| 451 | } | ||
| 452 | |||
| 453 | if (le32_to_cpu(sup->time_gran) > 1000000000 || | ||
| 454 | le32_to_cpu(sup->time_gran) < 1) { | ||
| 455 | err = 15; | ||
| 456 | goto failed; | ||
| 457 | } | ||
| 458 | |||
| 459 | return 0; | ||
| 460 | |||
| 461 | failed: | ||
| 462 | ubifs_err("bad superblock, error %d", err); | ||
| 463 | dbg_dump_node(c, sup); | ||
| 464 | return -EINVAL; | ||
| 465 | } | ||
| 466 | |||
| 467 | /** | ||
| 468 | * ubifs_read_sb_node - read superblock node. | ||
| 469 | * @c: UBIFS file-system description object | ||
| 470 | * | ||
| 471 | * This function returns a pointer to the superblock node or a negative error | ||
| 472 | * code. | ||
| 473 | */ | ||
| 474 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) | ||
| 475 | { | ||
| 476 | struct ubifs_sb_node *sup; | ||
| 477 | int err; | ||
| 478 | |||
| 479 | sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS); | ||
| 480 | if (!sup) | ||
| 481 | return ERR_PTR(-ENOMEM); | ||
| 482 | |||
| 483 | err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ, | ||
| 484 | UBIFS_SB_LNUM, 0); | ||
| 485 | if (err) { | ||
| 486 | kfree(sup); | ||
| 487 | return ERR_PTR(err); | ||
| 488 | } | ||
| 489 | |||
| 490 | return sup; | ||
| 491 | } | ||
| 492 | |||
| 493 | /** | ||
| 494 | * ubifs_write_sb_node - write superblock node. | ||
| 495 | * @c: UBIFS file-system description object | ||
| 496 | * @sup: superblock node read with 'ubifs_read_sb_node()' | ||
| 497 | * | ||
| 498 | * This function returns %0 on success and a negative error code on failure. | ||
| 499 | */ | ||
| 500 | int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup) | ||
| 501 | { | ||
| 502 | int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); | ||
| 503 | |||
| 504 | ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1); | ||
| 505 | return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM); | ||
| 506 | } | ||
| 507 | |||
| 508 | /** | ||
| 509 | * ubifs_read_superblock - read superblock. | ||
| 510 | * @c: UBIFS file-system description object | ||
| 511 | * | ||
| 512 | * This function finds, reads and checks the superblock. If an empty UBI volume | ||
| 513 | * is being mounted, this function creates default superblock. Returns zero in | ||
| 514 | * case of success, and a negative error code in case of failure. | ||
| 515 | */ | ||
| 516 | int ubifs_read_superblock(struct ubifs_info *c) | ||
| 517 | { | ||
| 518 | int err, sup_flags; | ||
| 519 | struct ubifs_sb_node *sup; | ||
| 520 | |||
| 521 | if (c->empty) { | ||
| 522 | err = create_default_filesystem(c); | ||
| 523 | if (err) | ||
| 524 | return err; | ||
| 525 | } | ||
| 526 | |||
| 527 | sup = ubifs_read_sb_node(c); | ||
| 528 | if (IS_ERR(sup)) | ||
| 529 | return PTR_ERR(sup); | ||
| 530 | |||
| 531 | /* | ||
| 532 | * The software supports all previous versions but not future versions, | ||
| 533 | * due to the unavailability of time-travelling equipment. | ||
| 534 | */ | ||
| 535 | c->fmt_version = le32_to_cpu(sup->fmt_version); | ||
| 536 | if (c->fmt_version > UBIFS_FORMAT_VERSION) { | ||
| 537 | ubifs_err("on-flash format version is %d, but software only " | ||
| 538 | "supports up to version %d", c->fmt_version, | ||
| 539 | UBIFS_FORMAT_VERSION); | ||
| 540 | err = -EINVAL; | ||
| 541 | goto out; | ||
| 542 | } | ||
| 543 | |||
| 544 | if (c->fmt_version < 3) { | ||
| 545 | ubifs_err("on-flash format version %d is not supported", | ||
| 546 | c->fmt_version); | ||
| 547 | err = -EINVAL; | ||
| 548 | goto out; | ||
| 549 | } | ||
| 550 | |||
| 551 | switch (sup->key_hash) { | ||
| 552 | case UBIFS_KEY_HASH_R5: | ||
| 553 | c->key_hash = key_r5_hash; | ||
| 554 | c->key_hash_type = UBIFS_KEY_HASH_R5; | ||
| 555 | break; | ||
| 556 | |||
| 557 | case UBIFS_KEY_HASH_TEST: | ||
| 558 | c->key_hash = key_test_hash; | ||
| 559 | c->key_hash_type = UBIFS_KEY_HASH_TEST; | ||
| 560 | break; | ||
| 561 | }; | ||
| 562 | |||
| 563 | c->key_fmt = sup->key_fmt; | ||
| 564 | |||
| 565 | switch (c->key_fmt) { | ||
| 566 | case UBIFS_SIMPLE_KEY_FMT: | ||
| 567 | c->key_len = UBIFS_SK_LEN; | ||
| 568 | break; | ||
| 569 | default: | ||
| 570 | ubifs_err("unsupported key format"); | ||
| 571 | err = -EINVAL; | ||
| 572 | goto out; | ||
| 573 | } | ||
| 574 | |||
| 575 | c->leb_cnt = le32_to_cpu(sup->leb_cnt); | ||
| 576 | c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt); | ||
| 577 | c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes); | ||
| 578 | c->log_lebs = le32_to_cpu(sup->log_lebs); | ||
| 579 | c->lpt_lebs = le32_to_cpu(sup->lpt_lebs); | ||
| 580 | c->orph_lebs = le32_to_cpu(sup->orph_lebs); | ||
| 581 | c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; | ||
| 582 | c->fanout = le32_to_cpu(sup->fanout); | ||
| 583 | c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); | ||
| 584 | c->default_compr = le16_to_cpu(sup->default_compr); | ||
| 585 | c->rp_size = le64_to_cpu(sup->rp_size); | ||
| 586 | c->rp_uid = le32_to_cpu(sup->rp_uid); | ||
| 587 | c->rp_gid = le32_to_cpu(sup->rp_gid); | ||
| 588 | sup_flags = le32_to_cpu(sup->flags); | ||
| 589 | |||
| 590 | c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); | ||
| 591 | |||
| 592 | memcpy(&c->uuid, &sup->uuid, 16); | ||
| 593 | |||
| 594 | c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); | ||
| 595 | |||
| 596 | /* Automatically increase file system size to the maximum size */ | ||
| 597 | c->old_leb_cnt = c->leb_cnt; | ||
| 598 | if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { | ||
| 599 | c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); | ||
| 600 | if (c->vfs_sb->s_flags & MS_RDONLY) | ||
| 601 | dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", | ||
| 602 | c->old_leb_cnt, c->leb_cnt); | ||
| 603 | else { | ||
| 604 | dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs", | ||
| 605 | c->old_leb_cnt, c->leb_cnt); | ||
| 606 | sup->leb_cnt = cpu_to_le32(c->leb_cnt); | ||
| 607 | err = ubifs_write_sb_node(c, sup); | ||
| 608 | if (err) | ||
| 609 | goto out; | ||
| 610 | c->old_leb_cnt = c->leb_cnt; | ||
| 611 | } | ||
| 612 | } | ||
| 613 | |||
| 614 | c->log_bytes = (long long)c->log_lebs * c->leb_size; | ||
| 615 | c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1; | ||
| 616 | c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs; | ||
| 617 | c->lpt_last = c->lpt_first + c->lpt_lebs - 1; | ||
| 618 | c->orph_first = c->lpt_last + 1; | ||
| 619 | c->orph_last = c->orph_first + c->orph_lebs - 1; | ||
| 620 | c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; | ||
| 621 | c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; | ||
| 622 | c->main_first = c->leb_cnt - c->main_lebs; | ||
| 623 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); | ||
| 624 | |||
| 625 | err = validate_sb(c, sup); | ||
| 626 | out: | ||
| 627 | kfree(sup); | ||
| 628 | return err; | ||
| 629 | } | ||
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c new file mode 100644 index 000000000000..acf5c5fffc60 --- /dev/null +++ b/fs/ubifs/scan.c | |||
| @@ -0,0 +1,362 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements the scan which is a general-purpose function for | ||
| 25 | * determining what nodes are in an eraseblock. The scan is used to replay the | ||
| 26 | * journal, to do garbage collection. for the TNC in-the-gaps method, and by | ||
| 27 | * debugging functions. | ||
| 28 | */ | ||
| 29 | |||
| 30 | #include "ubifs.h" | ||
| 31 | |||
| 32 | /** | ||
| 33 | * scan_padding_bytes - scan for padding bytes. | ||
| 34 | * @buf: buffer to scan | ||
| 35 | * @len: length of buffer | ||
| 36 | * | ||
| 37 | * This function returns the number of padding bytes on success and | ||
| 38 | * %SCANNED_GARBAGE on failure. | ||
| 39 | */ | ||
| 40 | static int scan_padding_bytes(void *buf, int len) | ||
| 41 | { | ||
| 42 | int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len); | ||
| 43 | uint8_t *p = buf; | ||
| 44 | |||
| 45 | dbg_scan("not a node"); | ||
| 46 | |||
| 47 | while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE) | ||
| 48 | pad_len += 1; | ||
| 49 | |||
| 50 | if (!pad_len || (pad_len & 7)) | ||
| 51 | return SCANNED_GARBAGE; | ||
| 52 | |||
| 53 | dbg_scan("%d padding bytes", pad_len); | ||
| 54 | |||
| 55 | return pad_len; | ||
| 56 | } | ||
| 57 | |||
| 58 | /** | ||
| 59 | * ubifs_scan_a_node - scan for a node or padding. | ||
| 60 | * @c: UBIFS file-system description object | ||
| 61 | * @buf: buffer to scan | ||
| 62 | * @len: length of buffer | ||
| 63 | * @lnum: logical eraseblock number | ||
| 64 | * @offs: offset within the logical eraseblock | ||
| 65 | * @quiet: print no messages | ||
| 66 | * | ||
| 67 | * This function returns a scanning code to indicate what was scanned. | ||
| 68 | */ | ||
| 69 | int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | ||
| 70 | int offs, int quiet) | ||
| 71 | { | ||
| 72 | struct ubifs_ch *ch = buf; | ||
| 73 | uint32_t magic; | ||
| 74 | |||
| 75 | magic = le32_to_cpu(ch->magic); | ||
| 76 | |||
| 77 | if (magic == 0xFFFFFFFF) { | ||
| 78 | dbg_scan("hit empty space"); | ||
| 79 | return SCANNED_EMPTY_SPACE; | ||
| 80 | } | ||
| 81 | |||
| 82 | if (magic != UBIFS_NODE_MAGIC) | ||
| 83 | return scan_padding_bytes(buf, len); | ||
| 84 | |||
| 85 | if (len < UBIFS_CH_SZ) | ||
| 86 | return SCANNED_GARBAGE; | ||
| 87 | |||
| 88 | dbg_scan("scanning %s", dbg_ntype(ch->node_type)); | ||
| 89 | |||
| 90 | if (ubifs_check_node(c, buf, lnum, offs, quiet)) | ||
| 91 | return SCANNED_A_CORRUPT_NODE; | ||
| 92 | |||
| 93 | if (ch->node_type == UBIFS_PAD_NODE) { | ||
| 94 | struct ubifs_pad_node *pad = buf; | ||
| 95 | int pad_len = le32_to_cpu(pad->pad_len); | ||
| 96 | int node_len = le32_to_cpu(ch->len); | ||
| 97 | |||
| 98 | /* Validate the padding node */ | ||
| 99 | if (pad_len < 0 || | ||
| 100 | offs + node_len + pad_len > c->leb_size) { | ||
| 101 | if (!quiet) { | ||
| 102 | ubifs_err("bad pad node at LEB %d:%d", | ||
| 103 | lnum, offs); | ||
| 104 | dbg_dump_node(c, pad); | ||
| 105 | } | ||
| 106 | return SCANNED_A_BAD_PAD_NODE; | ||
| 107 | } | ||
| 108 | |||
| 109 | /* Make the node pads to 8-byte boundary */ | ||
| 110 | if ((node_len + pad_len) & 7) { | ||
| 111 | if (!quiet) { | ||
| 112 | dbg_err("bad padding length %d - %d", | ||
| 113 | offs, offs + node_len + pad_len); | ||
| 114 | } | ||
| 115 | return SCANNED_A_BAD_PAD_NODE; | ||
| 116 | } | ||
| 117 | |||
| 118 | dbg_scan("%d bytes padded, offset now %d", | ||
| 119 | pad_len, ALIGN(offs + node_len + pad_len, 8)); | ||
| 120 | |||
| 121 | return node_len + pad_len; | ||
| 122 | } | ||
| 123 | |||
| 124 | return SCANNED_A_NODE; | ||
| 125 | } | ||
| 126 | |||
| 127 | /** | ||
| 128 | * ubifs_start_scan - create LEB scanning information at start of scan. | ||
| 129 | * @c: UBIFS file-system description object | ||
| 130 | * @lnum: logical eraseblock number | ||
| 131 | * @offs: offset to start at (usually zero) | ||
| 132 | * @sbuf: scan buffer (must be c->leb_size) | ||
| 133 | * | ||
| 134 | * This function returns %0 on success and a negative error code on failure. | ||
| 135 | */ | ||
| 136 | struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, | ||
| 137 | int offs, void *sbuf) | ||
| 138 | { | ||
| 139 | struct ubifs_scan_leb *sleb; | ||
| 140 | int err; | ||
| 141 | |||
| 142 | dbg_scan("scan LEB %d:%d", lnum, offs); | ||
| 143 | |||
| 144 | sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS); | ||
| 145 | if (!sleb) | ||
| 146 | return ERR_PTR(-ENOMEM); | ||
| 147 | |||
| 148 | sleb->lnum = lnum; | ||
| 149 | INIT_LIST_HEAD(&sleb->nodes); | ||
| 150 | sleb->buf = sbuf; | ||
| 151 | |||
| 152 | err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); | ||
| 153 | if (err && err != -EBADMSG) { | ||
| 154 | ubifs_err("cannot read %d bytes from LEB %d:%d," | ||
| 155 | " error %d", c->leb_size - offs, lnum, offs, err); | ||
| 156 | kfree(sleb); | ||
| 157 | return ERR_PTR(err); | ||
| 158 | } | ||
| 159 | |||
| 160 | if (err == -EBADMSG) | ||
| 161 | sleb->ecc = 1; | ||
| 162 | |||
| 163 | return sleb; | ||
| 164 | } | ||
| 165 | |||
| 166 | /** | ||
| 167 | * ubifs_end_scan - update LEB scanning information at end of scan. | ||
| 168 | * @c: UBIFS file-system description object | ||
| 169 | * @sleb: scanning information | ||
| 170 | * @lnum: logical eraseblock number | ||
| 171 | * @offs: offset to start at (usually zero) | ||
| 172 | * | ||
| 173 | * This function returns %0 on success and a negative error code on failure. | ||
| 174 | */ | ||
| 175 | void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
| 176 | int lnum, int offs) | ||
| 177 | { | ||
| 178 | lnum = lnum; | ||
| 179 | dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); | ||
| 180 | ubifs_assert(offs % c->min_io_size == 0); | ||
| 181 | |||
| 182 | sleb->endpt = ALIGN(offs, c->min_io_size); | ||
| 183 | } | ||
| 184 | |||
| 185 | /** | ||
| 186 | * ubifs_add_snod - add a scanned node to LEB scanning information. | ||
| 187 | * @c: UBIFS file-system description object | ||
| 188 | * @sleb: scanning information | ||
| 189 | * @buf: buffer containing node | ||
| 190 | * @offs: offset of node on flash | ||
| 191 | * | ||
| 192 | * This function returns %0 on success and a negative error code on failure. | ||
| 193 | */ | ||
| 194 | int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
| 195 | void *buf, int offs) | ||
| 196 | { | ||
| 197 | struct ubifs_ch *ch = buf; | ||
| 198 | struct ubifs_ino_node *ino = buf; | ||
| 199 | struct ubifs_scan_node *snod; | ||
| 200 | |||
| 201 | snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); | ||
| 202 | if (!snod) | ||
| 203 | return -ENOMEM; | ||
| 204 | |||
| 205 | snod->sqnum = le64_to_cpu(ch->sqnum); | ||
| 206 | snod->type = ch->node_type; | ||
| 207 | snod->offs = offs; | ||
| 208 | snod->len = le32_to_cpu(ch->len); | ||
| 209 | snod->node = buf; | ||
| 210 | |||
| 211 | switch (ch->node_type) { | ||
| 212 | case UBIFS_INO_NODE: | ||
| 213 | case UBIFS_DENT_NODE: | ||
| 214 | case UBIFS_XENT_NODE: | ||
| 215 | case UBIFS_DATA_NODE: | ||
| 216 | case UBIFS_TRUN_NODE: | ||
| 217 | /* | ||
| 218 | * The key is in the same place in all keyed | ||
| 219 | * nodes. | ||
| 220 | */ | ||
| 221 | key_read(c, &ino->key, &snod->key); | ||
| 222 | break; | ||
| 223 | } | ||
| 224 | list_add_tail(&snod->list, &sleb->nodes); | ||
| 225 | sleb->nodes_cnt += 1; | ||
| 226 | return 0; | ||
| 227 | } | ||
| 228 | |||
| 229 | /** | ||
| 230 | * ubifs_scanned_corruption - print information after UBIFS scanned corruption. | ||
| 231 | * @c: UBIFS file-system description object | ||
| 232 | * @lnum: LEB number of corruption | ||
| 233 | * @offs: offset of corruption | ||
| 234 | * @buf: buffer containing corruption | ||
| 235 | */ | ||
| 236 | void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, | ||
| 237 | void *buf) | ||
| 238 | { | ||
| 239 | int len; | ||
| 240 | |||
| 241 | ubifs_err("corrupted data at LEB %d:%d", lnum, offs); | ||
| 242 | if (dbg_failure_mode) | ||
| 243 | return; | ||
| 244 | len = c->leb_size - offs; | ||
| 245 | if (len > 4096) | ||
| 246 | len = 4096; | ||
| 247 | dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); | ||
| 248 | print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); | ||
| 249 | } | ||
| 250 | |||
| 251 | /** | ||
| 252 | * ubifs_scan - scan a logical eraseblock. | ||
| 253 | * @c: UBIFS file-system description object | ||
| 254 | * @lnum: logical eraseblock number | ||
| 255 | * @offs: offset to start at (usually zero) | ||
| 256 | * @sbuf: scan buffer (must be c->leb_size) | ||
| 257 | * | ||
| 258 | * This function scans LEB number @lnum and returns complete information about | ||
| 259 | * its contents. Returns an error code in case of failure. | ||
| 260 | */ | ||
| 261 | struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, | ||
| 262 | int offs, void *sbuf) | ||
| 263 | { | ||
| 264 | void *buf = sbuf + offs; | ||
| 265 | int err, len = c->leb_size - offs; | ||
| 266 | struct ubifs_scan_leb *sleb; | ||
| 267 | |||
| 268 | sleb = ubifs_start_scan(c, lnum, offs, sbuf); | ||
| 269 | if (IS_ERR(sleb)) | ||
| 270 | return sleb; | ||
| 271 | |||
| 272 | while (len >= 8) { | ||
| 273 | struct ubifs_ch *ch = buf; | ||
| 274 | int node_len, ret; | ||
| 275 | |||
| 276 | dbg_scan("look at LEB %d:%d (%d bytes left)", | ||
| 277 | lnum, offs, len); | ||
| 278 | |||
| 279 | cond_resched(); | ||
| 280 | |||
| 281 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); | ||
| 282 | |||
| 283 | if (ret > 0) { | ||
| 284 | /* Padding bytes or a valid padding node */ | ||
| 285 | offs += ret; | ||
| 286 | buf += ret; | ||
| 287 | len -= ret; | ||
| 288 | continue; | ||
| 289 | } | ||
| 290 | |||
| 291 | if (ret == SCANNED_EMPTY_SPACE) | ||
| 292 | /* Empty space is checked later */ | ||
| 293 | break; | ||
| 294 | |||
| 295 | switch (ret) { | ||
| 296 | case SCANNED_GARBAGE: | ||
| 297 | dbg_err("garbage"); | ||
| 298 | goto corrupted; | ||
| 299 | case SCANNED_A_NODE: | ||
| 300 | break; | ||
| 301 | case SCANNED_A_CORRUPT_NODE: | ||
| 302 | case SCANNED_A_BAD_PAD_NODE: | ||
| 303 | dbg_err("bad node"); | ||
| 304 | goto corrupted; | ||
| 305 | default: | ||
| 306 | dbg_err("unknown"); | ||
| 307 | goto corrupted; | ||
| 308 | } | ||
| 309 | |||
| 310 | err = ubifs_add_snod(c, sleb, buf, offs); | ||
| 311 | if (err) | ||
| 312 | goto error; | ||
| 313 | |||
| 314 | node_len = ALIGN(le32_to_cpu(ch->len), 8); | ||
| 315 | offs += node_len; | ||
| 316 | buf += node_len; | ||
| 317 | len -= node_len; | ||
| 318 | } | ||
| 319 | |||
| 320 | if (offs % c->min_io_size) | ||
| 321 | goto corrupted; | ||
| 322 | |||
| 323 | ubifs_end_scan(c, sleb, lnum, offs); | ||
| 324 | |||
| 325 | for (; len > 4; offs += 4, buf = buf + 4, len -= 4) | ||
| 326 | if (*(uint32_t *)buf != 0xffffffff) | ||
| 327 | break; | ||
| 328 | for (; len; offs++, buf++, len--) | ||
| 329 | if (*(uint8_t *)buf != 0xff) { | ||
| 330 | ubifs_err("corrupt empty space at LEB %d:%d", | ||
| 331 | lnum, offs); | ||
| 332 | goto corrupted; | ||
| 333 | } | ||
| 334 | |||
| 335 | return sleb; | ||
| 336 | |||
| 337 | corrupted: | ||
| 338 | ubifs_scanned_corruption(c, lnum, offs, buf); | ||
| 339 | err = -EUCLEAN; | ||
| 340 | error: | ||
| 341 | ubifs_err("LEB %d scanning failed", lnum); | ||
| 342 | ubifs_scan_destroy(sleb); | ||
| 343 | return ERR_PTR(err); | ||
| 344 | } | ||
| 345 | |||
| 346 | /** | ||
| 347 | * ubifs_scan_destroy - destroy LEB scanning information. | ||
| 348 | * @sleb: scanning information to free | ||
| 349 | */ | ||
| 350 | void ubifs_scan_destroy(struct ubifs_scan_leb *sleb) | ||
| 351 | { | ||
| 352 | struct ubifs_scan_node *node; | ||
| 353 | struct list_head *head; | ||
| 354 | |||
| 355 | head = &sleb->nodes; | ||
| 356 | while (!list_empty(head)) { | ||
| 357 | node = list_entry(head->next, struct ubifs_scan_node, list); | ||
| 358 | list_del(&node->list); | ||
| 359 | kfree(node); | ||
| 360 | } | ||
| 361 | kfree(sleb); | ||
| 362 | } | ||
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c new file mode 100644 index 000000000000..f248533841a2 --- /dev/null +++ b/fs/ubifs/shrinker.c | |||
| @@ -0,0 +1,322 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements UBIFS shrinker which evicts clean znodes from the TNC | ||
| 25 | * tree when Linux VM needs more RAM. | ||
| 26 | * | ||
| 27 | * We do not implement any LRU lists to find oldest znodes to free because it | ||
| 28 | * would add additional overhead to the file system fast paths. So the shrinker | ||
| 29 | * just walks the TNC tree when searching for znodes to free. | ||
| 30 | * | ||
| 31 | * If the root of a TNC sub-tree is clean and old enough, then the children are | ||
| 32 | * also clean and old enough. So the shrinker walks the TNC in level order and | ||
| 33 | * dumps entire sub-trees. | ||
| 34 | * | ||
| 35 | * The age of znodes is just the time-stamp when they were last looked at. | ||
| 36 | * The current shrinker first tries to evict old znodes, then young ones. | ||
| 37 | * | ||
| 38 | * Since the shrinker is global, it has to protect against races with FS | ||
| 39 | * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. | ||
| 40 | */ | ||
| 41 | |||
| 42 | #include "ubifs.h" | ||
| 43 | |||
| 44 | /* List of all UBIFS file-system instances */ | ||
| 45 | LIST_HEAD(ubifs_infos); | ||
| 46 | |||
| 47 | /* | ||
| 48 | * We number each shrinker run and record the number on the ubifs_info structure | ||
| 49 | * so that we can easily work out which ubifs_info structures have already been | ||
| 50 | * done by the current run. | ||
| 51 | */ | ||
| 52 | static unsigned int shrinker_run_no; | ||
| 53 | |||
| 54 | /* Protects 'ubifs_infos' list */ | ||
| 55 | DEFINE_SPINLOCK(ubifs_infos_lock); | ||
| 56 | |||
| 57 | /* Global clean znode counter (for all mounted UBIFS instances) */ | ||
| 58 | atomic_long_t ubifs_clean_zn_cnt; | ||
| 59 | |||
| 60 | /** | ||
| 61 | * shrink_tnc - shrink TNC tree. | ||
| 62 | * @c: UBIFS file-system description object | ||
| 63 | * @nr: number of znodes to free | ||
| 64 | * @age: the age of znodes to free | ||
| 65 | * @contention: if any contention, this is set to %1 | ||
| 66 | * | ||
| 67 | * This function traverses TNC tree and frees clean znodes. It does not free | ||
| 68 | * clean znodes which younger then @age. Returns number of freed znodes. | ||
| 69 | */ | ||
| 70 | static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) | ||
| 71 | { | ||
| 72 | int total_freed = 0; | ||
| 73 | struct ubifs_znode *znode, *zprev; | ||
| 74 | int time = get_seconds(); | ||
| 75 | |||
| 76 | ubifs_assert(mutex_is_locked(&c->umount_mutex)); | ||
| 77 | ubifs_assert(mutex_is_locked(&c->tnc_mutex)); | ||
| 78 | |||
| 79 | if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) | ||
| 80 | return 0; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Traverse the TNC tree in levelorder manner, so that it is possible | ||
| 84 | * to destroy large sub-trees. Indeed, if a znode is old, then all its | ||
| 85 | * children are older or of the same age. | ||
| 86 | * | ||
| 87 | * Note, we are holding 'c->tnc_mutex', so we do not have to lock the | ||
| 88 | * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is | ||
| 89 | * changed only when the 'c->tnc_mutex' is held. | ||
| 90 | */ | ||
| 91 | zprev = NULL; | ||
| 92 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | ||
| 93 | while (znode && total_freed < nr && | ||
| 94 | atomic_long_read(&c->clean_zn_cnt) > 0) { | ||
| 95 | int freed; | ||
| 96 | |||
| 97 | /* | ||
| 98 | * If the znode is clean, but it is in the 'c->cnext' list, this | ||
| 99 | * means that this znode has just been written to flash as a | ||
| 100 | * part of commit and was marked clean. They will be removed | ||
| 101 | * from the list at end commit. We cannot change the list, | ||
| 102 | * because it is not protected by any mutex (design decision to | ||
| 103 | * make commit really independent and parallel to main I/O). So | ||
| 104 | * we just skip these znodes. | ||
| 105 | * | ||
| 106 | * Note, the 'clean_zn_cnt' counters are not updated until | ||
| 107 | * after the commit, so the UBIFS shrinker does not report | ||
| 108 | * the znodes which are in the 'c->cnext' list as freeable. | ||
| 109 | * | ||
| 110 | * Also note, if the root of a sub-tree is not in 'c->cnext', | ||
| 111 | * then the whole sub-tree is not in 'c->cnext' as well, so it | ||
| 112 | * is safe to dump whole sub-tree. | ||
| 113 | */ | ||
| 114 | |||
| 115 | if (znode->cnext) { | ||
| 116 | /* | ||
| 117 | * Very soon these znodes will be removed from the list | ||
| 118 | * and become freeable. | ||
| 119 | */ | ||
| 120 | *contention = 1; | ||
| 121 | } else if (!ubifs_zn_dirty(znode) && | ||
| 122 | abs(time - znode->time) >= age) { | ||
| 123 | if (znode->parent) | ||
| 124 | znode->parent->zbranch[znode->iip].znode = NULL; | ||
| 125 | else | ||
| 126 | c->zroot.znode = NULL; | ||
| 127 | |||
| 128 | freed = ubifs_destroy_tnc_subtree(znode); | ||
| 129 | atomic_long_sub(freed, &ubifs_clean_zn_cnt); | ||
| 130 | atomic_long_sub(freed, &c->clean_zn_cnt); | ||
| 131 | ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); | ||
| 132 | total_freed += freed; | ||
| 133 | znode = zprev; | ||
| 134 | } | ||
| 135 | |||
| 136 | if (unlikely(!c->zroot.znode)) | ||
| 137 | break; | ||
| 138 | |||
| 139 | zprev = znode; | ||
| 140 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); | ||
| 141 | cond_resched(); | ||
| 142 | } | ||
| 143 | |||
| 144 | return total_freed; | ||
| 145 | } | ||
| 146 | |||
| 147 | /** | ||
| 148 | * shrink_tnc_trees - shrink UBIFS TNC trees. | ||
| 149 | * @nr: number of znodes to free | ||
| 150 | * @age: the age of znodes to free | ||
| 151 | * @contention: if any contention, this is set to %1 | ||
| 152 | * | ||
| 153 | * This function walks the list of mounted UBIFS file-systems and frees clean | ||
| 154 | * znodes which are older then @age, until at least @nr znodes are freed. | ||
| 155 | * Returns the number of freed znodes. | ||
| 156 | */ | ||
| 157 | static int shrink_tnc_trees(int nr, int age, int *contention) | ||
| 158 | { | ||
| 159 | struct ubifs_info *c; | ||
| 160 | struct list_head *p; | ||
| 161 | unsigned int run_no; | ||
| 162 | int freed = 0; | ||
| 163 | |||
| 164 | spin_lock(&ubifs_infos_lock); | ||
| 165 | do { | ||
| 166 | run_no = ++shrinker_run_no; | ||
| 167 | } while (run_no == 0); | ||
| 168 | /* Iterate over all mounted UBIFS file-systems and try to shrink them */ | ||
| 169 | p = ubifs_infos.next; | ||
| 170 | while (p != &ubifs_infos) { | ||
| 171 | c = list_entry(p, struct ubifs_info, infos_list); | ||
| 172 | /* | ||
| 173 | * We move the ones we do to the end of the list, so we stop | ||
| 174 | * when we see one we have already done. | ||
| 175 | */ | ||
| 176 | if (c->shrinker_run_no == run_no) | ||
| 177 | break; | ||
| 178 | if (!mutex_trylock(&c->umount_mutex)) { | ||
| 179 | /* Some un-mount is in progress, try next FS */ | ||
| 180 | *contention = 1; | ||
| 181 | p = p->next; | ||
| 182 | continue; | ||
| 183 | } | ||
| 184 | /* | ||
| 185 | * We're holding 'c->umount_mutex', so the file-system won't go | ||
| 186 | * away. | ||
| 187 | */ | ||
| 188 | if (!mutex_trylock(&c->tnc_mutex)) { | ||
| 189 | mutex_unlock(&c->umount_mutex); | ||
| 190 | *contention = 1; | ||
| 191 | p = p->next; | ||
| 192 | continue; | ||
| 193 | } | ||
| 194 | spin_unlock(&ubifs_infos_lock); | ||
| 195 | /* | ||
| 196 | * OK, now we have TNC locked, the file-system cannot go away - | ||
| 197 | * it is safe to reap the cache. | ||
| 198 | */ | ||
| 199 | c->shrinker_run_no = run_no; | ||
| 200 | freed += shrink_tnc(c, nr, age, contention); | ||
| 201 | mutex_unlock(&c->tnc_mutex); | ||
| 202 | spin_lock(&ubifs_infos_lock); | ||
| 203 | /* Get the next list element before we move this one */ | ||
| 204 | p = p->next; | ||
| 205 | /* | ||
| 206 | * Move this one to the end of the list to provide some | ||
| 207 | * fairness. | ||
| 208 | */ | ||
| 209 | list_del(&c->infos_list); | ||
| 210 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
| 211 | mutex_unlock(&c->umount_mutex); | ||
| 212 | if (freed >= nr) | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | spin_unlock(&ubifs_infos_lock); | ||
| 216 | return freed; | ||
| 217 | } | ||
| 218 | |||
| 219 | /** | ||
| 220 | * kick_a_thread - kick a background thread to start commit. | ||
| 221 | * | ||
| 222 | * This function kicks a background thread to start background commit. Returns | ||
| 223 | * %-1 if a thread was kicked or there is another reason to assume the memory | ||
| 224 | * will soon be freed or become freeable. If there are no dirty znodes, returns | ||
| 225 | * %0. | ||
| 226 | */ | ||
| 227 | static int kick_a_thread(void) | ||
| 228 | { | ||
| 229 | int i; | ||
| 230 | struct ubifs_info *c; | ||
| 231 | |||
| 232 | /* | ||
| 233 | * Iterate over all mounted UBIFS file-systems and find out if there is | ||
| 234 | * already an ongoing commit operation there. If no, then iterate for | ||
| 235 | * the second time and initiate background commit. | ||
| 236 | */ | ||
| 237 | spin_lock(&ubifs_infos_lock); | ||
| 238 | for (i = 0; i < 2; i++) { | ||
| 239 | list_for_each_entry(c, &ubifs_infos, infos_list) { | ||
| 240 | long dirty_zn_cnt; | ||
| 241 | |||
| 242 | if (!mutex_trylock(&c->umount_mutex)) { | ||
| 243 | /* | ||
| 244 | * Some un-mount is in progress, it will | ||
| 245 | * certainly free memory, so just return. | ||
| 246 | */ | ||
| 247 | spin_unlock(&ubifs_infos_lock); | ||
| 248 | return -1; | ||
| 249 | } | ||
| 250 | |||
| 251 | dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); | ||
| 252 | |||
| 253 | if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || | ||
| 254 | c->ro_media) { | ||
| 255 | mutex_unlock(&c->umount_mutex); | ||
| 256 | continue; | ||
| 257 | } | ||
| 258 | |||
| 259 | if (c->cmt_state != COMMIT_RESTING) { | ||
| 260 | spin_unlock(&ubifs_infos_lock); | ||
| 261 | mutex_unlock(&c->umount_mutex); | ||
| 262 | return -1; | ||
| 263 | } | ||
| 264 | |||
| 265 | if (i == 1) { | ||
| 266 | list_del(&c->infos_list); | ||
| 267 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
| 268 | spin_unlock(&ubifs_infos_lock); | ||
| 269 | |||
| 270 | ubifs_request_bg_commit(c); | ||
| 271 | mutex_unlock(&c->umount_mutex); | ||
| 272 | return -1; | ||
| 273 | } | ||
| 274 | mutex_unlock(&c->umount_mutex); | ||
| 275 | } | ||
| 276 | } | ||
| 277 | spin_unlock(&ubifs_infos_lock); | ||
| 278 | |||
| 279 | return 0; | ||
| 280 | } | ||
| 281 | |||
| 282 | int ubifs_shrinker(int nr, gfp_t gfp_mask) | ||
| 283 | { | ||
| 284 | int freed, contention = 0; | ||
| 285 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | ||
| 286 | |||
| 287 | if (nr == 0) | ||
| 288 | return clean_zn_cnt; | ||
| 289 | |||
| 290 | if (!clean_zn_cnt) { | ||
| 291 | /* | ||
| 292 | * No clean znodes, nothing to reap. All we can do in this case | ||
| 293 | * is to kick background threads to start commit, which will | ||
| 294 | * probably make clean znodes which, in turn, will be freeable. | ||
| 295 | * And we return -1 which means will make VM call us again | ||
| 296 | * later. | ||
| 297 | */ | ||
| 298 | dbg_tnc("no clean znodes, kick a thread"); | ||
| 299 | return kick_a_thread(); | ||
| 300 | } | ||
| 301 | |||
| 302 | freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); | ||
| 303 | if (freed >= nr) | ||
| 304 | goto out; | ||
| 305 | |||
| 306 | dbg_tnc("not enough old znodes, try to free young ones"); | ||
| 307 | freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); | ||
| 308 | if (freed >= nr) | ||
| 309 | goto out; | ||
| 310 | |||
| 311 | dbg_tnc("not enough young znodes, free all"); | ||
| 312 | freed += shrink_tnc_trees(nr - freed, 0, &contention); | ||
| 313 | |||
| 314 | if (!freed && contention) { | ||
| 315 | dbg_tnc("freed nothing, but contention"); | ||
| 316 | return -1; | ||
| 317 | } | ||
| 318 | |||
| 319 | out: | ||
| 320 | dbg_tnc("%d znodes were freed, requested %d", freed, nr); | ||
| 321 | return freed; | ||
| 322 | } | ||
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c new file mode 100644 index 000000000000..00eb9c68ad03 --- /dev/null +++ b/fs/ubifs/super.c | |||
| @@ -0,0 +1,1951 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements UBIFS initialization and VFS superblock operations. Some | ||
| 25 | * initialization stuff which is rather large and complex is placed at | ||
| 26 | * corresponding subsystems, but most of it is here. | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include <linux/init.h> | ||
| 30 | #include <linux/slab.h> | ||
| 31 | #include <linux/module.h> | ||
| 32 | #include <linux/ctype.h> | ||
| 33 | #include <linux/random.h> | ||
| 34 | #include <linux/kthread.h> | ||
| 35 | #include <linux/parser.h> | ||
| 36 | #include <linux/seq_file.h> | ||
| 37 | #include <linux/mount.h> | ||
| 38 | #include "ubifs.h" | ||
| 39 | |||
| 40 | /* Slab cache for UBIFS inodes */ | ||
| 41 | struct kmem_cache *ubifs_inode_slab; | ||
| 42 | |||
| 43 | /* UBIFS TNC shrinker description */ | ||
| 44 | static struct shrinker ubifs_shrinker_info = { | ||
| 45 | .shrink = ubifs_shrinker, | ||
| 46 | .seeks = DEFAULT_SEEKS, | ||
| 47 | }; | ||
| 48 | |||
| 49 | /** | ||
| 50 | * validate_inode - validate inode. | ||
| 51 | * @c: UBIFS file-system description object | ||
| 52 | * @inode: the inode to validate | ||
| 53 | * | ||
| 54 | * This is a helper function for 'ubifs_iget()' which validates various fields | ||
| 55 | * of a newly built inode to make sure they contain sane values and prevent | ||
| 56 | * possible vulnerabilities. Returns zero if the inode is all right and | ||
| 57 | * a non-zero error code if not. | ||
| 58 | */ | ||
| 59 | static int validate_inode(struct ubifs_info *c, const struct inode *inode) | ||
| 60 | { | ||
| 61 | int err; | ||
| 62 | const struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 63 | |||
| 64 | if (inode->i_size > c->max_inode_sz) { | ||
| 65 | ubifs_err("inode is too large (%lld)", | ||
| 66 | (long long)inode->i_size); | ||
| 67 | return 1; | ||
| 68 | } | ||
| 69 | |||
| 70 | if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { | ||
| 71 | ubifs_err("unknown compression type %d", ui->compr_type); | ||
| 72 | return 2; | ||
| 73 | } | ||
| 74 | |||
| 75 | if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX) | ||
| 76 | return 3; | ||
| 77 | |||
| 78 | if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) | ||
| 79 | return 4; | ||
| 80 | |||
| 81 | if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) | ||
| 82 | return 5; | ||
| 83 | |||
| 84 | if (!ubifs_compr_present(ui->compr_type)) { | ||
| 85 | ubifs_warn("inode %lu uses '%s' compression, but it was not " | ||
| 86 | "compiled in", inode->i_ino, | ||
| 87 | ubifs_compr_name(ui->compr_type)); | ||
| 88 | } | ||
| 89 | |||
| 90 | err = dbg_check_dir_size(c, inode); | ||
| 91 | return err; | ||
| 92 | } | ||
| 93 | |||
| 94 | struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) | ||
| 95 | { | ||
| 96 | int err; | ||
| 97 | union ubifs_key key; | ||
| 98 | struct ubifs_ino_node *ino; | ||
| 99 | struct ubifs_info *c = sb->s_fs_info; | ||
| 100 | struct inode *inode; | ||
| 101 | struct ubifs_inode *ui; | ||
| 102 | |||
| 103 | dbg_gen("inode %lu", inum); | ||
| 104 | |||
| 105 | inode = iget_locked(sb, inum); | ||
| 106 | if (!inode) | ||
| 107 | return ERR_PTR(-ENOMEM); | ||
| 108 | if (!(inode->i_state & I_NEW)) | ||
| 109 | return inode; | ||
| 110 | ui = ubifs_inode(inode); | ||
| 111 | |||
| 112 | ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); | ||
| 113 | if (!ino) { | ||
| 114 | err = -ENOMEM; | ||
| 115 | goto out; | ||
| 116 | } | ||
| 117 | |||
| 118 | ino_key_init(c, &key, inode->i_ino); | ||
| 119 | |||
| 120 | err = ubifs_tnc_lookup(c, &key, ino); | ||
| 121 | if (err) | ||
| 122 | goto out_ino; | ||
| 123 | |||
| 124 | inode->i_flags |= (S_NOCMTIME | S_NOATIME); | ||
| 125 | inode->i_nlink = le32_to_cpu(ino->nlink); | ||
| 126 | inode->i_uid = le32_to_cpu(ino->uid); | ||
| 127 | inode->i_gid = le32_to_cpu(ino->gid); | ||
| 128 | inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); | ||
| 129 | inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); | ||
| 130 | inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); | ||
| 131 | inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); | ||
| 132 | inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); | ||
| 133 | inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); | ||
| 134 | inode->i_mode = le32_to_cpu(ino->mode); | ||
| 135 | inode->i_size = le64_to_cpu(ino->size); | ||
| 136 | |||
| 137 | ui->data_len = le32_to_cpu(ino->data_len); | ||
| 138 | ui->flags = le32_to_cpu(ino->flags); | ||
| 139 | ui->compr_type = le16_to_cpu(ino->compr_type); | ||
| 140 | ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); | ||
| 141 | ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt); | ||
| 142 | ui->xattr_size = le32_to_cpu(ino->xattr_size); | ||
| 143 | ui->xattr_names = le32_to_cpu(ino->xattr_names); | ||
| 144 | ui->synced_i_size = ui->ui_size = inode->i_size; | ||
| 145 | |||
| 146 | ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0; | ||
| 147 | |||
| 148 | err = validate_inode(c, inode); | ||
| 149 | if (err) | ||
| 150 | goto out_invalid; | ||
| 151 | |||
| 152 | /* Disable readahead */ | ||
| 153 | inode->i_mapping->backing_dev_info = &c->bdi; | ||
| 154 | |||
| 155 | switch (inode->i_mode & S_IFMT) { | ||
| 156 | case S_IFREG: | ||
| 157 | inode->i_mapping->a_ops = &ubifs_file_address_operations; | ||
| 158 | inode->i_op = &ubifs_file_inode_operations; | ||
| 159 | inode->i_fop = &ubifs_file_operations; | ||
| 160 | if (ui->xattr) { | ||
| 161 | ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); | ||
| 162 | if (!ui->data) { | ||
| 163 | err = -ENOMEM; | ||
| 164 | goto out_ino; | ||
| 165 | } | ||
| 166 | memcpy(ui->data, ino->data, ui->data_len); | ||
| 167 | ((char *)ui->data)[ui->data_len] = '\0'; | ||
| 168 | } else if (ui->data_len != 0) { | ||
| 169 | err = 10; | ||
| 170 | goto out_invalid; | ||
| 171 | } | ||
| 172 | break; | ||
| 173 | case S_IFDIR: | ||
| 174 | inode->i_op = &ubifs_dir_inode_operations; | ||
| 175 | inode->i_fop = &ubifs_dir_operations; | ||
| 176 | if (ui->data_len != 0) { | ||
| 177 | err = 11; | ||
| 178 | goto out_invalid; | ||
| 179 | } | ||
| 180 | break; | ||
| 181 | case S_IFLNK: | ||
| 182 | inode->i_op = &ubifs_symlink_inode_operations; | ||
| 183 | if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { | ||
| 184 | err = 12; | ||
| 185 | goto out_invalid; | ||
| 186 | } | ||
| 187 | ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); | ||
| 188 | if (!ui->data) { | ||
| 189 | err = -ENOMEM; | ||
| 190 | goto out_ino; | ||
| 191 | } | ||
| 192 | memcpy(ui->data, ino->data, ui->data_len); | ||
| 193 | ((char *)ui->data)[ui->data_len] = '\0'; | ||
| 194 | break; | ||
| 195 | case S_IFBLK: | ||
| 196 | case S_IFCHR: | ||
| 197 | { | ||
| 198 | dev_t rdev; | ||
| 199 | union ubifs_dev_desc *dev; | ||
| 200 | |||
| 201 | ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); | ||
| 202 | if (!ui->data) { | ||
| 203 | err = -ENOMEM; | ||
| 204 | goto out_ino; | ||
| 205 | } | ||
| 206 | |||
| 207 | dev = (union ubifs_dev_desc *)ino->data; | ||
| 208 | if (ui->data_len == sizeof(dev->new)) | ||
| 209 | rdev = new_decode_dev(le32_to_cpu(dev->new)); | ||
| 210 | else if (ui->data_len == sizeof(dev->huge)) | ||
| 211 | rdev = huge_decode_dev(le64_to_cpu(dev->huge)); | ||
| 212 | else { | ||
| 213 | err = 13; | ||
| 214 | goto out_invalid; | ||
| 215 | } | ||
| 216 | memcpy(ui->data, ino->data, ui->data_len); | ||
| 217 | inode->i_op = &ubifs_file_inode_operations; | ||
| 218 | init_special_inode(inode, inode->i_mode, rdev); | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | case S_IFSOCK: | ||
| 222 | case S_IFIFO: | ||
| 223 | inode->i_op = &ubifs_file_inode_operations; | ||
| 224 | init_special_inode(inode, inode->i_mode, 0); | ||
| 225 | if (ui->data_len != 0) { | ||
| 226 | err = 14; | ||
| 227 | goto out_invalid; | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | default: | ||
| 231 | err = 15; | ||
| 232 | goto out_invalid; | ||
| 233 | } | ||
| 234 | |||
| 235 | kfree(ino); | ||
| 236 | ubifs_set_inode_flags(inode); | ||
| 237 | unlock_new_inode(inode); | ||
| 238 | return inode; | ||
| 239 | |||
| 240 | out_invalid: | ||
| 241 | ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); | ||
| 242 | dbg_dump_node(c, ino); | ||
| 243 | dbg_dump_inode(c, inode); | ||
| 244 | err = -EINVAL; | ||
| 245 | out_ino: | ||
| 246 | kfree(ino); | ||
| 247 | out: | ||
| 248 | ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); | ||
| 249 | iget_failed(inode); | ||
| 250 | return ERR_PTR(err); | ||
| 251 | } | ||
| 252 | |||
| 253 | static struct inode *ubifs_alloc_inode(struct super_block *sb) | ||
| 254 | { | ||
| 255 | struct ubifs_inode *ui; | ||
| 256 | |||
| 257 | ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS); | ||
| 258 | if (!ui) | ||
| 259 | return NULL; | ||
| 260 | |||
| 261 | memset((void *)ui + sizeof(struct inode), 0, | ||
| 262 | sizeof(struct ubifs_inode) - sizeof(struct inode)); | ||
| 263 | mutex_init(&ui->ui_mutex); | ||
| 264 | spin_lock_init(&ui->ui_lock); | ||
| 265 | return &ui->vfs_inode; | ||
| 266 | }; | ||
| 267 | |||
| 268 | static void ubifs_destroy_inode(struct inode *inode) | ||
| 269 | { | ||
| 270 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 271 | |||
| 272 | kfree(ui->data); | ||
| 273 | kmem_cache_free(ubifs_inode_slab, inode); | ||
| 274 | } | ||
| 275 | |||
| 276 | /* | ||
| 277 | * Note, Linux write-back code calls this without 'i_mutex'. | ||
| 278 | */ | ||
| 279 | static int ubifs_write_inode(struct inode *inode, int wait) | ||
| 280 | { | ||
| 281 | int err; | ||
| 282 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 283 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 284 | |||
| 285 | ubifs_assert(!ui->xattr); | ||
| 286 | if (is_bad_inode(inode)) | ||
| 287 | return 0; | ||
| 288 | |||
| 289 | mutex_lock(&ui->ui_mutex); | ||
| 290 | /* | ||
| 291 | * Due to races between write-back forced by budgeting | ||
| 292 | * (see 'sync_some_inodes()') and pdflush write-back, the inode may | ||
| 293 | * have already been synchronized, do not do this again. This might | ||
| 294 | * also happen if it was synchronized in an VFS operation, e.g. | ||
| 295 | * 'ubifs_link()'. | ||
| 296 | */ | ||
| 297 | if (!ui->dirty) { | ||
| 298 | mutex_unlock(&ui->ui_mutex); | ||
| 299 | return 0; | ||
| 300 | } | ||
| 301 | |||
| 302 | dbg_gen("inode %lu", inode->i_ino); | ||
| 303 | err = ubifs_jnl_write_inode(c, inode, 0); | ||
| 304 | if (err) | ||
| 305 | ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); | ||
| 306 | |||
| 307 | ui->dirty = 0; | ||
| 308 | mutex_unlock(&ui->ui_mutex); | ||
| 309 | ubifs_release_dirty_inode_budget(c, ui); | ||
| 310 | return err; | ||
| 311 | } | ||
| 312 | |||
| 313 | static void ubifs_delete_inode(struct inode *inode) | ||
| 314 | { | ||
| 315 | int err; | ||
| 316 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 317 | |||
| 318 | if (ubifs_inode(inode)->xattr) | ||
| 319 | /* | ||
| 320 | * Extended attribute inode deletions are fully handled in | ||
| 321 | * 'ubifs_removexattr()'. These inodes are special and have | ||
| 322 | * limited usage, so there is nothing to do here. | ||
| 323 | */ | ||
| 324 | goto out; | ||
| 325 | |||
| 326 | dbg_gen("inode %lu", inode->i_ino); | ||
| 327 | ubifs_assert(!atomic_read(&inode->i_count)); | ||
| 328 | ubifs_assert(inode->i_nlink == 0); | ||
| 329 | |||
| 330 | truncate_inode_pages(&inode->i_data, 0); | ||
| 331 | if (is_bad_inode(inode)) | ||
| 332 | goto out; | ||
| 333 | |||
| 334 | ubifs_inode(inode)->ui_size = inode->i_size = 0; | ||
| 335 | err = ubifs_jnl_write_inode(c, inode, 1); | ||
| 336 | if (err) | ||
| 337 | /* | ||
| 338 | * Worst case we have a lost orphan inode wasting space, so a | ||
| 339 | * simple error message is ok here. | ||
| 340 | */ | ||
| 341 | ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); | ||
| 342 | out: | ||
| 343 | clear_inode(inode); | ||
| 344 | } | ||
| 345 | |||
| 346 | static void ubifs_dirty_inode(struct inode *inode) | ||
| 347 | { | ||
| 348 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 349 | |||
| 350 | ubifs_assert(mutex_is_locked(&ui->ui_mutex)); | ||
| 351 | if (!ui->dirty) { | ||
| 352 | ui->dirty = 1; | ||
| 353 | dbg_gen("inode %lu", inode->i_ino); | ||
| 354 | } | ||
| 355 | } | ||
| 356 | |||
| 357 | static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
| 358 | { | ||
| 359 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | ||
| 360 | unsigned long long free; | ||
| 361 | |||
| 362 | free = ubifs_budg_get_free_space(c); | ||
| 363 | dbg_gen("free space %lld bytes (%lld blocks)", | ||
| 364 | free, free >> UBIFS_BLOCK_SHIFT); | ||
| 365 | |||
| 366 | buf->f_type = UBIFS_SUPER_MAGIC; | ||
| 367 | buf->f_bsize = UBIFS_BLOCK_SIZE; | ||
| 368 | buf->f_blocks = c->block_cnt; | ||
| 369 | buf->f_bfree = free >> UBIFS_BLOCK_SHIFT; | ||
| 370 | if (free > c->report_rp_size) | ||
| 371 | buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT; | ||
| 372 | else | ||
| 373 | buf->f_bavail = 0; | ||
| 374 | buf->f_files = 0; | ||
| 375 | buf->f_ffree = 0; | ||
| 376 | buf->f_namelen = UBIFS_MAX_NLEN; | ||
| 377 | |||
| 378 | return 0; | ||
| 379 | } | ||
| 380 | |||
| 381 | static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) | ||
| 382 | { | ||
| 383 | struct ubifs_info *c = mnt->mnt_sb->s_fs_info; | ||
| 384 | |||
| 385 | if (c->mount_opts.unmount_mode == 2) | ||
| 386 | seq_printf(s, ",fast_unmount"); | ||
| 387 | else if (c->mount_opts.unmount_mode == 1) | ||
| 388 | seq_printf(s, ",norm_unmount"); | ||
| 389 | |||
| 390 | return 0; | ||
| 391 | } | ||
| 392 | |||
| 393 | static int ubifs_sync_fs(struct super_block *sb, int wait) | ||
| 394 | { | ||
| 395 | struct ubifs_info *c = sb->s_fs_info; | ||
| 396 | int i, ret = 0, err; | ||
| 397 | |||
| 398 | if (c->jheads) | ||
| 399 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 400 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
| 401 | if (err && !ret) | ||
| 402 | ret = err; | ||
| 403 | } | ||
| 404 | /* | ||
| 405 | * We ought to call sync for c->ubi but it does not have one. If it had | ||
| 406 | * it would in turn call mtd->sync, however mtd operations are | ||
| 407 | * synchronous anyway, so we don't lose any sleep here. | ||
| 408 | */ | ||
| 409 | return ret; | ||
| 410 | } | ||
| 411 | |||
| 412 | /** | ||
| 413 | * init_constants_early - initialize UBIFS constants. | ||
| 414 | * @c: UBIFS file-system description object | ||
| 415 | * | ||
| 416 | * This function initialize UBIFS constants which do not need the superblock to | ||
| 417 | * be read. It also checks that the UBI volume satisfies basic UBIFS | ||
| 418 | * requirements. Returns zero in case of success and a negative error code in | ||
| 419 | * case of failure. | ||
| 420 | */ | ||
| 421 | static int init_constants_early(struct ubifs_info *c) | ||
| 422 | { | ||
| 423 | if (c->vi.corrupted) { | ||
| 424 | ubifs_warn("UBI volume is corrupted - read-only mode"); | ||
| 425 | c->ro_media = 1; | ||
| 426 | } | ||
| 427 | |||
| 428 | if (c->di.ro_mode) { | ||
| 429 | ubifs_msg("read-only UBI device"); | ||
| 430 | c->ro_media = 1; | ||
| 431 | } | ||
| 432 | |||
| 433 | if (c->vi.vol_type == UBI_STATIC_VOLUME) { | ||
| 434 | ubifs_msg("static UBI volume - read-only mode"); | ||
| 435 | c->ro_media = 1; | ||
| 436 | } | ||
| 437 | |||
| 438 | c->leb_cnt = c->vi.size; | ||
| 439 | c->leb_size = c->vi.usable_leb_size; | ||
| 440 | c->half_leb_size = c->leb_size / 2; | ||
| 441 | c->min_io_size = c->di.min_io_size; | ||
| 442 | c->min_io_shift = fls(c->min_io_size) - 1; | ||
| 443 | |||
| 444 | if (c->leb_size < UBIFS_MIN_LEB_SZ) { | ||
| 445 | ubifs_err("too small LEBs (%d bytes), min. is %d bytes", | ||
| 446 | c->leb_size, UBIFS_MIN_LEB_SZ); | ||
| 447 | return -EINVAL; | ||
| 448 | } | ||
| 449 | |||
| 450 | if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { | ||
| 451 | ubifs_err("too few LEBs (%d), min. is %d", | ||
| 452 | c->leb_cnt, UBIFS_MIN_LEB_CNT); | ||
| 453 | return -EINVAL; | ||
| 454 | } | ||
| 455 | |||
| 456 | if (!is_power_of_2(c->min_io_size)) { | ||
| 457 | ubifs_err("bad min. I/O size %d", c->min_io_size); | ||
| 458 | return -EINVAL; | ||
| 459 | } | ||
| 460 | |||
| 461 | /* | ||
| 462 | * UBIFS aligns all node to 8-byte boundary, so to make function in | ||
| 463 | * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is | ||
| 464 | * less than 8. | ||
| 465 | */ | ||
| 466 | if (c->min_io_size < 8) { | ||
| 467 | c->min_io_size = 8; | ||
| 468 | c->min_io_shift = 3; | ||
| 469 | } | ||
| 470 | |||
| 471 | c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); | ||
| 472 | c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size); | ||
| 473 | |||
| 474 | /* | ||
| 475 | * Initialize node length ranges which are mostly needed for node | ||
| 476 | * length validation. | ||
| 477 | */ | ||
| 478 | c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ; | ||
| 479 | c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ; | ||
| 480 | c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ; | ||
| 481 | c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ; | ||
| 482 | c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ; | ||
| 483 | c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ; | ||
| 484 | |||
| 485 | c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ; | ||
| 486 | c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ; | ||
| 487 | c->ranges[UBIFS_ORPH_NODE].min_len = | ||
| 488 | UBIFS_ORPH_NODE_SZ + sizeof(__le64); | ||
| 489 | c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size; | ||
| 490 | c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ; | ||
| 491 | c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ; | ||
| 492 | c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ; | ||
| 493 | c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ; | ||
| 494 | c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ; | ||
| 495 | c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ; | ||
| 496 | /* | ||
| 497 | * Minimum indexing node size is amended later when superblock is | ||
| 498 | * read and the key length is known. | ||
| 499 | */ | ||
| 500 | c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ; | ||
| 501 | /* | ||
| 502 | * Maximum indexing node size is amended later when superblock is | ||
| 503 | * read and the fanout is known. | ||
| 504 | */ | ||
| 505 | c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; | ||
| 506 | |||
| 507 | /* | ||
| 508 | * Initialize dead and dark LEB space watermarks. | ||
| 509 | * | ||
| 510 | * Dead space is the space which cannot be used. Its watermark is | ||
| 511 | * equivalent to min. I/O unit or minimum node size if it is greater | ||
| 512 | * then min. I/O unit. | ||
| 513 | * | ||
| 514 | * Dark space is the space which might be used, or might not, depending | ||
| 515 | * on which node should be written to the LEB. Its watermark is | ||
| 516 | * equivalent to maximum UBIFS node size. | ||
| 517 | */ | ||
| 518 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | ||
| 519 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | ||
| 520 | |||
| 521 | return 0; | ||
| 522 | } | ||
| 523 | |||
| 524 | /** | ||
| 525 | * bud_wbuf_callback - bud LEB write-buffer synchronization call-back. | ||
| 526 | * @c: UBIFS file-system description object | ||
| 527 | * @lnum: LEB the write-buffer was synchronized to | ||
| 528 | * @free: how many free bytes left in this LEB | ||
| 529 | * @pad: how many bytes were padded | ||
| 530 | * | ||
| 531 | * This is a callback function which is called by the I/O unit when the | ||
| 532 | * write-buffer is synchronized. We need this to correctly maintain space | ||
| 533 | * accounting in bud logical eraseblocks. This function returns zero in case of | ||
| 534 | * success and a negative error code in case of failure. | ||
| 535 | * | ||
| 536 | * This function actually belongs to the journal, but we keep it here because | ||
| 537 | * we want to keep it static. | ||
| 538 | */ | ||
| 539 | static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) | ||
| 540 | { | ||
| 541 | return ubifs_update_one_lp(c, lnum, free, pad, 0, 0); | ||
| 542 | } | ||
| 543 | |||
| 544 | /* | ||
| 545 | * init_constants_late - initialize UBIFS constants. | ||
| 546 | * @c: UBIFS file-system description object | ||
| 547 | * | ||
| 548 | * This is a helper function which initializes various UBIFS constants after | ||
| 549 | * the superblock has been read. It also checks various UBIFS parameters and | ||
| 550 | * makes sure they are all right. Returns zero in case of success and a | ||
| 551 | * negative error code in case of failure. | ||
| 552 | */ | ||
| 553 | static int init_constants_late(struct ubifs_info *c) | ||
| 554 | { | ||
| 555 | int tmp, err; | ||
| 556 | uint64_t tmp64; | ||
| 557 | |||
| 558 | c->main_bytes = (long long)c->main_lebs * c->leb_size; | ||
| 559 | c->max_znode_sz = sizeof(struct ubifs_znode) + | ||
| 560 | c->fanout * sizeof(struct ubifs_zbranch); | ||
| 561 | |||
| 562 | tmp = ubifs_idx_node_sz(c, 1); | ||
| 563 | c->ranges[UBIFS_IDX_NODE].min_len = tmp; | ||
| 564 | c->min_idx_node_sz = ALIGN(tmp, 8); | ||
| 565 | |||
| 566 | tmp = ubifs_idx_node_sz(c, c->fanout); | ||
| 567 | c->ranges[UBIFS_IDX_NODE].max_len = tmp; | ||
| 568 | c->max_idx_node_sz = ALIGN(tmp, 8); | ||
| 569 | |||
| 570 | /* Make sure LEB size is large enough to fit full commit */ | ||
| 571 | tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; | ||
| 572 | tmp = ALIGN(tmp, c->min_io_size); | ||
| 573 | if (tmp > c->leb_size) { | ||
| 574 | dbg_err("too small LEB size %d, at least %d needed", | ||
| 575 | c->leb_size, tmp); | ||
| 576 | return -EINVAL; | ||
| 577 | } | ||
| 578 | |||
| 579 | /* | ||
| 580 | * Make sure that the log is large enough to fit reference nodes for | ||
| 581 | * all buds plus one reserved LEB. | ||
| 582 | */ | ||
| 583 | tmp64 = c->max_bud_bytes; | ||
| 584 | tmp = do_div(tmp64, c->leb_size); | ||
| 585 | c->max_bud_cnt = tmp64 + !!tmp; | ||
| 586 | tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); | ||
| 587 | tmp /= c->leb_size; | ||
| 588 | tmp += 1; | ||
| 589 | if (c->log_lebs < tmp) { | ||
| 590 | dbg_err("too small log %d LEBs, required min. %d LEBs", | ||
| 591 | c->log_lebs, tmp); | ||
| 592 | return -EINVAL; | ||
| 593 | } | ||
| 594 | |||
| 595 | /* | ||
| 596 | * When budgeting we assume worst-case scenarios when the pages are not | ||
| 597 | * be compressed and direntries are of the maximum size. | ||
| 598 | * | ||
| 599 | * Note, data, which may be stored in inodes is budgeted separately, so | ||
| 600 | * it is not included into 'c->inode_budget'. | ||
| 601 | */ | ||
| 602 | c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; | ||
| 603 | c->inode_budget = UBIFS_INO_NODE_SZ; | ||
| 604 | c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; | ||
| 605 | |||
| 606 | /* | ||
| 607 | * When the amount of flash space used by buds becomes | ||
| 608 | * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit. | ||
| 609 | * The writers are unblocked when the commit is finished. To avoid | ||
| 610 | * writers to be blocked UBIFS initiates background commit in advance, | ||
| 611 | * when number of bud bytes becomes above the limit defined below. | ||
| 612 | */ | ||
| 613 | c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4; | ||
| 614 | |||
| 615 | /* | ||
| 616 | * Ensure minimum journal size. All the bytes in the journal heads are | ||
| 617 | * considered to be used, when calculating the current journal usage. | ||
| 618 | * Consequently, if the journal is too small, UBIFS will treat it as | ||
| 619 | * always full. | ||
| 620 | */ | ||
| 621 | tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1; | ||
| 622 | if (c->bg_bud_bytes < tmp64) | ||
| 623 | c->bg_bud_bytes = tmp64; | ||
| 624 | if (c->max_bud_bytes < tmp64 + c->leb_size) | ||
| 625 | c->max_bud_bytes = tmp64 + c->leb_size; | ||
| 626 | |||
| 627 | err = ubifs_calc_lpt_geom(c); | ||
| 628 | if (err) | ||
| 629 | return err; | ||
| 630 | |||
| 631 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | ||
| 632 | |||
| 633 | /* | ||
| 634 | * Calculate total amount of FS blocks. This number is not used | ||
| 635 | * internally because it does not make much sense for UBIFS, but it is | ||
| 636 | * necessary to report something for the 'statfs()' call. | ||
| 637 | * | ||
| 638 | * Subtract the LEB reserved for GC and the LEB which is reserved for | ||
| 639 | * deletions. | ||
| 640 | * | ||
| 641 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
| 642 | */ | ||
| 643 | tmp64 = c->main_lebs - 2; | ||
| 644 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | ||
| 645 | tmp64 = ubifs_reported_space(c, tmp64); | ||
| 646 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | ||
| 647 | |||
| 648 | return 0; | ||
| 649 | } | ||
| 650 | |||
| 651 | /** | ||
| 652 | * take_gc_lnum - reserve GC LEB. | ||
| 653 | * @c: UBIFS file-system description object | ||
| 654 | * | ||
| 655 | * This function ensures that the LEB reserved for garbage collection is | ||
| 656 | * unmapped and is marked as "taken" in lprops. We also have to set free space | ||
| 657 | * to LEB size and dirty space to zero, because lprops may contain out-of-date | ||
| 658 | * information if the file-system was un-mounted before it has been committed. | ||
| 659 | * This function returns zero in case of success and a negative error code in | ||
| 660 | * case of failure. | ||
| 661 | */ | ||
| 662 | static int take_gc_lnum(struct ubifs_info *c) | ||
| 663 | { | ||
| 664 | int err; | ||
| 665 | |||
| 666 | if (c->gc_lnum == -1) { | ||
| 667 | ubifs_err("no LEB for GC"); | ||
| 668 | return -EINVAL; | ||
| 669 | } | ||
| 670 | |||
| 671 | err = ubifs_leb_unmap(c, c->gc_lnum); | ||
| 672 | if (err) | ||
| 673 | return err; | ||
| 674 | |||
| 675 | /* And we have to tell lprops that this LEB is taken */ | ||
| 676 | err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, | ||
| 677 | LPROPS_TAKEN, 0, 0); | ||
| 678 | return err; | ||
| 679 | } | ||
| 680 | |||
| 681 | /** | ||
| 682 | * alloc_wbufs - allocate write-buffers. | ||
| 683 | * @c: UBIFS file-system description object | ||
| 684 | * | ||
| 685 | * This helper function allocates and initializes UBIFS write-buffers. Returns | ||
| 686 | * zero in case of success and %-ENOMEM in case of failure. | ||
| 687 | */ | ||
| 688 | static int alloc_wbufs(struct ubifs_info *c) | ||
| 689 | { | ||
| 690 | int i, err; | ||
| 691 | |||
| 692 | c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), | ||
| 693 | GFP_KERNEL); | ||
| 694 | if (!c->jheads) | ||
| 695 | return -ENOMEM; | ||
| 696 | |||
| 697 | /* Initialize journal heads */ | ||
| 698 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 699 | INIT_LIST_HEAD(&c->jheads[i].buds_list); | ||
| 700 | err = ubifs_wbuf_init(c, &c->jheads[i].wbuf); | ||
| 701 | if (err) | ||
| 702 | return err; | ||
| 703 | |||
| 704 | c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; | ||
| 705 | c->jheads[i].wbuf.jhead = i; | ||
| 706 | } | ||
| 707 | |||
| 708 | c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; | ||
| 709 | /* | ||
| 710 | * Garbage Collector head likely contains long-term data and | ||
| 711 | * does not need to be synchronized by timer. | ||
| 712 | */ | ||
| 713 | c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; | ||
| 714 | c->jheads[GCHD].wbuf.timeout = 0; | ||
| 715 | |||
| 716 | return 0; | ||
| 717 | } | ||
| 718 | |||
| 719 | /** | ||
| 720 | * free_wbufs - free write-buffers. | ||
| 721 | * @c: UBIFS file-system description object | ||
| 722 | */ | ||
| 723 | static void free_wbufs(struct ubifs_info *c) | ||
| 724 | { | ||
| 725 | int i; | ||
| 726 | |||
| 727 | if (c->jheads) { | ||
| 728 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 729 | kfree(c->jheads[i].wbuf.buf); | ||
| 730 | kfree(c->jheads[i].wbuf.inodes); | ||
| 731 | } | ||
| 732 | kfree(c->jheads); | ||
| 733 | c->jheads = NULL; | ||
| 734 | } | ||
| 735 | } | ||
| 736 | |||
| 737 | /** | ||
| 738 | * free_orphans - free orphans. | ||
| 739 | * @c: UBIFS file-system description object | ||
| 740 | */ | ||
| 741 | static void free_orphans(struct ubifs_info *c) | ||
| 742 | { | ||
| 743 | struct ubifs_orphan *orph; | ||
| 744 | |||
| 745 | while (c->orph_dnext) { | ||
| 746 | orph = c->orph_dnext; | ||
| 747 | c->orph_dnext = orph->dnext; | ||
| 748 | list_del(&orph->list); | ||
| 749 | kfree(orph); | ||
| 750 | } | ||
| 751 | |||
| 752 | while (!list_empty(&c->orph_list)) { | ||
| 753 | orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); | ||
| 754 | list_del(&orph->list); | ||
| 755 | kfree(orph); | ||
| 756 | dbg_err("orphan list not empty at unmount"); | ||
| 757 | } | ||
| 758 | |||
| 759 | vfree(c->orph_buf); | ||
| 760 | c->orph_buf = NULL; | ||
| 761 | } | ||
| 762 | |||
| 763 | /** | ||
| 764 | * free_buds - free per-bud objects. | ||
| 765 | * @c: UBIFS file-system description object | ||
| 766 | */ | ||
| 767 | static void free_buds(struct ubifs_info *c) | ||
| 768 | { | ||
| 769 | struct rb_node *this = c->buds.rb_node; | ||
| 770 | struct ubifs_bud *bud; | ||
| 771 | |||
| 772 | while (this) { | ||
| 773 | if (this->rb_left) | ||
| 774 | this = this->rb_left; | ||
| 775 | else if (this->rb_right) | ||
| 776 | this = this->rb_right; | ||
| 777 | else { | ||
| 778 | bud = rb_entry(this, struct ubifs_bud, rb); | ||
| 779 | this = rb_parent(this); | ||
| 780 | if (this) { | ||
| 781 | if (this->rb_left == &bud->rb) | ||
| 782 | this->rb_left = NULL; | ||
| 783 | else | ||
| 784 | this->rb_right = NULL; | ||
| 785 | } | ||
| 786 | kfree(bud); | ||
| 787 | } | ||
| 788 | } | ||
| 789 | } | ||
| 790 | |||
| 791 | /** | ||
| 792 | * check_volume_empty - check if the UBI volume is empty. | ||
| 793 | * @c: UBIFS file-system description object | ||
| 794 | * | ||
| 795 | * This function checks if the UBIFS volume is empty by looking if its LEBs are | ||
| 796 | * mapped or not. The result of checking is stored in the @c->empty variable. | ||
| 797 | * Returns zero in case of success and a negative error code in case of | ||
| 798 | * failure. | ||
| 799 | */ | ||
| 800 | static int check_volume_empty(struct ubifs_info *c) | ||
| 801 | { | ||
| 802 | int lnum, err; | ||
| 803 | |||
| 804 | c->empty = 1; | ||
| 805 | for (lnum = 0; lnum < c->leb_cnt; lnum++) { | ||
| 806 | err = ubi_is_mapped(c->ubi, lnum); | ||
| 807 | if (unlikely(err < 0)) | ||
| 808 | return err; | ||
| 809 | if (err == 1) { | ||
| 810 | c->empty = 0; | ||
| 811 | break; | ||
| 812 | } | ||
| 813 | |||
| 814 | cond_resched(); | ||
| 815 | } | ||
| 816 | |||
| 817 | return 0; | ||
| 818 | } | ||
| 819 | |||
| 820 | /* | ||
| 821 | * UBIFS mount options. | ||
| 822 | * | ||
| 823 | * Opt_fast_unmount: do not run a journal commit before un-mounting | ||
| 824 | * Opt_norm_unmount: run a journal commit before un-mounting | ||
| 825 | * Opt_err: just end of array marker | ||
| 826 | */ | ||
| 827 | enum { | ||
| 828 | Opt_fast_unmount, | ||
| 829 | Opt_norm_unmount, | ||
| 830 | Opt_err, | ||
| 831 | }; | ||
| 832 | |||
| 833 | static match_table_t tokens = { | ||
| 834 | {Opt_fast_unmount, "fast_unmount"}, | ||
| 835 | {Opt_norm_unmount, "norm_unmount"}, | ||
| 836 | {Opt_err, NULL}, | ||
| 837 | }; | ||
| 838 | |||
| 839 | /** | ||
| 840 | * ubifs_parse_options - parse mount parameters. | ||
| 841 | * @c: UBIFS file-system description object | ||
| 842 | * @options: parameters to parse | ||
| 843 | * @is_remount: non-zero if this is FS re-mount | ||
| 844 | * | ||
| 845 | * This function parses UBIFS mount options and returns zero in case success | ||
| 846 | * and a negative error code in case of failure. | ||
| 847 | */ | ||
| 848 | static int ubifs_parse_options(struct ubifs_info *c, char *options, | ||
| 849 | int is_remount) | ||
| 850 | { | ||
| 851 | char *p; | ||
| 852 | substring_t args[MAX_OPT_ARGS]; | ||
| 853 | |||
| 854 | if (!options) | ||
| 855 | return 0; | ||
| 856 | |||
| 857 | while ((p = strsep(&options, ","))) { | ||
| 858 | int token; | ||
| 859 | |||
| 860 | if (!*p) | ||
| 861 | continue; | ||
| 862 | |||
| 863 | token = match_token(p, tokens, args); | ||
| 864 | switch (token) { | ||
| 865 | case Opt_fast_unmount: | ||
| 866 | c->mount_opts.unmount_mode = 2; | ||
| 867 | c->fast_unmount = 1; | ||
| 868 | break; | ||
| 869 | case Opt_norm_unmount: | ||
| 870 | c->mount_opts.unmount_mode = 1; | ||
| 871 | c->fast_unmount = 0; | ||
| 872 | break; | ||
| 873 | default: | ||
| 874 | ubifs_err("unrecognized mount option \"%s\" " | ||
| 875 | "or missing value", p); | ||
| 876 | return -EINVAL; | ||
| 877 | } | ||
| 878 | } | ||
| 879 | |||
| 880 | return 0; | ||
| 881 | } | ||
| 882 | |||
| 883 | /** | ||
| 884 | * destroy_journal - destroy journal data structures. | ||
| 885 | * @c: UBIFS file-system description object | ||
| 886 | * | ||
| 887 | * This function destroys journal data structures including those that may have | ||
| 888 | * been created by recovery functions. | ||
| 889 | */ | ||
| 890 | static void destroy_journal(struct ubifs_info *c) | ||
| 891 | { | ||
| 892 | while (!list_empty(&c->unclean_leb_list)) { | ||
| 893 | struct ubifs_unclean_leb *ucleb; | ||
| 894 | |||
| 895 | ucleb = list_entry(c->unclean_leb_list.next, | ||
| 896 | struct ubifs_unclean_leb, list); | ||
| 897 | list_del(&ucleb->list); | ||
| 898 | kfree(ucleb); | ||
| 899 | } | ||
| 900 | while (!list_empty(&c->old_buds)) { | ||
| 901 | struct ubifs_bud *bud; | ||
| 902 | |||
| 903 | bud = list_entry(c->old_buds.next, struct ubifs_bud, list); | ||
| 904 | list_del(&bud->list); | ||
| 905 | kfree(bud); | ||
| 906 | } | ||
| 907 | ubifs_destroy_idx_gc(c); | ||
| 908 | ubifs_destroy_size_tree(c); | ||
| 909 | ubifs_tnc_close(c); | ||
| 910 | free_buds(c); | ||
| 911 | } | ||
| 912 | |||
| 913 | /** | ||
| 914 | * mount_ubifs - mount UBIFS file-system. | ||
| 915 | * @c: UBIFS file-system description object | ||
| 916 | * | ||
| 917 | * This function mounts UBIFS file system. Returns zero in case of success and | ||
| 918 | * a negative error code in case of failure. | ||
| 919 | * | ||
| 920 | * Note, the function does not de-allocate resources it it fails half way | ||
| 921 | * through, and the caller has to do this instead. | ||
| 922 | */ | ||
| 923 | static int mount_ubifs(struct ubifs_info *c) | ||
| 924 | { | ||
| 925 | struct super_block *sb = c->vfs_sb; | ||
| 926 | int err, mounted_read_only = (sb->s_flags & MS_RDONLY); | ||
| 927 | long long x; | ||
| 928 | size_t sz; | ||
| 929 | |||
| 930 | err = init_constants_early(c); | ||
| 931 | if (err) | ||
| 932 | return err; | ||
| 933 | |||
| 934 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 935 | c->dbg_buf = vmalloc(c->leb_size); | ||
| 936 | if (!c->dbg_buf) | ||
| 937 | return -ENOMEM; | ||
| 938 | #endif | ||
| 939 | |||
| 940 | err = check_volume_empty(c); | ||
| 941 | if (err) | ||
| 942 | goto out_free; | ||
| 943 | |||
| 944 | if (c->empty && (mounted_read_only || c->ro_media)) { | ||
| 945 | /* | ||
| 946 | * This UBI volume is empty, and read-only, or the file system | ||
| 947 | * is mounted read-only - we cannot format it. | ||
| 948 | */ | ||
| 949 | ubifs_err("can't format empty UBI volume: read-only %s", | ||
| 950 | c->ro_media ? "UBI volume" : "mount"); | ||
| 951 | err = -EROFS; | ||
| 952 | goto out_free; | ||
| 953 | } | ||
| 954 | |||
| 955 | if (c->ro_media && !mounted_read_only) { | ||
| 956 | ubifs_err("cannot mount read-write - read-only media"); | ||
| 957 | err = -EROFS; | ||
| 958 | goto out_free; | ||
| 959 | } | ||
| 960 | |||
| 961 | /* | ||
| 962 | * The requirement for the buffer is that it should fit indexing B-tree | ||
| 963 | * height amount of integers. We assume the height if the TNC tree will | ||
| 964 | * never exceed 64. | ||
| 965 | */ | ||
| 966 | err = -ENOMEM; | ||
| 967 | c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL); | ||
| 968 | if (!c->bottom_up_buf) | ||
| 969 | goto out_free; | ||
| 970 | |||
| 971 | c->sbuf = vmalloc(c->leb_size); | ||
| 972 | if (!c->sbuf) | ||
| 973 | goto out_free; | ||
| 974 | |||
| 975 | if (!mounted_read_only) { | ||
| 976 | c->ileb_buf = vmalloc(c->leb_size); | ||
| 977 | if (!c->ileb_buf) | ||
| 978 | goto out_free; | ||
| 979 | } | ||
| 980 | |||
| 981 | err = ubifs_read_superblock(c); | ||
| 982 | if (err) | ||
| 983 | goto out_free; | ||
| 984 | |||
| 985 | /* | ||
| 986 | * Make sure the compressor which is set as the default on in the | ||
| 987 | * superblock was actually compiled in. | ||
| 988 | */ | ||
| 989 | if (!ubifs_compr_present(c->default_compr)) { | ||
| 990 | ubifs_warn("'%s' compressor is set by superblock, but not " | ||
| 991 | "compiled in", ubifs_compr_name(c->default_compr)); | ||
| 992 | c->default_compr = UBIFS_COMPR_NONE; | ||
| 993 | } | ||
| 994 | |||
| 995 | dbg_failure_mode_registration(c); | ||
| 996 | |||
| 997 | err = init_constants_late(c); | ||
| 998 | if (err) | ||
| 999 | goto out_dereg; | ||
| 1000 | |||
| 1001 | sz = ALIGN(c->max_idx_node_sz, c->min_io_size); | ||
| 1002 | sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); | ||
| 1003 | c->cbuf = kmalloc(sz, GFP_NOFS); | ||
| 1004 | if (!c->cbuf) { | ||
| 1005 | err = -ENOMEM; | ||
| 1006 | goto out_dereg; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | if (!mounted_read_only) { | ||
| 1010 | err = alloc_wbufs(c); | ||
| 1011 | if (err) | ||
| 1012 | goto out_cbuf; | ||
| 1013 | |||
| 1014 | /* Create background thread */ | ||
| 1015 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, | ||
| 1016 | c->vi.vol_id); | ||
| 1017 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | ||
| 1018 | if (!c->bgt) | ||
| 1019 | c->bgt = ERR_PTR(-EINVAL); | ||
| 1020 | if (IS_ERR(c->bgt)) { | ||
| 1021 | err = PTR_ERR(c->bgt); | ||
| 1022 | c->bgt = NULL; | ||
| 1023 | ubifs_err("cannot spawn \"%s\", error %d", | ||
| 1024 | c->bgt_name, err); | ||
| 1025 | goto out_wbufs; | ||
| 1026 | } | ||
| 1027 | wake_up_process(c->bgt); | ||
| 1028 | } | ||
| 1029 | |||
| 1030 | err = ubifs_read_master(c); | ||
| 1031 | if (err) | ||
| 1032 | goto out_master; | ||
| 1033 | |||
| 1034 | if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { | ||
| 1035 | ubifs_msg("recovery needed"); | ||
| 1036 | c->need_recovery = 1; | ||
| 1037 | if (!mounted_read_only) { | ||
| 1038 | err = ubifs_recover_inl_heads(c, c->sbuf); | ||
| 1039 | if (err) | ||
| 1040 | goto out_master; | ||
| 1041 | } | ||
| 1042 | } else if (!mounted_read_only) { | ||
| 1043 | /* | ||
| 1044 | * Set the "dirty" flag so that if we reboot uncleanly we | ||
| 1045 | * will notice this immediately on the next mount. | ||
| 1046 | */ | ||
| 1047 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); | ||
| 1048 | err = ubifs_write_master(c); | ||
| 1049 | if (err) | ||
| 1050 | goto out_master; | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | err = ubifs_lpt_init(c, 1, !mounted_read_only); | ||
| 1054 | if (err) | ||
| 1055 | goto out_lpt; | ||
| 1056 | |||
| 1057 | err = dbg_check_idx_size(c, c->old_idx_sz); | ||
| 1058 | if (err) | ||
| 1059 | goto out_lpt; | ||
| 1060 | |||
| 1061 | err = ubifs_replay_journal(c); | ||
| 1062 | if (err) | ||
| 1063 | goto out_journal; | ||
| 1064 | |||
| 1065 | err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); | ||
| 1066 | if (err) | ||
| 1067 | goto out_orphans; | ||
| 1068 | |||
| 1069 | if (!mounted_read_only) { | ||
| 1070 | int lnum; | ||
| 1071 | |||
| 1072 | /* Check for enough free space */ | ||
| 1073 | if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { | ||
| 1074 | ubifs_err("insufficient available space"); | ||
| 1075 | err = -EINVAL; | ||
| 1076 | goto out_orphans; | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | /* Check for enough log space */ | ||
| 1080 | lnum = c->lhead_lnum + 1; | ||
| 1081 | if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) | ||
| 1082 | lnum = UBIFS_LOG_LNUM; | ||
| 1083 | if (lnum == c->ltail_lnum) { | ||
| 1084 | err = ubifs_consolidate_log(c); | ||
| 1085 | if (err) | ||
| 1086 | goto out_orphans; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | if (c->need_recovery) { | ||
| 1090 | err = ubifs_recover_size(c); | ||
| 1091 | if (err) | ||
| 1092 | goto out_orphans; | ||
| 1093 | err = ubifs_rcvry_gc_commit(c); | ||
| 1094 | } else | ||
| 1095 | err = take_gc_lnum(c); | ||
| 1096 | if (err) | ||
| 1097 | goto out_orphans; | ||
| 1098 | |||
| 1099 | err = dbg_check_lprops(c); | ||
| 1100 | if (err) | ||
| 1101 | goto out_orphans; | ||
| 1102 | } else if (c->need_recovery) { | ||
| 1103 | err = ubifs_recover_size(c); | ||
| 1104 | if (err) | ||
| 1105 | goto out_orphans; | ||
| 1106 | } | ||
| 1107 | |||
| 1108 | spin_lock(&ubifs_infos_lock); | ||
| 1109 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
| 1110 | spin_unlock(&ubifs_infos_lock); | ||
| 1111 | |||
| 1112 | if (c->need_recovery) { | ||
| 1113 | if (mounted_read_only) | ||
| 1114 | ubifs_msg("recovery deferred"); | ||
| 1115 | else { | ||
| 1116 | c->need_recovery = 0; | ||
| 1117 | ubifs_msg("recovery completed"); | ||
| 1118 | } | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | err = dbg_check_filesystem(c); | ||
| 1122 | if (err) | ||
| 1123 | goto out_infos; | ||
| 1124 | |||
| 1125 | ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, | ||
| 1126 | c->vi.vol_id); | ||
| 1127 | if (mounted_read_only) | ||
| 1128 | ubifs_msg("mounted read-only"); | ||
| 1129 | x = (long long)c->main_lebs * c->leb_size; | ||
| 1130 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", | ||
| 1131 | x, x >> 10, x >> 20, c->main_lebs); | ||
| 1132 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | ||
| 1133 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", | ||
| 1134 | x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); | ||
| 1135 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | ||
| 1136 | ubifs_msg("media format %d, latest format %d", | ||
| 1137 | c->fmt_version, UBIFS_FORMAT_VERSION); | ||
| 1138 | |||
| 1139 | dbg_msg("compiled on: " __DATE__ " at " __TIME__); | ||
| 1140 | dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); | ||
| 1141 | dbg_msg("LEB size: %d bytes (%d KiB)", | ||
| 1142 | c->leb_size, c->leb_size / 1024); | ||
| 1143 | dbg_msg("data journal heads: %d", | ||
| 1144 | c->jhead_cnt - NONDATA_JHEADS_CNT); | ||
| 1145 | dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" | ||
| 1146 | "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", | ||
| 1147 | c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], | ||
| 1148 | c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], | ||
| 1149 | c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], | ||
| 1150 | c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); | ||
| 1151 | dbg_msg("fast unmount: %d", c->fast_unmount); | ||
| 1152 | dbg_msg("big_lpt %d", c->big_lpt); | ||
| 1153 | dbg_msg("log LEBs: %d (%d - %d)", | ||
| 1154 | c->log_lebs, UBIFS_LOG_LNUM, c->log_last); | ||
| 1155 | dbg_msg("LPT area LEBs: %d (%d - %d)", | ||
| 1156 | c->lpt_lebs, c->lpt_first, c->lpt_last); | ||
| 1157 | dbg_msg("orphan area LEBs: %d (%d - %d)", | ||
| 1158 | c->orph_lebs, c->orph_first, c->orph_last); | ||
| 1159 | dbg_msg("main area LEBs: %d (%d - %d)", | ||
| 1160 | c->main_lebs, c->main_first, c->leb_cnt - 1); | ||
| 1161 | dbg_msg("index LEBs: %d", c->lst.idx_lebs); | ||
| 1162 | dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", | ||
| 1163 | c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); | ||
| 1164 | dbg_msg("key hash type: %d", c->key_hash_type); | ||
| 1165 | dbg_msg("tree fanout: %d", c->fanout); | ||
| 1166 | dbg_msg("reserved GC LEB: %d", c->gc_lnum); | ||
| 1167 | dbg_msg("first main LEB: %d", c->main_first); | ||
| 1168 | dbg_msg("dead watermark: %d", c->dead_wm); | ||
| 1169 | dbg_msg("dark watermark: %d", c->dark_wm); | ||
| 1170 | x = (long long)c->main_lebs * c->dark_wm; | ||
| 1171 | dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", | ||
| 1172 | x, x >> 10, x >> 20); | ||
| 1173 | dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", | ||
| 1174 | c->max_bud_bytes, c->max_bud_bytes >> 10, | ||
| 1175 | c->max_bud_bytes >> 20); | ||
| 1176 | dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", | ||
| 1177 | c->bg_bud_bytes, c->bg_bud_bytes >> 10, | ||
| 1178 | c->bg_bud_bytes >> 20); | ||
| 1179 | dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", | ||
| 1180 | c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); | ||
| 1181 | dbg_msg("max. seq. number: %llu", c->max_sqnum); | ||
| 1182 | dbg_msg("commit number: %llu", c->cmt_no); | ||
| 1183 | |||
| 1184 | return 0; | ||
| 1185 | |||
| 1186 | out_infos: | ||
| 1187 | spin_lock(&ubifs_infos_lock); | ||
| 1188 | list_del(&c->infos_list); | ||
| 1189 | spin_unlock(&ubifs_infos_lock); | ||
| 1190 | out_orphans: | ||
| 1191 | free_orphans(c); | ||
| 1192 | out_journal: | ||
| 1193 | destroy_journal(c); | ||
| 1194 | out_lpt: | ||
| 1195 | ubifs_lpt_free(c, 0); | ||
| 1196 | out_master: | ||
| 1197 | kfree(c->mst_node); | ||
| 1198 | kfree(c->rcvrd_mst_node); | ||
| 1199 | if (c->bgt) | ||
| 1200 | kthread_stop(c->bgt); | ||
| 1201 | out_wbufs: | ||
| 1202 | free_wbufs(c); | ||
| 1203 | out_cbuf: | ||
| 1204 | kfree(c->cbuf); | ||
| 1205 | out_dereg: | ||
| 1206 | dbg_failure_mode_deregistration(c); | ||
| 1207 | out_free: | ||
| 1208 | vfree(c->ileb_buf); | ||
| 1209 | vfree(c->sbuf); | ||
| 1210 | kfree(c->bottom_up_buf); | ||
| 1211 | UBIFS_DBG(vfree(c->dbg_buf)); | ||
| 1212 | return err; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | /** | ||
| 1216 | * ubifs_umount - un-mount UBIFS file-system. | ||
| 1217 | * @c: UBIFS file-system description object | ||
| 1218 | * | ||
| 1219 | * Note, this function is called to free allocated resourced when un-mounting, | ||
| 1220 | * as well as free resources when an error occurred while we were half way | ||
| 1221 | * through mounting (error path cleanup function). So it has to make sure the | ||
| 1222 | * resource was actually allocated before freeing it. | ||
| 1223 | */ | ||
| 1224 | static void ubifs_umount(struct ubifs_info *c) | ||
| 1225 | { | ||
| 1226 | dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, | ||
| 1227 | c->vi.vol_id); | ||
| 1228 | |||
| 1229 | spin_lock(&ubifs_infos_lock); | ||
| 1230 | list_del(&c->infos_list); | ||
| 1231 | spin_unlock(&ubifs_infos_lock); | ||
| 1232 | |||
| 1233 | if (c->bgt) | ||
| 1234 | kthread_stop(c->bgt); | ||
| 1235 | |||
| 1236 | destroy_journal(c); | ||
| 1237 | free_wbufs(c); | ||
| 1238 | free_orphans(c); | ||
| 1239 | ubifs_lpt_free(c, 0); | ||
| 1240 | |||
| 1241 | kfree(c->cbuf); | ||
| 1242 | kfree(c->rcvrd_mst_node); | ||
| 1243 | kfree(c->mst_node); | ||
| 1244 | vfree(c->sbuf); | ||
| 1245 | kfree(c->bottom_up_buf); | ||
| 1246 | UBIFS_DBG(vfree(c->dbg_buf)); | ||
| 1247 | vfree(c->ileb_buf); | ||
| 1248 | dbg_failure_mode_deregistration(c); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | /** | ||
| 1252 | * ubifs_remount_rw - re-mount in read-write mode. | ||
| 1253 | * @c: UBIFS file-system description object | ||
| 1254 | * | ||
| 1255 | * UBIFS avoids allocating many unnecessary resources when mounted in read-only | ||
| 1256 | * mode. This function allocates the needed resources and re-mounts UBIFS in | ||
| 1257 | * read-write mode. | ||
| 1258 | */ | ||
| 1259 | static int ubifs_remount_rw(struct ubifs_info *c) | ||
| 1260 | { | ||
| 1261 | int err, lnum; | ||
| 1262 | |||
| 1263 | if (c->ro_media) | ||
| 1264 | return -EINVAL; | ||
| 1265 | |||
| 1266 | mutex_lock(&c->umount_mutex); | ||
| 1267 | c->remounting_rw = 1; | ||
| 1268 | |||
| 1269 | /* Check for enough free space */ | ||
| 1270 | if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { | ||
| 1271 | ubifs_err("insufficient available space"); | ||
| 1272 | err = -EINVAL; | ||
| 1273 | goto out; | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | if (c->old_leb_cnt != c->leb_cnt) { | ||
| 1277 | struct ubifs_sb_node *sup; | ||
| 1278 | |||
| 1279 | sup = ubifs_read_sb_node(c); | ||
| 1280 | if (IS_ERR(sup)) { | ||
| 1281 | err = PTR_ERR(sup); | ||
| 1282 | goto out; | ||
| 1283 | } | ||
| 1284 | sup->leb_cnt = cpu_to_le32(c->leb_cnt); | ||
| 1285 | err = ubifs_write_sb_node(c, sup); | ||
| 1286 | if (err) | ||
| 1287 | goto out; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | if (c->need_recovery) { | ||
| 1291 | ubifs_msg("completing deferred recovery"); | ||
| 1292 | err = ubifs_write_rcvrd_mst_node(c); | ||
| 1293 | if (err) | ||
| 1294 | goto out; | ||
| 1295 | err = ubifs_recover_size(c); | ||
| 1296 | if (err) | ||
| 1297 | goto out; | ||
| 1298 | err = ubifs_clean_lebs(c, c->sbuf); | ||
| 1299 | if (err) | ||
| 1300 | goto out; | ||
| 1301 | err = ubifs_recover_inl_heads(c, c->sbuf); | ||
| 1302 | if (err) | ||
| 1303 | goto out; | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { | ||
| 1307 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); | ||
| 1308 | err = ubifs_write_master(c); | ||
| 1309 | if (err) | ||
| 1310 | goto out; | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | c->ileb_buf = vmalloc(c->leb_size); | ||
| 1314 | if (!c->ileb_buf) { | ||
| 1315 | err = -ENOMEM; | ||
| 1316 | goto out; | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | err = ubifs_lpt_init(c, 0, 1); | ||
| 1320 | if (err) | ||
| 1321 | goto out; | ||
| 1322 | |||
| 1323 | err = alloc_wbufs(c); | ||
| 1324 | if (err) | ||
| 1325 | goto out; | ||
| 1326 | |||
| 1327 | ubifs_create_buds_lists(c); | ||
| 1328 | |||
| 1329 | /* Create background thread */ | ||
| 1330 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | ||
| 1331 | if (!c->bgt) | ||
| 1332 | c->bgt = ERR_PTR(-EINVAL); | ||
| 1333 | if (IS_ERR(c->bgt)) { | ||
| 1334 | err = PTR_ERR(c->bgt); | ||
| 1335 | c->bgt = NULL; | ||
| 1336 | ubifs_err("cannot spawn \"%s\", error %d", | ||
| 1337 | c->bgt_name, err); | ||
| 1338 | return err; | ||
| 1339 | } | ||
| 1340 | wake_up_process(c->bgt); | ||
| 1341 | |||
| 1342 | c->orph_buf = vmalloc(c->leb_size); | ||
| 1343 | if (!c->orph_buf) | ||
| 1344 | return -ENOMEM; | ||
| 1345 | |||
| 1346 | /* Check for enough log space */ | ||
| 1347 | lnum = c->lhead_lnum + 1; | ||
| 1348 | if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) | ||
| 1349 | lnum = UBIFS_LOG_LNUM; | ||
| 1350 | if (lnum == c->ltail_lnum) { | ||
| 1351 | err = ubifs_consolidate_log(c); | ||
| 1352 | if (err) | ||
| 1353 | goto out; | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | if (c->need_recovery) | ||
| 1357 | err = ubifs_rcvry_gc_commit(c); | ||
| 1358 | else | ||
| 1359 | err = take_gc_lnum(c); | ||
| 1360 | if (err) | ||
| 1361 | goto out; | ||
| 1362 | |||
| 1363 | if (c->need_recovery) { | ||
| 1364 | c->need_recovery = 0; | ||
| 1365 | ubifs_msg("deferred recovery completed"); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | dbg_gen("re-mounted read-write"); | ||
| 1369 | c->vfs_sb->s_flags &= ~MS_RDONLY; | ||
| 1370 | c->remounting_rw = 0; | ||
| 1371 | mutex_unlock(&c->umount_mutex); | ||
| 1372 | return 0; | ||
| 1373 | |||
| 1374 | out: | ||
| 1375 | vfree(c->orph_buf); | ||
| 1376 | c->orph_buf = NULL; | ||
| 1377 | if (c->bgt) { | ||
| 1378 | kthread_stop(c->bgt); | ||
| 1379 | c->bgt = NULL; | ||
| 1380 | } | ||
| 1381 | free_wbufs(c); | ||
| 1382 | vfree(c->ileb_buf); | ||
| 1383 | c->ileb_buf = NULL; | ||
| 1384 | ubifs_lpt_free(c, 1); | ||
| 1385 | c->remounting_rw = 0; | ||
| 1386 | mutex_unlock(&c->umount_mutex); | ||
| 1387 | return err; | ||
| 1388 | } | ||
| 1389 | |||
| 1390 | /** | ||
| 1391 | * commit_on_unmount - commit the journal when un-mounting. | ||
| 1392 | * @c: UBIFS file-system description object | ||
| 1393 | * | ||
| 1394 | * This function is called during un-mounting and it commits the journal unless | ||
| 1395 | * the "fast unmount" mode is enabled. It also avoids committing the journal if | ||
| 1396 | * it contains too few data. | ||
| 1397 | * | ||
| 1398 | * Sometimes recovery requires the journal to be committed at least once, and | ||
| 1399 | * this function takes care about this. | ||
| 1400 | */ | ||
| 1401 | static void commit_on_unmount(struct ubifs_info *c) | ||
| 1402 | { | ||
| 1403 | if (!c->fast_unmount) { | ||
| 1404 | long long bud_bytes; | ||
| 1405 | |||
| 1406 | spin_lock(&c->buds_lock); | ||
| 1407 | bud_bytes = c->bud_bytes; | ||
| 1408 | spin_unlock(&c->buds_lock); | ||
| 1409 | if (bud_bytes > c->leb_size) | ||
| 1410 | ubifs_run_commit(c); | ||
| 1411 | } | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | /** | ||
| 1415 | * ubifs_remount_ro - re-mount in read-only mode. | ||
| 1416 | * @c: UBIFS file-system description object | ||
| 1417 | * | ||
| 1418 | * We rely on VFS to have stopped writing. Possibly the background thread could | ||
| 1419 | * be running a commit, however kthread_stop will wait in that case. | ||
| 1420 | */ | ||
| 1421 | static void ubifs_remount_ro(struct ubifs_info *c) | ||
| 1422 | { | ||
| 1423 | int i, err; | ||
| 1424 | |||
| 1425 | ubifs_assert(!c->need_recovery); | ||
| 1426 | commit_on_unmount(c); | ||
| 1427 | |||
| 1428 | mutex_lock(&c->umount_mutex); | ||
| 1429 | if (c->bgt) { | ||
| 1430 | kthread_stop(c->bgt); | ||
| 1431 | c->bgt = NULL; | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 1435 | ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
| 1436 | del_timer_sync(&c->jheads[i].wbuf.timer); | ||
| 1437 | } | ||
| 1438 | |||
| 1439 | if (!c->ro_media) { | ||
| 1440 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); | ||
| 1441 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); | ||
| 1442 | c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); | ||
| 1443 | err = ubifs_write_master(c); | ||
| 1444 | if (err) | ||
| 1445 | ubifs_ro_mode(c, err); | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | ubifs_destroy_idx_gc(c); | ||
| 1449 | free_wbufs(c); | ||
| 1450 | vfree(c->orph_buf); | ||
| 1451 | c->orph_buf = NULL; | ||
| 1452 | vfree(c->ileb_buf); | ||
| 1453 | c->ileb_buf = NULL; | ||
| 1454 | ubifs_lpt_free(c, 1); | ||
| 1455 | mutex_unlock(&c->umount_mutex); | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | static void ubifs_put_super(struct super_block *sb) | ||
| 1459 | { | ||
| 1460 | int i; | ||
| 1461 | struct ubifs_info *c = sb->s_fs_info; | ||
| 1462 | |||
| 1463 | ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, | ||
| 1464 | c->vi.vol_id); | ||
| 1465 | /* | ||
| 1466 | * The following asserts are only valid if there has not been a failure | ||
| 1467 | * of the media. For example, there will be dirty inodes if we failed | ||
| 1468 | * to write them back because of I/O errors. | ||
| 1469 | */ | ||
| 1470 | ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); | ||
| 1471 | ubifs_assert(c->budg_idx_growth == 0); | ||
| 1472 | ubifs_assert(c->budg_data_growth == 0); | ||
| 1473 | |||
| 1474 | /* | ||
| 1475 | * The 'c->umount_lock' prevents races between UBIFS memory shrinker | ||
| 1476 | * and file system un-mount. Namely, it prevents the shrinker from | ||
| 1477 | * picking this superblock for shrinking - it will be just skipped if | ||
| 1478 | * the mutex is locked. | ||
| 1479 | */ | ||
| 1480 | mutex_lock(&c->umount_mutex); | ||
| 1481 | if (!(c->vfs_sb->s_flags & MS_RDONLY)) { | ||
| 1482 | /* | ||
| 1483 | * First of all kill the background thread to make sure it does | ||
| 1484 | * not interfere with un-mounting and freeing resources. | ||
| 1485 | */ | ||
| 1486 | if (c->bgt) { | ||
| 1487 | kthread_stop(c->bgt); | ||
| 1488 | c->bgt = NULL; | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | /* Synchronize write-buffers */ | ||
| 1492 | if (c->jheads) | ||
| 1493 | for (i = 0; i < c->jhead_cnt; i++) { | ||
| 1494 | ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
| 1495 | del_timer_sync(&c->jheads[i].wbuf.timer); | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | /* | ||
| 1499 | * On fatal errors c->ro_media is set to 1, in which case we do | ||
| 1500 | * not write the master node. | ||
| 1501 | */ | ||
| 1502 | if (!c->ro_media) { | ||
| 1503 | /* | ||
| 1504 | * We are being cleanly unmounted which means the | ||
| 1505 | * orphans were killed - indicate this in the master | ||
| 1506 | * node. Also save the reserved GC LEB number. | ||
| 1507 | */ | ||
| 1508 | int err; | ||
| 1509 | |||
| 1510 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); | ||
| 1511 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); | ||
| 1512 | c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); | ||
| 1513 | err = ubifs_write_master(c); | ||
| 1514 | if (err) | ||
| 1515 | /* | ||
| 1516 | * Recovery will attempt to fix the master area | ||
| 1517 | * next mount, so we just print a message and | ||
| 1518 | * continue to unmount normally. | ||
| 1519 | */ | ||
| 1520 | ubifs_err("failed to write master node, " | ||
| 1521 | "error %d", err); | ||
| 1522 | } | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | ubifs_umount(c); | ||
| 1526 | bdi_destroy(&c->bdi); | ||
| 1527 | ubi_close_volume(c->ubi); | ||
| 1528 | mutex_unlock(&c->umount_mutex); | ||
| 1529 | kfree(c); | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | ||
| 1533 | { | ||
| 1534 | int err; | ||
| 1535 | struct ubifs_info *c = sb->s_fs_info; | ||
| 1536 | |||
| 1537 | dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); | ||
| 1538 | |||
| 1539 | err = ubifs_parse_options(c, data, 1); | ||
| 1540 | if (err) { | ||
| 1541 | ubifs_err("invalid or unknown remount parameter"); | ||
| 1542 | return err; | ||
| 1543 | } | ||
| 1544 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | ||
| 1545 | err = ubifs_remount_rw(c); | ||
| 1546 | if (err) | ||
| 1547 | return err; | ||
| 1548 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) | ||
| 1549 | ubifs_remount_ro(c); | ||
| 1550 | |||
| 1551 | return 0; | ||
| 1552 | } | ||
| 1553 | |||
| 1554 | struct super_operations ubifs_super_operations = { | ||
| 1555 | .alloc_inode = ubifs_alloc_inode, | ||
| 1556 | .destroy_inode = ubifs_destroy_inode, | ||
| 1557 | .put_super = ubifs_put_super, | ||
| 1558 | .write_inode = ubifs_write_inode, | ||
| 1559 | .delete_inode = ubifs_delete_inode, | ||
| 1560 | .statfs = ubifs_statfs, | ||
| 1561 | .dirty_inode = ubifs_dirty_inode, | ||
| 1562 | .remount_fs = ubifs_remount_fs, | ||
| 1563 | .show_options = ubifs_show_options, | ||
| 1564 | .sync_fs = ubifs_sync_fs, | ||
| 1565 | }; | ||
| 1566 | |||
| 1567 | /** | ||
| 1568 | * open_ubi - parse UBI device name string and open the UBI device. | ||
| 1569 | * @name: UBI volume name | ||
| 1570 | * @mode: UBI volume open mode | ||
| 1571 | * | ||
| 1572 | * There are several ways to specify UBI volumes when mounting UBIFS: | ||
| 1573 | * o ubiX_Y - UBI device number X, volume Y; | ||
| 1574 | * o ubiY - UBI device number 0, volume Y; | ||
| 1575 | * o ubiX:NAME - mount UBI device X, volume with name NAME; | ||
| 1576 | * o ubi:NAME - mount UBI device 0, volume with name NAME. | ||
| 1577 | * | ||
| 1578 | * Alternative '!' separator may be used instead of ':' (because some shells | ||
| 1579 | * like busybox may interpret ':' as an NFS host name separator). This function | ||
| 1580 | * returns ubi volume object in case of success and a negative error code in | ||
| 1581 | * case of failure. | ||
| 1582 | */ | ||
| 1583 | static struct ubi_volume_desc *open_ubi(const char *name, int mode) | ||
| 1584 | { | ||
| 1585 | int dev, vol; | ||
| 1586 | char *endptr; | ||
| 1587 | |||
| 1588 | if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') | ||
| 1589 | return ERR_PTR(-EINVAL); | ||
| 1590 | |||
| 1591 | /* ubi:NAME method */ | ||
| 1592 | if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') | ||
| 1593 | return ubi_open_volume_nm(0, name + 4, mode); | ||
| 1594 | |||
| 1595 | if (!isdigit(name[3])) | ||
| 1596 | return ERR_PTR(-EINVAL); | ||
| 1597 | |||
| 1598 | dev = simple_strtoul(name + 3, &endptr, 0); | ||
| 1599 | |||
| 1600 | /* ubiY method */ | ||
| 1601 | if (*endptr == '\0') | ||
| 1602 | return ubi_open_volume(0, dev, mode); | ||
| 1603 | |||
| 1604 | /* ubiX_Y method */ | ||
| 1605 | if (*endptr == '_' && isdigit(endptr[1])) { | ||
| 1606 | vol = simple_strtoul(endptr + 1, &endptr, 0); | ||
| 1607 | if (*endptr != '\0') | ||
| 1608 | return ERR_PTR(-EINVAL); | ||
| 1609 | return ubi_open_volume(dev, vol, mode); | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | /* ubiX:NAME method */ | ||
| 1613 | if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') | ||
| 1614 | return ubi_open_volume_nm(dev, ++endptr, mode); | ||
| 1615 | |||
| 1616 | return ERR_PTR(-EINVAL); | ||
| 1617 | } | ||
| 1618 | |||
| 1619 | static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | ||
| 1620 | { | ||
| 1621 | struct ubi_volume_desc *ubi = sb->s_fs_info; | ||
| 1622 | struct ubifs_info *c; | ||
| 1623 | struct inode *root; | ||
| 1624 | int err; | ||
| 1625 | |||
| 1626 | c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); | ||
| 1627 | if (!c) | ||
| 1628 | return -ENOMEM; | ||
| 1629 | |||
| 1630 | spin_lock_init(&c->cnt_lock); | ||
| 1631 | spin_lock_init(&c->cs_lock); | ||
| 1632 | spin_lock_init(&c->buds_lock); | ||
| 1633 | spin_lock_init(&c->space_lock); | ||
| 1634 | spin_lock_init(&c->orphan_lock); | ||
| 1635 | init_rwsem(&c->commit_sem); | ||
| 1636 | mutex_init(&c->lp_mutex); | ||
| 1637 | mutex_init(&c->tnc_mutex); | ||
| 1638 | mutex_init(&c->log_mutex); | ||
| 1639 | mutex_init(&c->mst_mutex); | ||
| 1640 | mutex_init(&c->umount_mutex); | ||
| 1641 | init_waitqueue_head(&c->cmt_wq); | ||
| 1642 | c->buds = RB_ROOT; | ||
| 1643 | c->old_idx = RB_ROOT; | ||
| 1644 | c->size_tree = RB_ROOT; | ||
| 1645 | c->orph_tree = RB_ROOT; | ||
| 1646 | INIT_LIST_HEAD(&c->infos_list); | ||
| 1647 | INIT_LIST_HEAD(&c->idx_gc); | ||
| 1648 | INIT_LIST_HEAD(&c->replay_list); | ||
| 1649 | INIT_LIST_HEAD(&c->replay_buds); | ||
| 1650 | INIT_LIST_HEAD(&c->uncat_list); | ||
| 1651 | INIT_LIST_HEAD(&c->empty_list); | ||
| 1652 | INIT_LIST_HEAD(&c->freeable_list); | ||
| 1653 | INIT_LIST_HEAD(&c->frdi_idx_list); | ||
| 1654 | INIT_LIST_HEAD(&c->unclean_leb_list); | ||
| 1655 | INIT_LIST_HEAD(&c->old_buds); | ||
| 1656 | INIT_LIST_HEAD(&c->orph_list); | ||
| 1657 | INIT_LIST_HEAD(&c->orph_new); | ||
| 1658 | |||
| 1659 | c->highest_inum = UBIFS_FIRST_INO; | ||
| 1660 | get_random_bytes(&c->vfs_gen, sizeof(int)); | ||
| 1661 | c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; | ||
| 1662 | |||
| 1663 | ubi_get_volume_info(ubi, &c->vi); | ||
| 1664 | ubi_get_device_info(c->vi.ubi_num, &c->di); | ||
| 1665 | |||
| 1666 | /* Re-open the UBI device in read-write mode */ | ||
| 1667 | c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); | ||
| 1668 | if (IS_ERR(c->ubi)) { | ||
| 1669 | err = PTR_ERR(c->ubi); | ||
| 1670 | goto out_free; | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | /* | ||
| 1674 | * UBIFS provids 'backing_dev_info' in order to disable readahead. For | ||
| 1675 | * UBIFS, I/O is not deferred, it is done immediately in readpage, | ||
| 1676 | * which means the user would have to wait not just for their own I/O | ||
| 1677 | * but the readahead I/O as well i.e. completely pointless. | ||
| 1678 | * | ||
| 1679 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. | ||
| 1680 | */ | ||
| 1681 | c->bdi.capabilities = BDI_CAP_MAP_COPY; | ||
| 1682 | c->bdi.unplug_io_fn = default_unplug_io_fn; | ||
| 1683 | err = bdi_init(&c->bdi); | ||
| 1684 | if (err) | ||
| 1685 | goto out_close; | ||
| 1686 | |||
| 1687 | err = ubifs_parse_options(c, data, 0); | ||
| 1688 | if (err) | ||
| 1689 | goto out_bdi; | ||
| 1690 | |||
| 1691 | c->vfs_sb = sb; | ||
| 1692 | |||
| 1693 | sb->s_fs_info = c; | ||
| 1694 | sb->s_magic = UBIFS_SUPER_MAGIC; | ||
| 1695 | sb->s_blocksize = UBIFS_BLOCK_SIZE; | ||
| 1696 | sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; | ||
| 1697 | sb->s_dev = c->vi.cdev; | ||
| 1698 | sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); | ||
| 1699 | if (c->max_inode_sz > MAX_LFS_FILESIZE) | ||
| 1700 | sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; | ||
| 1701 | sb->s_op = &ubifs_super_operations; | ||
| 1702 | |||
| 1703 | mutex_lock(&c->umount_mutex); | ||
| 1704 | err = mount_ubifs(c); | ||
| 1705 | if (err) { | ||
| 1706 | ubifs_assert(err < 0); | ||
| 1707 | goto out_unlock; | ||
| 1708 | } | ||
| 1709 | |||
| 1710 | /* Read the root inode */ | ||
| 1711 | root = ubifs_iget(sb, UBIFS_ROOT_INO); | ||
| 1712 | if (IS_ERR(root)) { | ||
| 1713 | err = PTR_ERR(root); | ||
| 1714 | goto out_umount; | ||
| 1715 | } | ||
| 1716 | |||
| 1717 | sb->s_root = d_alloc_root(root); | ||
| 1718 | if (!sb->s_root) | ||
| 1719 | goto out_iput; | ||
| 1720 | |||
| 1721 | mutex_unlock(&c->umount_mutex); | ||
| 1722 | |||
| 1723 | return 0; | ||
| 1724 | |||
| 1725 | out_iput: | ||
| 1726 | iput(root); | ||
| 1727 | out_umount: | ||
| 1728 | ubifs_umount(c); | ||
| 1729 | out_unlock: | ||
| 1730 | mutex_unlock(&c->umount_mutex); | ||
| 1731 | out_bdi: | ||
| 1732 | bdi_destroy(&c->bdi); | ||
| 1733 | out_close: | ||
| 1734 | ubi_close_volume(c->ubi); | ||
| 1735 | out_free: | ||
| 1736 | kfree(c); | ||
| 1737 | return err; | ||
| 1738 | } | ||
| 1739 | |||
| 1740 | static int sb_test(struct super_block *sb, void *data) | ||
| 1741 | { | ||
| 1742 | dev_t *dev = data; | ||
| 1743 | |||
| 1744 | return sb->s_dev == *dev; | ||
| 1745 | } | ||
| 1746 | |||
| 1747 | static int sb_set(struct super_block *sb, void *data) | ||
| 1748 | { | ||
| 1749 | dev_t *dev = data; | ||
| 1750 | |||
| 1751 | sb->s_dev = *dev; | ||
| 1752 | return 0; | ||
| 1753 | } | ||
| 1754 | |||
| 1755 | static int ubifs_get_sb(struct file_system_type *fs_type, int flags, | ||
| 1756 | const char *name, void *data, struct vfsmount *mnt) | ||
| 1757 | { | ||
| 1758 | struct ubi_volume_desc *ubi; | ||
| 1759 | struct ubi_volume_info vi; | ||
| 1760 | struct super_block *sb; | ||
| 1761 | int err; | ||
| 1762 | |||
| 1763 | dbg_gen("name %s, flags %#x", name, flags); | ||
| 1764 | |||
| 1765 | /* | ||
| 1766 | * Get UBI device number and volume ID. Mount it read-only so far | ||
| 1767 | * because this might be a new mount point, and UBI allows only one | ||
| 1768 | * read-write user at a time. | ||
| 1769 | */ | ||
| 1770 | ubi = open_ubi(name, UBI_READONLY); | ||
| 1771 | if (IS_ERR(ubi)) { | ||
| 1772 | ubifs_err("cannot open \"%s\", error %d", | ||
| 1773 | name, (int)PTR_ERR(ubi)); | ||
| 1774 | return PTR_ERR(ubi); | ||
| 1775 | } | ||
| 1776 | ubi_get_volume_info(ubi, &vi); | ||
| 1777 | |||
| 1778 | dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); | ||
| 1779 | |||
| 1780 | sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); | ||
| 1781 | if (IS_ERR(sb)) { | ||
| 1782 | err = PTR_ERR(sb); | ||
| 1783 | goto out_close; | ||
| 1784 | } | ||
| 1785 | |||
| 1786 | if (sb->s_root) { | ||
| 1787 | /* A new mount point for already mounted UBIFS */ | ||
| 1788 | dbg_gen("this ubi volume is already mounted"); | ||
| 1789 | if ((flags ^ sb->s_flags) & MS_RDONLY) { | ||
| 1790 | err = -EBUSY; | ||
| 1791 | goto out_deact; | ||
| 1792 | } | ||
| 1793 | } else { | ||
| 1794 | sb->s_flags = flags; | ||
| 1795 | /* | ||
| 1796 | * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is | ||
| 1797 | * replaced by 'c'. | ||
| 1798 | */ | ||
| 1799 | sb->s_fs_info = ubi; | ||
| 1800 | err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); | ||
| 1801 | if (err) | ||
| 1802 | goto out_deact; | ||
| 1803 | /* We do not support atime */ | ||
| 1804 | sb->s_flags |= MS_ACTIVE | MS_NOATIME; | ||
| 1805 | } | ||
| 1806 | |||
| 1807 | /* 'fill_super()' opens ubi again so we must close it here */ | ||
| 1808 | ubi_close_volume(ubi); | ||
| 1809 | |||
| 1810 | return simple_set_mnt(mnt, sb); | ||
| 1811 | |||
| 1812 | out_deact: | ||
| 1813 | up_write(&sb->s_umount); | ||
| 1814 | deactivate_super(sb); | ||
| 1815 | out_close: | ||
| 1816 | ubi_close_volume(ubi); | ||
| 1817 | return err; | ||
| 1818 | } | ||
| 1819 | |||
| 1820 | static void ubifs_kill_sb(struct super_block *sb) | ||
| 1821 | { | ||
| 1822 | struct ubifs_info *c = sb->s_fs_info; | ||
| 1823 | |||
| 1824 | /* | ||
| 1825 | * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' | ||
| 1826 | * in order to be outside BKL. | ||
| 1827 | */ | ||
| 1828 | if (sb->s_root && !(sb->s_flags & MS_RDONLY)) | ||
| 1829 | commit_on_unmount(c); | ||
| 1830 | /* The un-mount routine is actually done in put_super() */ | ||
| 1831 | generic_shutdown_super(sb); | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | static struct file_system_type ubifs_fs_type = { | ||
| 1835 | .name = "ubifs", | ||
| 1836 | .owner = THIS_MODULE, | ||
| 1837 | .get_sb = ubifs_get_sb, | ||
| 1838 | .kill_sb = ubifs_kill_sb | ||
| 1839 | }; | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * Inode slab cache constructor. | ||
| 1843 | */ | ||
| 1844 | static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) | ||
| 1845 | { | ||
| 1846 | struct ubifs_inode *ui = obj; | ||
| 1847 | inode_init_once(&ui->vfs_inode); | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | static int __init ubifs_init(void) | ||
| 1851 | { | ||
| 1852 | int err; | ||
| 1853 | |||
| 1854 | BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); | ||
| 1855 | |||
| 1856 | /* Make sure node sizes are 8-byte aligned */ | ||
| 1857 | BUILD_BUG_ON(UBIFS_CH_SZ & 7); | ||
| 1858 | BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7); | ||
| 1859 | BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7); | ||
| 1860 | BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7); | ||
| 1861 | BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7); | ||
| 1862 | BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7); | ||
| 1863 | BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7); | ||
| 1864 | BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7); | ||
| 1865 | BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7); | ||
| 1866 | BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7); | ||
| 1867 | BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7); | ||
| 1868 | |||
| 1869 | BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7); | ||
| 1870 | BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7); | ||
| 1871 | BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7); | ||
| 1872 | BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7); | ||
| 1873 | BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7); | ||
| 1874 | BUILD_BUG_ON(MIN_WRITE_SZ & 7); | ||
| 1875 | |||
| 1876 | /* Check min. node size */ | ||
| 1877 | BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ); | ||
| 1878 | BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ); | ||
| 1879 | BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ); | ||
| 1880 | BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ); | ||
| 1881 | |||
| 1882 | BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ); | ||
| 1883 | BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ); | ||
| 1884 | BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ); | ||
| 1885 | BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ); | ||
| 1886 | |||
| 1887 | /* Defined node sizes */ | ||
| 1888 | BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096); | ||
| 1889 | BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512); | ||
| 1890 | BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160); | ||
| 1891 | BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); | ||
| 1892 | |||
| 1893 | /* | ||
| 1894 | * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to | ||
| 1895 | * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. | ||
| 1896 | */ | ||
| 1897 | if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { | ||
| 1898 | ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" | ||
| 1899 | " at least 4096 bytes", | ||
| 1900 | (unsigned int)PAGE_CACHE_SIZE); | ||
| 1901 | return -EINVAL; | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | err = register_filesystem(&ubifs_fs_type); | ||
| 1905 | if (err) { | ||
| 1906 | ubifs_err("cannot register file system, error %d", err); | ||
| 1907 | return err; | ||
| 1908 | } | ||
| 1909 | |||
| 1910 | err = -ENOMEM; | ||
| 1911 | ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", | ||
| 1912 | sizeof(struct ubifs_inode), 0, | ||
| 1913 | SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, | ||
| 1914 | &inode_slab_ctor); | ||
| 1915 | if (!ubifs_inode_slab) | ||
| 1916 | goto out_reg; | ||
| 1917 | |||
| 1918 | register_shrinker(&ubifs_shrinker_info); | ||
| 1919 | |||
| 1920 | err = ubifs_compressors_init(); | ||
| 1921 | if (err) | ||
| 1922 | goto out_compr; | ||
| 1923 | |||
| 1924 | return 0; | ||
| 1925 | |||
| 1926 | out_compr: | ||
| 1927 | unregister_shrinker(&ubifs_shrinker_info); | ||
| 1928 | kmem_cache_destroy(ubifs_inode_slab); | ||
| 1929 | out_reg: | ||
| 1930 | unregister_filesystem(&ubifs_fs_type); | ||
| 1931 | return err; | ||
| 1932 | } | ||
| 1933 | /* late_initcall to let compressors initialize first */ | ||
| 1934 | late_initcall(ubifs_init); | ||
| 1935 | |||
| 1936 | static void __exit ubifs_exit(void) | ||
| 1937 | { | ||
| 1938 | ubifs_assert(list_empty(&ubifs_infos)); | ||
| 1939 | ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); | ||
| 1940 | |||
| 1941 | ubifs_compressors_exit(); | ||
| 1942 | unregister_shrinker(&ubifs_shrinker_info); | ||
| 1943 | kmem_cache_destroy(ubifs_inode_slab); | ||
| 1944 | unregister_filesystem(&ubifs_fs_type); | ||
| 1945 | } | ||
| 1946 | module_exit(ubifs_exit); | ||
| 1947 | |||
| 1948 | MODULE_LICENSE("GPL"); | ||
| 1949 | MODULE_VERSION(__stringify(UBIFS_VERSION)); | ||
| 1950 | MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter"); | ||
| 1951 | MODULE_DESCRIPTION("UBIFS - UBI File System"); | ||
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c new file mode 100644 index 000000000000..e909f4a96443 --- /dev/null +++ b/fs/ubifs/tnc.c | |||
| @@ -0,0 +1,2956 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements TNC (Tree Node Cache) which caches indexing nodes of | ||
| 25 | * the UBIFS B-tree. | ||
| 26 | * | ||
| 27 | * At the moment the locking rules of the TNC tree are quite simple and | ||
| 28 | * straightforward. We just have a mutex and lock it when we traverse the | ||
| 29 | * tree. If a znode is not in memory, we read it from flash while still having | ||
| 30 | * the mutex locked. | ||
| 31 | */ | ||
| 32 | |||
| 33 | #include <linux/crc32.h> | ||
| 34 | #include "ubifs.h" | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. | ||
| 38 | * @NAME_LESS: name corresponding to the first argument is less than second | ||
| 39 | * @NAME_MATCHES: names match | ||
| 40 | * @NAME_GREATER: name corresponding to the second argument is greater than | ||
| 41 | * first | ||
| 42 | * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media | ||
| 43 | * | ||
| 44 | * These constants were introduce to improve readability. | ||
| 45 | */ | ||
| 46 | enum { | ||
| 47 | NAME_LESS = 0, | ||
| 48 | NAME_MATCHES = 1, | ||
| 49 | NAME_GREATER = 2, | ||
| 50 | NOT_ON_MEDIA = 3, | ||
| 51 | }; | ||
| 52 | |||
| 53 | /** | ||
| 54 | * insert_old_idx - record an index node obsoleted since the last commit start. | ||
| 55 | * @c: UBIFS file-system description object | ||
| 56 | * @lnum: LEB number of obsoleted index node | ||
| 57 | * @offs: offset of obsoleted index node | ||
| 58 | * | ||
| 59 | * Returns %0 on success, and a negative error code on failure. | ||
| 60 | * | ||
| 61 | * For recovery, there must always be a complete intact version of the index on | ||
| 62 | * flash at all times. That is called the "old index". It is the index as at the | ||
| 63 | * time of the last successful commit. Many of the index nodes in the old index | ||
| 64 | * may be dirty, but they must not be erased until the next successful commit | ||
| 65 | * (at which point that index becomes the old index). | ||
| 66 | * | ||
| 67 | * That means that the garbage collection and the in-the-gaps method of | ||
| 68 | * committing must be able to determine if an index node is in the old index. | ||
| 69 | * Most of the old index nodes can be found by looking up the TNC using the | ||
| 70 | * 'lookup_znode()' function. However, some of the old index nodes may have | ||
| 71 | * been deleted from the current index or may have been changed so much that | ||
| 72 | * they cannot be easily found. In those cases, an entry is added to an RB-tree. | ||
| 73 | * That is what this function does. The RB-tree is ordered by LEB number and | ||
| 74 | * offset because they uniquely identify the old index node. | ||
| 75 | */ | ||
| 76 | static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) | ||
| 77 | { | ||
| 78 | struct ubifs_old_idx *old_idx, *o; | ||
| 79 | struct rb_node **p, *parent = NULL; | ||
| 80 | |||
| 81 | old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); | ||
| 82 | if (unlikely(!old_idx)) | ||
| 83 | return -ENOMEM; | ||
| 84 | old_idx->lnum = lnum; | ||
| 85 | old_idx->offs = offs; | ||
| 86 | |||
| 87 | p = &c->old_idx.rb_node; | ||
| 88 | while (*p) { | ||
| 89 | parent = *p; | ||
| 90 | o = rb_entry(parent, struct ubifs_old_idx, rb); | ||
| 91 | if (lnum < o->lnum) | ||
| 92 | p = &(*p)->rb_left; | ||
| 93 | else if (lnum > o->lnum) | ||
| 94 | p = &(*p)->rb_right; | ||
| 95 | else if (offs < o->offs) | ||
| 96 | p = &(*p)->rb_left; | ||
| 97 | else if (offs > o->offs) | ||
| 98 | p = &(*p)->rb_right; | ||
| 99 | else { | ||
| 100 | ubifs_err("old idx added twice!"); | ||
| 101 | kfree(old_idx); | ||
| 102 | return 0; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | rb_link_node(&old_idx->rb, parent, p); | ||
| 106 | rb_insert_color(&old_idx->rb, &c->old_idx); | ||
| 107 | return 0; | ||
| 108 | } | ||
| 109 | |||
| 110 | /** | ||
| 111 | * insert_old_idx_znode - record a znode obsoleted since last commit start. | ||
| 112 | * @c: UBIFS file-system description object | ||
| 113 | * @znode: znode of obsoleted index node | ||
| 114 | * | ||
| 115 | * Returns %0 on success, and a negative error code on failure. | ||
| 116 | */ | ||
| 117 | int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode) | ||
| 118 | { | ||
| 119 | if (znode->parent) { | ||
| 120 | struct ubifs_zbranch *zbr; | ||
| 121 | |||
| 122 | zbr = &znode->parent->zbranch[znode->iip]; | ||
| 123 | if (zbr->len) | ||
| 124 | return insert_old_idx(c, zbr->lnum, zbr->offs); | ||
| 125 | } else | ||
| 126 | if (c->zroot.len) | ||
| 127 | return insert_old_idx(c, c->zroot.lnum, | ||
| 128 | c->zroot.offs); | ||
| 129 | return 0; | ||
| 130 | } | ||
| 131 | |||
| 132 | /** | ||
| 133 | * ins_clr_old_idx_znode - record a znode obsoleted since last commit start. | ||
| 134 | * @c: UBIFS file-system description object | ||
| 135 | * @znode: znode of obsoleted index node | ||
| 136 | * | ||
| 137 | * Returns %0 on success, and a negative error code on failure. | ||
| 138 | */ | ||
| 139 | static int ins_clr_old_idx_znode(struct ubifs_info *c, | ||
| 140 | struct ubifs_znode *znode) | ||
| 141 | { | ||
| 142 | int err; | ||
| 143 | |||
| 144 | if (znode->parent) { | ||
| 145 | struct ubifs_zbranch *zbr; | ||
| 146 | |||
| 147 | zbr = &znode->parent->zbranch[znode->iip]; | ||
| 148 | if (zbr->len) { | ||
| 149 | err = insert_old_idx(c, zbr->lnum, zbr->offs); | ||
| 150 | if (err) | ||
| 151 | return err; | ||
| 152 | zbr->lnum = 0; | ||
| 153 | zbr->offs = 0; | ||
| 154 | zbr->len = 0; | ||
| 155 | } | ||
| 156 | } else | ||
| 157 | if (c->zroot.len) { | ||
| 158 | err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs); | ||
| 159 | if (err) | ||
| 160 | return err; | ||
| 161 | c->zroot.lnum = 0; | ||
| 162 | c->zroot.offs = 0; | ||
| 163 | c->zroot.len = 0; | ||
| 164 | } | ||
| 165 | return 0; | ||
| 166 | } | ||
| 167 | |||
| 168 | /** | ||
| 169 | * destroy_old_idx - destroy the old_idx RB-tree. | ||
| 170 | * @c: UBIFS file-system description object | ||
| 171 | * | ||
| 172 | * During start commit, the old_idx RB-tree is used to avoid overwriting index | ||
| 173 | * nodes that were in the index last commit but have since been deleted. This | ||
| 174 | * is necessary for recovery i.e. the old index must be kept intact until the | ||
| 175 | * new index is successfully written. The old-idx RB-tree is used for the | ||
| 176 | * in-the-gaps method of writing index nodes and is destroyed every commit. | ||
| 177 | */ | ||
| 178 | void destroy_old_idx(struct ubifs_info *c) | ||
| 179 | { | ||
| 180 | struct rb_node *this = c->old_idx.rb_node; | ||
| 181 | struct ubifs_old_idx *old_idx; | ||
| 182 | |||
| 183 | while (this) { | ||
| 184 | if (this->rb_left) { | ||
| 185 | this = this->rb_left; | ||
| 186 | continue; | ||
| 187 | } else if (this->rb_right) { | ||
| 188 | this = this->rb_right; | ||
| 189 | continue; | ||
| 190 | } | ||
| 191 | old_idx = rb_entry(this, struct ubifs_old_idx, rb); | ||
| 192 | this = rb_parent(this); | ||
| 193 | if (this) { | ||
| 194 | if (this->rb_left == &old_idx->rb) | ||
| 195 | this->rb_left = NULL; | ||
| 196 | else | ||
| 197 | this->rb_right = NULL; | ||
| 198 | } | ||
| 199 | kfree(old_idx); | ||
| 200 | } | ||
| 201 | c->old_idx = RB_ROOT; | ||
| 202 | } | ||
| 203 | |||
| 204 | /** | ||
| 205 | * copy_znode - copy a dirty znode. | ||
| 206 | * @c: UBIFS file-system description object | ||
| 207 | * @znode: znode to copy | ||
| 208 | * | ||
| 209 | * A dirty znode being committed may not be changed, so it is copied. | ||
| 210 | */ | ||
| 211 | static struct ubifs_znode *copy_znode(struct ubifs_info *c, | ||
| 212 | struct ubifs_znode *znode) | ||
| 213 | { | ||
| 214 | struct ubifs_znode *zn; | ||
| 215 | |||
| 216 | zn = kmalloc(c->max_znode_sz, GFP_NOFS); | ||
| 217 | if (unlikely(!zn)) | ||
| 218 | return ERR_PTR(-ENOMEM); | ||
| 219 | |||
| 220 | memcpy(zn, znode, c->max_znode_sz); | ||
| 221 | zn->cnext = NULL; | ||
| 222 | __set_bit(DIRTY_ZNODE, &zn->flags); | ||
| 223 | __clear_bit(COW_ZNODE, &zn->flags); | ||
| 224 | |||
| 225 | ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); | ||
| 226 | __set_bit(OBSOLETE_ZNODE, &znode->flags); | ||
| 227 | |||
| 228 | if (znode->level != 0) { | ||
| 229 | int i; | ||
| 230 | const int n = zn->child_cnt; | ||
| 231 | |||
| 232 | /* The children now have new parent */ | ||
| 233 | for (i = 0; i < n; i++) { | ||
| 234 | struct ubifs_zbranch *zbr = &zn->zbranch[i]; | ||
| 235 | |||
| 236 | if (zbr->znode) | ||
| 237 | zbr->znode->parent = zn; | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | atomic_long_inc(&c->dirty_zn_cnt); | ||
| 242 | return zn; | ||
| 243 | } | ||
| 244 | |||
| 245 | /** | ||
| 246 | * add_idx_dirt - add dirt due to a dirty znode. | ||
| 247 | * @c: UBIFS file-system description object | ||
| 248 | * @lnum: LEB number of index node | ||
| 249 | * @dirt: size of index node | ||
| 250 | * | ||
| 251 | * This function updates lprops dirty space and the new size of the index. | ||
| 252 | */ | ||
| 253 | static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) | ||
| 254 | { | ||
| 255 | c->calc_idx_sz -= ALIGN(dirt, 8); | ||
| 256 | return ubifs_add_dirt(c, lnum, dirt); | ||
| 257 | } | ||
| 258 | |||
| 259 | /** | ||
| 260 | * dirty_cow_znode - ensure a znode is not being committed. | ||
| 261 | * @c: UBIFS file-system description object | ||
| 262 | * @zbr: branch of znode to check | ||
| 263 | * | ||
| 264 | * Returns dirtied znode on success or negative error code on failure. | ||
| 265 | */ | ||
| 266 | static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, | ||
| 267 | struct ubifs_zbranch *zbr) | ||
| 268 | { | ||
| 269 | struct ubifs_znode *znode = zbr->znode; | ||
| 270 | struct ubifs_znode *zn; | ||
| 271 | int err; | ||
| 272 | |||
| 273 | if (!test_bit(COW_ZNODE, &znode->flags)) { | ||
| 274 | /* znode is not being committed */ | ||
| 275 | if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { | ||
| 276 | atomic_long_inc(&c->dirty_zn_cnt); | ||
| 277 | atomic_long_dec(&c->clean_zn_cnt); | ||
| 278 | atomic_long_dec(&ubifs_clean_zn_cnt); | ||
| 279 | err = add_idx_dirt(c, zbr->lnum, zbr->len); | ||
| 280 | if (unlikely(err)) | ||
| 281 | return ERR_PTR(err); | ||
| 282 | } | ||
| 283 | return znode; | ||
| 284 | } | ||
| 285 | |||
| 286 | zn = copy_znode(c, znode); | ||
| 287 | if (unlikely(IS_ERR(zn))) | ||
| 288 | return zn; | ||
| 289 | |||
| 290 | if (zbr->len) { | ||
| 291 | err = insert_old_idx(c, zbr->lnum, zbr->offs); | ||
| 292 | if (unlikely(err)) | ||
| 293 | return ERR_PTR(err); | ||
| 294 | err = add_idx_dirt(c, zbr->lnum, zbr->len); | ||
| 295 | } else | ||
| 296 | err = 0; | ||
| 297 | |||
| 298 | zbr->znode = zn; | ||
| 299 | zbr->lnum = 0; | ||
| 300 | zbr->offs = 0; | ||
| 301 | zbr->len = 0; | ||
| 302 | |||
| 303 | if (unlikely(err)) | ||
| 304 | return ERR_PTR(err); | ||
| 305 | return zn; | ||
| 306 | } | ||
| 307 | |||
| 308 | /** | ||
| 309 | * lnc_add - add a leaf node to the leaf node cache. | ||
| 310 | * @c: UBIFS file-system description object | ||
| 311 | * @zbr: zbranch of leaf node | ||
| 312 | * @node: leaf node | ||
| 313 | * | ||
| 314 | * Leaf nodes are non-index nodes directory entry nodes or data nodes. The | ||
| 315 | * purpose of the leaf node cache is to save re-reading the same leaf node over | ||
| 316 | * and over again. Most things are cached by VFS, however the file system must | ||
| 317 | * cache directory entries for readdir and for resolving hash collisions. The | ||
| 318 | * present implementation of the leaf node cache is extremely simple, and | ||
| 319 | * allows for error returns that are not used but that may be needed if a more | ||
| 320 | * complex implementation is created. | ||
| 321 | * | ||
| 322 | * Note, this function does not add the @node object to LNC directly, but | ||
| 323 | * allocates a copy of the object and adds the copy to LNC. The reason for this | ||
| 324 | * is that @node has been allocated outside of the TNC subsystem and will be | ||
| 325 | * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC | ||
| 326 | * may be changed at any time, e.g. freed by the shrinker. | ||
| 327 | */ | ||
| 328 | static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 329 | const void *node) | ||
| 330 | { | ||
| 331 | int err; | ||
| 332 | void *lnc_node; | ||
| 333 | const struct ubifs_dent_node *dent = node; | ||
| 334 | |||
| 335 | ubifs_assert(!zbr->leaf); | ||
| 336 | ubifs_assert(zbr->len != 0); | ||
| 337 | ubifs_assert(is_hash_key(c, &zbr->key)); | ||
| 338 | |||
| 339 | err = ubifs_validate_entry(c, dent); | ||
| 340 | if (err) { | ||
| 341 | dbg_dump_stack(); | ||
| 342 | dbg_dump_node(c, dent); | ||
| 343 | return err; | ||
| 344 | } | ||
| 345 | |||
| 346 | lnc_node = kmalloc(zbr->len, GFP_NOFS); | ||
| 347 | if (!lnc_node) | ||
| 348 | /* We don't have to have the cache, so no error */ | ||
| 349 | return 0; | ||
| 350 | |||
| 351 | memcpy(lnc_node, node, zbr->len); | ||
| 352 | zbr->leaf = lnc_node; | ||
| 353 | return 0; | ||
| 354 | } | ||
| 355 | |||
| 356 | /** | ||
| 357 | * lnc_add_directly - add a leaf node to the leaf-node-cache. | ||
| 358 | * @c: UBIFS file-system description object | ||
| 359 | * @zbr: zbranch of leaf node | ||
| 360 | * @node: leaf node | ||
| 361 | * | ||
| 362 | * This function is similar to 'lnc_add()', but it does not create a copy of | ||
| 363 | * @node but inserts @node to TNC directly. | ||
| 364 | */ | ||
| 365 | static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 366 | void *node) | ||
| 367 | { | ||
| 368 | int err; | ||
| 369 | |||
| 370 | ubifs_assert(!zbr->leaf); | ||
| 371 | ubifs_assert(zbr->len != 0); | ||
| 372 | |||
| 373 | err = ubifs_validate_entry(c, node); | ||
| 374 | if (err) { | ||
| 375 | dbg_dump_stack(); | ||
| 376 | dbg_dump_node(c, node); | ||
| 377 | return err; | ||
| 378 | } | ||
| 379 | |||
| 380 | zbr->leaf = node; | ||
| 381 | return 0; | ||
| 382 | } | ||
| 383 | |||
| 384 | /** | ||
| 385 | * lnc_free - remove a leaf node from the leaf node cache. | ||
| 386 | * @zbr: zbranch of leaf node | ||
| 387 | * @node: leaf node | ||
| 388 | */ | ||
| 389 | static void lnc_free(struct ubifs_zbranch *zbr) | ||
| 390 | { | ||
| 391 | if (!zbr->leaf) | ||
| 392 | return; | ||
| 393 | kfree(zbr->leaf); | ||
| 394 | zbr->leaf = NULL; | ||
| 395 | } | ||
| 396 | |||
| 397 | /** | ||
| 398 | * tnc_read_node_nm - read a "hashed" leaf node. | ||
| 399 | * @c: UBIFS file-system description object | ||
| 400 | * @zbr: key and position of the node | ||
| 401 | * @node: node is returned here | ||
| 402 | * | ||
| 403 | * This function reads a "hashed" node defined by @zbr from the leaf node cache | ||
| 404 | * (in it is there) or from the hash media, in which case the node is also | ||
| 405 | * added to LNC. Returns zero in case of success or a negative negative error | ||
| 406 | * code in case of failure. | ||
| 407 | */ | ||
| 408 | static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 409 | void *node) | ||
| 410 | { | ||
| 411 | int err; | ||
| 412 | |||
| 413 | ubifs_assert(is_hash_key(c, &zbr->key)); | ||
| 414 | |||
| 415 | if (zbr->leaf) { | ||
| 416 | /* Read from the leaf node cache */ | ||
| 417 | ubifs_assert(zbr->len != 0); | ||
| 418 | memcpy(node, zbr->leaf, zbr->len); | ||
| 419 | return 0; | ||
| 420 | } | ||
| 421 | |||
| 422 | err = ubifs_tnc_read_node(c, zbr, node); | ||
| 423 | if (err) | ||
| 424 | return err; | ||
| 425 | |||
| 426 | /* Add the node to the leaf node cache */ | ||
| 427 | err = lnc_add(c, zbr, node); | ||
| 428 | return err; | ||
| 429 | } | ||
| 430 | |||
| 431 | /** | ||
| 432 | * try_read_node - read a node if it is a node. | ||
| 433 | * @c: UBIFS file-system description object | ||
| 434 | * @buf: buffer to read to | ||
| 435 | * @type: node type | ||
| 436 | * @len: node length (not aligned) | ||
| 437 | * @lnum: LEB number of node to read | ||
| 438 | * @offs: offset of node to read | ||
| 439 | * | ||
| 440 | * This function tries to read a node of known type and length, checks it and | ||
| 441 | * stores it in @buf. This function returns %1 if a node is present and %0 if | ||
| 442 | * a node is not present. A negative error code is returned for I/O errors. | ||
| 443 | * This function performs that same function as ubifs_read_node except that | ||
| 444 | * it does not require that there is actually a node present and instead | ||
| 445 | * the return code indicates if a node was read. | ||
| 446 | */ | ||
| 447 | static int try_read_node(const struct ubifs_info *c, void *buf, int type, | ||
| 448 | int len, int lnum, int offs) | ||
| 449 | { | ||
| 450 | int err, node_len; | ||
| 451 | struct ubifs_ch *ch = buf; | ||
| 452 | uint32_t crc, node_crc; | ||
| 453 | |||
| 454 | dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); | ||
| 455 | |||
| 456 | err = ubi_read(c->ubi, lnum, buf, offs, len); | ||
| 457 | if (err) { | ||
| 458 | ubifs_err("cannot read node type %d from LEB %d:%d, error %d", | ||
| 459 | type, lnum, offs, err); | ||
| 460 | return err; | ||
| 461 | } | ||
| 462 | |||
| 463 | if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) | ||
| 464 | return 0; | ||
| 465 | |||
| 466 | if (ch->node_type != type) | ||
| 467 | return 0; | ||
| 468 | |||
| 469 | node_len = le32_to_cpu(ch->len); | ||
| 470 | if (node_len != len) | ||
| 471 | return 0; | ||
| 472 | |||
| 473 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); | ||
| 474 | node_crc = le32_to_cpu(ch->crc); | ||
| 475 | if (crc != node_crc) | ||
| 476 | return 0; | ||
| 477 | |||
| 478 | return 1; | ||
| 479 | } | ||
| 480 | |||
| 481 | /** | ||
| 482 | * fallible_read_node - try to read a leaf node. | ||
| 483 | * @c: UBIFS file-system description object | ||
| 484 | * @key: key of node to read | ||
| 485 | * @zbr: position of node | ||
| 486 | * @node: node returned | ||
| 487 | * | ||
| 488 | * This function tries to read a node and returns %1 if the node is read, %0 | ||
| 489 | * if the node is not present, and a negative error code in the case of error. | ||
| 490 | */ | ||
| 491 | static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | ||
| 492 | struct ubifs_zbranch *zbr, void *node) | ||
| 493 | { | ||
| 494 | int ret; | ||
| 495 | |||
| 496 | dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); | ||
| 497 | |||
| 498 | ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, | ||
| 499 | zbr->offs); | ||
| 500 | if (ret == 1) { | ||
| 501 | union ubifs_key node_key; | ||
| 502 | struct ubifs_dent_node *dent = node; | ||
| 503 | |||
| 504 | /* All nodes have key in the same place */ | ||
| 505 | key_read(c, &dent->key, &node_key); | ||
| 506 | if (keys_cmp(c, key, &node_key) != 0) | ||
| 507 | ret = 0; | ||
| 508 | } | ||
| 509 | if (ret == 0) | ||
| 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | ||
| 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | ||
| 512 | return ret; | ||
| 513 | } | ||
| 514 | |||
| 515 | /** | ||
| 516 | * matches_name - determine if a direntry or xattr entry matches a given name. | ||
| 517 | * @c: UBIFS file-system description object | ||
| 518 | * @zbr: zbranch of dent | ||
| 519 | * @nm: name to match | ||
| 520 | * | ||
| 521 | * This function checks if xentry/direntry referred by zbranch @zbr matches name | ||
| 522 | * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by | ||
| 523 | * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case | ||
| 524 | * of failure, a negative error code is returned. | ||
| 525 | */ | ||
| 526 | static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 527 | const struct qstr *nm) | ||
| 528 | { | ||
| 529 | struct ubifs_dent_node *dent; | ||
| 530 | int nlen, err; | ||
| 531 | |||
| 532 | /* If possible, match against the dent in the leaf node cache */ | ||
| 533 | if (!zbr->leaf) { | ||
| 534 | dent = kmalloc(zbr->len, GFP_NOFS); | ||
| 535 | if (!dent) | ||
| 536 | return -ENOMEM; | ||
| 537 | |||
| 538 | err = ubifs_tnc_read_node(c, zbr, dent); | ||
| 539 | if (err) | ||
| 540 | goto out_free; | ||
| 541 | |||
| 542 | /* Add the node to the leaf node cache */ | ||
| 543 | err = lnc_add_directly(c, zbr, dent); | ||
| 544 | if (err) | ||
| 545 | goto out_free; | ||
| 546 | } else | ||
| 547 | dent = zbr->leaf; | ||
| 548 | |||
| 549 | nlen = le16_to_cpu(dent->nlen); | ||
| 550 | err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); | ||
| 551 | if (err == 0) { | ||
| 552 | if (nlen == nm->len) | ||
| 553 | return NAME_MATCHES; | ||
| 554 | else if (nlen < nm->len) | ||
| 555 | return NAME_LESS; | ||
| 556 | else | ||
| 557 | return NAME_GREATER; | ||
| 558 | } else if (err < 0) | ||
| 559 | return NAME_LESS; | ||
| 560 | else | ||
| 561 | return NAME_GREATER; | ||
| 562 | |||
| 563 | out_free: | ||
| 564 | kfree(dent); | ||
| 565 | return err; | ||
| 566 | } | ||
| 567 | |||
| 568 | /** | ||
| 569 | * get_znode - get a TNC znode that may not be loaded yet. | ||
| 570 | * @c: UBIFS file-system description object | ||
| 571 | * @znode: parent znode | ||
| 572 | * @n: znode branch slot number | ||
| 573 | * | ||
| 574 | * This function returns the znode or a negative error code. | ||
| 575 | */ | ||
| 576 | static struct ubifs_znode *get_znode(struct ubifs_info *c, | ||
| 577 | struct ubifs_znode *znode, int n) | ||
| 578 | { | ||
| 579 | struct ubifs_zbranch *zbr; | ||
| 580 | |||
| 581 | zbr = &znode->zbranch[n]; | ||
| 582 | if (zbr->znode) | ||
| 583 | znode = zbr->znode; | ||
| 584 | else | ||
| 585 | znode = ubifs_load_znode(c, zbr, znode, n); | ||
| 586 | return znode; | ||
| 587 | } | ||
| 588 | |||
| 589 | /** | ||
| 590 | * tnc_next - find next TNC entry. | ||
| 591 | * @c: UBIFS file-system description object | ||
| 592 | * @zn: znode is passed and returned here | ||
| 593 | * @n: znode branch slot number is passed and returned here | ||
| 594 | * | ||
| 595 | * This function returns %0 if the next TNC entry is found, %-ENOENT if there is | ||
| 596 | * no next entry, or a negative error code otherwise. | ||
| 597 | */ | ||
| 598 | static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n) | ||
| 599 | { | ||
| 600 | struct ubifs_znode *znode = *zn; | ||
| 601 | int nn = *n; | ||
| 602 | |||
| 603 | nn += 1; | ||
| 604 | if (nn < znode->child_cnt) { | ||
| 605 | *n = nn; | ||
| 606 | return 0; | ||
| 607 | } | ||
| 608 | while (1) { | ||
| 609 | struct ubifs_znode *zp; | ||
| 610 | |||
| 611 | zp = znode->parent; | ||
| 612 | if (!zp) | ||
| 613 | return -ENOENT; | ||
| 614 | nn = znode->iip + 1; | ||
| 615 | znode = zp; | ||
| 616 | if (nn < znode->child_cnt) { | ||
| 617 | znode = get_znode(c, znode, nn); | ||
| 618 | if (IS_ERR(znode)) | ||
| 619 | return PTR_ERR(znode); | ||
| 620 | while (znode->level != 0) { | ||
| 621 | znode = get_znode(c, znode, 0); | ||
| 622 | if (IS_ERR(znode)) | ||
| 623 | return PTR_ERR(znode); | ||
| 624 | } | ||
| 625 | nn = 0; | ||
| 626 | break; | ||
| 627 | } | ||
| 628 | } | ||
| 629 | *zn = znode; | ||
| 630 | *n = nn; | ||
| 631 | return 0; | ||
| 632 | } | ||
| 633 | |||
| 634 | /** | ||
| 635 | * tnc_prev - find previous TNC entry. | ||
| 636 | * @c: UBIFS file-system description object | ||
| 637 | * @zn: znode is returned here | ||
| 638 | * @n: znode branch slot number is passed and returned here | ||
| 639 | * | ||
| 640 | * This function returns %0 if the previous TNC entry is found, %-ENOENT if | ||
| 641 | * there is no next entry, or a negative error code otherwise. | ||
| 642 | */ | ||
| 643 | static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n) | ||
| 644 | { | ||
| 645 | struct ubifs_znode *znode = *zn; | ||
| 646 | int nn = *n; | ||
| 647 | |||
| 648 | if (nn > 0) { | ||
| 649 | *n = nn - 1; | ||
| 650 | return 0; | ||
| 651 | } | ||
| 652 | while (1) { | ||
| 653 | struct ubifs_znode *zp; | ||
| 654 | |||
| 655 | zp = znode->parent; | ||
| 656 | if (!zp) | ||
| 657 | return -ENOENT; | ||
| 658 | nn = znode->iip - 1; | ||
| 659 | znode = zp; | ||
| 660 | if (nn >= 0) { | ||
| 661 | znode = get_znode(c, znode, nn); | ||
| 662 | if (IS_ERR(znode)) | ||
| 663 | return PTR_ERR(znode); | ||
| 664 | while (znode->level != 0) { | ||
| 665 | nn = znode->child_cnt - 1; | ||
| 666 | znode = get_znode(c, znode, nn); | ||
| 667 | if (IS_ERR(znode)) | ||
| 668 | return PTR_ERR(znode); | ||
| 669 | } | ||
| 670 | nn = znode->child_cnt - 1; | ||
| 671 | break; | ||
| 672 | } | ||
| 673 | } | ||
| 674 | *zn = znode; | ||
| 675 | *n = nn; | ||
| 676 | return 0; | ||
| 677 | } | ||
| 678 | |||
| 679 | /** | ||
| 680 | * resolve_collision - resolve a collision. | ||
| 681 | * @c: UBIFS file-system description object | ||
| 682 | * @key: key of a directory or extended attribute entry | ||
| 683 | * @zn: znode is returned here | ||
| 684 | * @n: zbranch number is passed and returned here | ||
| 685 | * @nm: name of the entry | ||
| 686 | * | ||
| 687 | * This function is called for "hashed" keys to make sure that the found key | ||
| 688 | * really corresponds to the looked up node (directory or extended attribute | ||
| 689 | * entry). It returns %1 and sets @zn and @n if the collision is resolved. | ||
| 690 | * %0 is returned if @nm is not found and @zn and @n are set to the previous | ||
| 691 | * entry, i.e. to the entry after which @nm could follow if it were in TNC. | ||
| 692 | * This means that @n may be set to %-1 if the leftmost key in @zn is the | ||
| 693 | * previous one. A negative error code is returned on failures. | ||
| 694 | */ | ||
| 695 | static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key, | ||
| 696 | struct ubifs_znode **zn, int *n, | ||
| 697 | const struct qstr *nm) | ||
| 698 | { | ||
| 699 | int err; | ||
| 700 | |||
| 701 | err = matches_name(c, &(*zn)->zbranch[*n], nm); | ||
| 702 | if (unlikely(err < 0)) | ||
| 703 | return err; | ||
| 704 | if (err == NAME_MATCHES) | ||
| 705 | return 1; | ||
| 706 | |||
| 707 | if (err == NAME_GREATER) { | ||
| 708 | /* Look left */ | ||
| 709 | while (1) { | ||
| 710 | err = tnc_prev(c, zn, n); | ||
| 711 | if (err == -ENOENT) { | ||
| 712 | ubifs_assert(*n == 0); | ||
| 713 | *n = -1; | ||
| 714 | return 0; | ||
| 715 | } | ||
| 716 | if (err < 0) | ||
| 717 | return err; | ||
| 718 | if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { | ||
| 719 | /* | ||
| 720 | * We have found the branch after which we would | ||
| 721 | * like to insert, but inserting in this znode | ||
| 722 | * may still be wrong. Consider the following 3 | ||
| 723 | * znodes, in the case where we are resolving a | ||
| 724 | * collision with Key2. | ||
| 725 | * | ||
| 726 | * znode zp | ||
| 727 | * ---------------------- | ||
| 728 | * level 1 | Key0 | Key1 | | ||
| 729 | * ----------------------- | ||
| 730 | * | | | ||
| 731 | * znode za | | znode zb | ||
| 732 | * ------------ ------------ | ||
| 733 | * level 0 | Key0 | | Key2 | | ||
| 734 | * ------------ ------------ | ||
| 735 | * | ||
| 736 | * The lookup finds Key2 in znode zb. Lets say | ||
| 737 | * there is no match and the name is greater so | ||
| 738 | * we look left. When we find Key0, we end up | ||
| 739 | * here. If we return now, we will insert into | ||
| 740 | * znode za at slot n = 1. But that is invalid | ||
| 741 | * according to the parent's keys. Key2 must | ||
| 742 | * be inserted into znode zb. | ||
| 743 | * | ||
| 744 | * Note, this problem is not relevant for the | ||
| 745 | * case when we go right, because | ||
| 746 | * 'tnc_insert()' would correct the parent key. | ||
| 747 | */ | ||
| 748 | if (*n == (*zn)->child_cnt - 1) { | ||
| 749 | err = tnc_next(c, zn, n); | ||
| 750 | if (err) { | ||
| 751 | /* Should be impossible */ | ||
| 752 | ubifs_assert(0); | ||
| 753 | if (err == -ENOENT) | ||
| 754 | err = -EINVAL; | ||
| 755 | return err; | ||
| 756 | } | ||
| 757 | ubifs_assert(*n == 0); | ||
| 758 | *n = -1; | ||
| 759 | } | ||
| 760 | return 0; | ||
| 761 | } | ||
| 762 | err = matches_name(c, &(*zn)->zbranch[*n], nm); | ||
| 763 | if (err < 0) | ||
| 764 | return err; | ||
| 765 | if (err == NAME_LESS) | ||
| 766 | return 0; | ||
| 767 | if (err == NAME_MATCHES) | ||
| 768 | return 1; | ||
| 769 | ubifs_assert(err == NAME_GREATER); | ||
| 770 | } | ||
| 771 | } else { | ||
| 772 | int nn = *n; | ||
| 773 | struct ubifs_znode *znode = *zn; | ||
| 774 | |||
| 775 | /* Look right */ | ||
| 776 | while (1) { | ||
| 777 | err = tnc_next(c, &znode, &nn); | ||
| 778 | if (err == -ENOENT) | ||
| 779 | return 0; | ||
| 780 | if (err < 0) | ||
| 781 | return err; | ||
| 782 | if (keys_cmp(c, &znode->zbranch[nn].key, key)) | ||
| 783 | return 0; | ||
| 784 | err = matches_name(c, &znode->zbranch[nn], nm); | ||
| 785 | if (err < 0) | ||
| 786 | return err; | ||
| 787 | if (err == NAME_GREATER) | ||
| 788 | return 0; | ||
| 789 | *zn = znode; | ||
| 790 | *n = nn; | ||
| 791 | if (err == NAME_MATCHES) | ||
| 792 | return 1; | ||
| 793 | ubifs_assert(err == NAME_LESS); | ||
| 794 | } | ||
| 795 | } | ||
| 796 | } | ||
| 797 | |||
| 798 | /** | ||
| 799 | * fallible_matches_name - determine if a dent matches a given name. | ||
| 800 | * @c: UBIFS file-system description object | ||
| 801 | * @zbr: zbranch of dent | ||
| 802 | * @nm: name to match | ||
| 803 | * | ||
| 804 | * This is a "fallible" version of 'matches_name()' function which does not | ||
| 805 | * panic if the direntry/xentry referred by @zbr does not exist on the media. | ||
| 806 | * | ||
| 807 | * This function checks if xentry/direntry referred by zbranch @zbr matches name | ||
| 808 | * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr | ||
| 809 | * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA | ||
| 810 | * if xentry/direntry referred by @zbr does not exist on the media. A negative | ||
| 811 | * error code is returned in case of failure. | ||
| 812 | */ | ||
| 813 | static int fallible_matches_name(struct ubifs_info *c, | ||
| 814 | struct ubifs_zbranch *zbr, | ||
| 815 | const struct qstr *nm) | ||
| 816 | { | ||
| 817 | struct ubifs_dent_node *dent; | ||
| 818 | int nlen, err; | ||
| 819 | |||
| 820 | /* If possible, match against the dent in the leaf node cache */ | ||
| 821 | if (!zbr->leaf) { | ||
| 822 | dent = kmalloc(zbr->len, GFP_NOFS); | ||
| 823 | if (!dent) | ||
| 824 | return -ENOMEM; | ||
| 825 | |||
| 826 | err = fallible_read_node(c, &zbr->key, zbr, dent); | ||
| 827 | if (err < 0) | ||
| 828 | goto out_free; | ||
| 829 | if (err == 0) { | ||
| 830 | /* The node was not present */ | ||
| 831 | err = NOT_ON_MEDIA; | ||
| 832 | goto out_free; | ||
| 833 | } | ||
| 834 | ubifs_assert(err == 1); | ||
| 835 | |||
| 836 | err = lnc_add_directly(c, zbr, dent); | ||
| 837 | if (err) | ||
| 838 | goto out_free; | ||
| 839 | } else | ||
| 840 | dent = zbr->leaf; | ||
| 841 | |||
| 842 | nlen = le16_to_cpu(dent->nlen); | ||
| 843 | err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); | ||
| 844 | if (err == 0) { | ||
| 845 | if (nlen == nm->len) | ||
| 846 | return NAME_MATCHES; | ||
| 847 | else if (nlen < nm->len) | ||
| 848 | return NAME_LESS; | ||
| 849 | else | ||
| 850 | return NAME_GREATER; | ||
| 851 | } else if (err < 0) | ||
| 852 | return NAME_LESS; | ||
| 853 | else | ||
| 854 | return NAME_GREATER; | ||
| 855 | |||
| 856 | out_free: | ||
| 857 | kfree(dent); | ||
| 858 | return err; | ||
| 859 | } | ||
| 860 | |||
| 861 | /** | ||
| 862 | * fallible_resolve_collision - resolve a collision even if nodes are missing. | ||
| 863 | * @c: UBIFS file-system description object | ||
| 864 | * @key: key | ||
| 865 | * @zn: znode is returned here | ||
| 866 | * @n: branch number is passed and returned here | ||
| 867 | * @nm: name of directory entry | ||
| 868 | * @adding: indicates caller is adding a key to the TNC | ||
| 869 | * | ||
| 870 | * This is a "fallible" version of the 'resolve_collision()' function which | ||
| 871 | * does not panic if one of the nodes referred to by TNC does not exist on the | ||
| 872 | * media. This may happen when replaying the journal if a deleted node was | ||
| 873 | * Garbage-collected and the commit was not done. A branch that refers to a node | ||
| 874 | * that is not present is called a dangling branch. The following are the return | ||
| 875 | * codes for this function: | ||
| 876 | * o if @nm was found, %1 is returned and @zn and @n are set to the found | ||
| 877 | * branch; | ||
| 878 | * o if we are @adding and @nm was not found, %0 is returned; | ||
| 879 | * o if we are not @adding and @nm was not found, but a dangling branch was | ||
| 880 | * found, then %1 is returned and @zn and @n are set to the dangling branch; | ||
| 881 | * o a negative error code is returned in case of failure. | ||
| 882 | */ | ||
| 883 | static int fallible_resolve_collision(struct ubifs_info *c, | ||
| 884 | const union ubifs_key *key, | ||
| 885 | struct ubifs_znode **zn, int *n, | ||
| 886 | const struct qstr *nm, int adding) | ||
| 887 | { | ||
| 888 | struct ubifs_znode *o_znode = NULL, *znode = *zn; | ||
| 889 | int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; | ||
| 890 | |||
| 891 | cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); | ||
| 892 | if (unlikely(cmp < 0)) | ||
| 893 | return cmp; | ||
| 894 | if (cmp == NAME_MATCHES) | ||
| 895 | return 1; | ||
| 896 | if (cmp == NOT_ON_MEDIA) { | ||
| 897 | o_znode = znode; | ||
| 898 | o_n = nn; | ||
| 899 | /* | ||
| 900 | * We are unlucky and hit a dangling branch straight away. | ||
| 901 | * Now we do not really know where to go to find the needed | ||
| 902 | * branch - to the left or to the right. Well, let's try left. | ||
| 903 | */ | ||
| 904 | unsure = 1; | ||
| 905 | } else if (!adding) | ||
| 906 | unsure = 1; /* Remove a dangling branch wherever it is */ | ||
| 907 | |||
| 908 | if (cmp == NAME_GREATER || unsure) { | ||
| 909 | /* Look left */ | ||
| 910 | while (1) { | ||
| 911 | err = tnc_prev(c, zn, n); | ||
| 912 | if (err == -ENOENT) { | ||
| 913 | ubifs_assert(*n == 0); | ||
| 914 | *n = -1; | ||
| 915 | break; | ||
| 916 | } | ||
| 917 | if (err < 0) | ||
| 918 | return err; | ||
| 919 | if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { | ||
| 920 | /* See comments in 'resolve_collision()' */ | ||
| 921 | if (*n == (*zn)->child_cnt - 1) { | ||
| 922 | err = tnc_next(c, zn, n); | ||
| 923 | if (err) { | ||
| 924 | /* Should be impossible */ | ||
| 925 | ubifs_assert(0); | ||
| 926 | if (err == -ENOENT) | ||
| 927 | err = -EINVAL; | ||
| 928 | return err; | ||
| 929 | } | ||
| 930 | ubifs_assert(*n == 0); | ||
| 931 | *n = -1; | ||
| 932 | } | ||
| 933 | break; | ||
| 934 | } | ||
| 935 | err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm); | ||
| 936 | if (err < 0) | ||
| 937 | return err; | ||
| 938 | if (err == NAME_MATCHES) | ||
| 939 | return 1; | ||
| 940 | if (err == NOT_ON_MEDIA) { | ||
| 941 | o_znode = *zn; | ||
| 942 | o_n = *n; | ||
| 943 | continue; | ||
| 944 | } | ||
| 945 | if (!adding) | ||
| 946 | continue; | ||
| 947 | if (err == NAME_LESS) | ||
| 948 | break; | ||
| 949 | else | ||
| 950 | unsure = 0; | ||
| 951 | } | ||
| 952 | } | ||
| 953 | |||
| 954 | if (cmp == NAME_LESS || unsure) { | ||
| 955 | /* Look right */ | ||
| 956 | *zn = znode; | ||
| 957 | *n = nn; | ||
| 958 | while (1) { | ||
| 959 | err = tnc_next(c, &znode, &nn); | ||
| 960 | if (err == -ENOENT) | ||
| 961 | break; | ||
| 962 | if (err < 0) | ||
| 963 | return err; | ||
| 964 | if (keys_cmp(c, &znode->zbranch[nn].key, key)) | ||
| 965 | break; | ||
| 966 | err = fallible_matches_name(c, &znode->zbranch[nn], nm); | ||
| 967 | if (err < 0) | ||
| 968 | return err; | ||
| 969 | if (err == NAME_GREATER) | ||
| 970 | break; | ||
| 971 | *zn = znode; | ||
| 972 | *n = nn; | ||
| 973 | if (err == NAME_MATCHES) | ||
| 974 | return 1; | ||
| 975 | if (err == NOT_ON_MEDIA) { | ||
| 976 | o_znode = znode; | ||
| 977 | o_n = nn; | ||
| 978 | } | ||
| 979 | } | ||
| 980 | } | ||
| 981 | |||
| 982 | /* Never match a dangling branch when adding */ | ||
| 983 | if (adding || !o_znode) | ||
| 984 | return 0; | ||
| 985 | |||
| 986 | dbg_mnt("dangling match LEB %d:%d len %d %s", | ||
| 987 | o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, | ||
| 988 | o_znode->zbranch[o_n].len, DBGKEY(key)); | ||
| 989 | *zn = o_znode; | ||
| 990 | *n = o_n; | ||
| 991 | return 1; | ||
| 992 | } | ||
| 993 | |||
| 994 | /** | ||
| 995 | * matches_position - determine if a zbranch matches a given position. | ||
| 996 | * @zbr: zbranch of dent | ||
| 997 | * @lnum: LEB number of dent to match | ||
| 998 | * @offs: offset of dent to match | ||
| 999 | * | ||
| 1000 | * This function returns %1 if @lnum:@offs matches, and %0 otherwise. | ||
| 1001 | */ | ||
| 1002 | static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs) | ||
| 1003 | { | ||
| 1004 | if (zbr->lnum == lnum && zbr->offs == offs) | ||
| 1005 | return 1; | ||
| 1006 | else | ||
| 1007 | return 0; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | /** | ||
| 1011 | * resolve_collision_directly - resolve a collision directly. | ||
| 1012 | * @c: UBIFS file-system description object | ||
| 1013 | * @key: key of directory entry | ||
| 1014 | * @zn: znode is passed and returned here | ||
| 1015 | * @n: zbranch number is passed and returned here | ||
| 1016 | * @lnum: LEB number of dent node to match | ||
| 1017 | * @offs: offset of dent node to match | ||
| 1018 | * | ||
| 1019 | * This function is used for "hashed" keys to make sure the found directory or | ||
| 1020 | * extended attribute entry node is what was looked for. It is used when the | ||
| 1021 | * flash address of the right node is known (@lnum:@offs) which makes it much | ||
| 1022 | * easier to resolve collisions (no need to read entries and match full | ||
| 1023 | * names). This function returns %1 and sets @zn and @n if the collision is | ||
| 1024 | * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the | ||
| 1025 | * previous directory entry. Otherwise a negative error code is returned. | ||
| 1026 | */ | ||
| 1027 | static int resolve_collision_directly(struct ubifs_info *c, | ||
| 1028 | const union ubifs_key *key, | ||
| 1029 | struct ubifs_znode **zn, int *n, | ||
| 1030 | int lnum, int offs) | ||
| 1031 | { | ||
| 1032 | struct ubifs_znode *znode; | ||
| 1033 | int nn, err; | ||
| 1034 | |||
| 1035 | znode = *zn; | ||
| 1036 | nn = *n; | ||
| 1037 | if (matches_position(&znode->zbranch[nn], lnum, offs)) | ||
| 1038 | return 1; | ||
| 1039 | |||
| 1040 | /* Look left */ | ||
| 1041 | while (1) { | ||
| 1042 | err = tnc_prev(c, &znode, &nn); | ||
| 1043 | if (err == -ENOENT) | ||
| 1044 | break; | ||
| 1045 | if (err < 0) | ||
| 1046 | return err; | ||
| 1047 | if (keys_cmp(c, &znode->zbranch[nn].key, key)) | ||
| 1048 | break; | ||
| 1049 | if (matches_position(&znode->zbranch[nn], lnum, offs)) { | ||
| 1050 | *zn = znode; | ||
| 1051 | *n = nn; | ||
| 1052 | return 1; | ||
| 1053 | } | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | /* Look right */ | ||
| 1057 | znode = *zn; | ||
| 1058 | nn = *n; | ||
| 1059 | while (1) { | ||
| 1060 | err = tnc_next(c, &znode, &nn); | ||
| 1061 | if (err == -ENOENT) | ||
| 1062 | return 0; | ||
| 1063 | if (err < 0) | ||
| 1064 | return err; | ||
| 1065 | if (keys_cmp(c, &znode->zbranch[nn].key, key)) | ||
| 1066 | return 0; | ||
| 1067 | *zn = znode; | ||
| 1068 | *n = nn; | ||
| 1069 | if (matches_position(&znode->zbranch[nn], lnum, offs)) | ||
| 1070 | return 1; | ||
| 1071 | } | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | /** | ||
| 1075 | * dirty_cow_bottom_up - dirty a znode and its ancestors. | ||
| 1076 | * @c: UBIFS file-system description object | ||
| 1077 | * @znode: znode to dirty | ||
| 1078 | * | ||
| 1079 | * If we do not have a unique key that resides in a znode, then we cannot | ||
| 1080 | * dirty that znode from the top down (i.e. by using lookup_level0_dirty) | ||
| 1081 | * This function records the path back to the last dirty ancestor, and then | ||
| 1082 | * dirties the znodes on that path. | ||
| 1083 | */ | ||
| 1084 | static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, | ||
| 1085 | struct ubifs_znode *znode) | ||
| 1086 | { | ||
| 1087 | struct ubifs_znode *zp; | ||
| 1088 | int *path = c->bottom_up_buf, p = 0; | ||
| 1089 | |||
| 1090 | ubifs_assert(c->zroot.znode); | ||
| 1091 | ubifs_assert(znode); | ||
| 1092 | if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) { | ||
| 1093 | kfree(c->bottom_up_buf); | ||
| 1094 | c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int), | ||
| 1095 | GFP_NOFS); | ||
| 1096 | if (!c->bottom_up_buf) | ||
| 1097 | return ERR_PTR(-ENOMEM); | ||
| 1098 | path = c->bottom_up_buf; | ||
| 1099 | } | ||
| 1100 | if (c->zroot.znode->level) { | ||
| 1101 | /* Go up until parent is dirty */ | ||
| 1102 | while (1) { | ||
| 1103 | int n; | ||
| 1104 | |||
| 1105 | zp = znode->parent; | ||
| 1106 | if (!zp) | ||
| 1107 | break; | ||
| 1108 | n = znode->iip; | ||
| 1109 | ubifs_assert(p < c->zroot.znode->level); | ||
| 1110 | path[p++] = n; | ||
| 1111 | if (!zp->cnext && ubifs_zn_dirty(znode)) | ||
| 1112 | break; | ||
| 1113 | znode = zp; | ||
| 1114 | } | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | /* Come back down, dirtying as we go */ | ||
| 1118 | while (1) { | ||
| 1119 | struct ubifs_zbranch *zbr; | ||
| 1120 | |||
| 1121 | zp = znode->parent; | ||
| 1122 | if (zp) { | ||
| 1123 | ubifs_assert(path[p - 1] >= 0); | ||
| 1124 | ubifs_assert(path[p - 1] < zp->child_cnt); | ||
| 1125 | zbr = &zp->zbranch[path[--p]]; | ||
| 1126 | znode = dirty_cow_znode(c, zbr); | ||
| 1127 | } else { | ||
| 1128 | ubifs_assert(znode == c->zroot.znode); | ||
| 1129 | znode = dirty_cow_znode(c, &c->zroot); | ||
| 1130 | } | ||
| 1131 | if (unlikely(IS_ERR(znode)) || !p) | ||
| 1132 | break; | ||
| 1133 | ubifs_assert(path[p - 1] >= 0); | ||
| 1134 | ubifs_assert(path[p - 1] < znode->child_cnt); | ||
| 1135 | znode = znode->zbranch[path[p - 1]].znode; | ||
| 1136 | } | ||
| 1137 | |||
| 1138 | return znode; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | /** | ||
| 1142 | * ubifs_lookup_level0 - search for zero-level znode. | ||
| 1143 | * @c: UBIFS file-system description object | ||
| 1144 | * @key: key to lookup | ||
| 1145 | * @zn: znode is returned here | ||
| 1146 | * @n: znode branch slot number is returned here | ||
| 1147 | * | ||
| 1148 | * This function looks up the TNC tree and search for zero-level znode which | ||
| 1149 | * refers key @key. The found zero-level znode is returned in @zn. There are 3 | ||
| 1150 | * cases: | ||
| 1151 | * o exact match, i.e. the found zero-level znode contains key @key, then %1 | ||
| 1152 | * is returned and slot number of the matched branch is stored in @n; | ||
| 1153 | * o not exact match, which means that zero-level znode does not contain | ||
| 1154 | * @key, then %0 is returned and slot number of the closed branch is stored | ||
| 1155 | * in @n; | ||
| 1156 | * o @key is so small that it is even less than the lowest key of the | ||
| 1157 | * leftmost zero-level node, then %0 is returned and %0 is stored in @n. | ||
| 1158 | * | ||
| 1159 | * Note, when the TNC tree is traversed, some znodes may be absent, then this | ||
| 1160 | * function reads corresponding indexing nodes and inserts them to TNC. In | ||
| 1161 | * case of failure, a negative error code is returned. | ||
| 1162 | */ | ||
| 1163 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1164 | struct ubifs_znode **zn, int *n) | ||
| 1165 | { | ||
| 1166 | int err, exact; | ||
| 1167 | struct ubifs_znode *znode; | ||
| 1168 | unsigned long time = get_seconds(); | ||
| 1169 | |||
| 1170 | dbg_tnc("search key %s", DBGKEY(key)); | ||
| 1171 | |||
| 1172 | znode = c->zroot.znode; | ||
| 1173 | if (unlikely(!znode)) { | ||
| 1174 | znode = ubifs_load_znode(c, &c->zroot, NULL, 0); | ||
| 1175 | if (IS_ERR(znode)) | ||
| 1176 | return PTR_ERR(znode); | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | znode->time = time; | ||
| 1180 | |||
| 1181 | while (1) { | ||
| 1182 | struct ubifs_zbranch *zbr; | ||
| 1183 | |||
| 1184 | exact = ubifs_search_zbranch(c, znode, key, n); | ||
| 1185 | |||
| 1186 | if (znode->level == 0) | ||
| 1187 | break; | ||
| 1188 | |||
| 1189 | if (*n < 0) | ||
| 1190 | *n = 0; | ||
| 1191 | zbr = &znode->zbranch[*n]; | ||
| 1192 | |||
| 1193 | if (zbr->znode) { | ||
| 1194 | znode->time = time; | ||
| 1195 | znode = zbr->znode; | ||
| 1196 | continue; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | /* znode is not in TNC cache, load it from the media */ | ||
| 1200 | znode = ubifs_load_znode(c, zbr, znode, *n); | ||
| 1201 | if (IS_ERR(znode)) | ||
| 1202 | return PTR_ERR(znode); | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | *zn = znode; | ||
| 1206 | if (exact || !is_hash_key(c, key) || *n != -1) { | ||
| 1207 | dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); | ||
| 1208 | return exact; | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | /* | ||
| 1212 | * Here is a tricky place. We have not found the key and this is a | ||
| 1213 | * "hashed" key, which may collide. The rest of the code deals with | ||
| 1214 | * situations like this: | ||
| 1215 | * | ||
| 1216 | * | 3 | 5 | | ||
| 1217 | * / \ | ||
| 1218 | * | 3 | 5 | | 6 | 7 | (x) | ||
| 1219 | * | ||
| 1220 | * Or more a complex example: | ||
| 1221 | * | ||
| 1222 | * | 1 | 5 | | ||
| 1223 | * / \ | ||
| 1224 | * | 1 | 3 | | 5 | 8 | | ||
| 1225 | * \ / | ||
| 1226 | * | 5 | 5 | | 6 | 7 | (x) | ||
| 1227 | * | ||
| 1228 | * In the examples, if we are looking for key "5", we may reach nodes | ||
| 1229 | * marked with "(x)". In this case what we have do is to look at the | ||
| 1230 | * left and see if there is "5" key there. If there is, we have to | ||
| 1231 | * return it. | ||
| 1232 | * | ||
| 1233 | * Note, this whole situation is possible because we allow to have | ||
| 1234 | * elements which are equivalent to the next key in the parent in the | ||
| 1235 | * children of current znode. For example, this happens if we split a | ||
| 1236 | * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something | ||
| 1237 | * like this: | ||
| 1238 | * | 3 | 5 | | ||
| 1239 | * / \ | ||
| 1240 | * | 3 | 5 | | 5 | 6 | 7 | | ||
| 1241 | * ^ | ||
| 1242 | * And this becomes what is at the first "picture" after key "5" marked | ||
| 1243 | * with "^" is removed. What could be done is we could prohibit | ||
| 1244 | * splitting in the middle of the colliding sequence. Also, when | ||
| 1245 | * removing the leftmost key, we would have to correct the key of the | ||
| 1246 | * parent node, which would introduce additional complications. Namely, | ||
| 1247 | * if we changed the the leftmost key of the parent znode, the garbage | ||
| 1248 | * collector would be unable to find it (GC is doing this when GC'ing | ||
| 1249 | * indexing LEBs). Although we already have an additional RB-tree where | ||
| 1250 | * we save such changed znodes (see 'ins_clr_old_idx_znode()') until | ||
| 1251 | * after the commit. But anyway, this does not look easy to implement | ||
| 1252 | * so we did not try this. | ||
| 1253 | */ | ||
| 1254 | err = tnc_prev(c, &znode, n); | ||
| 1255 | if (err == -ENOENT) { | ||
| 1256 | dbg_tnc("found 0, lvl %d, n -1", znode->level); | ||
| 1257 | *n = -1; | ||
| 1258 | return 0; | ||
| 1259 | } | ||
| 1260 | if (unlikely(err < 0)) | ||
| 1261 | return err; | ||
| 1262 | if (keys_cmp(c, key, &znode->zbranch[*n].key)) { | ||
| 1263 | dbg_tnc("found 0, lvl %d, n -1", znode->level); | ||
| 1264 | *n = -1; | ||
| 1265 | return 0; | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); | ||
| 1269 | *zn = znode; | ||
| 1270 | return 1; | ||
| 1271 | } | ||
| 1272 | |||
| 1273 | /** | ||
| 1274 | * lookup_level0_dirty - search for zero-level znode dirtying. | ||
| 1275 | * @c: UBIFS file-system description object | ||
| 1276 | * @key: key to lookup | ||
| 1277 | * @zn: znode is returned here | ||
| 1278 | * @n: znode branch slot number is returned here | ||
| 1279 | * | ||
| 1280 | * This function looks up the TNC tree and search for zero-level znode which | ||
| 1281 | * refers key @key. The found zero-level znode is returned in @zn. There are 3 | ||
| 1282 | * cases: | ||
| 1283 | * o exact match, i.e. the found zero-level znode contains key @key, then %1 | ||
| 1284 | * is returned and slot number of the matched branch is stored in @n; | ||
| 1285 | * o not exact match, which means that zero-level znode does not contain @key | ||
| 1286 | * then %0 is returned and slot number of the closed branch is stored in | ||
| 1287 | * @n; | ||
| 1288 | * o @key is so small that it is even less than the lowest key of the | ||
| 1289 | * leftmost zero-level node, then %0 is returned and %-1 is stored in @n. | ||
| 1290 | * | ||
| 1291 | * Additionally all znodes in the path from the root to the located zero-level | ||
| 1292 | * znode are marked as dirty. | ||
| 1293 | * | ||
| 1294 | * Note, when the TNC tree is traversed, some znodes may be absent, then this | ||
| 1295 | * function reads corresponding indexing nodes and inserts them to TNC. In | ||
| 1296 | * case of failure, a negative error code is returned. | ||
| 1297 | */ | ||
| 1298 | static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1299 | struct ubifs_znode **zn, int *n) | ||
| 1300 | { | ||
| 1301 | int err, exact; | ||
| 1302 | struct ubifs_znode *znode; | ||
| 1303 | unsigned long time = get_seconds(); | ||
| 1304 | |||
| 1305 | dbg_tnc("search and dirty key %s", DBGKEY(key)); | ||
| 1306 | |||
| 1307 | znode = c->zroot.znode; | ||
| 1308 | if (unlikely(!znode)) { | ||
| 1309 | znode = ubifs_load_znode(c, &c->zroot, NULL, 0); | ||
| 1310 | if (IS_ERR(znode)) | ||
| 1311 | return PTR_ERR(znode); | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | znode = dirty_cow_znode(c, &c->zroot); | ||
| 1315 | if (IS_ERR(znode)) | ||
| 1316 | return PTR_ERR(znode); | ||
| 1317 | |||
| 1318 | znode->time = time; | ||
| 1319 | |||
| 1320 | while (1) { | ||
| 1321 | struct ubifs_zbranch *zbr; | ||
| 1322 | |||
| 1323 | exact = ubifs_search_zbranch(c, znode, key, n); | ||
| 1324 | |||
| 1325 | if (znode->level == 0) | ||
| 1326 | break; | ||
| 1327 | |||
| 1328 | if (*n < 0) | ||
| 1329 | *n = 0; | ||
| 1330 | zbr = &znode->zbranch[*n]; | ||
| 1331 | |||
| 1332 | if (zbr->znode) { | ||
| 1333 | znode->time = time; | ||
| 1334 | znode = dirty_cow_znode(c, zbr); | ||
| 1335 | if (IS_ERR(znode)) | ||
| 1336 | return PTR_ERR(znode); | ||
| 1337 | continue; | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | /* znode is not in TNC cache, load it from the media */ | ||
| 1341 | znode = ubifs_load_znode(c, zbr, znode, *n); | ||
| 1342 | if (IS_ERR(znode)) | ||
| 1343 | return PTR_ERR(znode); | ||
| 1344 | znode = dirty_cow_znode(c, zbr); | ||
| 1345 | if (IS_ERR(znode)) | ||
| 1346 | return PTR_ERR(znode); | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | *zn = znode; | ||
| 1350 | if (exact || !is_hash_key(c, key) || *n != -1) { | ||
| 1351 | dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); | ||
| 1352 | return exact; | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | /* | ||
| 1356 | * See huge comment at 'lookup_level0_dirty()' what is the rest of the | ||
| 1357 | * code. | ||
| 1358 | */ | ||
| 1359 | err = tnc_prev(c, &znode, n); | ||
| 1360 | if (err == -ENOENT) { | ||
| 1361 | *n = -1; | ||
| 1362 | dbg_tnc("found 0, lvl %d, n -1", znode->level); | ||
| 1363 | return 0; | ||
| 1364 | } | ||
| 1365 | if (unlikely(err < 0)) | ||
| 1366 | return err; | ||
| 1367 | if (keys_cmp(c, key, &znode->zbranch[*n].key)) { | ||
| 1368 | *n = -1; | ||
| 1369 | dbg_tnc("found 0, lvl %d, n -1", znode->level); | ||
| 1370 | return 0; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (znode->cnext || !ubifs_zn_dirty(znode)) { | ||
| 1374 | znode = dirty_cow_bottom_up(c, znode); | ||
| 1375 | if (IS_ERR(znode)) | ||
| 1376 | return PTR_ERR(znode); | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); | ||
| 1380 | *zn = znode; | ||
| 1381 | return 1; | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | /** | ||
| 1385 | * ubifs_tnc_lookup - look up a file-system node. | ||
| 1386 | * @c: UBIFS file-system description object | ||
| 1387 | * @key: node key to lookup | ||
| 1388 | * @node: the node is returned here | ||
| 1389 | * | ||
| 1390 | * This function look up and reads node with key @key. The caller has to make | ||
| 1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
| 1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
| 1393 | * case of failure. | ||
| 1394 | */ | ||
| 1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1396 | void *node) | ||
| 1397 | { | ||
| 1398 | int found, n, err; | ||
| 1399 | struct ubifs_znode *znode; | ||
| 1400 | struct ubifs_zbranch zbr, *zt; | ||
| 1401 | |||
| 1402 | mutex_lock(&c->tnc_mutex); | ||
| 1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | ||
| 1404 | if (!found) { | ||
| 1405 | err = -ENOENT; | ||
| 1406 | goto out; | ||
| 1407 | } else if (found < 0) { | ||
| 1408 | err = found; | ||
| 1409 | goto out; | ||
| 1410 | } | ||
| 1411 | zt = &znode->zbranch[n]; | ||
| 1412 | if (is_hash_key(c, key)) { | ||
| 1413 | /* | ||
| 1414 | * In this case the leaf node cache gets used, so we pass the | ||
| 1415 | * address of the zbranch and keep the mutex locked | ||
| 1416 | */ | ||
| 1417 | err = tnc_read_node_nm(c, zt, node); | ||
| 1418 | goto out; | ||
| 1419 | } | ||
| 1420 | zbr = znode->zbranch[n]; | ||
| 1421 | mutex_unlock(&c->tnc_mutex); | ||
| 1422 | |||
| 1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
| 1424 | return err; | ||
| 1425 | |||
| 1426 | out: | ||
| 1427 | mutex_unlock(&c->tnc_mutex); | ||
| 1428 | return err; | ||
| 1429 | } | ||
| 1430 | |||
| 1431 | /** | ||
| 1432 | * ubifs_tnc_locate - look up a file-system node and return it and its location. | ||
| 1433 | * @c: UBIFS file-system description object | ||
| 1434 | * @key: node key to lookup | ||
| 1435 | * @node: the node is returned here | ||
| 1436 | * @lnum: LEB number is returned here | ||
| 1437 | * @offs: offset is returned here | ||
| 1438 | * | ||
| 1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | ||
| 1440 | * location also. See 'ubifs_tnc_lookup()'. | ||
| 1441 | */ | ||
| 1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1443 | void *node, int *lnum, int *offs) | ||
| 1444 | { | ||
| 1445 | int found, n, err; | ||
| 1446 | struct ubifs_znode *znode; | ||
| 1447 | struct ubifs_zbranch zbr, *zt; | ||
| 1448 | |||
| 1449 | mutex_lock(&c->tnc_mutex); | ||
| 1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | ||
| 1451 | if (!found) { | ||
| 1452 | err = -ENOENT; | ||
| 1453 | goto out; | ||
| 1454 | } else if (found < 0) { | ||
| 1455 | err = found; | ||
| 1456 | goto out; | ||
| 1457 | } | ||
| 1458 | zt = &znode->zbranch[n]; | ||
| 1459 | if (is_hash_key(c, key)) { | ||
| 1460 | /* | ||
| 1461 | * In this case the leaf node cache gets used, so we pass the | ||
| 1462 | * address of the zbranch and keep the mutex locked | ||
| 1463 | */ | ||
| 1464 | *lnum = zt->lnum; | ||
| 1465 | *offs = zt->offs; | ||
| 1466 | err = tnc_read_node_nm(c, zt, node); | ||
| 1467 | goto out; | ||
| 1468 | } | ||
| 1469 | zbr = znode->zbranch[n]; | ||
| 1470 | mutex_unlock(&c->tnc_mutex); | ||
| 1471 | |||
| 1472 | *lnum = zbr.lnum; | ||
| 1473 | *offs = zbr.offs; | ||
| 1474 | |||
| 1475 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
| 1476 | return err; | ||
| 1477 | |||
| 1478 | out: | ||
| 1479 | mutex_unlock(&c->tnc_mutex); | ||
| 1480 | return err; | ||
| 1481 | } | ||
| 1482 | |||
| 1483 | /** | ||
| 1484 | * do_lookup_nm- look up a "hashed" node. | ||
| 1485 | * @c: UBIFS file-system description object | ||
| 1486 | * @key: node key to lookup | ||
| 1487 | * @node: the node is returned here | ||
| 1488 | * @nm: node name | ||
| 1489 | * | ||
| 1490 | * This function look up and reads a node which contains name hash in the key. | ||
| 1491 | * Since the hash may have collisions, there may be many nodes with the same | ||
| 1492 | * key, so we have to sequentially look to all of them until the needed one is | ||
| 1493 | * found. This function returns zero in case of success, %-ENOENT if the node | ||
| 1494 | * was not found, and a negative error code in case of failure. | ||
| 1495 | */ | ||
| 1496 | static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1497 | void *node, const struct qstr *nm) | ||
| 1498 | { | ||
| 1499 | int found, n, err; | ||
| 1500 | struct ubifs_znode *znode; | ||
| 1501 | struct ubifs_zbranch zbr; | ||
| 1502 | |||
| 1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | ||
| 1504 | mutex_lock(&c->tnc_mutex); | ||
| 1505 | found = ubifs_lookup_level0(c, key, &znode, &n); | ||
| 1506 | if (!found) { | ||
| 1507 | err = -ENOENT; | ||
| 1508 | goto out_unlock; | ||
| 1509 | } else if (found < 0) { | ||
| 1510 | err = found; | ||
| 1511 | goto out_unlock; | ||
| 1512 | } | ||
| 1513 | |||
| 1514 | ubifs_assert(n >= 0); | ||
| 1515 | |||
| 1516 | err = resolve_collision(c, key, &znode, &n, nm); | ||
| 1517 | dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); | ||
| 1518 | if (unlikely(err < 0)) | ||
| 1519 | goto out_unlock; | ||
| 1520 | if (err == 0) { | ||
| 1521 | err = -ENOENT; | ||
| 1522 | goto out_unlock; | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | zbr = znode->zbranch[n]; | ||
| 1526 | mutex_unlock(&c->tnc_mutex); | ||
| 1527 | |||
| 1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
| 1529 | return err; | ||
| 1530 | |||
| 1531 | out_unlock: | ||
| 1532 | mutex_unlock(&c->tnc_mutex); | ||
| 1533 | return err; | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | /** | ||
| 1537 | * ubifs_tnc_lookup_nm - look up a "hashed" node. | ||
| 1538 | * @c: UBIFS file-system description object | ||
| 1539 | * @key: node key to lookup | ||
| 1540 | * @node: the node is returned here | ||
| 1541 | * @nm: node name | ||
| 1542 | * | ||
| 1543 | * This function look up and reads a node which contains name hash in the key. | ||
| 1544 | * Since the hash may have collisions, there may be many nodes with the same | ||
| 1545 | * key, so we have to sequentially look to all of them until the needed one is | ||
| 1546 | * found. This function returns zero in case of success, %-ENOENT if the node | ||
| 1547 | * was not found, and a negative error code in case of failure. | ||
| 1548 | */ | ||
| 1549 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1550 | void *node, const struct qstr *nm) | ||
| 1551 | { | ||
| 1552 | int err, len; | ||
| 1553 | const struct ubifs_dent_node *dent = node; | ||
| 1554 | |||
| 1555 | /* | ||
| 1556 | * We assume that in most of the cases there are no name collisions and | ||
| 1557 | * 'ubifs_tnc_lookup()' returns us the right direntry. | ||
| 1558 | */ | ||
| 1559 | err = ubifs_tnc_lookup(c, key, node); | ||
| 1560 | if (err) | ||
| 1561 | return err; | ||
| 1562 | |||
| 1563 | len = le16_to_cpu(dent->nlen); | ||
| 1564 | if (nm->len == len && !memcmp(dent->name, nm->name, len)) | ||
| 1565 | return 0; | ||
| 1566 | |||
| 1567 | /* | ||
| 1568 | * Unluckily, there are hash collisions and we have to iterate over | ||
| 1569 | * them look at each direntry with colliding name hash sequentially. | ||
| 1570 | */ | ||
| 1571 | return do_lookup_nm(c, key, node, nm); | ||
| 1572 | } | ||
| 1573 | |||
| 1574 | /** | ||
| 1575 | * correct_parent_keys - correct parent znodes' keys. | ||
| 1576 | * @c: UBIFS file-system description object | ||
| 1577 | * @znode: znode to correct parent znodes for | ||
| 1578 | * | ||
| 1579 | * This is a helper function for 'tnc_insert()'. When the key of the leftmost | ||
| 1580 | * zbranch changes, keys of parent znodes have to be corrected. This helper | ||
| 1581 | * function is called in such situations and corrects the keys if needed. | ||
| 1582 | */ | ||
| 1583 | static void correct_parent_keys(const struct ubifs_info *c, | ||
| 1584 | struct ubifs_znode *znode) | ||
| 1585 | { | ||
| 1586 | union ubifs_key *key, *key1; | ||
| 1587 | |||
| 1588 | ubifs_assert(znode->parent); | ||
| 1589 | ubifs_assert(znode->iip == 0); | ||
| 1590 | |||
| 1591 | key = &znode->zbranch[0].key; | ||
| 1592 | key1 = &znode->parent->zbranch[0].key; | ||
| 1593 | |||
| 1594 | while (keys_cmp(c, key, key1) < 0) { | ||
| 1595 | key_copy(c, key, key1); | ||
| 1596 | znode = znode->parent; | ||
| 1597 | znode->alt = 1; | ||
| 1598 | if (!znode->parent || znode->iip) | ||
| 1599 | break; | ||
| 1600 | key1 = &znode->parent->zbranch[0].key; | ||
| 1601 | } | ||
| 1602 | } | ||
| 1603 | |||
| 1604 | /** | ||
| 1605 | * insert_zbranch - insert a zbranch into a znode. | ||
| 1606 | * @znode: znode into which to insert | ||
| 1607 | * @zbr: zbranch to insert | ||
| 1608 | * @n: slot number to insert to | ||
| 1609 | * | ||
| 1610 | * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in | ||
| 1611 | * znode's array of zbranches and keeps zbranches consolidated, so when a new | ||
| 1612 | * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th | ||
| 1613 | * slot, zbranches starting from @n have to be moved right. | ||
| 1614 | */ | ||
| 1615 | static void insert_zbranch(struct ubifs_znode *znode, | ||
| 1616 | const struct ubifs_zbranch *zbr, int n) | ||
| 1617 | { | ||
| 1618 | int i; | ||
| 1619 | |||
| 1620 | ubifs_assert(ubifs_zn_dirty(znode)); | ||
| 1621 | |||
| 1622 | if (znode->level) { | ||
| 1623 | for (i = znode->child_cnt; i > n; i--) { | ||
| 1624 | znode->zbranch[i] = znode->zbranch[i - 1]; | ||
| 1625 | if (znode->zbranch[i].znode) | ||
| 1626 | znode->zbranch[i].znode->iip = i; | ||
| 1627 | } | ||
| 1628 | if (zbr->znode) | ||
| 1629 | zbr->znode->iip = n; | ||
| 1630 | } else | ||
| 1631 | for (i = znode->child_cnt; i > n; i--) | ||
| 1632 | znode->zbranch[i] = znode->zbranch[i - 1]; | ||
| 1633 | |||
| 1634 | znode->zbranch[n] = *zbr; | ||
| 1635 | znode->child_cnt += 1; | ||
| 1636 | |||
| 1637 | /* | ||
| 1638 | * After inserting at slot zero, the lower bound of the key range of | ||
| 1639 | * this znode may have changed. If this znode is subsequently split | ||
| 1640 | * then the upper bound of the key range may change, and furthermore | ||
| 1641 | * it could change to be lower than the original lower bound. If that | ||
| 1642 | * happens, then it will no longer be possible to find this znode in the | ||
| 1643 | * TNC using the key from the index node on flash. That is bad because | ||
| 1644 | * if it is not found, we will assume it is obsolete and may overwrite | ||
| 1645 | * it. Then if there is an unclean unmount, we will start using the | ||
| 1646 | * old index which will be broken. | ||
| 1647 | * | ||
| 1648 | * So we first mark znodes that have insertions at slot zero, and then | ||
| 1649 | * if they are split we add their lnum/offs to the old_idx tree. | ||
| 1650 | */ | ||
| 1651 | if (n == 0) | ||
| 1652 | znode->alt = 1; | ||
| 1653 | } | ||
| 1654 | |||
| 1655 | /** | ||
| 1656 | * tnc_insert - insert a node into TNC. | ||
| 1657 | * @c: UBIFS file-system description object | ||
| 1658 | * @znode: znode to insert into | ||
| 1659 | * @zbr: branch to insert | ||
| 1660 | * @n: slot number to insert new zbranch to | ||
| 1661 | * | ||
| 1662 | * This function inserts a new node described by @zbr into znode @znode. If | ||
| 1663 | * znode does not have a free slot for new zbranch, it is split. Parent znodes | ||
| 1664 | * are splat as well if needed. Returns zero in case of success or a negative | ||
| 1665 | * error code in case of failure. | ||
| 1666 | */ | ||
| 1667 | static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, | ||
| 1668 | struct ubifs_zbranch *zbr, int n) | ||
| 1669 | { | ||
| 1670 | struct ubifs_znode *zn, *zi, *zp; | ||
| 1671 | int i, keep, move, appending = 0; | ||
| 1672 | union ubifs_key *key = &zbr->key; | ||
| 1673 | |||
| 1674 | ubifs_assert(n >= 0 && n <= c->fanout); | ||
| 1675 | |||
| 1676 | /* Implement naive insert for now */ | ||
| 1677 | again: | ||
| 1678 | zp = znode->parent; | ||
| 1679 | if (znode->child_cnt < c->fanout) { | ||
| 1680 | ubifs_assert(n != c->fanout); | ||
| 1681 | dbg_tnc("inserted at %d level %d, key %s", n, znode->level, | ||
| 1682 | DBGKEY(key)); | ||
| 1683 | |||
| 1684 | insert_zbranch(znode, zbr, n); | ||
| 1685 | |||
| 1686 | /* Ensure parent's key is correct */ | ||
| 1687 | if (n == 0 && zp && znode->iip == 0) | ||
| 1688 | correct_parent_keys(c, znode); | ||
| 1689 | |||
| 1690 | return 0; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | /* | ||
| 1694 | * Unfortunately, @znode does not have more empty slots and we have to | ||
| 1695 | * split it. | ||
| 1696 | */ | ||
| 1697 | dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); | ||
| 1698 | |||
| 1699 | if (znode->alt) | ||
| 1700 | /* | ||
| 1701 | * We can no longer be sure of finding this znode by key, so we | ||
| 1702 | * record it in the old_idx tree. | ||
| 1703 | */ | ||
| 1704 | ins_clr_old_idx_znode(c, znode); | ||
| 1705 | |||
| 1706 | zn = kzalloc(c->max_znode_sz, GFP_NOFS); | ||
| 1707 | if (!zn) | ||
| 1708 | return -ENOMEM; | ||
| 1709 | zn->parent = zp; | ||
| 1710 | zn->level = znode->level; | ||
| 1711 | |||
| 1712 | /* Decide where to split */ | ||
| 1713 | if (znode->level == 0 && n == c->fanout && | ||
| 1714 | key_type(c, key) == UBIFS_DATA_KEY) { | ||
| 1715 | union ubifs_key *key1; | ||
| 1716 | |||
| 1717 | /* | ||
| 1718 | * If this is an inode which is being appended - do not split | ||
| 1719 | * it because no other zbranches can be inserted between | ||
| 1720 | * zbranches of consecutive data nodes anyway. | ||
| 1721 | */ | ||
| 1722 | key1 = &znode->zbranch[n - 1].key; | ||
| 1723 | if (key_inum(c, key1) == key_inum(c, key) && | ||
| 1724 | key_type(c, key1) == UBIFS_DATA_KEY && | ||
| 1725 | key_block(c, key1) == key_block(c, key) - 1) | ||
| 1726 | appending = 1; | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | if (appending) { | ||
| 1730 | keep = c->fanout; | ||
| 1731 | move = 0; | ||
| 1732 | } else { | ||
| 1733 | keep = (c->fanout + 1) / 2; | ||
| 1734 | move = c->fanout - keep; | ||
| 1735 | } | ||
| 1736 | |||
| 1737 | /* | ||
| 1738 | * Although we don't at present, we could look at the neighbors and see | ||
| 1739 | * if we can move some zbranches there. | ||
| 1740 | */ | ||
| 1741 | |||
| 1742 | if (n < keep) { | ||
| 1743 | /* Insert into existing znode */ | ||
| 1744 | zi = znode; | ||
| 1745 | move += 1; | ||
| 1746 | keep -= 1; | ||
| 1747 | } else { | ||
| 1748 | /* Insert into new znode */ | ||
| 1749 | zi = zn; | ||
| 1750 | n -= keep; | ||
| 1751 | /* Re-parent */ | ||
| 1752 | if (zn->level != 0) | ||
| 1753 | zbr->znode->parent = zn; | ||
| 1754 | } | ||
| 1755 | |||
| 1756 | __set_bit(DIRTY_ZNODE, &zn->flags); | ||
| 1757 | atomic_long_inc(&c->dirty_zn_cnt); | ||
| 1758 | |||
| 1759 | zn->child_cnt = move; | ||
| 1760 | znode->child_cnt = keep; | ||
| 1761 | |||
| 1762 | dbg_tnc("moving %d, keeping %d", move, keep); | ||
| 1763 | |||
| 1764 | /* Move zbranch */ | ||
| 1765 | for (i = 0; i < move; i++) { | ||
| 1766 | zn->zbranch[i] = znode->zbranch[keep + i]; | ||
| 1767 | /* Re-parent */ | ||
| 1768 | if (zn->level != 0) | ||
| 1769 | if (zn->zbranch[i].znode) { | ||
| 1770 | zn->zbranch[i].znode->parent = zn; | ||
| 1771 | zn->zbranch[i].znode->iip = i; | ||
| 1772 | } | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | /* Insert new key and branch */ | ||
| 1776 | dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); | ||
| 1777 | |||
| 1778 | insert_zbranch(zi, zbr, n); | ||
| 1779 | |||
| 1780 | /* Insert new znode (produced by spitting) into the parent */ | ||
| 1781 | if (zp) { | ||
| 1782 | i = n; | ||
| 1783 | /* Locate insertion point */ | ||
| 1784 | n = znode->iip + 1; | ||
| 1785 | if (appending && n != c->fanout) | ||
| 1786 | appending = 0; | ||
| 1787 | |||
| 1788 | if (i == 0 && zi == znode && znode->iip == 0) | ||
| 1789 | correct_parent_keys(c, znode); | ||
| 1790 | |||
| 1791 | /* Tail recursion */ | ||
| 1792 | zbr->key = zn->zbranch[0].key; | ||
| 1793 | zbr->znode = zn; | ||
| 1794 | zbr->lnum = 0; | ||
| 1795 | zbr->offs = 0; | ||
| 1796 | zbr->len = 0; | ||
| 1797 | znode = zp; | ||
| 1798 | |||
| 1799 | goto again; | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | /* We have to split root znode */ | ||
| 1803 | dbg_tnc("creating new zroot at level %d", znode->level + 1); | ||
| 1804 | |||
| 1805 | zi = kzalloc(c->max_znode_sz, GFP_NOFS); | ||
| 1806 | if (!zi) | ||
| 1807 | return -ENOMEM; | ||
| 1808 | |||
| 1809 | zi->child_cnt = 2; | ||
| 1810 | zi->level = znode->level + 1; | ||
| 1811 | |||
| 1812 | __set_bit(DIRTY_ZNODE, &zi->flags); | ||
| 1813 | atomic_long_inc(&c->dirty_zn_cnt); | ||
| 1814 | |||
| 1815 | zi->zbranch[0].key = znode->zbranch[0].key; | ||
| 1816 | zi->zbranch[0].znode = znode; | ||
| 1817 | zi->zbranch[0].lnum = c->zroot.lnum; | ||
| 1818 | zi->zbranch[0].offs = c->zroot.offs; | ||
| 1819 | zi->zbranch[0].len = c->zroot.len; | ||
| 1820 | zi->zbranch[1].key = zn->zbranch[0].key; | ||
| 1821 | zi->zbranch[1].znode = zn; | ||
| 1822 | |||
| 1823 | c->zroot.lnum = 0; | ||
| 1824 | c->zroot.offs = 0; | ||
| 1825 | c->zroot.len = 0; | ||
| 1826 | c->zroot.znode = zi; | ||
| 1827 | |||
| 1828 | zn->parent = zi; | ||
| 1829 | zn->iip = 1; | ||
| 1830 | znode->parent = zi; | ||
| 1831 | znode->iip = 0; | ||
| 1832 | |||
| 1833 | return 0; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | /** | ||
| 1837 | * ubifs_tnc_add - add a node to TNC. | ||
| 1838 | * @c: UBIFS file-system description object | ||
| 1839 | * @key: key to add | ||
| 1840 | * @lnum: LEB number of node | ||
| 1841 | * @offs: node offset | ||
| 1842 | * @len: node length | ||
| 1843 | * | ||
| 1844 | * This function adds a node with key @key to TNC. The node may be new or it may | ||
| 1845 | * obsolete some existing one. Returns %0 on success or negative error code on | ||
| 1846 | * failure. | ||
| 1847 | */ | ||
| 1848 | int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, | ||
| 1849 | int offs, int len) | ||
| 1850 | { | ||
| 1851 | int found, n, err = 0; | ||
| 1852 | struct ubifs_znode *znode; | ||
| 1853 | |||
| 1854 | mutex_lock(&c->tnc_mutex); | ||
| 1855 | dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); | ||
| 1856 | found = lookup_level0_dirty(c, key, &znode, &n); | ||
| 1857 | if (!found) { | ||
| 1858 | struct ubifs_zbranch zbr; | ||
| 1859 | |||
| 1860 | zbr.znode = NULL; | ||
| 1861 | zbr.lnum = lnum; | ||
| 1862 | zbr.offs = offs; | ||
| 1863 | zbr.len = len; | ||
| 1864 | key_copy(c, key, &zbr.key); | ||
| 1865 | err = tnc_insert(c, znode, &zbr, n + 1); | ||
| 1866 | } else if (found == 1) { | ||
| 1867 | struct ubifs_zbranch *zbr = &znode->zbranch[n]; | ||
| 1868 | |||
| 1869 | lnc_free(zbr); | ||
| 1870 | err = ubifs_add_dirt(c, zbr->lnum, zbr->len); | ||
| 1871 | zbr->lnum = lnum; | ||
| 1872 | zbr->offs = offs; | ||
| 1873 | zbr->len = len; | ||
| 1874 | } else | ||
| 1875 | err = found; | ||
| 1876 | if (!err) | ||
| 1877 | err = dbg_check_tnc(c, 0); | ||
| 1878 | mutex_unlock(&c->tnc_mutex); | ||
| 1879 | |||
| 1880 | return err; | ||
| 1881 | } | ||
| 1882 | |||
| 1883 | /** | ||
| 1884 | * ubifs_tnc_replace - replace a node in the TNC only if the old node is found. | ||
| 1885 | * @c: UBIFS file-system description object | ||
| 1886 | * @key: key to add | ||
| 1887 | * @old_lnum: LEB number of old node | ||
| 1888 | * @old_offs: old node offset | ||
| 1889 | * @lnum: LEB number of node | ||
| 1890 | * @offs: node offset | ||
| 1891 | * @len: node length | ||
| 1892 | * | ||
| 1893 | * This function replaces a node with key @key in the TNC only if the old node | ||
| 1894 | * is found. This function is called by garbage collection when node are moved. | ||
| 1895 | * Returns %0 on success or negative error code on failure. | ||
| 1896 | */ | ||
| 1897 | int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1898 | int old_lnum, int old_offs, int lnum, int offs, int len) | ||
| 1899 | { | ||
| 1900 | int found, n, err = 0; | ||
| 1901 | struct ubifs_znode *znode; | ||
| 1902 | |||
| 1903 | mutex_lock(&c->tnc_mutex); | ||
| 1904 | dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, | ||
| 1905 | old_offs, lnum, offs, len, DBGKEY(key)); | ||
| 1906 | found = lookup_level0_dirty(c, key, &znode, &n); | ||
| 1907 | if (found < 0) { | ||
| 1908 | err = found; | ||
| 1909 | goto out_unlock; | ||
| 1910 | } | ||
| 1911 | |||
| 1912 | if (found == 1) { | ||
| 1913 | struct ubifs_zbranch *zbr = &znode->zbranch[n]; | ||
| 1914 | |||
| 1915 | found = 0; | ||
| 1916 | if (zbr->lnum == old_lnum && zbr->offs == old_offs) { | ||
| 1917 | lnc_free(zbr); | ||
| 1918 | err = ubifs_add_dirt(c, zbr->lnum, zbr->len); | ||
| 1919 | if (err) | ||
| 1920 | goto out_unlock; | ||
| 1921 | zbr->lnum = lnum; | ||
| 1922 | zbr->offs = offs; | ||
| 1923 | zbr->len = len; | ||
| 1924 | found = 1; | ||
| 1925 | } else if (is_hash_key(c, key)) { | ||
| 1926 | found = resolve_collision_directly(c, key, &znode, &n, | ||
| 1927 | old_lnum, old_offs); | ||
| 1928 | dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d", | ||
| 1929 | found, znode, n, old_lnum, old_offs); | ||
| 1930 | if (found < 0) { | ||
| 1931 | err = found; | ||
| 1932 | goto out_unlock; | ||
| 1933 | } | ||
| 1934 | |||
| 1935 | if (found) { | ||
| 1936 | /* Ensure the znode is dirtied */ | ||
| 1937 | if (znode->cnext || !ubifs_zn_dirty(znode)) { | ||
| 1938 | znode = dirty_cow_bottom_up(c, | ||
| 1939 | znode); | ||
| 1940 | if (IS_ERR(znode)) { | ||
| 1941 | err = PTR_ERR(znode); | ||
| 1942 | goto out_unlock; | ||
| 1943 | } | ||
| 1944 | } | ||
| 1945 | zbr = &znode->zbranch[n]; | ||
| 1946 | lnc_free(zbr); | ||
| 1947 | err = ubifs_add_dirt(c, zbr->lnum, | ||
| 1948 | zbr->len); | ||
| 1949 | if (err) | ||
| 1950 | goto out_unlock; | ||
| 1951 | zbr->lnum = lnum; | ||
| 1952 | zbr->offs = offs; | ||
| 1953 | zbr->len = len; | ||
| 1954 | } | ||
| 1955 | } | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | if (!found) | ||
| 1959 | err = ubifs_add_dirt(c, lnum, len); | ||
| 1960 | |||
| 1961 | if (!err) | ||
| 1962 | err = dbg_check_tnc(c, 0); | ||
| 1963 | |||
| 1964 | out_unlock: | ||
| 1965 | mutex_unlock(&c->tnc_mutex); | ||
| 1966 | return err; | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | /** | ||
| 1970 | * ubifs_tnc_add_nm - add a "hashed" node to TNC. | ||
| 1971 | * @c: UBIFS file-system description object | ||
| 1972 | * @key: key to add | ||
| 1973 | * @lnum: LEB number of node | ||
| 1974 | * @offs: node offset | ||
| 1975 | * @len: node length | ||
| 1976 | * @nm: node name | ||
| 1977 | * | ||
| 1978 | * This is the same as 'ubifs_tnc_add()' but it should be used with keys which | ||
| 1979 | * may have collisions, like directory entry keys. | ||
| 1980 | */ | ||
| 1981 | int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1982 | int lnum, int offs, int len, const struct qstr *nm) | ||
| 1983 | { | ||
| 1984 | int found, n, err = 0; | ||
| 1985 | struct ubifs_znode *znode; | ||
| 1986 | |||
| 1987 | mutex_lock(&c->tnc_mutex); | ||
| 1988 | dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, | ||
| 1989 | DBGKEY(key)); | ||
| 1990 | found = lookup_level0_dirty(c, key, &znode, &n); | ||
| 1991 | if (found < 0) { | ||
| 1992 | err = found; | ||
| 1993 | goto out_unlock; | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | if (found == 1) { | ||
| 1997 | if (c->replaying) | ||
| 1998 | found = fallible_resolve_collision(c, key, &znode, &n, | ||
| 1999 | nm, 1); | ||
| 2000 | else | ||
| 2001 | found = resolve_collision(c, key, &znode, &n, nm); | ||
| 2002 | dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n); | ||
| 2003 | if (found < 0) { | ||
| 2004 | err = found; | ||
| 2005 | goto out_unlock; | ||
| 2006 | } | ||
| 2007 | |||
| 2008 | /* Ensure the znode is dirtied */ | ||
| 2009 | if (znode->cnext || !ubifs_zn_dirty(znode)) { | ||
| 2010 | znode = dirty_cow_bottom_up(c, znode); | ||
| 2011 | if (IS_ERR(znode)) { | ||
| 2012 | err = PTR_ERR(znode); | ||
| 2013 | goto out_unlock; | ||
| 2014 | } | ||
| 2015 | } | ||
| 2016 | |||
| 2017 | if (found == 1) { | ||
| 2018 | struct ubifs_zbranch *zbr = &znode->zbranch[n]; | ||
| 2019 | |||
| 2020 | lnc_free(zbr); | ||
| 2021 | err = ubifs_add_dirt(c, zbr->lnum, zbr->len); | ||
| 2022 | zbr->lnum = lnum; | ||
| 2023 | zbr->offs = offs; | ||
| 2024 | zbr->len = len; | ||
| 2025 | goto out_unlock; | ||
| 2026 | } | ||
| 2027 | } | ||
| 2028 | |||
| 2029 | if (!found) { | ||
| 2030 | struct ubifs_zbranch zbr; | ||
| 2031 | |||
| 2032 | zbr.znode = NULL; | ||
| 2033 | zbr.lnum = lnum; | ||
| 2034 | zbr.offs = offs; | ||
| 2035 | zbr.len = len; | ||
| 2036 | key_copy(c, key, &zbr.key); | ||
| 2037 | err = tnc_insert(c, znode, &zbr, n + 1); | ||
| 2038 | if (err) | ||
| 2039 | goto out_unlock; | ||
| 2040 | if (c->replaying) { | ||
| 2041 | /* | ||
| 2042 | * We did not find it in the index so there may be a | ||
| 2043 | * dangling branch still in the index. So we remove it | ||
| 2044 | * by passing 'ubifs_tnc_remove_nm()' the same key but | ||
| 2045 | * an unmatchable name. | ||
| 2046 | */ | ||
| 2047 | struct qstr noname = { .len = 0, .name = "" }; | ||
| 2048 | |||
| 2049 | err = dbg_check_tnc(c, 0); | ||
| 2050 | mutex_unlock(&c->tnc_mutex); | ||
| 2051 | if (err) | ||
| 2052 | return err; | ||
| 2053 | return ubifs_tnc_remove_nm(c, key, &noname); | ||
| 2054 | } | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | out_unlock: | ||
| 2058 | if (!err) | ||
| 2059 | err = dbg_check_tnc(c, 0); | ||
| 2060 | mutex_unlock(&c->tnc_mutex); | ||
| 2061 | return err; | ||
| 2062 | } | ||
| 2063 | |||
| 2064 | /** | ||
| 2065 | * tnc_delete - delete a znode form TNC. | ||
| 2066 | * @c: UBIFS file-system description object | ||
| 2067 | * @znode: znode to delete from | ||
| 2068 | * @n: zbranch slot number to delete | ||
| 2069 | * | ||
| 2070 | * This function deletes a leaf node from @n-th slot of @znode. Returns zero in | ||
| 2071 | * case of success and a negative error code in case of failure. | ||
| 2072 | */ | ||
| 2073 | static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) | ||
| 2074 | { | ||
| 2075 | struct ubifs_zbranch *zbr; | ||
| 2076 | struct ubifs_znode *zp; | ||
| 2077 | int i, err; | ||
| 2078 | |||
| 2079 | /* Delete without merge for now */ | ||
| 2080 | ubifs_assert(znode->level == 0); | ||
| 2081 | ubifs_assert(n >= 0 && n < c->fanout); | ||
| 2082 | dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); | ||
| 2083 | |||
| 2084 | zbr = &znode->zbranch[n]; | ||
| 2085 | lnc_free(zbr); | ||
| 2086 | |||
| 2087 | err = ubifs_add_dirt(c, zbr->lnum, zbr->len); | ||
| 2088 | if (err) { | ||
| 2089 | dbg_dump_znode(c, znode); | ||
| 2090 | return err; | ||
| 2091 | } | ||
| 2092 | |||
| 2093 | /* We do not "gap" zbranch slots */ | ||
| 2094 | for (i = n; i < znode->child_cnt - 1; i++) | ||
| 2095 | znode->zbranch[i] = znode->zbranch[i + 1]; | ||
| 2096 | znode->child_cnt -= 1; | ||
| 2097 | |||
| 2098 | if (znode->child_cnt > 0) | ||
| 2099 | return 0; | ||
| 2100 | |||
| 2101 | /* | ||
| 2102 | * This was the last zbranch, we have to delete this znode from the | ||
| 2103 | * parent. | ||
| 2104 | */ | ||
| 2105 | |||
| 2106 | do { | ||
| 2107 | ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); | ||
| 2108 | ubifs_assert(ubifs_zn_dirty(znode)); | ||
| 2109 | |||
| 2110 | zp = znode->parent; | ||
| 2111 | n = znode->iip; | ||
| 2112 | |||
| 2113 | atomic_long_dec(&c->dirty_zn_cnt); | ||
| 2114 | |||
| 2115 | err = insert_old_idx_znode(c, znode); | ||
| 2116 | if (err) | ||
| 2117 | return err; | ||
| 2118 | |||
| 2119 | if (znode->cnext) { | ||
| 2120 | __set_bit(OBSOLETE_ZNODE, &znode->flags); | ||
| 2121 | atomic_long_inc(&c->clean_zn_cnt); | ||
| 2122 | atomic_long_inc(&ubifs_clean_zn_cnt); | ||
| 2123 | } else | ||
| 2124 | kfree(znode); | ||
| 2125 | znode = zp; | ||
| 2126 | } while (znode->child_cnt == 1); /* while removing last child */ | ||
| 2127 | |||
| 2128 | /* Remove from znode, entry n - 1 */ | ||
| 2129 | znode->child_cnt -= 1; | ||
| 2130 | ubifs_assert(znode->level != 0); | ||
| 2131 | for (i = n; i < znode->child_cnt; i++) { | ||
| 2132 | znode->zbranch[i] = znode->zbranch[i + 1]; | ||
| 2133 | if (znode->zbranch[i].znode) | ||
| 2134 | znode->zbranch[i].znode->iip = i; | ||
| 2135 | } | ||
| 2136 | |||
| 2137 | /* | ||
| 2138 | * If this is the root and it has only 1 child then | ||
| 2139 | * collapse the tree. | ||
| 2140 | */ | ||
| 2141 | if (!znode->parent) { | ||
| 2142 | while (znode->child_cnt == 1 && znode->level != 0) { | ||
| 2143 | zp = znode; | ||
| 2144 | zbr = &znode->zbranch[0]; | ||
| 2145 | znode = get_znode(c, znode, 0); | ||
| 2146 | if (IS_ERR(znode)) | ||
| 2147 | return PTR_ERR(znode); | ||
| 2148 | znode = dirty_cow_znode(c, zbr); | ||
| 2149 | if (IS_ERR(znode)) | ||
| 2150 | return PTR_ERR(znode); | ||
| 2151 | znode->parent = NULL; | ||
| 2152 | znode->iip = 0; | ||
| 2153 | if (c->zroot.len) { | ||
| 2154 | err = insert_old_idx(c, c->zroot.lnum, | ||
| 2155 | c->zroot.offs); | ||
| 2156 | if (err) | ||
| 2157 | return err; | ||
| 2158 | } | ||
| 2159 | c->zroot.lnum = zbr->lnum; | ||
| 2160 | c->zroot.offs = zbr->offs; | ||
| 2161 | c->zroot.len = zbr->len; | ||
| 2162 | c->zroot.znode = znode; | ||
| 2163 | ubifs_assert(!test_bit(OBSOLETE_ZNODE, | ||
| 2164 | &zp->flags)); | ||
| 2165 | ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); | ||
| 2166 | atomic_long_dec(&c->dirty_zn_cnt); | ||
| 2167 | |||
| 2168 | if (zp->cnext) { | ||
| 2169 | __set_bit(OBSOLETE_ZNODE, &zp->flags); | ||
| 2170 | atomic_long_inc(&c->clean_zn_cnt); | ||
| 2171 | atomic_long_inc(&ubifs_clean_zn_cnt); | ||
| 2172 | } else | ||
| 2173 | kfree(zp); | ||
| 2174 | } | ||
| 2175 | } | ||
| 2176 | |||
| 2177 | return 0; | ||
| 2178 | } | ||
| 2179 | |||
| 2180 | /** | ||
| 2181 | * ubifs_tnc_remove - remove an index entry of a node. | ||
| 2182 | * @c: UBIFS file-system description object | ||
| 2183 | * @key: key of node | ||
| 2184 | * | ||
| 2185 | * Returns %0 on success or negative error code on failure. | ||
| 2186 | */ | ||
| 2187 | int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) | ||
| 2188 | { | ||
| 2189 | int found, n, err = 0; | ||
| 2190 | struct ubifs_znode *znode; | ||
| 2191 | |||
| 2192 | mutex_lock(&c->tnc_mutex); | ||
| 2193 | dbg_tnc("key %s", DBGKEY(key)); | ||
| 2194 | found = lookup_level0_dirty(c, key, &znode, &n); | ||
| 2195 | if (found < 0) { | ||
| 2196 | err = found; | ||
| 2197 | goto out_unlock; | ||
| 2198 | } | ||
| 2199 | if (found == 1) | ||
| 2200 | err = tnc_delete(c, znode, n); | ||
| 2201 | if (!err) | ||
| 2202 | err = dbg_check_tnc(c, 0); | ||
| 2203 | |||
| 2204 | out_unlock: | ||
| 2205 | mutex_unlock(&c->tnc_mutex); | ||
| 2206 | return err; | ||
| 2207 | } | ||
| 2208 | |||
| 2209 | /** | ||
| 2210 | * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node. | ||
| 2211 | * @c: UBIFS file-system description object | ||
| 2212 | * @key: key of node | ||
| 2213 | * @nm: directory entry name | ||
| 2214 | * | ||
| 2215 | * Returns %0 on success or negative error code on failure. | ||
| 2216 | */ | ||
| 2217 | int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, | ||
| 2218 | const struct qstr *nm) | ||
| 2219 | { | ||
| 2220 | int n, err; | ||
| 2221 | struct ubifs_znode *znode; | ||
| 2222 | |||
| 2223 | mutex_lock(&c->tnc_mutex); | ||
| 2224 | dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); | ||
| 2225 | err = lookup_level0_dirty(c, key, &znode, &n); | ||
| 2226 | if (err < 0) | ||
| 2227 | goto out_unlock; | ||
| 2228 | |||
| 2229 | if (err) { | ||
| 2230 | if (c->replaying) | ||
| 2231 | err = fallible_resolve_collision(c, key, &znode, &n, | ||
| 2232 | nm, 0); | ||
| 2233 | else | ||
| 2234 | err = resolve_collision(c, key, &znode, &n, nm); | ||
| 2235 | dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); | ||
| 2236 | if (err < 0) | ||
| 2237 | goto out_unlock; | ||
| 2238 | if (err) { | ||
| 2239 | /* Ensure the znode is dirtied */ | ||
| 2240 | if (znode->cnext || !ubifs_zn_dirty(znode)) { | ||
| 2241 | znode = dirty_cow_bottom_up(c, znode); | ||
| 2242 | if (IS_ERR(znode)) { | ||
| 2243 | err = PTR_ERR(znode); | ||
| 2244 | goto out_unlock; | ||
| 2245 | } | ||
| 2246 | } | ||
| 2247 | err = tnc_delete(c, znode, n); | ||
| 2248 | } | ||
| 2249 | } | ||
| 2250 | |||
| 2251 | out_unlock: | ||
| 2252 | if (!err) | ||
| 2253 | err = dbg_check_tnc(c, 0); | ||
| 2254 | mutex_unlock(&c->tnc_mutex); | ||
| 2255 | return err; | ||
| 2256 | } | ||
| 2257 | |||
| 2258 | /** | ||
| 2259 | * key_in_range - determine if a key falls within a range of keys. | ||
| 2260 | * @c: UBIFS file-system description object | ||
| 2261 | * @key: key to check | ||
| 2262 | * @from_key: lowest key in range | ||
| 2263 | * @to_key: highest key in range | ||
| 2264 | * | ||
| 2265 | * This function returns %1 if the key is in range and %0 otherwise. | ||
| 2266 | */ | ||
| 2267 | static int key_in_range(struct ubifs_info *c, union ubifs_key *key, | ||
| 2268 | union ubifs_key *from_key, union ubifs_key *to_key) | ||
| 2269 | { | ||
| 2270 | if (keys_cmp(c, key, from_key) < 0) | ||
| 2271 | return 0; | ||
| 2272 | if (keys_cmp(c, key, to_key) > 0) | ||
| 2273 | return 0; | ||
| 2274 | return 1; | ||
| 2275 | } | ||
| 2276 | |||
| 2277 | /** | ||
| 2278 | * ubifs_tnc_remove_range - remove index entries in range. | ||
| 2279 | * @c: UBIFS file-system description object | ||
| 2280 | * @from_key: lowest key to remove | ||
| 2281 | * @to_key: highest key to remove | ||
| 2282 | * | ||
| 2283 | * This function removes index entries starting at @from_key and ending at | ||
| 2284 | * @to_key. This function returns zero in case of success and a negative error | ||
| 2285 | * code in case of failure. | ||
| 2286 | */ | ||
| 2287 | int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, | ||
| 2288 | union ubifs_key *to_key) | ||
| 2289 | { | ||
| 2290 | int i, n, k, err = 0; | ||
| 2291 | struct ubifs_znode *znode; | ||
| 2292 | union ubifs_key *key; | ||
| 2293 | |||
| 2294 | mutex_lock(&c->tnc_mutex); | ||
| 2295 | while (1) { | ||
| 2296 | /* Find first level 0 znode that contains keys to remove */ | ||
| 2297 | err = ubifs_lookup_level0(c, from_key, &znode, &n); | ||
| 2298 | if (err < 0) | ||
| 2299 | goto out_unlock; | ||
| 2300 | |||
| 2301 | if (err) | ||
| 2302 | key = from_key; | ||
| 2303 | else { | ||
| 2304 | err = tnc_next(c, &znode, &n); | ||
| 2305 | if (err == -ENOENT) { | ||
| 2306 | err = 0; | ||
| 2307 | goto out_unlock; | ||
| 2308 | } | ||
| 2309 | if (err < 0) | ||
| 2310 | goto out_unlock; | ||
| 2311 | key = &znode->zbranch[n].key; | ||
| 2312 | if (!key_in_range(c, key, from_key, to_key)) { | ||
| 2313 | err = 0; | ||
| 2314 | goto out_unlock; | ||
| 2315 | } | ||
| 2316 | } | ||
| 2317 | |||
| 2318 | /* Ensure the znode is dirtied */ | ||
| 2319 | if (znode->cnext || !ubifs_zn_dirty(znode)) { | ||
| 2320 | znode = dirty_cow_bottom_up(c, znode); | ||
| 2321 | if (IS_ERR(znode)) { | ||
| 2322 | err = PTR_ERR(znode); | ||
| 2323 | goto out_unlock; | ||
| 2324 | } | ||
| 2325 | } | ||
| 2326 | |||
| 2327 | /* Remove all keys in range except the first */ | ||
| 2328 | for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) { | ||
| 2329 | key = &znode->zbranch[i].key; | ||
| 2330 | if (!key_in_range(c, key, from_key, to_key)) | ||
| 2331 | break; | ||
| 2332 | lnc_free(&znode->zbranch[i]); | ||
| 2333 | err = ubifs_add_dirt(c, znode->zbranch[i].lnum, | ||
| 2334 | znode->zbranch[i].len); | ||
| 2335 | if (err) { | ||
| 2336 | dbg_dump_znode(c, znode); | ||
| 2337 | goto out_unlock; | ||
| 2338 | } | ||
| 2339 | dbg_tnc("removing %s", DBGKEY(key)); | ||
| 2340 | } | ||
| 2341 | if (k) { | ||
| 2342 | for (i = n + 1 + k; i < znode->child_cnt; i++) | ||
| 2343 | znode->zbranch[i - k] = znode->zbranch[i]; | ||
| 2344 | znode->child_cnt -= k; | ||
| 2345 | } | ||
| 2346 | |||
| 2347 | /* Now delete the first */ | ||
| 2348 | err = tnc_delete(c, znode, n); | ||
| 2349 | if (err) | ||
| 2350 | goto out_unlock; | ||
| 2351 | } | ||
| 2352 | |||
| 2353 | out_unlock: | ||
| 2354 | if (!err) | ||
| 2355 | err = dbg_check_tnc(c, 0); | ||
| 2356 | mutex_unlock(&c->tnc_mutex); | ||
| 2357 | return err; | ||
| 2358 | } | ||
| 2359 | |||
| 2360 | /** | ||
| 2361 | * ubifs_tnc_remove_ino - remove an inode from TNC. | ||
| 2362 | * @c: UBIFS file-system description object | ||
| 2363 | * @inum: inode number to remove | ||
| 2364 | * | ||
| 2365 | * This function remove inode @inum and all the extended attributes associated | ||
| 2366 | * with the anode from TNC and returns zero in case of success or a negative | ||
| 2367 | * error code in case of failure. | ||
| 2368 | */ | ||
| 2369 | int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) | ||
| 2370 | { | ||
| 2371 | union ubifs_key key1, key2; | ||
| 2372 | struct ubifs_dent_node *xent, *pxent = NULL; | ||
| 2373 | struct qstr nm = { .name = NULL }; | ||
| 2374 | |||
| 2375 | dbg_tnc("ino %lu", inum); | ||
| 2376 | |||
| 2377 | /* | ||
| 2378 | * Walk all extended attribute entries and remove them together with | ||
| 2379 | * corresponding extended attribute inodes. | ||
| 2380 | */ | ||
| 2381 | lowest_xent_key(c, &key1, inum); | ||
| 2382 | while (1) { | ||
| 2383 | ino_t xattr_inum; | ||
| 2384 | int err; | ||
| 2385 | |||
| 2386 | xent = ubifs_tnc_next_ent(c, &key1, &nm); | ||
| 2387 | if (IS_ERR(xent)) { | ||
| 2388 | err = PTR_ERR(xent); | ||
| 2389 | if (err == -ENOENT) | ||
| 2390 | break; | ||
| 2391 | return err; | ||
| 2392 | } | ||
| 2393 | |||
| 2394 | xattr_inum = le64_to_cpu(xent->inum); | ||
| 2395 | dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum); | ||
| 2396 | |||
| 2397 | nm.name = xent->name; | ||
| 2398 | nm.len = le16_to_cpu(xent->nlen); | ||
| 2399 | err = ubifs_tnc_remove_nm(c, &key1, &nm); | ||
| 2400 | if (err) { | ||
| 2401 | kfree(xent); | ||
| 2402 | return err; | ||
| 2403 | } | ||
| 2404 | |||
| 2405 | lowest_ino_key(c, &key1, xattr_inum); | ||
| 2406 | highest_ino_key(c, &key2, xattr_inum); | ||
| 2407 | err = ubifs_tnc_remove_range(c, &key1, &key2); | ||
| 2408 | if (err) { | ||
| 2409 | kfree(xent); | ||
| 2410 | return err; | ||
| 2411 | } | ||
| 2412 | |||
| 2413 | kfree(pxent); | ||
| 2414 | pxent = xent; | ||
| 2415 | key_read(c, &xent->key, &key1); | ||
| 2416 | } | ||
| 2417 | |||
| 2418 | kfree(pxent); | ||
| 2419 | lowest_ino_key(c, &key1, inum); | ||
| 2420 | highest_ino_key(c, &key2, inum); | ||
| 2421 | |||
| 2422 | return ubifs_tnc_remove_range(c, &key1, &key2); | ||
| 2423 | } | ||
| 2424 | |||
| 2425 | /** | ||
| 2426 | * ubifs_tnc_next_ent - walk directory or extended attribute entries. | ||
| 2427 | * @c: UBIFS file-system description object | ||
| 2428 | * @key: key of last entry | ||
| 2429 | * @nm: name of last entry found or %NULL | ||
| 2430 | * | ||
| 2431 | * This function finds and reads the next directory or extended attribute entry | ||
| 2432 | * after the given key (@key) if there is one. @nm is used to resolve | ||
| 2433 | * collisions. | ||
| 2434 | * | ||
| 2435 | * If the name of the current entry is not known and only the key is known, | ||
| 2436 | * @nm->name has to be %NULL. In this case the semantics of this function is a | ||
| 2437 | * little bit different and it returns the entry corresponding to this key, not | ||
| 2438 | * the next one. If the key was not found, the closest "right" entry is | ||
| 2439 | * returned. | ||
| 2440 | * | ||
| 2441 | * If the fist entry has to be found, @key has to contain the lowest possible | ||
| 2442 | * key value for this inode and @name has to be %NULL. | ||
| 2443 | * | ||
| 2444 | * This function returns the found directory or extended attribute entry node | ||
| 2445 | * in case of success, %-ENOENT is returned if no entry was found, and a | ||
| 2446 | * negative error code is returned in case of failure. | ||
| 2447 | */ | ||
| 2448 | struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, | ||
| 2449 | union ubifs_key *key, | ||
| 2450 | const struct qstr *nm) | ||
| 2451 | { | ||
| 2452 | int n, err, type = key_type(c, key); | ||
| 2453 | struct ubifs_znode *znode; | ||
| 2454 | struct ubifs_dent_node *dent; | ||
| 2455 | struct ubifs_zbranch *zbr; | ||
| 2456 | union ubifs_key *dkey; | ||
| 2457 | |||
| 2458 | dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); | ||
| 2459 | ubifs_assert(is_hash_key(c, key)); | ||
| 2460 | |||
| 2461 | mutex_lock(&c->tnc_mutex); | ||
| 2462 | err = ubifs_lookup_level0(c, key, &znode, &n); | ||
| 2463 | if (unlikely(err < 0)) | ||
| 2464 | goto out_unlock; | ||
| 2465 | |||
| 2466 | if (nm->name) { | ||
| 2467 | if (err) { | ||
| 2468 | /* Handle collisions */ | ||
| 2469 | err = resolve_collision(c, key, &znode, &n, nm); | ||
| 2470 | dbg_tnc("rc returned %d, znode %p, n %d", | ||
| 2471 | err, znode, n); | ||
| 2472 | if (unlikely(err < 0)) | ||
| 2473 | goto out_unlock; | ||
| 2474 | } | ||
| 2475 | |||
| 2476 | /* Now find next entry */ | ||
| 2477 | err = tnc_next(c, &znode, &n); | ||
| 2478 | if (unlikely(err)) | ||
| 2479 | goto out_unlock; | ||
| 2480 | } else { | ||
| 2481 | /* | ||
| 2482 | * The full name of the entry was not given, in which case the | ||
| 2483 | * behavior of this function is a little different and it | ||
| 2484 | * returns current entry, not the next one. | ||
| 2485 | */ | ||
| 2486 | if (!err) { | ||
| 2487 | /* | ||
| 2488 | * However, the given key does not exist in the TNC | ||
| 2489 | * tree and @znode/@n variables contain the closest | ||
| 2490 | * "preceding" element. Switch to the next one. | ||
| 2491 | */ | ||
| 2492 | err = tnc_next(c, &znode, &n); | ||
| 2493 | if (err) | ||
| 2494 | goto out_unlock; | ||
| 2495 | } | ||
| 2496 | } | ||
| 2497 | |||
| 2498 | zbr = &znode->zbranch[n]; | ||
| 2499 | dent = kmalloc(zbr->len, GFP_NOFS); | ||
| 2500 | if (unlikely(!dent)) { | ||
| 2501 | err = -ENOMEM; | ||
| 2502 | goto out_unlock; | ||
| 2503 | } | ||
| 2504 | |||
| 2505 | /* | ||
| 2506 | * The above 'tnc_next()' call could lead us to the next inode, check | ||
| 2507 | * this. | ||
| 2508 | */ | ||
| 2509 | dkey = &zbr->key; | ||
| 2510 | if (key_inum(c, dkey) != key_inum(c, key) || | ||
| 2511 | key_type(c, dkey) != type) { | ||
| 2512 | err = -ENOENT; | ||
| 2513 | goto out_free; | ||
| 2514 | } | ||
| 2515 | |||
| 2516 | err = tnc_read_node_nm(c, zbr, dent); | ||
| 2517 | if (unlikely(err)) | ||
| 2518 | goto out_free; | ||
| 2519 | |||
| 2520 | mutex_unlock(&c->tnc_mutex); | ||
| 2521 | return dent; | ||
| 2522 | |||
| 2523 | out_free: | ||
| 2524 | kfree(dent); | ||
| 2525 | out_unlock: | ||
| 2526 | mutex_unlock(&c->tnc_mutex); | ||
| 2527 | return ERR_PTR(err); | ||
| 2528 | } | ||
| 2529 | |||
| 2530 | /** | ||
| 2531 | * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit. | ||
| 2532 | * @c: UBIFS file-system description object | ||
| 2533 | * | ||
| 2534 | * Destroy left-over obsolete znodes from a failed commit. | ||
| 2535 | */ | ||
| 2536 | static void tnc_destroy_cnext(struct ubifs_info *c) | ||
| 2537 | { | ||
| 2538 | struct ubifs_znode *cnext; | ||
| 2539 | |||
| 2540 | if (!c->cnext) | ||
| 2541 | return; | ||
| 2542 | ubifs_assert(c->cmt_state == COMMIT_BROKEN); | ||
| 2543 | cnext = c->cnext; | ||
| 2544 | do { | ||
| 2545 | struct ubifs_znode *znode = cnext; | ||
| 2546 | |||
| 2547 | cnext = cnext->cnext; | ||
| 2548 | if (test_bit(OBSOLETE_ZNODE, &znode->flags)) | ||
| 2549 | kfree(znode); | ||
| 2550 | } while (cnext && cnext != c->cnext); | ||
| 2551 | } | ||
| 2552 | |||
| 2553 | /** | ||
| 2554 | * ubifs_tnc_close - close TNC subsystem and free all related resources. | ||
| 2555 | * @c: UBIFS file-system description object | ||
| 2556 | */ | ||
| 2557 | void ubifs_tnc_close(struct ubifs_info *c) | ||
| 2558 | { | ||
| 2559 | long clean_freed; | ||
| 2560 | |||
| 2561 | tnc_destroy_cnext(c); | ||
| 2562 | if (c->zroot.znode) { | ||
| 2563 | clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); | ||
| 2564 | atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); | ||
| 2565 | } | ||
| 2566 | kfree(c->gap_lebs); | ||
| 2567 | kfree(c->ilebs); | ||
| 2568 | destroy_old_idx(c); | ||
| 2569 | } | ||
| 2570 | |||
| 2571 | /** | ||
| 2572 | * left_znode - get the znode to the left. | ||
| 2573 | * @c: UBIFS file-system description object | ||
| 2574 | * @znode: znode | ||
| 2575 | * | ||
| 2576 | * This function returns a pointer to the znode to the left of @znode or NULL if | ||
| 2577 | * there is not one. A negative error code is returned on failure. | ||
| 2578 | */ | ||
| 2579 | static struct ubifs_znode *left_znode(struct ubifs_info *c, | ||
| 2580 | struct ubifs_znode *znode) | ||
| 2581 | { | ||
| 2582 | int level = znode->level; | ||
| 2583 | |||
| 2584 | while (1) { | ||
| 2585 | int n = znode->iip - 1; | ||
| 2586 | |||
| 2587 | /* Go up until we can go left */ | ||
| 2588 | znode = znode->parent; | ||
| 2589 | if (!znode) | ||
| 2590 | return NULL; | ||
| 2591 | if (n >= 0) { | ||
| 2592 | /* Now go down the rightmost branch to 'level' */ | ||
| 2593 | znode = get_znode(c, znode, n); | ||
| 2594 | if (IS_ERR(znode)) | ||
| 2595 | return znode; | ||
| 2596 | while (znode->level != level) { | ||
| 2597 | n = znode->child_cnt - 1; | ||
| 2598 | znode = get_znode(c, znode, n); | ||
| 2599 | if (IS_ERR(znode)) | ||
| 2600 | return znode; | ||
| 2601 | } | ||
| 2602 | break; | ||
| 2603 | } | ||
| 2604 | } | ||
| 2605 | return znode; | ||
| 2606 | } | ||
| 2607 | |||
| 2608 | /** | ||
| 2609 | * right_znode - get the znode to the right. | ||
| 2610 | * @c: UBIFS file-system description object | ||
| 2611 | * @znode: znode | ||
| 2612 | * | ||
| 2613 | * This function returns a pointer to the znode to the right of @znode or NULL | ||
| 2614 | * if there is not one. A negative error code is returned on failure. | ||
| 2615 | */ | ||
| 2616 | static struct ubifs_znode *right_znode(struct ubifs_info *c, | ||
| 2617 | struct ubifs_znode *znode) | ||
| 2618 | { | ||
| 2619 | int level = znode->level; | ||
| 2620 | |||
| 2621 | while (1) { | ||
| 2622 | int n = znode->iip + 1; | ||
| 2623 | |||
| 2624 | /* Go up until we can go right */ | ||
| 2625 | znode = znode->parent; | ||
| 2626 | if (!znode) | ||
| 2627 | return NULL; | ||
| 2628 | if (n < znode->child_cnt) { | ||
| 2629 | /* Now go down the leftmost branch to 'level' */ | ||
| 2630 | znode = get_znode(c, znode, n); | ||
| 2631 | if (IS_ERR(znode)) | ||
| 2632 | return znode; | ||
| 2633 | while (znode->level != level) { | ||
| 2634 | znode = get_znode(c, znode, 0); | ||
| 2635 | if (IS_ERR(znode)) | ||
| 2636 | return znode; | ||
| 2637 | } | ||
| 2638 | break; | ||
| 2639 | } | ||
| 2640 | } | ||
| 2641 | return znode; | ||
| 2642 | } | ||
| 2643 | |||
| 2644 | /** | ||
| 2645 | * lookup_znode - find a particular indexing node from TNC. | ||
| 2646 | * @c: UBIFS file-system description object | ||
| 2647 | * @key: index node key to lookup | ||
| 2648 | * @level: index node level | ||
| 2649 | * @lnum: index node LEB number | ||
| 2650 | * @offs: index node offset | ||
| 2651 | * | ||
| 2652 | * This function searches an indexing node by its first key @key and its | ||
| 2653 | * address @lnum:@offs. It looks up the indexing tree by pulling all indexing | ||
| 2654 | * nodes it traverses to TNC. This function is called fro indexing nodes which | ||
| 2655 | * were found on the media by scanning, for example when garbage-collecting or | ||
| 2656 | * when doing in-the-gaps commit. This means that the indexing node which is | ||
| 2657 | * looked for does not have to have exactly the same leftmost key @key, because | ||
| 2658 | * the leftmost key may have been changed, in which case TNC will contain a | ||
| 2659 | * dirty znode which still refers the same @lnum:@offs. This function is clever | ||
| 2660 | * enough to recognize such indexing nodes. | ||
| 2661 | * | ||
| 2662 | * Note, if a znode was deleted or changed too much, then this function will | ||
| 2663 | * not find it. For situations like this UBIFS has the old index RB-tree | ||
| 2664 | * (indexed by @lnum:@offs). | ||
| 2665 | * | ||
| 2666 | * This function returns a pointer to the znode found or %NULL if it is not | ||
| 2667 | * found. A negative error code is returned on failure. | ||
| 2668 | */ | ||
| 2669 | static struct ubifs_znode *lookup_znode(struct ubifs_info *c, | ||
| 2670 | union ubifs_key *key, int level, | ||
| 2671 | int lnum, int offs) | ||
| 2672 | { | ||
| 2673 | struct ubifs_znode *znode, *zn; | ||
| 2674 | int n, nn; | ||
| 2675 | |||
| 2676 | /* | ||
| 2677 | * The arguments have probably been read off flash, so don't assume | ||
| 2678 | * they are valid. | ||
| 2679 | */ | ||
| 2680 | if (level < 0) | ||
| 2681 | return ERR_PTR(-EINVAL); | ||
| 2682 | |||
| 2683 | /* Get the root znode */ | ||
| 2684 | znode = c->zroot.znode; | ||
| 2685 | if (!znode) { | ||
| 2686 | znode = ubifs_load_znode(c, &c->zroot, NULL, 0); | ||
| 2687 | if (IS_ERR(znode)) | ||
| 2688 | return znode; | ||
| 2689 | } | ||
| 2690 | /* Check if it is the one we are looking for */ | ||
| 2691 | if (c->zroot.lnum == lnum && c->zroot.offs == offs) | ||
| 2692 | return znode; | ||
| 2693 | /* Descend to the parent level i.e. (level + 1) */ | ||
| 2694 | if (level >= znode->level) | ||
| 2695 | return NULL; | ||
| 2696 | while (1) { | ||
| 2697 | ubifs_search_zbranch(c, znode, key, &n); | ||
| 2698 | if (n < 0) { | ||
| 2699 | /* | ||
| 2700 | * We reached a znode where the leftmost key is greater | ||
| 2701 | * than the key we are searching for. This is the same | ||
| 2702 | * situation as the one described in a huge comment at | ||
| 2703 | * the end of the 'ubifs_lookup_level0()' function. And | ||
| 2704 | * for exactly the same reasons we have to try to look | ||
| 2705 | * left before giving up. | ||
| 2706 | */ | ||
| 2707 | znode = left_znode(c, znode); | ||
| 2708 | if (!znode) | ||
| 2709 | return NULL; | ||
| 2710 | if (IS_ERR(znode)) | ||
| 2711 | return znode; | ||
| 2712 | ubifs_search_zbranch(c, znode, key, &n); | ||
| 2713 | ubifs_assert(n >= 0); | ||
| 2714 | } | ||
| 2715 | if (znode->level == level + 1) | ||
| 2716 | break; | ||
| 2717 | znode = get_znode(c, znode, n); | ||
| 2718 | if (IS_ERR(znode)) | ||
| 2719 | return znode; | ||
| 2720 | } | ||
| 2721 | /* Check if the child is the one we are looking for */ | ||
| 2722 | if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs) | ||
| 2723 | return get_znode(c, znode, n); | ||
| 2724 | /* If the key is unique, there is nowhere else to look */ | ||
| 2725 | if (!is_hash_key(c, key)) | ||
| 2726 | return NULL; | ||
| 2727 | /* | ||
| 2728 | * The key is not unique and so may be also in the znodes to either | ||
| 2729 | * side. | ||
| 2730 | */ | ||
| 2731 | zn = znode; | ||
| 2732 | nn = n; | ||
| 2733 | /* Look left */ | ||
| 2734 | while (1) { | ||
| 2735 | /* Move one branch to the left */ | ||
| 2736 | if (n) | ||
| 2737 | n -= 1; | ||
| 2738 | else { | ||
| 2739 | znode = left_znode(c, znode); | ||
| 2740 | if (!znode) | ||
| 2741 | break; | ||
| 2742 | if (IS_ERR(znode)) | ||
| 2743 | return znode; | ||
| 2744 | n = znode->child_cnt - 1; | ||
| 2745 | } | ||
| 2746 | /* Check it */ | ||
| 2747 | if (znode->zbranch[n].lnum == lnum && | ||
| 2748 | znode->zbranch[n].offs == offs) | ||
| 2749 | return get_znode(c, znode, n); | ||
| 2750 | /* Stop if the key is less than the one we are looking for */ | ||
| 2751 | if (keys_cmp(c, &znode->zbranch[n].key, key) < 0) | ||
| 2752 | break; | ||
| 2753 | } | ||
| 2754 | /* Back to the middle */ | ||
| 2755 | znode = zn; | ||
| 2756 | n = nn; | ||
| 2757 | /* Look right */ | ||
| 2758 | while (1) { | ||
| 2759 | /* Move one branch to the right */ | ||
| 2760 | if (++n >= znode->child_cnt) { | ||
| 2761 | znode = right_znode(c, znode); | ||
| 2762 | if (!znode) | ||
| 2763 | break; | ||
| 2764 | if (IS_ERR(znode)) | ||
| 2765 | return znode; | ||
| 2766 | n = 0; | ||
| 2767 | } | ||
| 2768 | /* Check it */ | ||
| 2769 | if (znode->zbranch[n].lnum == lnum && | ||
| 2770 | znode->zbranch[n].offs == offs) | ||
| 2771 | return get_znode(c, znode, n); | ||
| 2772 | /* Stop if the key is greater than the one we are looking for */ | ||
| 2773 | if (keys_cmp(c, &znode->zbranch[n].key, key) > 0) | ||
| 2774 | break; | ||
| 2775 | } | ||
| 2776 | return NULL; | ||
| 2777 | } | ||
| 2778 | |||
| 2779 | /** | ||
| 2780 | * is_idx_node_in_tnc - determine if an index node is in the TNC. | ||
| 2781 | * @c: UBIFS file-system description object | ||
| 2782 | * @key: key of index node | ||
| 2783 | * @level: index node level | ||
| 2784 | * @lnum: LEB number of index node | ||
| 2785 | * @offs: offset of index node | ||
| 2786 | * | ||
| 2787 | * This function returns %0 if the index node is not referred to in the TNC, %1 | ||
| 2788 | * if the index node is referred to in the TNC and the corresponding znode is | ||
| 2789 | * dirty, %2 if an index node is referred to in the TNC and the corresponding | ||
| 2790 | * znode is clean, and a negative error code in case of failure. | ||
| 2791 | * | ||
| 2792 | * Note, the @key argument has to be the key of the first child. Also note, | ||
| 2793 | * this function relies on the fact that 0:0 is never a valid LEB number and | ||
| 2794 | * offset for a main-area node. | ||
| 2795 | */ | ||
| 2796 | int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, | ||
| 2797 | int lnum, int offs) | ||
| 2798 | { | ||
| 2799 | struct ubifs_znode *znode; | ||
| 2800 | |||
| 2801 | znode = lookup_znode(c, key, level, lnum, offs); | ||
| 2802 | if (!znode) | ||
| 2803 | return 0; | ||
| 2804 | if (IS_ERR(znode)) | ||
| 2805 | return PTR_ERR(znode); | ||
| 2806 | |||
| 2807 | return ubifs_zn_dirty(znode) ? 1 : 2; | ||
| 2808 | } | ||
| 2809 | |||
| 2810 | /** | ||
| 2811 | * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC. | ||
| 2812 | * @c: UBIFS file-system description object | ||
| 2813 | * @key: node key | ||
| 2814 | * @lnum: node LEB number | ||
| 2815 | * @offs: node offset | ||
| 2816 | * | ||
| 2817 | * This function returns %1 if the node is referred to in the TNC, %0 if it is | ||
| 2818 | * not, and a negative error code in case of failure. | ||
| 2819 | * | ||
| 2820 | * Note, this function relies on the fact that 0:0 is never a valid LEB number | ||
| 2821 | * and offset for a main-area node. | ||
| 2822 | */ | ||
| 2823 | static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, | ||
| 2824 | int lnum, int offs) | ||
| 2825 | { | ||
| 2826 | struct ubifs_zbranch *zbr; | ||
| 2827 | struct ubifs_znode *znode, *zn; | ||
| 2828 | int n, found, err, nn; | ||
| 2829 | const int unique = !is_hash_key(c, key); | ||
| 2830 | |||
| 2831 | found = ubifs_lookup_level0(c, key, &znode, &n); | ||
| 2832 | if (found < 0) | ||
| 2833 | return found; /* Error code */ | ||
| 2834 | if (!found) | ||
| 2835 | return 0; | ||
| 2836 | zbr = &znode->zbranch[n]; | ||
| 2837 | if (lnum == zbr->lnum && offs == zbr->offs) | ||
| 2838 | return 1; /* Found it */ | ||
| 2839 | if (unique) | ||
| 2840 | return 0; | ||
| 2841 | /* | ||
| 2842 | * Because the key is not unique, we have to look left | ||
| 2843 | * and right as well | ||
| 2844 | */ | ||
| 2845 | zn = znode; | ||
| 2846 | nn = n; | ||
| 2847 | /* Look left */ | ||
| 2848 | while (1) { | ||
| 2849 | err = tnc_prev(c, &znode, &n); | ||
| 2850 | if (err == -ENOENT) | ||
| 2851 | break; | ||
| 2852 | if (err) | ||
| 2853 | return err; | ||
| 2854 | if (keys_cmp(c, key, &znode->zbranch[n].key)) | ||
| 2855 | break; | ||
| 2856 | zbr = &znode->zbranch[n]; | ||
| 2857 | if (lnum == zbr->lnum && offs == zbr->offs) | ||
| 2858 | return 1; /* Found it */ | ||
| 2859 | } | ||
| 2860 | /* Look right */ | ||
| 2861 | znode = zn; | ||
| 2862 | n = nn; | ||
| 2863 | while (1) { | ||
| 2864 | err = tnc_next(c, &znode, &n); | ||
| 2865 | if (err) { | ||
| 2866 | if (err == -ENOENT) | ||
| 2867 | return 0; | ||
| 2868 | return err; | ||
| 2869 | } | ||
| 2870 | if (keys_cmp(c, key, &znode->zbranch[n].key)) | ||
| 2871 | break; | ||
| 2872 | zbr = &znode->zbranch[n]; | ||
| 2873 | if (lnum == zbr->lnum && offs == zbr->offs) | ||
| 2874 | return 1; /* Found it */ | ||
| 2875 | } | ||
| 2876 | return 0; | ||
| 2877 | } | ||
| 2878 | |||
| 2879 | /** | ||
| 2880 | * ubifs_tnc_has_node - determine whether a node is in the TNC. | ||
| 2881 | * @c: UBIFS file-system description object | ||
| 2882 | * @key: node key | ||
| 2883 | * @level: index node level (if it is an index node) | ||
| 2884 | * @lnum: node LEB number | ||
| 2885 | * @offs: node offset | ||
| 2886 | * @is_idx: non-zero if the node is an index node | ||
| 2887 | * | ||
| 2888 | * This function returns %1 if the node is in the TNC, %0 if it is not, and a | ||
| 2889 | * negative error code in case of failure. For index nodes, @key has to be the | ||
| 2890 | * key of the first child. An index node is considered to be in the TNC only if | ||
| 2891 | * the corresponding znode is clean or has not been loaded. | ||
| 2892 | */ | ||
| 2893 | int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, | ||
| 2894 | int lnum, int offs, int is_idx) | ||
| 2895 | { | ||
| 2896 | int err; | ||
| 2897 | |||
| 2898 | mutex_lock(&c->tnc_mutex); | ||
| 2899 | if (is_idx) { | ||
| 2900 | err = is_idx_node_in_tnc(c, key, level, lnum, offs); | ||
| 2901 | if (err < 0) | ||
| 2902 | goto out_unlock; | ||
| 2903 | if (err == 1) | ||
| 2904 | /* The index node was found but it was dirty */ | ||
| 2905 | err = 0; | ||
| 2906 | else if (err == 2) | ||
| 2907 | /* The index node was found and it was clean */ | ||
| 2908 | err = 1; | ||
| 2909 | else | ||
| 2910 | BUG_ON(err != 0); | ||
| 2911 | } else | ||
| 2912 | err = is_leaf_node_in_tnc(c, key, lnum, offs); | ||
| 2913 | |||
| 2914 | out_unlock: | ||
| 2915 | mutex_unlock(&c->tnc_mutex); | ||
| 2916 | return err; | ||
| 2917 | } | ||
| 2918 | |||
| 2919 | /** | ||
| 2920 | * ubifs_dirty_idx_node - dirty an index node. | ||
| 2921 | * @c: UBIFS file-system description object | ||
| 2922 | * @key: index node key | ||
| 2923 | * @level: index node level | ||
| 2924 | * @lnum: index node LEB number | ||
| 2925 | * @offs: index node offset | ||
| 2926 | * | ||
| 2927 | * This function loads and dirties an index node so that it can be garbage | ||
| 2928 | * collected. The @key argument has to be the key of the first child. This | ||
| 2929 | * function relies on the fact that 0:0 is never a valid LEB number and offset | ||
| 2930 | * for a main-area node. Returns %0 on success and a negative error code on | ||
| 2931 | * failure. | ||
| 2932 | */ | ||
| 2933 | int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, | ||
| 2934 | int lnum, int offs) | ||
| 2935 | { | ||
| 2936 | struct ubifs_znode *znode; | ||
| 2937 | int err = 0; | ||
| 2938 | |||
| 2939 | mutex_lock(&c->tnc_mutex); | ||
| 2940 | znode = lookup_znode(c, key, level, lnum, offs); | ||
| 2941 | if (!znode) | ||
| 2942 | goto out_unlock; | ||
| 2943 | if (IS_ERR(znode)) { | ||
| 2944 | err = PTR_ERR(znode); | ||
| 2945 | goto out_unlock; | ||
| 2946 | } | ||
| 2947 | znode = dirty_cow_bottom_up(c, znode); | ||
| 2948 | if (IS_ERR(znode)) { | ||
| 2949 | err = PTR_ERR(znode); | ||
| 2950 | goto out_unlock; | ||
| 2951 | } | ||
| 2952 | |||
| 2953 | out_unlock: | ||
| 2954 | mutex_unlock(&c->tnc_mutex); | ||
| 2955 | return err; | ||
| 2956 | } | ||
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c new file mode 100644 index 000000000000..8117e65ba2e9 --- /dev/null +++ b/fs/ubifs/tnc_commit.c | |||
| @@ -0,0 +1,1103 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* This file implements TNC functions for committing */ | ||
| 24 | |||
| 25 | #include "ubifs.h" | ||
| 26 | |||
| 27 | /** | ||
| 28 | * make_idx_node - make an index node for fill-the-gaps method of TNC commit. | ||
| 29 | * @c: UBIFS file-system description object | ||
| 30 | * @idx: buffer in which to place new index node | ||
| 31 | * @znode: znode from which to make new index node | ||
| 32 | * @lnum: LEB number where new index node will be written | ||
| 33 | * @offs: offset where new index node will be written | ||
| 34 | * @len: length of new index node | ||
| 35 | */ | ||
| 36 | static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, | ||
| 37 | struct ubifs_znode *znode, int lnum, int offs, int len) | ||
| 38 | { | ||
| 39 | struct ubifs_znode *zp; | ||
| 40 | int i, err; | ||
| 41 | |||
| 42 | /* Make index node */ | ||
| 43 | idx->ch.node_type = UBIFS_IDX_NODE; | ||
| 44 | idx->child_cnt = cpu_to_le16(znode->child_cnt); | ||
| 45 | idx->level = cpu_to_le16(znode->level); | ||
| 46 | for (i = 0; i < znode->child_cnt; i++) { | ||
| 47 | struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); | ||
| 48 | struct ubifs_zbranch *zbr = &znode->zbranch[i]; | ||
| 49 | |||
| 50 | key_write_idx(c, &zbr->key, &br->key); | ||
| 51 | br->lnum = cpu_to_le32(zbr->lnum); | ||
| 52 | br->offs = cpu_to_le32(zbr->offs); | ||
| 53 | br->len = cpu_to_le32(zbr->len); | ||
| 54 | if (!zbr->lnum || !zbr->len) { | ||
| 55 | ubifs_err("bad ref in znode"); | ||
| 56 | dbg_dump_znode(c, znode); | ||
| 57 | if (zbr->znode) | ||
| 58 | dbg_dump_znode(c, zbr->znode); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | ubifs_prepare_node(c, idx, len, 0); | ||
| 62 | |||
| 63 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 64 | znode->lnum = lnum; | ||
| 65 | znode->offs = offs; | ||
| 66 | znode->len = len; | ||
| 67 | #endif | ||
| 68 | |||
| 69 | err = insert_old_idx_znode(c, znode); | ||
| 70 | |||
| 71 | /* Update the parent */ | ||
| 72 | zp = znode->parent; | ||
| 73 | if (zp) { | ||
| 74 | struct ubifs_zbranch *zbr; | ||
| 75 | |||
| 76 | zbr = &zp->zbranch[znode->iip]; | ||
| 77 | zbr->lnum = lnum; | ||
| 78 | zbr->offs = offs; | ||
| 79 | zbr->len = len; | ||
| 80 | } else { | ||
| 81 | c->zroot.lnum = lnum; | ||
| 82 | c->zroot.offs = offs; | ||
| 83 | c->zroot.len = len; | ||
| 84 | } | ||
| 85 | c->calc_idx_sz += ALIGN(len, 8); | ||
| 86 | |||
| 87 | atomic_long_dec(&c->dirty_zn_cnt); | ||
| 88 | |||
| 89 | ubifs_assert(ubifs_zn_dirty(znode)); | ||
| 90 | ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); | ||
| 91 | |||
| 92 | __clear_bit(DIRTY_ZNODE, &znode->flags); | ||
| 93 | __clear_bit(COW_ZNODE, &znode->flags); | ||
| 94 | |||
| 95 | return err; | ||
| 96 | } | ||
| 97 | |||
| 98 | /** | ||
| 99 | * fill_gap - make index nodes in gaps in dirty index LEBs. | ||
| 100 | * @c: UBIFS file-system description object | ||
| 101 | * @lnum: LEB number that gap appears in | ||
| 102 | * @gap_start: offset of start of gap | ||
| 103 | * @gap_end: offset of end of gap | ||
| 104 | * @dirt: adds dirty space to this | ||
| 105 | * | ||
| 106 | * This function returns the number of index nodes written into the gap. | ||
| 107 | */ | ||
| 108 | static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end, | ||
| 109 | int *dirt) | ||
| 110 | { | ||
| 111 | int len, gap_remains, gap_pos, written, pad_len; | ||
| 112 | |||
| 113 | ubifs_assert((gap_start & 7) == 0); | ||
| 114 | ubifs_assert((gap_end & 7) == 0); | ||
| 115 | ubifs_assert(gap_end >= gap_start); | ||
| 116 | |||
| 117 | gap_remains = gap_end - gap_start; | ||
| 118 | if (!gap_remains) | ||
| 119 | return 0; | ||
| 120 | gap_pos = gap_start; | ||
| 121 | written = 0; | ||
| 122 | while (c->enext) { | ||
| 123 | len = ubifs_idx_node_sz(c, c->enext->child_cnt); | ||
| 124 | if (len < gap_remains) { | ||
| 125 | struct ubifs_znode *znode = c->enext; | ||
| 126 | const int alen = ALIGN(len, 8); | ||
| 127 | int err; | ||
| 128 | |||
| 129 | ubifs_assert(alen <= gap_remains); | ||
| 130 | err = make_idx_node(c, c->ileb_buf + gap_pos, znode, | ||
| 131 | lnum, gap_pos, len); | ||
| 132 | if (err) | ||
| 133 | return err; | ||
| 134 | gap_remains -= alen; | ||
| 135 | gap_pos += alen; | ||
| 136 | c->enext = znode->cnext; | ||
| 137 | if (c->enext == c->cnext) | ||
| 138 | c->enext = NULL; | ||
| 139 | written += 1; | ||
| 140 | } else | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | if (gap_end == c->leb_size) { | ||
| 144 | c->ileb_len = ALIGN(gap_pos, c->min_io_size); | ||
| 145 | /* Pad to end of min_io_size */ | ||
| 146 | pad_len = c->ileb_len - gap_pos; | ||
| 147 | } else | ||
| 148 | /* Pad to end of gap */ | ||
| 149 | pad_len = gap_remains; | ||
| 150 | dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d", | ||
| 151 | lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len); | ||
| 152 | ubifs_pad(c, c->ileb_buf + gap_pos, pad_len); | ||
| 153 | *dirt += pad_len; | ||
| 154 | return written; | ||
| 155 | } | ||
| 156 | |||
| 157 | /** | ||
| 158 | * find_old_idx - find an index node obsoleted since the last commit start. | ||
| 159 | * @c: UBIFS file-system description object | ||
| 160 | * @lnum: LEB number of obsoleted index node | ||
| 161 | * @offs: offset of obsoleted index node | ||
| 162 | * | ||
| 163 | * Returns %1 if found and %0 otherwise. | ||
| 164 | */ | ||
| 165 | static int find_old_idx(struct ubifs_info *c, int lnum, int offs) | ||
| 166 | { | ||
| 167 | struct ubifs_old_idx *o; | ||
| 168 | struct rb_node *p; | ||
| 169 | |||
| 170 | p = c->old_idx.rb_node; | ||
| 171 | while (p) { | ||
| 172 | o = rb_entry(p, struct ubifs_old_idx, rb); | ||
| 173 | if (lnum < o->lnum) | ||
| 174 | p = p->rb_left; | ||
| 175 | else if (lnum > o->lnum) | ||
| 176 | p = p->rb_right; | ||
| 177 | else if (offs < o->offs) | ||
| 178 | p = p->rb_left; | ||
| 179 | else if (offs > o->offs) | ||
| 180 | p = p->rb_right; | ||
| 181 | else | ||
| 182 | return 1; | ||
| 183 | } | ||
| 184 | return 0; | ||
| 185 | } | ||
| 186 | |||
| 187 | /** | ||
| 188 | * is_idx_node_in_use - determine if an index node can be overwritten. | ||
| 189 | * @c: UBIFS file-system description object | ||
| 190 | * @key: key of index node | ||
| 191 | * @level: index node level | ||
| 192 | * @lnum: LEB number of index node | ||
| 193 | * @offs: offset of index node | ||
| 194 | * | ||
| 195 | * If @key / @lnum / @offs identify an index node that was not part of the old | ||
| 196 | * index, then this function returns %0 (obsolete). Else if the index node was | ||
| 197 | * part of the old index but is now dirty %1 is returned, else if it is clean %2 | ||
| 198 | * is returned. A negative error code is returned on failure. | ||
| 199 | */ | ||
| 200 | static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, | ||
| 201 | int level, int lnum, int offs) | ||
| 202 | { | ||
| 203 | int ret; | ||
| 204 | |||
| 205 | ret = is_idx_node_in_tnc(c, key, level, lnum, offs); | ||
| 206 | if (ret < 0) | ||
| 207 | return ret; /* Error code */ | ||
| 208 | if (ret == 0) | ||
| 209 | if (find_old_idx(c, lnum, offs)) | ||
| 210 | return 1; | ||
| 211 | return ret; | ||
| 212 | } | ||
| 213 | |||
| 214 | /** | ||
| 215 | * layout_leb_in_gaps - layout index nodes using in-the-gaps method. | ||
| 216 | * @c: UBIFS file-system description object | ||
| 217 | * @p: return LEB number here | ||
| 218 | * | ||
| 219 | * This function lays out new index nodes for dirty znodes using in-the-gaps | ||
| 220 | * method of TNC commit. | ||
| 221 | * This function merely puts the next znode into the next gap, making no attempt | ||
| 222 | * to try to maximise the number of znodes that fit. | ||
| 223 | * This function returns the number of index nodes written into the gaps, or a | ||
| 224 | * negative error code on failure. | ||
| 225 | */ | ||
| 226 | static int layout_leb_in_gaps(struct ubifs_info *c, int *p) | ||
| 227 | { | ||
| 228 | struct ubifs_scan_leb *sleb; | ||
| 229 | struct ubifs_scan_node *snod; | ||
| 230 | int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written; | ||
| 231 | |||
| 232 | tot_written = 0; | ||
| 233 | /* Get an index LEB with lots of obsolete index nodes */ | ||
| 234 | lnum = ubifs_find_dirty_idx_leb(c); | ||
| 235 | if (lnum < 0) | ||
| 236 | /* | ||
| 237 | * There also may be dirt in the index head that could be | ||
| 238 | * filled, however we do not check there at present. | ||
| 239 | */ | ||
| 240 | return lnum; /* Error code */ | ||
| 241 | *p = lnum; | ||
| 242 | dbg_gc("LEB %d", lnum); | ||
| 243 | /* | ||
| 244 | * Scan the index LEB. We use the generic scan for this even though | ||
| 245 | * it is more comprehensive and less efficient than is needed for this | ||
| 246 | * purpose. | ||
| 247 | */ | ||
| 248 | sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); | ||
| 249 | c->ileb_len = 0; | ||
| 250 | if (IS_ERR(sleb)) | ||
| 251 | return PTR_ERR(sleb); | ||
| 252 | gap_start = 0; | ||
| 253 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 254 | struct ubifs_idx_node *idx; | ||
| 255 | int in_use, level; | ||
| 256 | |||
| 257 | ubifs_assert(snod->type == UBIFS_IDX_NODE); | ||
| 258 | idx = snod->node; | ||
| 259 | key_read(c, ubifs_idx_key(c, idx), &snod->key); | ||
| 260 | level = le16_to_cpu(idx->level); | ||
| 261 | /* Determine if the index node is in use (not obsolete) */ | ||
| 262 | in_use = is_idx_node_in_use(c, &snod->key, level, lnum, | ||
| 263 | snod->offs); | ||
| 264 | if (in_use < 0) { | ||
| 265 | ubifs_scan_destroy(sleb); | ||
| 266 | return in_use; /* Error code */ | ||
| 267 | } | ||
| 268 | if (in_use) { | ||
| 269 | if (in_use == 1) | ||
| 270 | dirt += ALIGN(snod->len, 8); | ||
| 271 | /* | ||
| 272 | * The obsolete index nodes form gaps that can be | ||
| 273 | * overwritten. This gap has ended because we have | ||
| 274 | * found an index node that is still in use | ||
| 275 | * i.e. not obsolete | ||
| 276 | */ | ||
| 277 | gap_end = snod->offs; | ||
| 278 | /* Try to fill gap */ | ||
| 279 | written = fill_gap(c, lnum, gap_start, gap_end, &dirt); | ||
| 280 | if (written < 0) { | ||
| 281 | ubifs_scan_destroy(sleb); | ||
| 282 | return written; /* Error code */ | ||
| 283 | } | ||
| 284 | tot_written += written; | ||
| 285 | gap_start = ALIGN(snod->offs + snod->len, 8); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | ubifs_scan_destroy(sleb); | ||
| 289 | c->ileb_len = c->leb_size; | ||
| 290 | gap_end = c->leb_size; | ||
| 291 | /* Try to fill gap */ | ||
| 292 | written = fill_gap(c, lnum, gap_start, gap_end, &dirt); | ||
| 293 | if (written < 0) | ||
| 294 | return written; /* Error code */ | ||
| 295 | tot_written += written; | ||
| 296 | if (tot_written == 0) { | ||
| 297 | struct ubifs_lprops lp; | ||
| 298 | |||
| 299 | dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); | ||
| 300 | err = ubifs_read_one_lp(c, lnum, &lp); | ||
| 301 | if (err) | ||
| 302 | return err; | ||
| 303 | if (lp.free == c->leb_size) { | ||
| 304 | /* | ||
| 305 | * We must have snatched this LEB from the idx_gc list | ||
| 306 | * so we need to correct the free and dirty space. | ||
| 307 | */ | ||
| 308 | err = ubifs_change_one_lp(c, lnum, | ||
| 309 | c->leb_size - c->ileb_len, | ||
| 310 | dirt, 0, 0, 0); | ||
| 311 | if (err) | ||
| 312 | return err; | ||
| 313 | } | ||
| 314 | return 0; | ||
| 315 | } | ||
| 316 | err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt, | ||
| 317 | 0, 0, 0); | ||
| 318 | if (err) | ||
| 319 | return err; | ||
| 320 | err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len, | ||
| 321 | UBI_SHORTTERM); | ||
| 322 | if (err) | ||
| 323 | return err; | ||
| 324 | dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); | ||
| 325 | return tot_written; | ||
| 326 | } | ||
| 327 | |||
| 328 | /** | ||
| 329 | * get_leb_cnt - calculate the number of empty LEBs needed to commit. | ||
| 330 | * @c: UBIFS file-system description object | ||
| 331 | * @cnt: number of znodes to commit | ||
| 332 | * | ||
| 333 | * This function returns the number of empty LEBs needed to commit @cnt znodes | ||
| 334 | * to the current index head. The number is not exact and may be more than | ||
| 335 | * needed. | ||
| 336 | */ | ||
| 337 | static int get_leb_cnt(struct ubifs_info *c, int cnt) | ||
| 338 | { | ||
| 339 | int d; | ||
| 340 | |||
| 341 | /* Assume maximum index node size (i.e. overestimate space needed) */ | ||
| 342 | cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz; | ||
| 343 | if (cnt < 0) | ||
| 344 | cnt = 0; | ||
| 345 | d = c->leb_size / c->max_idx_node_sz; | ||
| 346 | return DIV_ROUND_UP(cnt, d); | ||
| 347 | } | ||
| 348 | |||
| 349 | /** | ||
| 350 | * layout_in_gaps - in-the-gaps method of committing TNC. | ||
| 351 | * @c: UBIFS file-system description object | ||
| 352 | * @cnt: number of dirty znodes to commit. | ||
| 353 | * | ||
| 354 | * This function lays out new index nodes for dirty znodes using in-the-gaps | ||
| 355 | * method of TNC commit. | ||
| 356 | * | ||
| 357 | * This function returns %0 on success and a negative error code on failure. | ||
| 358 | */ | ||
| 359 | static int layout_in_gaps(struct ubifs_info *c, int cnt) | ||
| 360 | { | ||
| 361 | int err, leb_needed_cnt, written, *p; | ||
| 362 | |||
| 363 | dbg_gc("%d znodes to write", cnt); | ||
| 364 | |||
| 365 | c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS); | ||
| 366 | if (!c->gap_lebs) | ||
| 367 | return -ENOMEM; | ||
| 368 | |||
| 369 | p = c->gap_lebs; | ||
| 370 | do { | ||
| 371 | ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); | ||
| 372 | written = layout_leb_in_gaps(c, p); | ||
| 373 | if (written < 0) { | ||
| 374 | err = written; | ||
| 375 | if (err == -ENOSPC) { | ||
| 376 | if (!dbg_force_in_the_gaps_enabled) { | ||
| 377 | /* | ||
| 378 | * Do not print scary warnings if the | ||
| 379 | * debugging option which forces | ||
| 380 | * in-the-gaps is enabled. | ||
| 381 | */ | ||
| 382 | ubifs_err("out of space"); | ||
| 383 | spin_lock(&c->space_lock); | ||
| 384 | dbg_dump_budg(c); | ||
| 385 | spin_unlock(&c->space_lock); | ||
| 386 | dbg_dump_lprops(c); | ||
| 387 | } | ||
| 388 | /* Try to commit anyway */ | ||
| 389 | err = 0; | ||
| 390 | break; | ||
| 391 | } | ||
| 392 | kfree(c->gap_lebs); | ||
| 393 | c->gap_lebs = NULL; | ||
| 394 | return err; | ||
| 395 | } | ||
| 396 | p++; | ||
| 397 | cnt -= written; | ||
| 398 | leb_needed_cnt = get_leb_cnt(c, cnt); | ||
| 399 | dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt, | ||
| 400 | leb_needed_cnt, c->ileb_cnt); | ||
| 401 | } while (leb_needed_cnt > c->ileb_cnt); | ||
| 402 | |||
| 403 | *p = -1; | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /** | ||
| 408 | * layout_in_empty_space - layout index nodes in empty space. | ||
| 409 | * @c: UBIFS file-system description object | ||
| 410 | * | ||
| 411 | * This function lays out new index nodes for dirty znodes using empty LEBs. | ||
| 412 | * | ||
| 413 | * This function returns %0 on success and a negative error code on failure. | ||
| 414 | */ | ||
| 415 | static int layout_in_empty_space(struct ubifs_info *c) | ||
| 416 | { | ||
| 417 | struct ubifs_znode *znode, *cnext, *zp; | ||
| 418 | int lnum, offs, len, next_len, buf_len, buf_offs, used, avail; | ||
| 419 | int wlen, blen, err; | ||
| 420 | |||
| 421 | cnext = c->enext; | ||
| 422 | if (!cnext) | ||
| 423 | return 0; | ||
| 424 | |||
| 425 | lnum = c->ihead_lnum; | ||
| 426 | buf_offs = c->ihead_offs; | ||
| 427 | |||
| 428 | buf_len = ubifs_idx_node_sz(c, c->fanout); | ||
| 429 | buf_len = ALIGN(buf_len, c->min_io_size); | ||
| 430 | used = 0; | ||
| 431 | avail = buf_len; | ||
| 432 | |||
| 433 | /* Ensure there is enough room for first write */ | ||
| 434 | next_len = ubifs_idx_node_sz(c, cnext->child_cnt); | ||
| 435 | if (buf_offs + next_len > c->leb_size) | ||
| 436 | lnum = -1; | ||
| 437 | |||
| 438 | while (1) { | ||
| 439 | znode = cnext; | ||
| 440 | |||
| 441 | len = ubifs_idx_node_sz(c, znode->child_cnt); | ||
| 442 | |||
| 443 | /* Determine the index node position */ | ||
| 444 | if (lnum == -1) { | ||
| 445 | if (c->ileb_nxt >= c->ileb_cnt) { | ||
| 446 | ubifs_err("out of space"); | ||
| 447 | return -ENOSPC; | ||
| 448 | } | ||
| 449 | lnum = c->ilebs[c->ileb_nxt++]; | ||
| 450 | buf_offs = 0; | ||
| 451 | used = 0; | ||
| 452 | avail = buf_len; | ||
| 453 | } | ||
| 454 | |||
| 455 | offs = buf_offs + used; | ||
| 456 | |||
| 457 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 458 | znode->lnum = lnum; | ||
| 459 | znode->offs = offs; | ||
| 460 | znode->len = len; | ||
| 461 | #endif | ||
| 462 | |||
| 463 | /* Update the parent */ | ||
| 464 | zp = znode->parent; | ||
| 465 | if (zp) { | ||
| 466 | struct ubifs_zbranch *zbr; | ||
| 467 | int i; | ||
| 468 | |||
| 469 | i = znode->iip; | ||
| 470 | zbr = &zp->zbranch[i]; | ||
| 471 | zbr->lnum = lnum; | ||
| 472 | zbr->offs = offs; | ||
| 473 | zbr->len = len; | ||
| 474 | } else { | ||
| 475 | c->zroot.lnum = lnum; | ||
| 476 | c->zroot.offs = offs; | ||
| 477 | c->zroot.len = len; | ||
| 478 | } | ||
| 479 | c->calc_idx_sz += ALIGN(len, 8); | ||
| 480 | |||
| 481 | /* | ||
| 482 | * Once lprops is updated, we can decrease the dirty znode count | ||
| 483 | * but it is easier to just do it here. | ||
| 484 | */ | ||
| 485 | atomic_long_dec(&c->dirty_zn_cnt); | ||
| 486 | |||
| 487 | /* | ||
| 488 | * Calculate the next index node length to see if there is | ||
| 489 | * enough room for it | ||
| 490 | */ | ||
| 491 | cnext = znode->cnext; | ||
| 492 | if (cnext == c->cnext) | ||
| 493 | next_len = 0; | ||
| 494 | else | ||
| 495 | next_len = ubifs_idx_node_sz(c, cnext->child_cnt); | ||
| 496 | |||
| 497 | if (c->min_io_size == 1) { | ||
| 498 | buf_offs += ALIGN(len, 8); | ||
| 499 | if (next_len) { | ||
| 500 | if (buf_offs + next_len <= c->leb_size) | ||
| 501 | continue; | ||
| 502 | err = ubifs_update_one_lp(c, lnum, 0, | ||
| 503 | c->leb_size - buf_offs, 0, 0); | ||
| 504 | if (err) | ||
| 505 | return err; | ||
| 506 | lnum = -1; | ||
| 507 | continue; | ||
| 508 | } | ||
| 509 | err = ubifs_update_one_lp(c, lnum, | ||
| 510 | c->leb_size - buf_offs, 0, 0, 0); | ||
| 511 | if (err) | ||
| 512 | return err; | ||
| 513 | break; | ||
| 514 | } | ||
| 515 | |||
| 516 | /* Update buffer positions */ | ||
| 517 | wlen = used + len; | ||
| 518 | used += ALIGN(len, 8); | ||
| 519 | avail -= ALIGN(len, 8); | ||
| 520 | |||
| 521 | if (next_len != 0 && | ||
| 522 | buf_offs + used + next_len <= c->leb_size && | ||
| 523 | avail > 0) | ||
| 524 | continue; | ||
| 525 | |||
| 526 | if (avail <= 0 && next_len && | ||
| 527 | buf_offs + used + next_len <= c->leb_size) | ||
| 528 | blen = buf_len; | ||
| 529 | else | ||
| 530 | blen = ALIGN(wlen, c->min_io_size); | ||
| 531 | |||
| 532 | /* The buffer is full or there are no more znodes to do */ | ||
| 533 | buf_offs += blen; | ||
| 534 | if (next_len) { | ||
| 535 | if (buf_offs + next_len > c->leb_size) { | ||
| 536 | err = ubifs_update_one_lp(c, lnum, | ||
| 537 | c->leb_size - buf_offs, blen - used, | ||
| 538 | 0, 0); | ||
| 539 | if (err) | ||
| 540 | return err; | ||
| 541 | lnum = -1; | ||
| 542 | } | ||
| 543 | used -= blen; | ||
| 544 | if (used < 0) | ||
| 545 | used = 0; | ||
| 546 | avail = buf_len - used; | ||
| 547 | continue; | ||
| 548 | } | ||
| 549 | err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs, | ||
| 550 | blen - used, 0, 0); | ||
| 551 | if (err) | ||
| 552 | return err; | ||
| 553 | break; | ||
| 554 | } | ||
| 555 | |||
| 556 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 557 | c->new_ihead_lnum = lnum; | ||
| 558 | c->new_ihead_offs = buf_offs; | ||
| 559 | #endif | ||
| 560 | |||
| 561 | return 0; | ||
| 562 | } | ||
| 563 | |||
| 564 | /** | ||
| 565 | * layout_commit - determine positions of index nodes to commit. | ||
| 566 | * @c: UBIFS file-system description object | ||
| 567 | * @no_space: indicates that insufficient empty LEBs were allocated | ||
| 568 | * @cnt: number of znodes to commit | ||
| 569 | * | ||
| 570 | * Calculate and update the positions of index nodes to commit. If there were | ||
| 571 | * an insufficient number of empty LEBs allocated, then index nodes are placed | ||
| 572 | * into the gaps created by obsolete index nodes in non-empty index LEBs. For | ||
| 573 | * this purpose, an obsolete index node is one that was not in the index as at | ||
| 574 | * the end of the last commit. To write "in-the-gaps" requires that those index | ||
| 575 | * LEBs are updated atomically in-place. | ||
| 576 | */ | ||
| 577 | static int layout_commit(struct ubifs_info *c, int no_space, int cnt) | ||
| 578 | { | ||
| 579 | int err; | ||
| 580 | |||
| 581 | if (no_space) { | ||
| 582 | err = layout_in_gaps(c, cnt); | ||
| 583 | if (err) | ||
| 584 | return err; | ||
| 585 | } | ||
| 586 | err = layout_in_empty_space(c); | ||
| 587 | return err; | ||
| 588 | } | ||
| 589 | |||
| 590 | /** | ||
| 591 | * find_first_dirty - find first dirty znode. | ||
| 592 | * @znode: znode to begin searching from | ||
| 593 | */ | ||
| 594 | static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode) | ||
| 595 | { | ||
| 596 | int i, cont; | ||
| 597 | |||
| 598 | if (!znode) | ||
| 599 | return NULL; | ||
| 600 | |||
| 601 | while (1) { | ||
| 602 | if (znode->level == 0) { | ||
| 603 | if (ubifs_zn_dirty(znode)) | ||
| 604 | return znode; | ||
| 605 | return NULL; | ||
| 606 | } | ||
| 607 | cont = 0; | ||
| 608 | for (i = 0; i < znode->child_cnt; i++) { | ||
| 609 | struct ubifs_zbranch *zbr = &znode->zbranch[i]; | ||
| 610 | |||
| 611 | if (zbr->znode && ubifs_zn_dirty(zbr->znode)) { | ||
| 612 | znode = zbr->znode; | ||
| 613 | cont = 1; | ||
| 614 | break; | ||
| 615 | } | ||
| 616 | } | ||
| 617 | if (!cont) { | ||
| 618 | if (ubifs_zn_dirty(znode)) | ||
| 619 | return znode; | ||
| 620 | return NULL; | ||
| 621 | } | ||
| 622 | } | ||
| 623 | } | ||
| 624 | |||
| 625 | /** | ||
| 626 | * find_next_dirty - find next dirty znode. | ||
| 627 | * @znode: znode to begin searching from | ||
| 628 | */ | ||
| 629 | static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode) | ||
| 630 | { | ||
| 631 | int n = znode->iip + 1; | ||
| 632 | |||
| 633 | znode = znode->parent; | ||
| 634 | if (!znode) | ||
| 635 | return NULL; | ||
| 636 | for (; n < znode->child_cnt; n++) { | ||
| 637 | struct ubifs_zbranch *zbr = &znode->zbranch[n]; | ||
| 638 | |||
| 639 | if (zbr->znode && ubifs_zn_dirty(zbr->znode)) | ||
| 640 | return find_first_dirty(zbr->znode); | ||
| 641 | } | ||
| 642 | return znode; | ||
| 643 | } | ||
| 644 | |||
| 645 | /** | ||
| 646 | * get_znodes_to_commit - create list of dirty znodes to commit. | ||
| 647 | * @c: UBIFS file-system description object | ||
| 648 | * | ||
| 649 | * This function returns the number of znodes to commit. | ||
| 650 | */ | ||
| 651 | static int get_znodes_to_commit(struct ubifs_info *c) | ||
| 652 | { | ||
| 653 | struct ubifs_znode *znode, *cnext; | ||
| 654 | int cnt = 0; | ||
| 655 | |||
| 656 | c->cnext = find_first_dirty(c->zroot.znode); | ||
| 657 | znode = c->enext = c->cnext; | ||
| 658 | if (!znode) { | ||
| 659 | dbg_cmt("no znodes to commit"); | ||
| 660 | return 0; | ||
| 661 | } | ||
| 662 | cnt += 1; | ||
| 663 | while (1) { | ||
| 664 | ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); | ||
| 665 | __set_bit(COW_ZNODE, &znode->flags); | ||
| 666 | znode->alt = 0; | ||
| 667 | cnext = find_next_dirty(znode); | ||
| 668 | if (!cnext) { | ||
| 669 | znode->cnext = c->cnext; | ||
| 670 | break; | ||
| 671 | } | ||
| 672 | znode->cnext = cnext; | ||
| 673 | znode = cnext; | ||
| 674 | cnt += 1; | ||
| 675 | } | ||
| 676 | dbg_cmt("committing %d znodes", cnt); | ||
| 677 | ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt)); | ||
| 678 | return cnt; | ||
| 679 | } | ||
| 680 | |||
| 681 | /** | ||
| 682 | * alloc_idx_lebs - allocate empty LEBs to be used to commit. | ||
| 683 | * @c: UBIFS file-system description object | ||
| 684 | * @cnt: number of znodes to commit | ||
| 685 | * | ||
| 686 | * This function returns %-ENOSPC if it cannot allocate a sufficient number of | ||
| 687 | * empty LEBs. %0 is returned on success, otherwise a negative error code | ||
| 688 | * is returned. | ||
| 689 | */ | ||
| 690 | static int alloc_idx_lebs(struct ubifs_info *c, int cnt) | ||
| 691 | { | ||
| 692 | int i, leb_cnt, lnum; | ||
| 693 | |||
| 694 | c->ileb_cnt = 0; | ||
| 695 | c->ileb_nxt = 0; | ||
| 696 | leb_cnt = get_leb_cnt(c, cnt); | ||
| 697 | dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt); | ||
| 698 | if (!leb_cnt) | ||
| 699 | return 0; | ||
| 700 | c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS); | ||
| 701 | if (!c->ilebs) | ||
| 702 | return -ENOMEM; | ||
| 703 | for (i = 0; i < leb_cnt; i++) { | ||
| 704 | lnum = ubifs_find_free_leb_for_idx(c); | ||
| 705 | if (lnum < 0) | ||
| 706 | return lnum; | ||
| 707 | c->ilebs[c->ileb_cnt++] = lnum; | ||
| 708 | dbg_cmt("LEB %d", lnum); | ||
| 709 | } | ||
| 710 | if (dbg_force_in_the_gaps()) | ||
| 711 | return -ENOSPC; | ||
| 712 | return 0; | ||
| 713 | } | ||
| 714 | |||
| 715 | /** | ||
| 716 | * free_unused_idx_lebs - free unused LEBs that were allocated for the commit. | ||
| 717 | * @c: UBIFS file-system description object | ||
| 718 | * | ||
| 719 | * It is possible that we allocate more empty LEBs for the commit than we need. | ||
| 720 | * This functions frees the surplus. | ||
| 721 | * | ||
| 722 | * This function returns %0 on success and a negative error code on failure. | ||
| 723 | */ | ||
| 724 | static int free_unused_idx_lebs(struct ubifs_info *c) | ||
| 725 | { | ||
| 726 | int i, err = 0, lnum, er; | ||
| 727 | |||
| 728 | for (i = c->ileb_nxt; i < c->ileb_cnt; i++) { | ||
| 729 | lnum = c->ilebs[i]; | ||
| 730 | dbg_cmt("LEB %d", lnum); | ||
| 731 | er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 732 | LPROPS_INDEX | LPROPS_TAKEN, 0); | ||
| 733 | if (!err) | ||
| 734 | err = er; | ||
| 735 | } | ||
| 736 | return err; | ||
| 737 | } | ||
| 738 | |||
| 739 | /** | ||
| 740 | * free_idx_lebs - free unused LEBs after commit end. | ||
| 741 | * @c: UBIFS file-system description object | ||
| 742 | * | ||
| 743 | * This function returns %0 on success and a negative error code on failure. | ||
| 744 | */ | ||
| 745 | static int free_idx_lebs(struct ubifs_info *c) | ||
| 746 | { | ||
| 747 | int err; | ||
| 748 | |||
| 749 | err = free_unused_idx_lebs(c); | ||
| 750 | kfree(c->ilebs); | ||
| 751 | c->ilebs = NULL; | ||
| 752 | return err; | ||
| 753 | } | ||
| 754 | |||
| 755 | /** | ||
| 756 | * ubifs_tnc_start_commit - start TNC commit. | ||
| 757 | * @c: UBIFS file-system description object | ||
| 758 | * @zroot: new index root position is returned here | ||
| 759 | * | ||
| 760 | * This function prepares the list of indexing nodes to commit and lays out | ||
| 761 | * their positions on flash. If there is not enough free space it uses the | ||
| 762 | * in-gap commit method. Returns zero in case of success and a negative error | ||
| 763 | * code in case of failure. | ||
| 764 | */ | ||
| 765 | int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) | ||
| 766 | { | ||
| 767 | int err = 0, cnt; | ||
| 768 | |||
| 769 | mutex_lock(&c->tnc_mutex); | ||
| 770 | err = dbg_check_tnc(c, 1); | ||
| 771 | if (err) | ||
| 772 | goto out; | ||
| 773 | cnt = get_znodes_to_commit(c); | ||
| 774 | if (cnt != 0) { | ||
| 775 | int no_space = 0; | ||
| 776 | |||
| 777 | err = alloc_idx_lebs(c, cnt); | ||
| 778 | if (err == -ENOSPC) | ||
| 779 | no_space = 1; | ||
| 780 | else if (err) | ||
| 781 | goto out_free; | ||
| 782 | err = layout_commit(c, no_space, cnt); | ||
| 783 | if (err) | ||
| 784 | goto out_free; | ||
| 785 | ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); | ||
| 786 | err = free_unused_idx_lebs(c); | ||
| 787 | if (err) | ||
| 788 | goto out; | ||
| 789 | } | ||
| 790 | destroy_old_idx(c); | ||
| 791 | memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch)); | ||
| 792 | |||
| 793 | err = ubifs_save_dirty_idx_lnums(c); | ||
| 794 | if (err) | ||
| 795 | goto out; | ||
| 796 | |||
| 797 | spin_lock(&c->space_lock); | ||
| 798 | /* | ||
| 799 | * Although we have not finished committing yet, update size of the | ||
| 800 | * committed index ('c->old_idx_sz') and zero out the index growth | ||
| 801 | * budget. It is OK to do this now, because we've reserved all the | ||
| 802 | * space which is needed to commit the index, and it is save for the | ||
| 803 | * budgeting subsystem to assume the index is already committed, | ||
| 804 | * even though it is not. | ||
| 805 | */ | ||
| 806 | c->old_idx_sz = c->calc_idx_sz; | ||
| 807 | c->budg_uncommitted_idx = 0; | ||
| 808 | spin_unlock(&c->space_lock); | ||
| 809 | mutex_unlock(&c->tnc_mutex); | ||
| 810 | |||
| 811 | dbg_cmt("number of index LEBs %d", c->lst.idx_lebs); | ||
| 812 | dbg_cmt("size of index %llu", c->calc_idx_sz); | ||
| 813 | return err; | ||
| 814 | |||
| 815 | out_free: | ||
| 816 | free_idx_lebs(c); | ||
| 817 | out: | ||
| 818 | mutex_unlock(&c->tnc_mutex); | ||
| 819 | return err; | ||
| 820 | } | ||
| 821 | |||
| 822 | /** | ||
| 823 | * write_index - write index nodes. | ||
| 824 | * @c: UBIFS file-system description object | ||
| 825 | * | ||
| 826 | * This function writes the index nodes whose positions were laid out in the | ||
| 827 | * layout_in_empty_space function. | ||
| 828 | */ | ||
| 829 | static int write_index(struct ubifs_info *c) | ||
| 830 | { | ||
| 831 | struct ubifs_idx_node *idx; | ||
| 832 | struct ubifs_znode *znode, *cnext; | ||
| 833 | int i, lnum, offs, len, next_len, buf_len, buf_offs, used; | ||
| 834 | int avail, wlen, err, lnum_pos = 0; | ||
| 835 | |||
| 836 | cnext = c->enext; | ||
| 837 | if (!cnext) | ||
| 838 | return 0; | ||
| 839 | |||
| 840 | /* | ||
| 841 | * Always write index nodes to the index head so that index nodes and | ||
| 842 | * other types of nodes are never mixed in the same erase block. | ||
| 843 | */ | ||
| 844 | lnum = c->ihead_lnum; | ||
| 845 | buf_offs = c->ihead_offs; | ||
| 846 | |||
| 847 | /* Allocate commit buffer */ | ||
| 848 | buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size); | ||
| 849 | used = 0; | ||
| 850 | avail = buf_len; | ||
| 851 | |||
| 852 | /* Ensure there is enough room for first write */ | ||
| 853 | next_len = ubifs_idx_node_sz(c, cnext->child_cnt); | ||
| 854 | if (buf_offs + next_len > c->leb_size) { | ||
| 855 | err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0, | ||
| 856 | LPROPS_TAKEN); | ||
| 857 | if (err) | ||
| 858 | return err; | ||
| 859 | lnum = -1; | ||
| 860 | } | ||
| 861 | |||
| 862 | while (1) { | ||
| 863 | cond_resched(); | ||
| 864 | |||
| 865 | znode = cnext; | ||
| 866 | idx = c->cbuf + used; | ||
| 867 | |||
| 868 | /* Make index node */ | ||
| 869 | idx->ch.node_type = UBIFS_IDX_NODE; | ||
| 870 | idx->child_cnt = cpu_to_le16(znode->child_cnt); | ||
| 871 | idx->level = cpu_to_le16(znode->level); | ||
| 872 | for (i = 0; i < znode->child_cnt; i++) { | ||
| 873 | struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); | ||
| 874 | struct ubifs_zbranch *zbr = &znode->zbranch[i]; | ||
| 875 | |||
| 876 | key_write_idx(c, &zbr->key, &br->key); | ||
| 877 | br->lnum = cpu_to_le32(zbr->lnum); | ||
| 878 | br->offs = cpu_to_le32(zbr->offs); | ||
| 879 | br->len = cpu_to_le32(zbr->len); | ||
| 880 | if (!zbr->lnum || !zbr->len) { | ||
| 881 | ubifs_err("bad ref in znode"); | ||
| 882 | dbg_dump_znode(c, znode); | ||
| 883 | if (zbr->znode) | ||
| 884 | dbg_dump_znode(c, zbr->znode); | ||
| 885 | } | ||
| 886 | } | ||
| 887 | len = ubifs_idx_node_sz(c, znode->child_cnt); | ||
| 888 | ubifs_prepare_node(c, idx, len, 0); | ||
| 889 | |||
| 890 | /* Determine the index node position */ | ||
| 891 | if (lnum == -1) { | ||
| 892 | lnum = c->ilebs[lnum_pos++]; | ||
| 893 | buf_offs = 0; | ||
| 894 | used = 0; | ||
| 895 | avail = buf_len; | ||
| 896 | } | ||
| 897 | offs = buf_offs + used; | ||
| 898 | |||
| 899 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 900 | if (lnum != znode->lnum || offs != znode->offs || | ||
| 901 | len != znode->len) { | ||
| 902 | ubifs_err("inconsistent znode posn"); | ||
| 903 | return -EINVAL; | ||
| 904 | } | ||
| 905 | #endif | ||
| 906 | |||
| 907 | /* Grab some stuff from znode while we still can */ | ||
| 908 | cnext = znode->cnext; | ||
| 909 | |||
| 910 | ubifs_assert(ubifs_zn_dirty(znode)); | ||
| 911 | ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); | ||
| 912 | |||
| 913 | /* | ||
| 914 | * It is important that other threads should see %DIRTY_ZNODE | ||
| 915 | * flag cleared before %COW_ZNODE. Specifically, it matters in | ||
| 916 | * the 'dirty_cow_znode()' function. This is the reason for the | ||
| 917 | * first barrier. Also, we want the bit changes to be seen to | ||
| 918 | * other threads ASAP, to avoid unnecesarry copying, which is | ||
| 919 | * the reason for the second barrier. | ||
| 920 | */ | ||
| 921 | clear_bit(DIRTY_ZNODE, &znode->flags); | ||
| 922 | smp_mb__before_clear_bit(); | ||
| 923 | clear_bit(COW_ZNODE, &znode->flags); | ||
| 924 | smp_mb__after_clear_bit(); | ||
| 925 | |||
| 926 | /* Do not access znode from this point on */ | ||
| 927 | |||
| 928 | /* Update buffer positions */ | ||
| 929 | wlen = used + len; | ||
| 930 | used += ALIGN(len, 8); | ||
| 931 | avail -= ALIGN(len, 8); | ||
| 932 | |||
| 933 | /* | ||
| 934 | * Calculate the next index node length to see if there is | ||
| 935 | * enough room for it | ||
| 936 | */ | ||
| 937 | if (cnext == c->cnext) | ||
| 938 | next_len = 0; | ||
| 939 | else | ||
| 940 | next_len = ubifs_idx_node_sz(c, cnext->child_cnt); | ||
| 941 | |||
| 942 | if (c->min_io_size == 1) { | ||
| 943 | /* | ||
| 944 | * Write the prepared index node immediately if there is | ||
| 945 | * no minimum IO size | ||
| 946 | */ | ||
| 947 | err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, | ||
| 948 | wlen, UBI_SHORTTERM); | ||
| 949 | if (err) | ||
| 950 | return err; | ||
| 951 | buf_offs += ALIGN(wlen, 8); | ||
| 952 | if (next_len) { | ||
| 953 | used = 0; | ||
| 954 | avail = buf_len; | ||
| 955 | if (buf_offs + next_len > c->leb_size) { | ||
| 956 | err = ubifs_update_one_lp(c, lnum, | ||
| 957 | LPROPS_NC, 0, 0, LPROPS_TAKEN); | ||
| 958 | if (err) | ||
| 959 | return err; | ||
| 960 | lnum = -1; | ||
| 961 | } | ||
| 962 | continue; | ||
| 963 | } | ||
| 964 | } else { | ||
| 965 | int blen, nxt_offs = buf_offs + used + next_len; | ||
| 966 | |||
| 967 | if (next_len && nxt_offs <= c->leb_size) { | ||
| 968 | if (avail > 0) | ||
| 969 | continue; | ||
| 970 | else | ||
| 971 | blen = buf_len; | ||
| 972 | } else { | ||
| 973 | wlen = ALIGN(wlen, 8); | ||
| 974 | blen = ALIGN(wlen, c->min_io_size); | ||
| 975 | ubifs_pad(c, c->cbuf + wlen, blen - wlen); | ||
| 976 | } | ||
| 977 | /* | ||
| 978 | * The buffer is full or there are no more znodes | ||
| 979 | * to do | ||
| 980 | */ | ||
| 981 | err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, | ||
| 982 | blen, UBI_SHORTTERM); | ||
| 983 | if (err) | ||
| 984 | return err; | ||
| 985 | buf_offs += blen; | ||
| 986 | if (next_len) { | ||
| 987 | if (nxt_offs > c->leb_size) { | ||
| 988 | err = ubifs_update_one_lp(c, lnum, | ||
| 989 | LPROPS_NC, 0, 0, LPROPS_TAKEN); | ||
| 990 | if (err) | ||
| 991 | return err; | ||
| 992 | lnum = -1; | ||
| 993 | } | ||
| 994 | used -= blen; | ||
| 995 | if (used < 0) | ||
| 996 | used = 0; | ||
| 997 | avail = buf_len - used; | ||
| 998 | memmove(c->cbuf, c->cbuf + blen, used); | ||
| 999 | continue; | ||
| 1000 | } | ||
| 1001 | } | ||
| 1002 | break; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 1006 | if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { | ||
| 1007 | ubifs_err("inconsistent ihead"); | ||
| 1008 | return -EINVAL; | ||
| 1009 | } | ||
| 1010 | #endif | ||
| 1011 | |||
| 1012 | c->ihead_lnum = lnum; | ||
| 1013 | c->ihead_offs = buf_offs; | ||
| 1014 | |||
| 1015 | return 0; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /** | ||
| 1019 | * free_obsolete_znodes - free obsolete znodes. | ||
| 1020 | * @c: UBIFS file-system description object | ||
| 1021 | * | ||
| 1022 | * At the end of commit end, obsolete znodes are freed. | ||
| 1023 | */ | ||
| 1024 | static void free_obsolete_znodes(struct ubifs_info *c) | ||
| 1025 | { | ||
| 1026 | struct ubifs_znode *znode, *cnext; | ||
| 1027 | |||
| 1028 | cnext = c->cnext; | ||
| 1029 | do { | ||
| 1030 | znode = cnext; | ||
| 1031 | cnext = znode->cnext; | ||
| 1032 | if (test_bit(OBSOLETE_ZNODE, &znode->flags)) | ||
| 1033 | kfree(znode); | ||
| 1034 | else { | ||
| 1035 | znode->cnext = NULL; | ||
| 1036 | atomic_long_inc(&c->clean_zn_cnt); | ||
| 1037 | atomic_long_inc(&ubifs_clean_zn_cnt); | ||
| 1038 | } | ||
| 1039 | } while (cnext != c->cnext); | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | /** | ||
| 1043 | * return_gap_lebs - return LEBs used by the in-gap commit method. | ||
| 1044 | * @c: UBIFS file-system description object | ||
| 1045 | * | ||
| 1046 | * This function clears the "taken" flag for the LEBs which were used by the | ||
| 1047 | * "commit in-the-gaps" method. | ||
| 1048 | */ | ||
| 1049 | static int return_gap_lebs(struct ubifs_info *c) | ||
| 1050 | { | ||
| 1051 | int *p, err; | ||
| 1052 | |||
| 1053 | if (!c->gap_lebs) | ||
| 1054 | return 0; | ||
| 1055 | |||
| 1056 | dbg_cmt(""); | ||
| 1057 | for (p = c->gap_lebs; *p != -1; p++) { | ||
| 1058 | err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0, | ||
| 1059 | LPROPS_TAKEN, 0); | ||
| 1060 | if (err) | ||
| 1061 | return err; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | kfree(c->gap_lebs); | ||
| 1065 | c->gap_lebs = NULL; | ||
| 1066 | return 0; | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | /** | ||
| 1070 | * ubifs_tnc_end_commit - update the TNC for commit end. | ||
| 1071 | * @c: UBIFS file-system description object | ||
| 1072 | * | ||
| 1073 | * Write the dirty znodes. | ||
| 1074 | */ | ||
| 1075 | int ubifs_tnc_end_commit(struct ubifs_info *c) | ||
| 1076 | { | ||
| 1077 | int err; | ||
| 1078 | |||
| 1079 | if (!c->cnext) | ||
| 1080 | return 0; | ||
| 1081 | |||
| 1082 | err = return_gap_lebs(c); | ||
| 1083 | if (err) | ||
| 1084 | return err; | ||
| 1085 | |||
| 1086 | err = write_index(c); | ||
| 1087 | if (err) | ||
| 1088 | return err; | ||
| 1089 | |||
| 1090 | mutex_lock(&c->tnc_mutex); | ||
| 1091 | |||
| 1092 | dbg_cmt("TNC height is %d", c->zroot.znode->level + 1); | ||
| 1093 | |||
| 1094 | free_obsolete_znodes(c); | ||
| 1095 | |||
| 1096 | c->cnext = NULL; | ||
| 1097 | kfree(c->ilebs); | ||
| 1098 | c->ilebs = NULL; | ||
| 1099 | |||
| 1100 | mutex_unlock(&c->tnc_mutex); | ||
| 1101 | |||
| 1102 | return 0; | ||
| 1103 | } | ||
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c new file mode 100644 index 000000000000..a25c1cc1f8d9 --- /dev/null +++ b/fs/ubifs/tnc_misc.c | |||
| @@ -0,0 +1,494 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Adrian Hunter | ||
| 20 | * Artem Bityutskiy (Битюцкий Артём) | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file contains miscelanious TNC-related functions shared betweend | ||
| 25 | * different files. This file does not form any logically separate TNC | ||
| 26 | * sub-system. The file was created because there is a lot of TNC code and | ||
| 27 | * putting it all in one file would make that file too big and unreadable. | ||
| 28 | */ | ||
| 29 | |||
| 30 | #include "ubifs.h" | ||
| 31 | |||
| 32 | /** | ||
| 33 | * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal. | ||
| 34 | * @zr: root of the subtree to traverse | ||
| 35 | * @znode: previous znode | ||
| 36 | * | ||
| 37 | * This function implements levelorder TNC traversal. The LNC is ignored. | ||
| 38 | * Returns the next element or %NULL if @znode is already the last one. | ||
| 39 | */ | ||
| 40 | struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, | ||
| 41 | struct ubifs_znode *znode) | ||
| 42 | { | ||
| 43 | int level, iip, level_search = 0; | ||
| 44 | struct ubifs_znode *zn; | ||
| 45 | |||
| 46 | ubifs_assert(zr); | ||
| 47 | |||
| 48 | if (unlikely(!znode)) | ||
| 49 | return zr; | ||
| 50 | |||
| 51 | if (unlikely(znode == zr)) { | ||
| 52 | if (znode->level == 0) | ||
| 53 | return NULL; | ||
| 54 | return ubifs_tnc_find_child(zr, 0); | ||
| 55 | } | ||
| 56 | |||
| 57 | level = znode->level; | ||
| 58 | |||
| 59 | iip = znode->iip; | ||
| 60 | while (1) { | ||
| 61 | ubifs_assert(znode->level <= zr->level); | ||
| 62 | |||
| 63 | /* | ||
| 64 | * First walk up until there is a znode with next branch to | ||
| 65 | * look at. | ||
| 66 | */ | ||
| 67 | while (znode->parent != zr && iip >= znode->parent->child_cnt) { | ||
| 68 | znode = znode->parent; | ||
| 69 | iip = znode->iip; | ||
| 70 | } | ||
| 71 | |||
| 72 | if (unlikely(znode->parent == zr && | ||
| 73 | iip >= znode->parent->child_cnt)) { | ||
| 74 | /* This level is done, switch to the lower one */ | ||
| 75 | level -= 1; | ||
| 76 | if (level_search || level < 0) | ||
| 77 | /* | ||
| 78 | * We were already looking for znode at lower | ||
| 79 | * level ('level_search'). As we are here | ||
| 80 | * again, it just does not exist. Or all levels | ||
| 81 | * were finished ('level < 0'). | ||
| 82 | */ | ||
| 83 | return NULL; | ||
| 84 | |||
| 85 | level_search = 1; | ||
| 86 | iip = -1; | ||
| 87 | znode = ubifs_tnc_find_child(zr, 0); | ||
| 88 | ubifs_assert(znode); | ||
| 89 | } | ||
| 90 | |||
| 91 | /* Switch to the next index */ | ||
| 92 | zn = ubifs_tnc_find_child(znode->parent, iip + 1); | ||
| 93 | if (!zn) { | ||
| 94 | /* No more children to look at, we have walk up */ | ||
| 95 | iip = znode->parent->child_cnt; | ||
| 96 | continue; | ||
| 97 | } | ||
| 98 | |||
| 99 | /* Walk back down to the level we came from ('level') */ | ||
| 100 | while (zn->level != level) { | ||
| 101 | znode = zn; | ||
| 102 | zn = ubifs_tnc_find_child(zn, 0); | ||
| 103 | if (!zn) { | ||
| 104 | /* | ||
| 105 | * This path is not too deep so it does not | ||
| 106 | * reach 'level'. Try next path. | ||
| 107 | */ | ||
| 108 | iip = znode->iip; | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | if (zn) { | ||
| 114 | ubifs_assert(zn->level >= 0); | ||
| 115 | return zn; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | /** | ||
| 121 | * ubifs_search_zbranch - search znode branch. | ||
| 122 | * @c: UBIFS file-system description object | ||
| 123 | * @znode: znode to search in | ||
| 124 | * @key: key to search for | ||
| 125 | * @n: znode branch slot number is returned here | ||
| 126 | * | ||
| 127 | * This is a helper function which search branch with key @key in @znode using | ||
| 128 | * binary search. The result of the search may be: | ||
| 129 | * o exact match, then %1 is returned, and the slot number of the branch is | ||
| 130 | * stored in @n; | ||
| 131 | * o no exact match, then %0 is returned and the slot number of the left | ||
| 132 | * closest branch is returned in @n; the slot if all keys in this znode are | ||
| 133 | * greater than @key, then %-1 is returned in @n. | ||
| 134 | */ | ||
| 135 | int ubifs_search_zbranch(const struct ubifs_info *c, | ||
| 136 | const struct ubifs_znode *znode, | ||
| 137 | const union ubifs_key *key, int *n) | ||
| 138 | { | ||
| 139 | int beg = 0, end = znode->child_cnt, uninitialized_var(mid); | ||
| 140 | int uninitialized_var(cmp); | ||
| 141 | const struct ubifs_zbranch *zbr = &znode->zbranch[0]; | ||
| 142 | |||
| 143 | ubifs_assert(end > beg); | ||
| 144 | |||
| 145 | while (end > beg) { | ||
| 146 | mid = (beg + end) >> 1; | ||
| 147 | cmp = keys_cmp(c, key, &zbr[mid].key); | ||
| 148 | if (cmp > 0) | ||
| 149 | beg = mid + 1; | ||
| 150 | else if (cmp < 0) | ||
| 151 | end = mid; | ||
| 152 | else { | ||
| 153 | *n = mid; | ||
| 154 | return 1; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | *n = end - 1; | ||
| 159 | |||
| 160 | /* The insert point is after *n */ | ||
| 161 | ubifs_assert(*n >= -1 && *n < znode->child_cnt); | ||
| 162 | if (*n == -1) | ||
| 163 | ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0); | ||
| 164 | else | ||
| 165 | ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0); | ||
| 166 | if (*n + 1 < znode->child_cnt) | ||
| 167 | ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0); | ||
| 168 | |||
| 169 | return 0; | ||
| 170 | } | ||
| 171 | |||
| 172 | /** | ||
| 173 | * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal. | ||
| 174 | * @znode: znode to start at (root of the sub-tree to traverse) | ||
| 175 | * | ||
| 176 | * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is | ||
| 177 | * ignored. | ||
| 178 | */ | ||
| 179 | struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode) | ||
| 180 | { | ||
| 181 | if (unlikely(!znode)) | ||
| 182 | return NULL; | ||
| 183 | |||
| 184 | while (znode->level > 0) { | ||
| 185 | struct ubifs_znode *child; | ||
| 186 | |||
| 187 | child = ubifs_tnc_find_child(znode, 0); | ||
| 188 | if (!child) | ||
| 189 | return znode; | ||
| 190 | znode = child; | ||
| 191 | } | ||
| 192 | |||
| 193 | return znode; | ||
| 194 | } | ||
| 195 | |||
| 196 | /** | ||
| 197 | * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal. | ||
| 198 | * @znode: previous znode | ||
| 199 | * | ||
| 200 | * This function implements postorder TNC traversal. The LNC is ignored. | ||
| 201 | * Returns the next element or %NULL if @znode is already the last one. | ||
| 202 | */ | ||
| 203 | struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) | ||
| 204 | { | ||
| 205 | struct ubifs_znode *zn; | ||
| 206 | |||
| 207 | ubifs_assert(znode); | ||
| 208 | if (unlikely(!znode->parent)) | ||
| 209 | return NULL; | ||
| 210 | |||
| 211 | /* Switch to the next index in the parent */ | ||
| 212 | zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1); | ||
| 213 | if (!zn) | ||
| 214 | /* This is in fact the last child, return parent */ | ||
| 215 | return znode->parent; | ||
| 216 | |||
| 217 | /* Go to the first znode in this new subtree */ | ||
| 218 | return ubifs_tnc_postorder_first(zn); | ||
| 219 | } | ||
| 220 | |||
| 221 | /** | ||
| 222 | * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree. | ||
| 223 | * @znode: znode defining subtree to destroy | ||
| 224 | * | ||
| 225 | * This function destroys subtree of the TNC tree. Returns number of clean | ||
| 226 | * znodes in the subtree. | ||
| 227 | */ | ||
| 228 | long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode) | ||
| 229 | { | ||
| 230 | struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode); | ||
| 231 | long clean_freed = 0; | ||
| 232 | int n; | ||
| 233 | |||
| 234 | ubifs_assert(zn); | ||
| 235 | while (1) { | ||
| 236 | for (n = 0; n < zn->child_cnt; n++) { | ||
| 237 | if (!zn->zbranch[n].znode) | ||
| 238 | continue; | ||
| 239 | |||
| 240 | if (zn->level > 0 && | ||
| 241 | !ubifs_zn_dirty(zn->zbranch[n].znode)) | ||
| 242 | clean_freed += 1; | ||
| 243 | |||
| 244 | cond_resched(); | ||
| 245 | kfree(zn->zbranch[n].znode); | ||
| 246 | } | ||
| 247 | |||
| 248 | if (zn == znode) { | ||
| 249 | if (!ubifs_zn_dirty(zn)) | ||
| 250 | clean_freed += 1; | ||
| 251 | kfree(zn); | ||
| 252 | return clean_freed; | ||
| 253 | } | ||
| 254 | |||
| 255 | zn = ubifs_tnc_postorder_next(zn); | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | /** | ||
| 260 | * read_znode - read an indexing node from flash and fill znode. | ||
| 261 | * @c: UBIFS file-system description object | ||
| 262 | * @lnum: LEB of the indexing node to read | ||
| 263 | * @offs: node offset | ||
| 264 | * @len: node length | ||
| 265 | * @znode: znode to read to | ||
| 266 | * | ||
| 267 | * This function reads an indexing node from the flash media and fills znode | ||
| 268 | * with the read data. Returns zero in case of success and a negative error | ||
| 269 | * code in case of failure. The read indexing node is validated and if anything | ||
| 270 | * is wrong with it, this function prints complaint messages and returns | ||
| 271 | * %-EINVAL. | ||
| 272 | */ | ||
| 273 | static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, | ||
| 274 | struct ubifs_znode *znode) | ||
| 275 | { | ||
| 276 | int i, err, type, cmp; | ||
| 277 | struct ubifs_idx_node *idx; | ||
| 278 | |||
| 279 | idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); | ||
| 280 | if (!idx) | ||
| 281 | return -ENOMEM; | ||
| 282 | |||
| 283 | err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); | ||
| 284 | if (err < 0) { | ||
| 285 | kfree(idx); | ||
| 286 | return err; | ||
| 287 | } | ||
| 288 | |||
| 289 | znode->child_cnt = le16_to_cpu(idx->child_cnt); | ||
| 290 | znode->level = le16_to_cpu(idx->level); | ||
| 291 | |||
| 292 | dbg_tnc("LEB %d:%d, level %d, %d branch", | ||
| 293 | lnum, offs, znode->level, znode->child_cnt); | ||
| 294 | |||
| 295 | if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { | ||
| 296 | dbg_err("current fanout %d, branch count %d", | ||
| 297 | c->fanout, znode->child_cnt); | ||
| 298 | dbg_err("max levels %d, znode level %d", | ||
| 299 | UBIFS_MAX_LEVELS, znode->level); | ||
| 300 | err = 1; | ||
| 301 | goto out_dump; | ||
| 302 | } | ||
| 303 | |||
| 304 | for (i = 0; i < znode->child_cnt; i++) { | ||
| 305 | const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); | ||
| 306 | struct ubifs_zbranch *zbr = &znode->zbranch[i]; | ||
| 307 | |||
| 308 | key_read(c, &br->key, &zbr->key); | ||
| 309 | zbr->lnum = le32_to_cpu(br->lnum); | ||
| 310 | zbr->offs = le32_to_cpu(br->offs); | ||
| 311 | zbr->len = le32_to_cpu(br->len); | ||
| 312 | zbr->znode = NULL; | ||
| 313 | |||
| 314 | /* Validate branch */ | ||
| 315 | |||
| 316 | if (zbr->lnum < c->main_first || | ||
| 317 | zbr->lnum >= c->leb_cnt || zbr->offs < 0 || | ||
| 318 | zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { | ||
| 319 | dbg_err("bad branch %d", i); | ||
| 320 | err = 2; | ||
| 321 | goto out_dump; | ||
| 322 | } | ||
| 323 | |||
| 324 | switch (key_type(c, &zbr->key)) { | ||
| 325 | case UBIFS_INO_KEY: | ||
| 326 | case UBIFS_DATA_KEY: | ||
| 327 | case UBIFS_DENT_KEY: | ||
| 328 | case UBIFS_XENT_KEY: | ||
| 329 | break; | ||
| 330 | default: | ||
| 331 | dbg_msg("bad key type at slot %d: %s", i, | ||
| 332 | DBGKEY(&zbr->key)); | ||
| 333 | err = 3; | ||
| 334 | goto out_dump; | ||
| 335 | } | ||
| 336 | |||
| 337 | if (znode->level) | ||
| 338 | continue; | ||
| 339 | |||
| 340 | type = key_type(c, &zbr->key); | ||
| 341 | if (c->ranges[type].max_len == 0) { | ||
| 342 | if (zbr->len != c->ranges[type].len) { | ||
| 343 | dbg_err("bad target node (type %d) length (%d)", | ||
| 344 | type, zbr->len); | ||
| 345 | dbg_err("have to be %d", c->ranges[type].len); | ||
| 346 | err = 4; | ||
| 347 | goto out_dump; | ||
| 348 | } | ||
| 349 | } else if (zbr->len < c->ranges[type].min_len || | ||
| 350 | zbr->len > c->ranges[type].max_len) { | ||
| 351 | dbg_err("bad target node (type %d) length (%d)", | ||
| 352 | type, zbr->len); | ||
| 353 | dbg_err("have to be in range of %d-%d", | ||
| 354 | c->ranges[type].min_len, | ||
| 355 | c->ranges[type].max_len); | ||
| 356 | err = 5; | ||
| 357 | goto out_dump; | ||
| 358 | } | ||
| 359 | } | ||
| 360 | |||
| 361 | /* | ||
| 362 | * Ensure that the next key is greater or equivalent to the | ||
| 363 | * previous one. | ||
| 364 | */ | ||
| 365 | for (i = 0; i < znode->child_cnt - 1; i++) { | ||
| 366 | const union ubifs_key *key1, *key2; | ||
| 367 | |||
| 368 | key1 = &znode->zbranch[i].key; | ||
| 369 | key2 = &znode->zbranch[i + 1].key; | ||
| 370 | |||
| 371 | cmp = keys_cmp(c, key1, key2); | ||
| 372 | if (cmp > 0) { | ||
| 373 | dbg_err("bad key order (keys %d and %d)", i, i + 1); | ||
| 374 | err = 6; | ||
| 375 | goto out_dump; | ||
| 376 | } else if (cmp == 0 && !is_hash_key(c, key1)) { | ||
| 377 | /* These can only be keys with colliding hash */ | ||
| 378 | dbg_err("keys %d and %d are not hashed but equivalent", | ||
| 379 | i, i + 1); | ||
| 380 | err = 7; | ||
| 381 | goto out_dump; | ||
| 382 | } | ||
| 383 | } | ||
| 384 | |||
| 385 | kfree(idx); | ||
| 386 | return 0; | ||
| 387 | |||
| 388 | out_dump: | ||
| 389 | ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); | ||
| 390 | dbg_dump_node(c, idx); | ||
| 391 | kfree(idx); | ||
| 392 | return -EINVAL; | ||
| 393 | } | ||
| 394 | |||
| 395 | /** | ||
| 396 | * ubifs_load_znode - load znode to TNC cache. | ||
| 397 | * @c: UBIFS file-system description object | ||
| 398 | * @zbr: znode branch | ||
| 399 | * @parent: znode's parent | ||
| 400 | * @iip: index in parent | ||
| 401 | * | ||
| 402 | * This function loads znode pointed to by @zbr into the TNC cache and | ||
| 403 | * returns pointer to it in case of success and a negative error code in case | ||
| 404 | * of failure. | ||
| 405 | */ | ||
| 406 | struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, | ||
| 407 | struct ubifs_zbranch *zbr, | ||
| 408 | struct ubifs_znode *parent, int iip) | ||
| 409 | { | ||
| 410 | int err; | ||
| 411 | struct ubifs_znode *znode; | ||
| 412 | |||
| 413 | ubifs_assert(!zbr->znode); | ||
| 414 | /* | ||
| 415 | * A slab cache is not presently used for znodes because the znode size | ||
| 416 | * depends on the fanout which is stored in the superblock. | ||
| 417 | */ | ||
| 418 | znode = kzalloc(c->max_znode_sz, GFP_NOFS); | ||
| 419 | if (!znode) | ||
| 420 | return ERR_PTR(-ENOMEM); | ||
| 421 | |||
| 422 | err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode); | ||
| 423 | if (err) | ||
| 424 | goto out; | ||
| 425 | |||
| 426 | atomic_long_inc(&c->clean_zn_cnt); | ||
| 427 | |||
| 428 | /* | ||
| 429 | * Increment the global clean znode counter as well. It is OK that | ||
| 430 | * global and per-FS clean znode counters may be inconsistent for some | ||
| 431 | * short time (because we might be preempted at this point), the global | ||
| 432 | * one is only used in shrinker. | ||
| 433 | */ | ||
| 434 | atomic_long_inc(&ubifs_clean_zn_cnt); | ||
| 435 | |||
| 436 | zbr->znode = znode; | ||
| 437 | znode->parent = parent; | ||
| 438 | znode->time = get_seconds(); | ||
| 439 | znode->iip = iip; | ||
| 440 | |||
| 441 | return znode; | ||
| 442 | |||
| 443 | out: | ||
| 444 | kfree(znode); | ||
| 445 | return ERR_PTR(err); | ||
| 446 | } | ||
| 447 | |||
| 448 | /** | ||
| 449 | * ubifs_tnc_read_node - read a leaf node from the flash media. | ||
| 450 | * @c: UBIFS file-system description object | ||
| 451 | * @zbr: key and position of the node | ||
| 452 | * @node: node is returned here | ||
| 453 | * | ||
| 454 | * This function reads a node defined by @zbr from the flash media. Returns | ||
| 455 | * zero in case of success or a negative negative error code in case of | ||
| 456 | * failure. | ||
| 457 | */ | ||
| 458 | int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 459 | void *node) | ||
| 460 | { | ||
| 461 | union ubifs_key key1, *key = &zbr->key; | ||
| 462 | int err, type = key_type(c, key); | ||
| 463 | struct ubifs_wbuf *wbuf; | ||
| 464 | |||
| 465 | /* | ||
| 466 | * 'zbr' has to point to on-flash node. The node may sit in a bud and | ||
| 467 | * may even be in a write buffer, so we have to take care about this. | ||
| 468 | */ | ||
| 469 | wbuf = ubifs_get_wbuf(c, zbr->lnum); | ||
| 470 | if (wbuf) | ||
| 471 | err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len, | ||
| 472 | zbr->lnum, zbr->offs); | ||
| 473 | else | ||
| 474 | err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, | ||
| 475 | zbr->offs); | ||
| 476 | |||
| 477 | if (err) { | ||
| 478 | dbg_tnc("key %s", DBGKEY(key)); | ||
| 479 | return err; | ||
| 480 | } | ||
| 481 | |||
| 482 | /* Make sure the key of the read node is correct */ | ||
| 483 | key_read(c, key, &key1); | ||
| 484 | if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) { | ||
| 485 | ubifs_err("bad key in node at LEB %d:%d", | ||
| 486 | zbr->lnum, zbr->offs); | ||
| 487 | dbg_tnc("looked for key %s found node's key %s", | ||
| 488 | DBGKEY(key), DBGKEY1(&key1)); | ||
| 489 | dbg_dump_node(c, node); | ||
| 490 | return -EINVAL; | ||
| 491 | } | ||
| 492 | |||
| 493 | return 0; | ||
| 494 | } | ||
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h new file mode 100644 index 000000000000..0cc7da9bed47 --- /dev/null +++ b/fs/ubifs/ubifs-media.h | |||
| @@ -0,0 +1,745 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file describes UBIFS on-flash format and contains definitions of all the | ||
| 25 | * relevant data structures and constants. | ||
| 26 | * | ||
| 27 | * All UBIFS on-flash objects are stored in the form of nodes. All nodes start | ||
| 28 | * with the UBIFS node magic number and have the same common header. Nodes | ||
| 29 | * always sit at 8-byte aligned positions on the media and node header sizes are | ||
| 30 | * also 8-byte aligned (except for the indexing node and the padding node). | ||
| 31 | */ | ||
| 32 | |||
| 33 | #ifndef __UBIFS_MEDIA_H__ | ||
| 34 | #define __UBIFS_MEDIA_H__ | ||
| 35 | |||
| 36 | /* UBIFS node magic number (must not have the padding byte first or last) */ | ||
| 37 | #define UBIFS_NODE_MAGIC 0x06101831 | ||
| 38 | |||
| 39 | /* UBIFS on-flash format version */ | ||
| 40 | #define UBIFS_FORMAT_VERSION 4 | ||
| 41 | |||
| 42 | /* Minimum logical eraseblock size in bytes */ | ||
| 43 | #define UBIFS_MIN_LEB_SZ (15*1024) | ||
| 44 | |||
| 45 | /* Initial CRC32 value used when calculating CRC checksums */ | ||
| 46 | #define UBIFS_CRC32_INIT 0xFFFFFFFFU | ||
| 47 | |||
| 48 | /* | ||
| 49 | * UBIFS does not try to compress data if its length is less than the below | ||
| 50 | * constant. | ||
| 51 | */ | ||
| 52 | #define UBIFS_MIN_COMPR_LEN 128 | ||
| 53 | |||
| 54 | /* Root inode number */ | ||
| 55 | #define UBIFS_ROOT_INO 1 | ||
| 56 | |||
| 57 | /* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */ | ||
| 58 | #define UBIFS_FIRST_INO 64 | ||
| 59 | |||
| 60 | /* | ||
| 61 | * Maximum file name and extended attribute length (must be a multiple of 8, | ||
| 62 | * minus 1). | ||
| 63 | */ | ||
| 64 | #define UBIFS_MAX_NLEN 255 | ||
| 65 | |||
| 66 | /* Maximum number of data journal heads */ | ||
| 67 | #define UBIFS_MAX_JHEADS 1 | ||
| 68 | |||
| 69 | /* | ||
| 70 | * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system, | ||
| 71 | * which means that it does not treat the underlying media as consisting of | ||
| 72 | * blocks like in case of hard drives. Do not be confused. UBIFS block is just | ||
| 73 | * the maximum amount of data which one data node can have or which can be | ||
| 74 | * attached to an inode node. | ||
| 75 | */ | ||
| 76 | #define UBIFS_BLOCK_SIZE 4096 | ||
| 77 | #define UBIFS_BLOCK_SHIFT 12 | ||
| 78 | #define UBIFS_BLOCK_MASK 0x00000FFF | ||
| 79 | |||
| 80 | /* UBIFS padding byte pattern (must not be first or last byte of node magic) */ | ||
| 81 | #define UBIFS_PADDING_BYTE 0xCE | ||
| 82 | |||
| 83 | /* Maximum possible key length */ | ||
| 84 | #define UBIFS_MAX_KEY_LEN 16 | ||
| 85 | |||
| 86 | /* Key length ("simple" format) */ | ||
| 87 | #define UBIFS_SK_LEN 8 | ||
| 88 | |||
| 89 | /* Minimum index tree fanout */ | ||
| 90 | #define UBIFS_MIN_FANOUT 2 | ||
| 91 | |||
| 92 | /* Maximum number of levels in UBIFS indexing B-tree */ | ||
| 93 | #define UBIFS_MAX_LEVELS 512 | ||
| 94 | |||
| 95 | /* Maximum amount of data attached to an inode in bytes */ | ||
| 96 | #define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE | ||
| 97 | |||
| 98 | /* LEB Properties Tree fanout (must be power of 2) and fanout shift */ | ||
| 99 | #define UBIFS_LPT_FANOUT 4 | ||
| 100 | #define UBIFS_LPT_FANOUT_SHIFT 2 | ||
| 101 | |||
| 102 | /* LEB Properties Tree bit field sizes */ | ||
| 103 | #define UBIFS_LPT_CRC_BITS 16 | ||
| 104 | #define UBIFS_LPT_CRC_BYTES 2 | ||
| 105 | #define UBIFS_LPT_TYPE_BITS 4 | ||
| 106 | |||
| 107 | /* The key is always at the same position in all keyed nodes */ | ||
| 108 | #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) | ||
| 109 | |||
| 110 | /* | ||
| 111 | * LEB Properties Tree node types. | ||
| 112 | * | ||
| 113 | * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties) | ||
| 114 | * UBIFS_LPT_NNODE: LPT internal node | ||
| 115 | * UBIFS_LPT_LTAB: LPT's own lprops table | ||
| 116 | * UBIFS_LPT_LSAVE: LPT's save table (big model only) | ||
| 117 | * UBIFS_LPT_NODE_CNT: count of LPT node types | ||
| 118 | * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type | ||
| 119 | */ | ||
| 120 | enum { | ||
| 121 | UBIFS_LPT_PNODE, | ||
| 122 | UBIFS_LPT_NNODE, | ||
| 123 | UBIFS_LPT_LTAB, | ||
| 124 | UBIFS_LPT_LSAVE, | ||
| 125 | UBIFS_LPT_NODE_CNT, | ||
| 126 | UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1, | ||
| 127 | }; | ||
| 128 | |||
| 129 | /* | ||
| 130 | * UBIFS inode types. | ||
| 131 | * | ||
| 132 | * UBIFS_ITYPE_REG: regular file | ||
| 133 | * UBIFS_ITYPE_DIR: directory | ||
| 134 | * UBIFS_ITYPE_LNK: soft link | ||
| 135 | * UBIFS_ITYPE_BLK: block device node | ||
| 136 | * UBIFS_ITYPE_CHR: character device node | ||
| 137 | * UBIFS_ITYPE_FIFO: fifo | ||
| 138 | * UBIFS_ITYPE_SOCK: socket | ||
| 139 | * UBIFS_ITYPES_CNT: count of supported file types | ||
| 140 | */ | ||
| 141 | enum { | ||
| 142 | UBIFS_ITYPE_REG, | ||
| 143 | UBIFS_ITYPE_DIR, | ||
| 144 | UBIFS_ITYPE_LNK, | ||
| 145 | UBIFS_ITYPE_BLK, | ||
| 146 | UBIFS_ITYPE_CHR, | ||
| 147 | UBIFS_ITYPE_FIFO, | ||
| 148 | UBIFS_ITYPE_SOCK, | ||
| 149 | UBIFS_ITYPES_CNT, | ||
| 150 | }; | ||
| 151 | |||
| 152 | /* | ||
| 153 | * Supported key hash functions. | ||
| 154 | * | ||
| 155 | * UBIFS_KEY_HASH_R5: R5 hash | ||
| 156 | * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name | ||
| 157 | */ | ||
| 158 | enum { | ||
| 159 | UBIFS_KEY_HASH_R5, | ||
| 160 | UBIFS_KEY_HASH_TEST, | ||
| 161 | }; | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Supported key formats. | ||
| 165 | * | ||
| 166 | * UBIFS_SIMPLE_KEY_FMT: simple key format | ||
| 167 | */ | ||
| 168 | enum { | ||
| 169 | UBIFS_SIMPLE_KEY_FMT, | ||
| 170 | }; | ||
| 171 | |||
| 172 | /* | ||
| 173 | * The simple key format uses 29 bits for storing UBIFS block number and hash | ||
| 174 | * value. | ||
| 175 | */ | ||
| 176 | #define UBIFS_S_KEY_BLOCK_BITS 29 | ||
| 177 | #define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF | ||
| 178 | #define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS | ||
| 179 | #define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Key types. | ||
| 183 | * | ||
| 184 | * UBIFS_INO_KEY: inode node key | ||
| 185 | * UBIFS_DATA_KEY: data node key | ||
| 186 | * UBIFS_DENT_KEY: directory entry node key | ||
| 187 | * UBIFS_XENT_KEY: extended attribute entry key | ||
| 188 | * UBIFS_KEY_TYPES_CNT: number of supported key types | ||
| 189 | */ | ||
| 190 | enum { | ||
| 191 | UBIFS_INO_KEY, | ||
| 192 | UBIFS_DATA_KEY, | ||
| 193 | UBIFS_DENT_KEY, | ||
| 194 | UBIFS_XENT_KEY, | ||
| 195 | UBIFS_KEY_TYPES_CNT, | ||
| 196 | }; | ||
| 197 | |||
| 198 | /* Count of LEBs reserved for the superblock area */ | ||
| 199 | #define UBIFS_SB_LEBS 1 | ||
| 200 | /* Count of LEBs reserved for the master area */ | ||
| 201 | #define UBIFS_MST_LEBS 2 | ||
| 202 | |||
| 203 | /* First LEB of the superblock area */ | ||
| 204 | #define UBIFS_SB_LNUM 0 | ||
| 205 | /* First LEB of the master area */ | ||
| 206 | #define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS) | ||
| 207 | /* First LEB of the log area */ | ||
| 208 | #define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS) | ||
| 209 | |||
| 210 | /* | ||
| 211 | * The below constants define the absolute minimum values for various UBIFS | ||
| 212 | * media areas. Many of them actually depend of flash geometry and the FS | ||
| 213 | * configuration (number of journal heads, orphan LEBs, etc). This means that | ||
| 214 | * the smallest volume size which can be used for UBIFS cannot be pre-defined | ||
| 215 | * by these constants. The file-system that meets the below limitation will not | ||
| 216 | * necessarily mount. UBIFS does run-time calculations and validates the FS | ||
| 217 | * size. | ||
| 218 | */ | ||
| 219 | |||
| 220 | /* Minimum number of logical eraseblocks in the log */ | ||
| 221 | #define UBIFS_MIN_LOG_LEBS 2 | ||
| 222 | /* Minimum number of bud logical eraseblocks (one for each head) */ | ||
| 223 | #define UBIFS_MIN_BUD_LEBS 3 | ||
| 224 | /* Minimum number of journal logical eraseblocks */ | ||
| 225 | #define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS) | ||
| 226 | /* Minimum number of LPT area logical eraseblocks */ | ||
| 227 | #define UBIFS_MIN_LPT_LEBS 2 | ||
| 228 | /* Minimum number of orphan area logical eraseblocks */ | ||
| 229 | #define UBIFS_MIN_ORPH_LEBS 1 | ||
| 230 | /* | ||
| 231 | * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 | ||
| 232 | * for GC, 1 for deletions, and at least 1 for committed data). | ||
| 233 | */ | ||
| 234 | #define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) | ||
| 235 | |||
| 236 | /* Minimum number of logical eraseblocks */ | ||
| 237 | #define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ | ||
| 238 | UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \ | ||
| 239 | UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS) | ||
| 240 | |||
| 241 | /* Node sizes (N.B. these are guaranteed to be multiples of 8) */ | ||
| 242 | #define UBIFS_CH_SZ sizeof(struct ubifs_ch) | ||
| 243 | #define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node) | ||
| 244 | #define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node) | ||
| 245 | #define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node) | ||
| 246 | #define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node) | ||
| 247 | #define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node) | ||
| 248 | #define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node) | ||
| 249 | #define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node) | ||
| 250 | #define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node) | ||
| 251 | #define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node) | ||
| 252 | #define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node) | ||
| 253 | #define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) | ||
| 254 | /* Extended attribute entry nodes are identical to directory entry nodes */ | ||
| 255 | #define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ | ||
| 256 | /* Only this does not have to be multiple of 8 bytes */ | ||
| 257 | #define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch) | ||
| 258 | |||
| 259 | /* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */ | ||
| 260 | #define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) | ||
| 261 | #define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA) | ||
| 262 | #define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1) | ||
| 263 | #define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ | ||
| 264 | |||
| 265 | /* The largest UBIFS node */ | ||
| 266 | #define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ | ||
| 267 | |||
| 268 | /* | ||
| 269 | * On-flash inode flags. | ||
| 270 | * | ||
| 271 | * UBIFS_COMPR_FL: use compression for this inode | ||
| 272 | * UBIFS_SYNC_FL: I/O on this inode has to be synchronous | ||
| 273 | * UBIFS_IMMUTABLE_FL: inode is immutable | ||
| 274 | * UBIFS_APPEND_FL: writes to the inode may only append data | ||
| 275 | * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous | ||
| 276 | * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value | ||
| 277 | * | ||
| 278 | * Note, these are on-flash flags which correspond to ioctl flags | ||
| 279 | * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not | ||
| 280 | * have to be the same. | ||
| 281 | */ | ||
| 282 | enum { | ||
| 283 | UBIFS_COMPR_FL = 0x01, | ||
| 284 | UBIFS_SYNC_FL = 0x02, | ||
| 285 | UBIFS_IMMUTABLE_FL = 0x04, | ||
| 286 | UBIFS_APPEND_FL = 0x08, | ||
| 287 | UBIFS_DIRSYNC_FL = 0x10, | ||
| 288 | UBIFS_XATTR_FL = 0x20, | ||
| 289 | }; | ||
| 290 | |||
| 291 | /* Inode flag bits used by UBIFS */ | ||
| 292 | #define UBIFS_FL_MASK 0x0000001F | ||
| 293 | |||
| 294 | /* | ||
| 295 | * UBIFS compression algorithms. | ||
| 296 | * | ||
| 297 | * UBIFS_COMPR_NONE: no compression | ||
| 298 | * UBIFS_COMPR_LZO: LZO compression | ||
| 299 | * UBIFS_COMPR_ZLIB: ZLIB compression | ||
| 300 | * UBIFS_COMPR_TYPES_CNT: count of supported compression types | ||
| 301 | */ | ||
| 302 | enum { | ||
| 303 | UBIFS_COMPR_NONE, | ||
| 304 | UBIFS_COMPR_LZO, | ||
| 305 | UBIFS_COMPR_ZLIB, | ||
| 306 | UBIFS_COMPR_TYPES_CNT, | ||
| 307 | }; | ||
| 308 | |||
| 309 | /* | ||
| 310 | * UBIFS node types. | ||
| 311 | * | ||
| 312 | * UBIFS_INO_NODE: inode node | ||
| 313 | * UBIFS_DATA_NODE: data node | ||
| 314 | * UBIFS_DENT_NODE: directory entry node | ||
| 315 | * UBIFS_XENT_NODE: extended attribute node | ||
| 316 | * UBIFS_TRUN_NODE: truncation node | ||
| 317 | * UBIFS_PAD_NODE: padding node | ||
| 318 | * UBIFS_SB_NODE: superblock node | ||
| 319 | * UBIFS_MST_NODE: master node | ||
| 320 | * UBIFS_REF_NODE: LEB reference node | ||
| 321 | * UBIFS_IDX_NODE: index node | ||
| 322 | * UBIFS_CS_NODE: commit start node | ||
| 323 | * UBIFS_ORPH_NODE: orphan node | ||
| 324 | * UBIFS_NODE_TYPES_CNT: count of supported node types | ||
| 325 | * | ||
| 326 | * Note, we index arrays by these numbers, so keep them low and contiguous. | ||
| 327 | * Node type constants for inodes, direntries and so on have to be the same as | ||
| 328 | * corresponding key type constants. | ||
| 329 | */ | ||
| 330 | enum { | ||
| 331 | UBIFS_INO_NODE, | ||
| 332 | UBIFS_DATA_NODE, | ||
| 333 | UBIFS_DENT_NODE, | ||
| 334 | UBIFS_XENT_NODE, | ||
| 335 | UBIFS_TRUN_NODE, | ||
| 336 | UBIFS_PAD_NODE, | ||
| 337 | UBIFS_SB_NODE, | ||
| 338 | UBIFS_MST_NODE, | ||
| 339 | UBIFS_REF_NODE, | ||
| 340 | UBIFS_IDX_NODE, | ||
| 341 | UBIFS_CS_NODE, | ||
| 342 | UBIFS_ORPH_NODE, | ||
| 343 | UBIFS_NODE_TYPES_CNT, | ||
| 344 | }; | ||
| 345 | |||
| 346 | /* | ||
| 347 | * Master node flags. | ||
| 348 | * | ||
| 349 | * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty | ||
| 350 | * UBIFS_MST_NO_ORPHS: no orphan inodes present | ||
| 351 | * UBIFS_MST_RCVRY: written by recovery | ||
| 352 | */ | ||
| 353 | enum { | ||
| 354 | UBIFS_MST_DIRTY = 1, | ||
| 355 | UBIFS_MST_NO_ORPHS = 2, | ||
| 356 | UBIFS_MST_RCVRY = 4, | ||
| 357 | }; | ||
| 358 | |||
| 359 | /* | ||
| 360 | * Node group type (used by recovery to recover whole group or none). | ||
| 361 | * | ||
| 362 | * UBIFS_NO_NODE_GROUP: this node is not part of a group | ||
| 363 | * UBIFS_IN_NODE_GROUP: this node is a part of a group | ||
| 364 | * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group | ||
| 365 | */ | ||
| 366 | enum { | ||
| 367 | UBIFS_NO_NODE_GROUP = 0, | ||
| 368 | UBIFS_IN_NODE_GROUP, | ||
| 369 | UBIFS_LAST_OF_NODE_GROUP, | ||
| 370 | }; | ||
| 371 | |||
| 372 | /* | ||
| 373 | * Superblock flags. | ||
| 374 | * | ||
| 375 | * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set | ||
| 376 | */ | ||
| 377 | enum { | ||
| 378 | UBIFS_FLG_BIGLPT = 0x02, | ||
| 379 | }; | ||
| 380 | |||
| 381 | /** | ||
| 382 | * struct ubifs_ch - common header node. | ||
| 383 | * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC) | ||
| 384 | * @crc: CRC-32 checksum of the node header | ||
| 385 | * @sqnum: sequence number | ||
| 386 | * @len: full node length | ||
| 387 | * @node_type: node type | ||
| 388 | * @group_type: node group type | ||
| 389 | * @padding: reserved for future, zeroes | ||
| 390 | * | ||
| 391 | * Every UBIFS node starts with this common part. If the node has a key, the | ||
| 392 | * key always goes next. | ||
| 393 | */ | ||
| 394 | struct ubifs_ch { | ||
| 395 | __le32 magic; | ||
| 396 | __le32 crc; | ||
| 397 | __le64 sqnum; | ||
| 398 | __le32 len; | ||
| 399 | __u8 node_type; | ||
| 400 | __u8 group_type; | ||
| 401 | __u8 padding[2]; | ||
| 402 | } __attribute__ ((packed)); | ||
| 403 | |||
| 404 | /** | ||
| 405 | * union ubifs_dev_desc - device node descriptor. | ||
| 406 | * @new: new type device descriptor | ||
| 407 | * @huge: huge type device descriptor | ||
| 408 | * | ||
| 409 | * This data structure describes major/minor numbers of a device node. In an | ||
| 410 | * inode is a device node then its data contains an object of this type. UBIFS | ||
| 411 | * uses standard Linux "new" and "huge" device node encodings. | ||
| 412 | */ | ||
| 413 | union ubifs_dev_desc { | ||
| 414 | __le32 new; | ||
| 415 | __le64 huge; | ||
| 416 | } __attribute__ ((packed)); | ||
| 417 | |||
| 418 | /** | ||
| 419 | * struct ubifs_ino_node - inode node. | ||
| 420 | * @ch: common header | ||
| 421 | * @key: node key | ||
| 422 | * @creat_sqnum: sequence number at time of creation | ||
| 423 | * @size: inode size in bytes (amount of uncompressed data) | ||
| 424 | * @atime_sec: access time seconds | ||
| 425 | * @ctime_sec: creation time seconds | ||
| 426 | * @mtime_sec: modification time seconds | ||
| 427 | * @atime_nsec: access time nanoseconds | ||
| 428 | * @ctime_nsec: creation time nanoseconds | ||
| 429 | * @mtime_nsec: modification time nanoseconds | ||
| 430 | * @nlink: number of hard links | ||
| 431 | * @uid: owner ID | ||
| 432 | * @gid: group ID | ||
| 433 | * @mode: access flags | ||
| 434 | * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc) | ||
| 435 | * @data_len: inode data length | ||
| 436 | * @xattr_cnt: count of extended attributes this inode has | ||
| 437 | * @xattr_size: summarized size of all extended attributes in bytes | ||
| 438 | * @padding1: reserved for future, zeroes | ||
| 439 | * @xattr_names: sum of lengths of all extended attribute names belonging to | ||
| 440 | * this inode | ||
| 441 | * @compr_type: compression type used for this inode | ||
| 442 | * @padding2: reserved for future, zeroes | ||
| 443 | * @data: data attached to the inode | ||
| 444 | * | ||
| 445 | * Note, even though inode compression type is defined by @compr_type, some | ||
| 446 | * nodes of this inode may be compressed with different compressor - this | ||
| 447 | * happens if compression type is changed while the inode already has data | ||
| 448 | * nodes. But @compr_type will be use for further writes to the inode. | ||
| 449 | * | ||
| 450 | * Note, do not forget to amend 'zero_ino_node_unused()' function when changing | ||
| 451 | * the padding fields. | ||
| 452 | */ | ||
| 453 | struct ubifs_ino_node { | ||
| 454 | struct ubifs_ch ch; | ||
| 455 | __u8 key[UBIFS_MAX_KEY_LEN]; | ||
| 456 | __le64 creat_sqnum; | ||
| 457 | __le64 size; | ||
| 458 | __le64 atime_sec; | ||
| 459 | __le64 ctime_sec; | ||
| 460 | __le64 mtime_sec; | ||
| 461 | __le32 atime_nsec; | ||
| 462 | __le32 ctime_nsec; | ||
| 463 | __le32 mtime_nsec; | ||
| 464 | __le32 nlink; | ||
| 465 | __le32 uid; | ||
| 466 | __le32 gid; | ||
| 467 | __le32 mode; | ||
| 468 | __le32 flags; | ||
| 469 | __le32 data_len; | ||
| 470 | __le32 xattr_cnt; | ||
| 471 | __le32 xattr_size; | ||
| 472 | __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */ | ||
| 473 | __le32 xattr_names; | ||
| 474 | __le16 compr_type; | ||
| 475 | __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ | ||
| 476 | __u8 data[]; | ||
| 477 | } __attribute__ ((packed)); | ||
| 478 | |||
| 479 | /** | ||
| 480 | * struct ubifs_dent_node - directory entry node. | ||
| 481 | * @ch: common header | ||
| 482 | * @key: node key | ||
| 483 | * @inum: target inode number | ||
| 484 | * @padding1: reserved for future, zeroes | ||
| 485 | * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc) | ||
| 486 | * @nlen: name length | ||
| 487 | * @padding2: reserved for future, zeroes | ||
| 488 | * @name: zero-terminated name | ||
| 489 | * | ||
| 490 | * Note, do not forget to amend 'zero_dent_node_unused()' function when | ||
| 491 | * changing the padding fields. | ||
| 492 | */ | ||
| 493 | struct ubifs_dent_node { | ||
| 494 | struct ubifs_ch ch; | ||
| 495 | __u8 key[UBIFS_MAX_KEY_LEN]; | ||
| 496 | __le64 inum; | ||
| 497 | __u8 padding1; | ||
| 498 | __u8 type; | ||
| 499 | __le16 nlen; | ||
| 500 | __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ | ||
| 501 | __u8 name[]; | ||
| 502 | } __attribute__ ((packed)); | ||
| 503 | |||
| 504 | /** | ||
| 505 | * struct ubifs_data_node - data node. | ||
| 506 | * @ch: common header | ||
| 507 | * @key: node key | ||
| 508 | * @size: uncompressed data size in bytes | ||
| 509 | * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc) | ||
| 510 | * @padding: reserved for future, zeroes | ||
| 511 | * @data: data | ||
| 512 | * | ||
| 513 | * Note, do not forget to amend 'zero_data_node_unused()' function when | ||
| 514 | * changing the padding fields. | ||
| 515 | */ | ||
| 516 | struct ubifs_data_node { | ||
| 517 | struct ubifs_ch ch; | ||
| 518 | __u8 key[UBIFS_MAX_KEY_LEN]; | ||
| 519 | __le32 size; | ||
| 520 | __le16 compr_type; | ||
| 521 | __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ | ||
| 522 | __u8 data[]; | ||
| 523 | } __attribute__ ((packed)); | ||
| 524 | |||
| 525 | /** | ||
| 526 | * struct ubifs_trun_node - truncation node. | ||
| 527 | * @ch: common header | ||
| 528 | * @inum: truncated inode number | ||
| 529 | * @padding: reserved for future, zeroes | ||
| 530 | * @old_size: size before truncation | ||
| 531 | * @new_size: size after truncation | ||
| 532 | * | ||
| 533 | * This node exists only in the journal and never goes to the main area. Note, | ||
| 534 | * do not forget to amend 'zero_trun_node_unused()' function when changing the | ||
| 535 | * padding fields. | ||
| 536 | */ | ||
| 537 | struct ubifs_trun_node { | ||
| 538 | struct ubifs_ch ch; | ||
| 539 | __le32 inum; | ||
| 540 | __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ | ||
| 541 | __le64 old_size; | ||
| 542 | __le64 new_size; | ||
| 543 | } __attribute__ ((packed)); | ||
| 544 | |||
| 545 | /** | ||
| 546 | * struct ubifs_pad_node - padding node. | ||
| 547 | * @ch: common header | ||
| 548 | * @pad_len: how many bytes after this node are unused (because padded) | ||
| 549 | * @padding: reserved for future, zeroes | ||
| 550 | */ | ||
| 551 | struct ubifs_pad_node { | ||
| 552 | struct ubifs_ch ch; | ||
| 553 | __le32 pad_len; | ||
| 554 | } __attribute__ ((packed)); | ||
| 555 | |||
| 556 | /** | ||
| 557 | * struct ubifs_sb_node - superblock node. | ||
| 558 | * @ch: common header | ||
| 559 | * @padding: reserved for future, zeroes | ||
| 560 | * @key_hash: type of hash function used in keys | ||
| 561 | * @key_fmt: format of the key | ||
| 562 | * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc) | ||
| 563 | * @min_io_size: minimal input/output unit size | ||
| 564 | * @leb_size: logical eraseblock size in bytes | ||
| 565 | * @leb_cnt: count of LEBs used by file-system | ||
| 566 | * @max_leb_cnt: maximum count of LEBs used by file-system | ||
| 567 | * @max_bud_bytes: maximum amount of data stored in buds | ||
| 568 | * @log_lebs: log size in logical eraseblocks | ||
| 569 | * @lpt_lebs: number of LEBs used for lprops table | ||
| 570 | * @orph_lebs: number of LEBs used for recording orphans | ||
| 571 | * @jhead_cnt: count of journal heads | ||
| 572 | * @fanout: tree fanout (max. number of links per indexing node) | ||
| 573 | * @lsave_cnt: number of LEB numbers in LPT's save table | ||
| 574 | * @fmt_version: UBIFS on-flash format version | ||
| 575 | * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) | ||
| 576 | * @padding1: reserved for future, zeroes | ||
| 577 | * @rp_uid: reserve pool UID | ||
| 578 | * @rp_gid: reserve pool GID | ||
| 579 | * @rp_size: size of the reserved pool in bytes | ||
| 580 | * @padding2: reserved for future, zeroes | ||
| 581 | * @time_gran: time granularity in nanoseconds | ||
| 582 | * @uuid: UUID generated when the file system image was created | ||
| 583 | */ | ||
| 584 | struct ubifs_sb_node { | ||
| 585 | struct ubifs_ch ch; | ||
| 586 | __u8 padding[2]; | ||
| 587 | __u8 key_hash; | ||
| 588 | __u8 key_fmt; | ||
| 589 | __le32 flags; | ||
| 590 | __le32 min_io_size; | ||
| 591 | __le32 leb_size; | ||
| 592 | __le32 leb_cnt; | ||
| 593 | __le32 max_leb_cnt; | ||
| 594 | __le64 max_bud_bytes; | ||
| 595 | __le32 log_lebs; | ||
| 596 | __le32 lpt_lebs; | ||
| 597 | __le32 orph_lebs; | ||
| 598 | __le32 jhead_cnt; | ||
| 599 | __le32 fanout; | ||
| 600 | __le32 lsave_cnt; | ||
| 601 | __le32 fmt_version; | ||
| 602 | __le16 default_compr; | ||
| 603 | __u8 padding1[2]; | ||
| 604 | __le32 rp_uid; | ||
| 605 | __le32 rp_gid; | ||
| 606 | __le64 rp_size; | ||
| 607 | __le32 time_gran; | ||
| 608 | __u8 uuid[16]; | ||
| 609 | __u8 padding2[3972]; | ||
| 610 | } __attribute__ ((packed)); | ||
| 611 | |||
| 612 | /** | ||
| 613 | * struct ubifs_mst_node - master node. | ||
| 614 | * @ch: common header | ||
| 615 | * @highest_inum: highest inode number in the committed index | ||
| 616 | * @cmt_no: commit number | ||
| 617 | * @flags: various flags (%UBIFS_MST_DIRTY, etc) | ||
| 618 | * @log_lnum: start of the log | ||
| 619 | * @root_lnum: LEB number of the root indexing node | ||
| 620 | * @root_offs: offset within @root_lnum | ||
| 621 | * @root_len: root indexing node length | ||
| 622 | * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was | ||
| 623 | * not reserved and should be reserved on mount) | ||
| 624 | * @ihead_lnum: LEB number of index head | ||
| 625 | * @ihead_offs: offset of index head | ||
| 626 | * @index_size: size of index on flash | ||
| 627 | * @total_free: total free space in bytes | ||
| 628 | * @total_dirty: total dirty space in bytes | ||
| 629 | * @total_used: total used space in bytes (includes only data LEBs) | ||
| 630 | * @total_dead: total dead space in bytes (includes only data LEBs) | ||
| 631 | * @total_dark: total dark space in bytes (includes only data LEBs) | ||
| 632 | * @lpt_lnum: LEB number of LPT root nnode | ||
| 633 | * @lpt_offs: offset of LPT root nnode | ||
| 634 | * @nhead_lnum: LEB number of LPT head | ||
| 635 | * @nhead_offs: offset of LPT head | ||
| 636 | * @ltab_lnum: LEB number of LPT's own lprops table | ||
| 637 | * @ltab_offs: offset of LPT's own lprops table | ||
| 638 | * @lsave_lnum: LEB number of LPT's save table (big model only) | ||
| 639 | * @lsave_offs: offset of LPT's save table (big model only) | ||
| 640 | * @lscan_lnum: LEB number of last LPT scan | ||
| 641 | * @empty_lebs: number of empty logical eraseblocks | ||
| 642 | * @idx_lebs: number of indexing logical eraseblocks | ||
| 643 | * @leb_cnt: count of LEBs used by file-system | ||
| 644 | * @padding: reserved for future, zeroes | ||
| 645 | */ | ||
| 646 | struct ubifs_mst_node { | ||
| 647 | struct ubifs_ch ch; | ||
| 648 | __le64 highest_inum; | ||
| 649 | __le64 cmt_no; | ||
| 650 | __le32 flags; | ||
| 651 | __le32 log_lnum; | ||
| 652 | __le32 root_lnum; | ||
| 653 | __le32 root_offs; | ||
| 654 | __le32 root_len; | ||
| 655 | __le32 gc_lnum; | ||
| 656 | __le32 ihead_lnum; | ||
| 657 | __le32 ihead_offs; | ||
| 658 | __le64 index_size; | ||
| 659 | __le64 total_free; | ||
| 660 | __le64 total_dirty; | ||
| 661 | __le64 total_used; | ||
| 662 | __le64 total_dead; | ||
| 663 | __le64 total_dark; | ||
| 664 | __le32 lpt_lnum; | ||
| 665 | __le32 lpt_offs; | ||
| 666 | __le32 nhead_lnum; | ||
| 667 | __le32 nhead_offs; | ||
| 668 | __le32 ltab_lnum; | ||
| 669 | __le32 ltab_offs; | ||
| 670 | __le32 lsave_lnum; | ||
| 671 | __le32 lsave_offs; | ||
| 672 | __le32 lscan_lnum; | ||
| 673 | __le32 empty_lebs; | ||
| 674 | __le32 idx_lebs; | ||
| 675 | __le32 leb_cnt; | ||
| 676 | __u8 padding[344]; | ||
| 677 | } __attribute__ ((packed)); | ||
| 678 | |||
| 679 | /** | ||
| 680 | * struct ubifs_ref_node - logical eraseblock reference node. | ||
| 681 | * @ch: common header | ||
| 682 | * @lnum: the referred logical eraseblock number | ||
| 683 | * @offs: start offset in the referred LEB | ||
| 684 | * @jhead: journal head number | ||
| 685 | * @padding: reserved for future, zeroes | ||
| 686 | */ | ||
| 687 | struct ubifs_ref_node { | ||
| 688 | struct ubifs_ch ch; | ||
| 689 | __le32 lnum; | ||
| 690 | __le32 offs; | ||
| 691 | __le32 jhead; | ||
| 692 | __u8 padding[28]; | ||
| 693 | } __attribute__ ((packed)); | ||
| 694 | |||
| 695 | /** | ||
| 696 | * struct ubifs_branch - key/reference/length branch | ||
| 697 | * @lnum: LEB number of the target node | ||
| 698 | * @offs: offset within @lnum | ||
| 699 | * @len: target node length | ||
| 700 | * @key: key | ||
| 701 | */ | ||
| 702 | struct ubifs_branch { | ||
| 703 | __le32 lnum; | ||
| 704 | __le32 offs; | ||
| 705 | __le32 len; | ||
| 706 | __u8 key[]; | ||
| 707 | } __attribute__ ((packed)); | ||
| 708 | |||
| 709 | /** | ||
| 710 | * struct ubifs_idx_node - indexing node. | ||
| 711 | * @ch: common header | ||
| 712 | * @child_cnt: number of child index nodes | ||
| 713 | * @level: tree level | ||
| 714 | * @branches: LEB number / offset / length / key branches | ||
| 715 | */ | ||
| 716 | struct ubifs_idx_node { | ||
| 717 | struct ubifs_ch ch; | ||
| 718 | __le16 child_cnt; | ||
| 719 | __le16 level; | ||
| 720 | __u8 branches[]; | ||
| 721 | } __attribute__ ((packed)); | ||
| 722 | |||
| 723 | /** | ||
| 724 | * struct ubifs_cs_node - commit start node. | ||
| 725 | * @ch: common header | ||
| 726 | * @cmt_no: commit number | ||
| 727 | */ | ||
| 728 | struct ubifs_cs_node { | ||
| 729 | struct ubifs_ch ch; | ||
| 730 | __le64 cmt_no; | ||
| 731 | } __attribute__ ((packed)); | ||
| 732 | |||
| 733 | /** | ||
| 734 | * struct ubifs_orph_node - orphan node. | ||
| 735 | * @ch: common header | ||
| 736 | * @cmt_no: commit number (also top bit is set on the last node of the commit) | ||
| 737 | * @inos: inode numbers of orphans | ||
| 738 | */ | ||
| 739 | struct ubifs_orph_node { | ||
| 740 | struct ubifs_ch ch; | ||
| 741 | __le64 cmt_no; | ||
| 742 | __le64 inos[]; | ||
| 743 | } __attribute__ ((packed)); | ||
| 744 | |||
| 745 | #endif /* __UBIFS_MEDIA_H__ */ | ||
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h new file mode 100644 index 000000000000..e4f89f271827 --- /dev/null +++ b/fs/ubifs/ubifs.h | |||
| @@ -0,0 +1,1649 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* Implementation version 0.7 */ | ||
| 24 | |||
| 25 | #ifndef __UBIFS_H__ | ||
| 26 | #define __UBIFS_H__ | ||
| 27 | |||
| 28 | #include <asm/div64.h> | ||
| 29 | #include <linux/statfs.h> | ||
| 30 | #include <linux/fs.h> | ||
| 31 | #include <linux/err.h> | ||
| 32 | #include <linux/sched.h> | ||
| 33 | #include <linux/vmalloc.h> | ||
| 34 | #include <linux/spinlock.h> | ||
| 35 | #include <linux/mutex.h> | ||
| 36 | #include <linux/rwsem.h> | ||
| 37 | #include <linux/mtd/ubi.h> | ||
| 38 | #include <linux/pagemap.h> | ||
| 39 | #include <linux/backing-dev.h> | ||
| 40 | #include "ubifs-media.h" | ||
| 41 | |||
| 42 | /* Version of this UBIFS implementation */ | ||
| 43 | #define UBIFS_VERSION 1 | ||
| 44 | |||
| 45 | /* Normal UBIFS messages */ | ||
| 46 | #define ubifs_msg(fmt, ...) \ | ||
| 47 | printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) | ||
| 48 | /* UBIFS error messages */ | ||
| 49 | #define ubifs_err(fmt, ...) \ | ||
| 50 | printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ | ||
| 51 | __func__, ##__VA_ARGS__) | ||
| 52 | /* UBIFS warning messages */ | ||
| 53 | #define ubifs_warn(fmt, ...) \ | ||
| 54 | printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ | ||
| 55 | current->pid, __func__, ##__VA_ARGS__) | ||
| 56 | |||
| 57 | /* UBIFS file system VFS magic number */ | ||
| 58 | #define UBIFS_SUPER_MAGIC 0x24051905 | ||
| 59 | |||
| 60 | /* Number of UBIFS blocks per VFS page */ | ||
| 61 | #define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE) | ||
| 62 | #define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT) | ||
| 63 | |||
| 64 | /* "File system end of life" sequence number watermark */ | ||
| 65 | #define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL | ||
| 66 | #define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL | ||
| 67 | |||
| 68 | /* Minimum amount of data UBIFS writes to the flash */ | ||
| 69 | #define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) | ||
| 70 | |||
| 71 | /* | ||
| 72 | * Currently we do not support inode number overlapping and re-using, so this | ||
| 73 | * watermark defines dangerous inode number level. This should be fixed later, | ||
| 74 | * although it is difficult to exceed current limit. Another option is to use | ||
| 75 | * 64-bit inode numbers, but this means more overhead. | ||
| 76 | */ | ||
| 77 | #define INUM_WARN_WATERMARK 0xFFF00000 | ||
| 78 | #define INUM_WATERMARK 0xFFFFFF00 | ||
| 79 | |||
| 80 | /* Largest key size supported in this implementation */ | ||
| 81 | #define CUR_MAX_KEY_LEN UBIFS_SK_LEN | ||
| 82 | |||
| 83 | /* Maximum number of entries in each LPT (LEB category) heap */ | ||
| 84 | #define LPT_HEAP_SZ 256 | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Background thread name pattern. The numbers are UBI device and volume | ||
| 88 | * numbers. | ||
| 89 | */ | ||
| 90 | #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" | ||
| 91 | |||
| 92 | /* Default write-buffer synchronization timeout (5 secs) */ | ||
| 93 | #define DEFAULT_WBUF_TIMEOUT (5 * HZ) | ||
| 94 | |||
| 95 | /* Maximum possible inode number (only 32-bit inodes are supported now) */ | ||
| 96 | #define MAX_INUM 0xFFFFFFFF | ||
| 97 | |||
| 98 | /* Number of non-data journal heads */ | ||
| 99 | #define NONDATA_JHEADS_CNT 2 | ||
| 100 | |||
| 101 | /* Garbage collector head */ | ||
| 102 | #define GCHD 0 | ||
| 103 | /* Base journal head number */ | ||
| 104 | #define BASEHD 1 | ||
| 105 | /* First "general purpose" journal head */ | ||
| 106 | #define DATAHD 2 | ||
| 107 | |||
| 108 | /* 'No change' value for 'ubifs_change_lp()' */ | ||
| 109 | #define LPROPS_NC 0x80000001 | ||
| 110 | |||
| 111 | /* | ||
| 112 | * There is no notion of truncation key because truncation nodes do not exist | ||
| 113 | * in TNC. However, when replaying, it is handy to introduce fake "truncation" | ||
| 114 | * keys for truncation nodes because the code becomes simpler. So we define | ||
| 115 | * %UBIFS_TRUN_KEY type. | ||
| 116 | */ | ||
| 117 | #define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT | ||
| 118 | |||
| 119 | /* | ||
| 120 | * How much a directory entry/extended attribute entry adds to the parent/host | ||
| 121 | * inode. | ||
| 122 | */ | ||
| 123 | #define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8) | ||
| 124 | |||
| 125 | /* How much an extended attribute adds to the host inode */ | ||
| 126 | #define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8) | ||
| 127 | |||
| 128 | /* | ||
| 129 | * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered | ||
| 130 | * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are | ||
| 131 | * considered "young". This is used by shrinker when selecting znode to trim | ||
| 132 | * off. | ||
| 133 | */ | ||
| 134 | #define OLD_ZNODE_AGE 20 | ||
| 135 | #define YOUNG_ZNODE_AGE 5 | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Some compressors, like LZO, may end up with more data then the input buffer. | ||
| 139 | * So UBIFS always allocates larger output buffer, to be sure the compressor | ||
| 140 | * will not corrupt memory in case of worst case compression. | ||
| 141 | */ | ||
| 142 | #define WORST_COMPR_FACTOR 2 | ||
| 143 | |||
| 144 | /* Maximum expected tree height for use by bottom_up_buf */ | ||
| 145 | #define BOTTOM_UP_HEIGHT 64 | ||
| 146 | |||
| 147 | /* | ||
| 148 | * Lockdep classes for UBIFS inode @ui_mutex. | ||
| 149 | */ | ||
| 150 | enum { | ||
| 151 | WB_MUTEX_1 = 0, | ||
| 152 | WB_MUTEX_2 = 1, | ||
| 153 | WB_MUTEX_3 = 2, | ||
| 154 | }; | ||
| 155 | |||
| 156 | /* | ||
| 157 | * Znode flags (actually, bit numbers which store the flags). | ||
| 158 | * | ||
| 159 | * DIRTY_ZNODE: znode is dirty | ||
| 160 | * COW_ZNODE: znode is being committed and a new instance of this znode has to | ||
| 161 | * be created before changing this znode | ||
| 162 | * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is | ||
| 163 | * still in the commit list and the ongoing commit operation | ||
| 164 | * will commit it, and delete this znode after it is done | ||
| 165 | */ | ||
| 166 | enum { | ||
| 167 | DIRTY_ZNODE = 0, | ||
| 168 | COW_ZNODE = 1, | ||
| 169 | OBSOLETE_ZNODE = 2, | ||
| 170 | }; | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Commit states. | ||
| 174 | * | ||
| 175 | * COMMIT_RESTING: commit is not wanted | ||
| 176 | * COMMIT_BACKGROUND: background commit has been requested | ||
| 177 | * COMMIT_REQUIRED: commit is required | ||
| 178 | * COMMIT_RUNNING_BACKGROUND: background commit is running | ||
| 179 | * COMMIT_RUNNING_REQUIRED: commit is running and it is required | ||
| 180 | * COMMIT_BROKEN: commit failed | ||
| 181 | */ | ||
| 182 | enum { | ||
| 183 | COMMIT_RESTING = 0, | ||
| 184 | COMMIT_BACKGROUND, | ||
| 185 | COMMIT_REQUIRED, | ||
| 186 | COMMIT_RUNNING_BACKGROUND, | ||
| 187 | COMMIT_RUNNING_REQUIRED, | ||
| 188 | COMMIT_BROKEN, | ||
| 189 | }; | ||
| 190 | |||
| 191 | /* | ||
| 192 | * 'ubifs_scan_a_node()' return values. | ||
| 193 | * | ||
| 194 | * SCANNED_GARBAGE: scanned garbage | ||
| 195 | * SCANNED_EMPTY_SPACE: scanned empty space | ||
| 196 | * SCANNED_A_NODE: scanned a valid node | ||
| 197 | * SCANNED_A_CORRUPT_NODE: scanned a corrupted node | ||
| 198 | * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length | ||
| 199 | * | ||
| 200 | * Greater than zero means: 'scanned that number of padding bytes' | ||
| 201 | */ | ||
| 202 | enum { | ||
| 203 | SCANNED_GARBAGE = 0, | ||
| 204 | SCANNED_EMPTY_SPACE = -1, | ||
| 205 | SCANNED_A_NODE = -2, | ||
| 206 | SCANNED_A_CORRUPT_NODE = -3, | ||
| 207 | SCANNED_A_BAD_PAD_NODE = -4, | ||
| 208 | }; | ||
| 209 | |||
| 210 | /* | ||
| 211 | * LPT cnode flag bits. | ||
| 212 | * | ||
| 213 | * DIRTY_CNODE: cnode is dirty | ||
| 214 | * COW_CNODE: cnode is being committed and must be copied before writing | ||
| 215 | * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), | ||
| 216 | * so it can (and must) be freed when the commit is finished | ||
| 217 | */ | ||
| 218 | enum { | ||
| 219 | DIRTY_CNODE = 0, | ||
| 220 | COW_CNODE = 1, | ||
| 221 | OBSOLETE_CNODE = 2, | ||
| 222 | }; | ||
| 223 | |||
| 224 | /* | ||
| 225 | * Dirty flag bits (lpt_drty_flgs) for LPT special nodes. | ||
| 226 | * | ||
| 227 | * LTAB_DIRTY: ltab node is dirty | ||
| 228 | * LSAVE_DIRTY: lsave node is dirty | ||
| 229 | */ | ||
| 230 | enum { | ||
| 231 | LTAB_DIRTY = 1, | ||
| 232 | LSAVE_DIRTY = 2, | ||
| 233 | }; | ||
| 234 | |||
| 235 | /* | ||
| 236 | * Return codes used by the garbage collector. | ||
| 237 | * @LEB_FREED: the logical eraseblock was freed and is ready to use | ||
| 238 | * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit | ||
| 239 | * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes | ||
| 240 | */ | ||
| 241 | enum { | ||
| 242 | LEB_FREED, | ||
| 243 | LEB_FREED_IDX, | ||
| 244 | LEB_RETAINED, | ||
| 245 | }; | ||
| 246 | |||
| 247 | /** | ||
| 248 | * struct ubifs_old_idx - index node obsoleted since last commit start. | ||
| 249 | * @rb: rb-tree node | ||
| 250 | * @lnum: LEB number of obsoleted index node | ||
| 251 | * @offs: offset of obsoleted index node | ||
| 252 | */ | ||
| 253 | struct ubifs_old_idx { | ||
| 254 | struct rb_node rb; | ||
| 255 | int lnum; | ||
| 256 | int offs; | ||
| 257 | }; | ||
| 258 | |||
| 259 | /* The below union makes it easier to deal with keys */ | ||
| 260 | union ubifs_key { | ||
| 261 | uint8_t u8[CUR_MAX_KEY_LEN]; | ||
| 262 | uint32_t u32[CUR_MAX_KEY_LEN/4]; | ||
| 263 | uint64_t u64[CUR_MAX_KEY_LEN/8]; | ||
| 264 | __le32 j32[CUR_MAX_KEY_LEN/4]; | ||
| 265 | }; | ||
| 266 | |||
| 267 | /** | ||
| 268 | * struct ubifs_scan_node - UBIFS scanned node information. | ||
| 269 | * @list: list of scanned nodes | ||
| 270 | * @key: key of node scanned (if it has one) | ||
| 271 | * @sqnum: sequence number | ||
| 272 | * @type: type of node scanned | ||
| 273 | * @offs: offset with LEB of node scanned | ||
| 274 | * @len: length of node scanned | ||
| 275 | * @node: raw node | ||
| 276 | */ | ||
| 277 | struct ubifs_scan_node { | ||
| 278 | struct list_head list; | ||
| 279 | union ubifs_key key; | ||
| 280 | unsigned long long sqnum; | ||
| 281 | int type; | ||
| 282 | int offs; | ||
| 283 | int len; | ||
| 284 | void *node; | ||
| 285 | }; | ||
| 286 | |||
| 287 | /** | ||
| 288 | * struct ubifs_scan_leb - UBIFS scanned LEB information. | ||
| 289 | * @lnum: logical eraseblock number | ||
| 290 | * @nodes_cnt: number of nodes scanned | ||
| 291 | * @nodes: list of struct ubifs_scan_node | ||
| 292 | * @endpt: end point (and therefore the start of empty space) | ||
| 293 | * @ecc: read returned -EBADMSG | ||
| 294 | * @buf: buffer containing entire LEB scanned | ||
| 295 | */ | ||
| 296 | struct ubifs_scan_leb { | ||
| 297 | int lnum; | ||
| 298 | int nodes_cnt; | ||
| 299 | struct list_head nodes; | ||
| 300 | int endpt; | ||
| 301 | int ecc; | ||
| 302 | void *buf; | ||
| 303 | }; | ||
| 304 | |||
| 305 | /** | ||
| 306 | * struct ubifs_gced_idx_leb - garbage-collected indexing LEB. | ||
| 307 | * @list: list | ||
| 308 | * @lnum: LEB number | ||
| 309 | * @unmap: OK to unmap this LEB | ||
| 310 | * | ||
| 311 | * This data structure is used to temporary store garbage-collected indexing | ||
| 312 | * LEBs - they are not released immediately, but only after the next commit. | ||
| 313 | * This is needed to guarantee recoverability. | ||
| 314 | */ | ||
| 315 | struct ubifs_gced_idx_leb { | ||
| 316 | struct list_head list; | ||
| 317 | int lnum; | ||
| 318 | int unmap; | ||
| 319 | }; | ||
| 320 | |||
| 321 | /** | ||
| 322 | * struct ubifs_inode - UBIFS in-memory inode description. | ||
| 323 | * @vfs_inode: VFS inode description object | ||
| 324 | * @creat_sqnum: sequence number at time of creation | ||
| 325 | * @xattr_size: summarized size of all extended attributes in bytes | ||
| 326 | * @xattr_cnt: count of extended attributes this inode has | ||
| 327 | * @xattr_names: sum of lengths of all extended attribute names belonging to | ||
| 328 | * this inode | ||
| 329 | * @dirty: non-zero if the inode is dirty | ||
| 330 | * @xattr: non-zero if this is an extended attribute inode | ||
| 331 | * @ui_mutex: serializes inode write-back with the rest of VFS operations, | ||
| 332 | * serializes "clean <-> dirty" state changes, protects @dirty, | ||
| 333 | * @ui_size, and @xattr_size | ||
| 334 | * @ui_lock: protects @synced_i_size | ||
| 335 | * @synced_i_size: synchronized size of inode, i.e. the value of inode size | ||
| 336 | * currently stored on the flash; used only for regular file | ||
| 337 | * inodes | ||
| 338 | * @ui_size: inode size used by UBIFS when writing to flash | ||
| 339 | * @flags: inode flags (@UBIFS_COMPR_FL, etc) | ||
| 340 | * @compr_type: default compression type used for this inode | ||
| 341 | * @data_len: length of the data attached to the inode | ||
| 342 | * @data: inode's data | ||
| 343 | * | ||
| 344 | * @ui_mutex exists for two main reasons. At first it prevents inodes from | ||
| 345 | * being written back while UBIFS changing them, being in the middle of an VFS | ||
| 346 | * operation. This way UBIFS makes sure the inode fields are consistent. For | ||
| 347 | * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and | ||
| 348 | * write-back must not write any of them before we have finished. | ||
| 349 | * | ||
| 350 | * The second reason is budgeting - UBIFS has to budget all operations. If an | ||
| 351 | * operation is going to mark an inode dirty, it has to allocate budget for | ||
| 352 | * this. It cannot just mark it dirty because there is no guarantee there will | ||
| 353 | * be enough flash space to write the inode back later. This means UBIFS has | ||
| 354 | * to have full control over inode "clean <-> dirty" transitions (and pages | ||
| 355 | * actually). But unfortunately, VFS marks inodes dirty in many places, and it | ||
| 356 | * does not ask the file-system if it is allowed to do so (there is a notifier, | ||
| 357 | * but it is not enough), i.e., there is no mechanism to synchronize with this. | ||
| 358 | * So UBIFS has its own inode dirty flag and its own mutex to serialize | ||
| 359 | * "clean <-> dirty" transitions. | ||
| 360 | * | ||
| 361 | * The @synced_i_size field is used to make sure we never write pages which are | ||
| 362 | * beyond last synchronized inode size. See 'ubifs_writepage()' for more | ||
| 363 | * information. | ||
| 364 | * | ||
| 365 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses | ||
| 366 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot | ||
| 367 | * make sure @inode->i_size is always changed under @ui_mutex, because it | ||
| 368 | * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock | ||
| 369 | * with 'ubifs_writepage()' (see file.c). All the other inode fields are | ||
| 370 | * changed under @ui_mutex, so they do not need "shadow" fields. Note, one | ||
| 371 | * could consider to rework locking and base it on "shadow" fields. | ||
| 372 | */ | ||
| 373 | struct ubifs_inode { | ||
| 374 | struct inode vfs_inode; | ||
| 375 | unsigned long long creat_sqnum; | ||
| 376 | unsigned int xattr_size; | ||
| 377 | unsigned int xattr_cnt; | ||
| 378 | unsigned int xattr_names; | ||
| 379 | unsigned int dirty:1; | ||
| 380 | unsigned int xattr:1; | ||
| 381 | struct mutex ui_mutex; | ||
| 382 | spinlock_t ui_lock; | ||
| 383 | loff_t synced_i_size; | ||
| 384 | loff_t ui_size; | ||
| 385 | int flags; | ||
| 386 | int compr_type; | ||
| 387 | int data_len; | ||
| 388 | void *data; | ||
| 389 | }; | ||
| 390 | |||
| 391 | /** | ||
| 392 | * struct ubifs_unclean_leb - records a LEB recovered under read-only mode. | ||
| 393 | * @list: list | ||
| 394 | * @lnum: LEB number of recovered LEB | ||
| 395 | * @endpt: offset where recovery ended | ||
| 396 | * | ||
| 397 | * This structure records a LEB identified during recovery that needs to be | ||
| 398 | * cleaned but was not because UBIFS was mounted read-only. The information | ||
| 399 | * is used to clean the LEB when remounting to read-write mode. | ||
| 400 | */ | ||
| 401 | struct ubifs_unclean_leb { | ||
| 402 | struct list_head list; | ||
| 403 | int lnum; | ||
| 404 | int endpt; | ||
| 405 | }; | ||
| 406 | |||
| 407 | /* | ||
| 408 | * LEB properties flags. | ||
| 409 | * | ||
| 410 | * LPROPS_UNCAT: not categorized | ||
| 411 | * LPROPS_DIRTY: dirty > 0, not index | ||
| 412 | * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index | ||
| 413 | * LPROPS_FREE: free > 0, not empty, not index | ||
| 414 | * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs | ||
| 415 | * LPROPS_EMPTY: LEB is empty, not taken | ||
| 416 | * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken | ||
| 417 | * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken | ||
| 418 | * LPROPS_CAT_MASK: mask for the LEB categories above | ||
| 419 | * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media) | ||
| 420 | * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash) | ||
| 421 | */ | ||
| 422 | enum { | ||
| 423 | LPROPS_UNCAT = 0, | ||
| 424 | LPROPS_DIRTY = 1, | ||
| 425 | LPROPS_DIRTY_IDX = 2, | ||
| 426 | LPROPS_FREE = 3, | ||
| 427 | LPROPS_HEAP_CNT = 3, | ||
| 428 | LPROPS_EMPTY = 4, | ||
| 429 | LPROPS_FREEABLE = 5, | ||
| 430 | LPROPS_FRDI_IDX = 6, | ||
| 431 | LPROPS_CAT_MASK = 15, | ||
| 432 | LPROPS_TAKEN = 16, | ||
| 433 | LPROPS_INDEX = 32, | ||
| 434 | }; | ||
| 435 | |||
| 436 | /** | ||
| 437 | * struct ubifs_lprops - logical eraseblock properties. | ||
| 438 | * @free: amount of free space in bytes | ||
| 439 | * @dirty: amount of dirty space in bytes | ||
| 440 | * @flags: LEB properties flags (see above) | ||
| 441 | * @lnum: LEB number | ||
| 442 | * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE) | ||
| 443 | * @hpos: heap position in heap of same-category lprops (other categories) | ||
| 444 | */ | ||
| 445 | struct ubifs_lprops { | ||
| 446 | int free; | ||
| 447 | int dirty; | ||
| 448 | int flags; | ||
| 449 | int lnum; | ||
| 450 | union { | ||
| 451 | struct list_head list; | ||
| 452 | int hpos; | ||
| 453 | }; | ||
| 454 | }; | ||
| 455 | |||
| 456 | /** | ||
| 457 | * struct ubifs_lpt_lprops - LPT logical eraseblock properties. | ||
| 458 | * @free: amount of free space in bytes | ||
| 459 | * @dirty: amount of dirty space in bytes | ||
| 460 | * @tgc: trivial GC flag (1 => unmap after commit end) | ||
| 461 | * @cmt: commit flag (1 => reserved for commit) | ||
| 462 | */ | ||
| 463 | struct ubifs_lpt_lprops { | ||
| 464 | int free; | ||
| 465 | int dirty; | ||
| 466 | unsigned tgc : 1; | ||
| 467 | unsigned cmt : 1; | ||
| 468 | }; | ||
| 469 | |||
| 470 | /** | ||
| 471 | * struct ubifs_lp_stats - statistics of eraseblocks in the main area. | ||
| 472 | * @empty_lebs: number of empty LEBs | ||
| 473 | * @taken_empty_lebs: number of taken LEBs | ||
| 474 | * @idx_lebs: number of indexing LEBs | ||
| 475 | * @total_free: total free space in bytes | ||
| 476 | * @total_dirty: total dirty space in bytes | ||
| 477 | * @total_used: total used space in bytes (includes only data LEBs) | ||
| 478 | * @total_dead: total dead space in bytes (includes only data LEBs) | ||
| 479 | * @total_dark: total dark space in bytes (includes only data LEBs) | ||
| 480 | * | ||
| 481 | * N.B. total_dirty and total_used are different to other total_* fields, | ||
| 482 | * because they account _all_ LEBs, not just data LEBs. | ||
| 483 | * | ||
| 484 | * 'taken_empty_lebs' counts the LEBs that are in the transient state of having | ||
| 485 | * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed | ||
| 486 | * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used | ||
| 487 | * by itself (in which case 'unused_lebs' would be a better name). In the case | ||
| 488 | * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC, | ||
| 489 | * but unlike other empty LEBs that are 'taken', it may not be written straight | ||
| 490 | * away (i.e. before the next commit start or unmount), so either gc_lnum must | ||
| 491 | * be specially accounted for, or the current approach followed i.e. count it | ||
| 492 | * under 'taken_empty_lebs'. | ||
| 493 | */ | ||
| 494 | struct ubifs_lp_stats { | ||
| 495 | int empty_lebs; | ||
| 496 | int taken_empty_lebs; | ||
| 497 | int idx_lebs; | ||
| 498 | long long total_free; | ||
| 499 | long long total_dirty; | ||
| 500 | long long total_used; | ||
| 501 | long long total_dead; | ||
| 502 | long long total_dark; | ||
| 503 | }; | ||
| 504 | |||
| 505 | struct ubifs_nnode; | ||
| 506 | |||
| 507 | /** | ||
| 508 | * struct ubifs_cnode - LEB Properties Tree common node. | ||
| 509 | * @parent: parent nnode | ||
| 510 | * @cnext: next cnode to commit | ||
| 511 | * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) | ||
| 512 | * @iip: index in parent | ||
| 513 | * @level: level in the tree (zero for pnodes, greater than zero for nnodes) | ||
| 514 | * @num: node number | ||
| 515 | */ | ||
| 516 | struct ubifs_cnode { | ||
| 517 | struct ubifs_nnode *parent; | ||
| 518 | struct ubifs_cnode *cnext; | ||
| 519 | unsigned long flags; | ||
| 520 | int iip; | ||
| 521 | int level; | ||
| 522 | int num; | ||
| 523 | }; | ||
| 524 | |||
| 525 | /** | ||
| 526 | * struct ubifs_pnode - LEB Properties Tree leaf node. | ||
| 527 | * @parent: parent nnode | ||
| 528 | * @cnext: next cnode to commit | ||
| 529 | * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) | ||
| 530 | * @iip: index in parent | ||
| 531 | * @level: level in the tree (always zero for pnodes) | ||
| 532 | * @num: node number | ||
| 533 | * @lprops: LEB properties array | ||
| 534 | */ | ||
| 535 | struct ubifs_pnode { | ||
| 536 | struct ubifs_nnode *parent; | ||
| 537 | struct ubifs_cnode *cnext; | ||
| 538 | unsigned long flags; | ||
| 539 | int iip; | ||
| 540 | int level; | ||
| 541 | int num; | ||
| 542 | struct ubifs_lprops lprops[UBIFS_LPT_FANOUT]; | ||
| 543 | }; | ||
| 544 | |||
| 545 | /** | ||
| 546 | * struct ubifs_nbranch - LEB Properties Tree internal node branch. | ||
| 547 | * @lnum: LEB number of child | ||
| 548 | * @offs: offset of child | ||
| 549 | * @nnode: nnode child | ||
| 550 | * @pnode: pnode child | ||
| 551 | * @cnode: cnode child | ||
| 552 | */ | ||
| 553 | struct ubifs_nbranch { | ||
| 554 | int lnum; | ||
| 555 | int offs; | ||
| 556 | union { | ||
| 557 | struct ubifs_nnode *nnode; | ||
| 558 | struct ubifs_pnode *pnode; | ||
| 559 | struct ubifs_cnode *cnode; | ||
| 560 | }; | ||
| 561 | }; | ||
| 562 | |||
| 563 | /** | ||
| 564 | * struct ubifs_nnode - LEB Properties Tree internal node. | ||
| 565 | * @parent: parent nnode | ||
| 566 | * @cnext: next cnode to commit | ||
| 567 | * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) | ||
| 568 | * @iip: index in parent | ||
| 569 | * @level: level in the tree (always greater than zero for nnodes) | ||
| 570 | * @num: node number | ||
| 571 | * @nbranch: branches to child nodes | ||
| 572 | */ | ||
| 573 | struct ubifs_nnode { | ||
| 574 | struct ubifs_nnode *parent; | ||
| 575 | struct ubifs_cnode *cnext; | ||
| 576 | unsigned long flags; | ||
| 577 | int iip; | ||
| 578 | int level; | ||
| 579 | int num; | ||
| 580 | struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT]; | ||
| 581 | }; | ||
| 582 | |||
| 583 | /** | ||
| 584 | * struct ubifs_lpt_heap - heap of categorized lprops. | ||
| 585 | * @arr: heap array | ||
| 586 | * @cnt: number in heap | ||
| 587 | * @max_cnt: maximum number allowed in heap | ||
| 588 | * | ||
| 589 | * There are %LPROPS_HEAP_CNT heaps. | ||
| 590 | */ | ||
| 591 | struct ubifs_lpt_heap { | ||
| 592 | struct ubifs_lprops **arr; | ||
| 593 | int cnt; | ||
| 594 | int max_cnt; | ||
| 595 | }; | ||
| 596 | |||
| 597 | /* | ||
| 598 | * Return codes for LPT scan callback function. | ||
| 599 | * | ||
| 600 | * LPT_SCAN_CONTINUE: continue scanning | ||
| 601 | * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory | ||
| 602 | * LPT_SCAN_STOP: stop scanning | ||
| 603 | */ | ||
| 604 | enum { | ||
| 605 | LPT_SCAN_CONTINUE = 0, | ||
| 606 | LPT_SCAN_ADD = 1, | ||
| 607 | LPT_SCAN_STOP = 2, | ||
| 608 | }; | ||
| 609 | |||
| 610 | struct ubifs_info; | ||
| 611 | |||
| 612 | /* Callback used by the 'ubifs_lpt_scan_nolock()' function */ | ||
| 613 | typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, | ||
| 614 | const struct ubifs_lprops *lprops, | ||
| 615 | int in_tree, void *data); | ||
| 616 | |||
| 617 | /** | ||
| 618 | * struct ubifs_wbuf - UBIFS write-buffer. | ||
| 619 | * @c: UBIFS file-system description object | ||
| 620 | * @buf: write-buffer (of min. flash I/O unit size) | ||
| 621 | * @lnum: logical eraseblock number the write-buffer points to | ||
| 622 | * @offs: write-buffer offset in this logical eraseblock | ||
| 623 | * @avail: number of bytes available in the write-buffer | ||
| 624 | * @used: number of used bytes in the write-buffer | ||
| 625 | * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, | ||
| 626 | * %UBI_UNKNOWN) | ||
| 627 | * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep | ||
| 628 | * up by 'mutex_lock_nested()). | ||
| 629 | * @sync_callback: write-buffer synchronization callback | ||
| 630 | * @io_mutex: serializes write-buffer I/O | ||
| 631 | * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes | ||
| 632 | * fields | ||
| 633 | * @timer: write-buffer timer | ||
| 634 | * @timeout: timer expire interval in jiffies | ||
| 635 | * @need_sync: it is set if its timer expired and needs sync | ||
| 636 | * @next_ino: points to the next position of the following inode number | ||
| 637 | * @inodes: stores the inode numbers of the nodes which are in wbuf | ||
| 638 | * | ||
| 639 | * The write-buffer synchronization callback is called when the write-buffer is | ||
| 640 | * synchronized in order to notify how much space was wasted due to | ||
| 641 | * write-buffer padding and how much free space is left in the LEB. | ||
| 642 | * | ||
| 643 | * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under | ||
| 644 | * spin-lock or mutex because they are written under both mutex and spin-lock. | ||
| 645 | * @buf is appended to under mutex but overwritten under both mutex and | ||
| 646 | * spin-lock. Thus the data between @buf and @buf + @used can be read under | ||
| 647 | * spinlock. | ||
| 648 | */ | ||
| 649 | struct ubifs_wbuf { | ||
| 650 | struct ubifs_info *c; | ||
| 651 | void *buf; | ||
| 652 | int lnum; | ||
| 653 | int offs; | ||
| 654 | int avail; | ||
| 655 | int used; | ||
| 656 | int dtype; | ||
| 657 | int jhead; | ||
| 658 | int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); | ||
| 659 | struct mutex io_mutex; | ||
| 660 | spinlock_t lock; | ||
| 661 | struct timer_list timer; | ||
| 662 | int timeout; | ||
| 663 | int need_sync; | ||
| 664 | int next_ino; | ||
| 665 | ino_t *inodes; | ||
| 666 | }; | ||
| 667 | |||
| 668 | /** | ||
| 669 | * struct ubifs_bud - bud logical eraseblock. | ||
| 670 | * @lnum: logical eraseblock number | ||
| 671 | * @start: where the (uncommitted) bud data starts | ||
| 672 | * @jhead: journal head number this bud belongs to | ||
| 673 | * @list: link in the list buds belonging to the same journal head | ||
| 674 | * @rb: link in the tree of all buds | ||
| 675 | */ | ||
| 676 | struct ubifs_bud { | ||
| 677 | int lnum; | ||
| 678 | int start; | ||
| 679 | int jhead; | ||
| 680 | struct list_head list; | ||
| 681 | struct rb_node rb; | ||
| 682 | }; | ||
| 683 | |||
| 684 | /** | ||
| 685 | * struct ubifs_jhead - journal head. | ||
| 686 | * @wbuf: head's write-buffer | ||
| 687 | * @buds_list: list of bud LEBs belonging to this journal head | ||
| 688 | * | ||
| 689 | * Note, the @buds list is protected by the @c->buds_lock. | ||
| 690 | */ | ||
| 691 | struct ubifs_jhead { | ||
| 692 | struct ubifs_wbuf wbuf; | ||
| 693 | struct list_head buds_list; | ||
| 694 | }; | ||
| 695 | |||
| 696 | /** | ||
| 697 | * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. | ||
| 698 | * @key: key | ||
| 699 | * @znode: znode address in memory | ||
| 700 | * @lnum: LEB number of the indexing node | ||
| 701 | * @offs: offset of the indexing node within @lnum | ||
| 702 | * @len: target node length | ||
| 703 | */ | ||
| 704 | struct ubifs_zbranch { | ||
| 705 | union ubifs_key key; | ||
| 706 | union { | ||
| 707 | struct ubifs_znode *znode; | ||
| 708 | void *leaf; | ||
| 709 | }; | ||
| 710 | int lnum; | ||
| 711 | int offs; | ||
| 712 | int len; | ||
| 713 | }; | ||
| 714 | |||
| 715 | /** | ||
| 716 | * struct ubifs_znode - in-memory representation of an indexing node. | ||
| 717 | * @parent: parent znode or NULL if it is the root | ||
| 718 | * @cnext: next znode to commit | ||
| 719 | * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE) | ||
| 720 | * @time: last access time (seconds) | ||
| 721 | * @level: level of the entry in the TNC tree | ||
| 722 | * @child_cnt: count of child znodes | ||
| 723 | * @iip: index in parent's zbranch array | ||
| 724 | * @alt: lower bound of key range has altered i.e. child inserted at slot 0 | ||
| 725 | * @lnum: LEB number of the corresponding indexing node | ||
| 726 | * @offs: offset of the corresponding indexing node | ||
| 727 | * @len: length of the corresponding indexing node | ||
| 728 | * @zbranch: array of znode branches (@c->fanout elements) | ||
| 729 | */ | ||
| 730 | struct ubifs_znode { | ||
| 731 | struct ubifs_znode *parent; | ||
| 732 | struct ubifs_znode *cnext; | ||
| 733 | unsigned long flags; | ||
| 734 | unsigned long time; | ||
| 735 | int level; | ||
| 736 | int child_cnt; | ||
| 737 | int iip; | ||
| 738 | int alt; | ||
| 739 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 740 | int lnum, offs, len; | ||
| 741 | #endif | ||
| 742 | struct ubifs_zbranch zbranch[]; | ||
| 743 | }; | ||
| 744 | |||
| 745 | /** | ||
| 746 | * struct ubifs_node_range - node length range description data structure. | ||
| 747 | * @len: fixed node length | ||
| 748 | * @min_len: minimum possible node length | ||
| 749 | * @max_len: maximum possible node length | ||
| 750 | * | ||
| 751 | * If @max_len is %0, the node has fixed length @len. | ||
| 752 | */ | ||
| 753 | struct ubifs_node_range { | ||
| 754 | union { | ||
| 755 | int len; | ||
| 756 | int min_len; | ||
| 757 | }; | ||
| 758 | int max_len; | ||
| 759 | }; | ||
| 760 | |||
| 761 | /** | ||
| 762 | * struct ubifs_compressor - UBIFS compressor description structure. | ||
| 763 | * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc) | ||
| 764 | * @cc: cryptoapi compressor handle | ||
| 765 | * @comp_mutex: mutex used during compression | ||
| 766 | * @decomp_mutex: mutex used during decompression | ||
| 767 | * @name: compressor name | ||
| 768 | * @capi_name: cryptoapi compressor name | ||
| 769 | */ | ||
| 770 | struct ubifs_compressor { | ||
| 771 | int compr_type; | ||
| 772 | struct crypto_comp *cc; | ||
| 773 | struct mutex *comp_mutex; | ||
| 774 | struct mutex *decomp_mutex; | ||
| 775 | const char *name; | ||
| 776 | const char *capi_name; | ||
| 777 | }; | ||
| 778 | |||
| 779 | /** | ||
| 780 | * struct ubifs_budget_req - budget requirements of an operation. | ||
| 781 | * | ||
| 782 | * @fast: non-zero if the budgeting should try to aquire budget quickly and | ||
| 783 | * should not try to call write-back | ||
| 784 | * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields | ||
| 785 | * have to be re-calculated | ||
| 786 | * @new_page: non-zero if the operation adds a new page | ||
| 787 | * @dirtied_page: non-zero if the operation makes a page dirty | ||
| 788 | * @new_dent: non-zero if the operation adds a new directory entry | ||
| 789 | * @mod_dent: non-zero if the operation removes or modifies an existing | ||
| 790 | * directory entry | ||
| 791 | * @new_ino: non-zero if the operation adds a new inode | ||
| 792 | * @new_ino_d: now much data newly created inode contains | ||
| 793 | * @dirtied_ino: how many inodes the operation makes dirty | ||
| 794 | * @dirtied_ino_d: now much data dirtied inode contains | ||
| 795 | * @idx_growth: how much the index will supposedly grow | ||
| 796 | * @data_growth: how much new data the operation will supposedly add | ||
| 797 | * @dd_growth: how much data that makes other data dirty the operation will | ||
| 798 | * supposedly add | ||
| 799 | * | ||
| 800 | * @idx_growth, @data_growth and @dd_growth are not used in budget request. The | ||
| 801 | * budgeting subsystem caches index and data growth values there to avoid | ||
| 802 | * re-calculating them when the budget is released. However, if @idx_growth is | ||
| 803 | * %-1, it is calculated by the release function using other fields. | ||
| 804 | * | ||
| 805 | * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d | ||
| 806 | * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made | ||
| 807 | * dirty by the re-name operation. | ||
| 808 | */ | ||
| 809 | struct ubifs_budget_req { | ||
| 810 | unsigned int fast:1; | ||
| 811 | unsigned int recalculate:1; | ||
| 812 | unsigned int new_page:1; | ||
| 813 | unsigned int dirtied_page:1; | ||
| 814 | unsigned int new_dent:1; | ||
| 815 | unsigned int mod_dent:1; | ||
| 816 | unsigned int new_ino:1; | ||
| 817 | unsigned int new_ino_d:13; | ||
| 818 | #ifndef UBIFS_DEBUG | ||
| 819 | unsigned int dirtied_ino:4; | ||
| 820 | unsigned int dirtied_ino_d:15; | ||
| 821 | #else | ||
| 822 | /* Not bit-fields to check for overflows */ | ||
| 823 | unsigned int dirtied_ino; | ||
| 824 | unsigned int dirtied_ino_d; | ||
| 825 | #endif | ||
| 826 | int idx_growth; | ||
| 827 | int data_growth; | ||
| 828 | int dd_growth; | ||
| 829 | }; | ||
| 830 | |||
| 831 | /** | ||
| 832 | * struct ubifs_orphan - stores the inode number of an orphan. | ||
| 833 | * @rb: rb-tree node of rb-tree of orphans sorted by inode number | ||
| 834 | * @list: list head of list of orphans in order added | ||
| 835 | * @new_list: list head of list of orphans added since the last commit | ||
| 836 | * @cnext: next orphan to commit | ||
| 837 | * @dnext: next orphan to delete | ||
| 838 | * @inum: inode number | ||
| 839 | * @new: %1 => added since the last commit, otherwise %0 | ||
| 840 | */ | ||
| 841 | struct ubifs_orphan { | ||
| 842 | struct rb_node rb; | ||
| 843 | struct list_head list; | ||
| 844 | struct list_head new_list; | ||
| 845 | struct ubifs_orphan *cnext; | ||
| 846 | struct ubifs_orphan *dnext; | ||
| 847 | ino_t inum; | ||
| 848 | int new; | ||
| 849 | }; | ||
| 850 | |||
| 851 | /** | ||
| 852 | * struct ubifs_mount_opts - UBIFS-specific mount options information. | ||
| 853 | * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) | ||
| 854 | */ | ||
| 855 | struct ubifs_mount_opts { | ||
| 856 | unsigned int unmount_mode:2; | ||
| 857 | }; | ||
| 858 | |||
| 859 | /** | ||
| 860 | * struct ubifs_info - UBIFS file-system description data structure | ||
| 861 | * (per-superblock). | ||
| 862 | * @vfs_sb: VFS @struct super_block object | ||
| 863 | * @bdi: backing device info object to make VFS happy and disable readahead | ||
| 864 | * | ||
| 865 | * @highest_inum: highest used inode number | ||
| 866 | * @vfs_gen: VFS inode generation counter | ||
| 867 | * @max_sqnum: current global sequence number | ||
| 868 | * @cmt_no: commit number (last successfully completed commit) | ||
| 869 | * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters | ||
| 870 | * @fmt_version: UBIFS on-flash format version | ||
| 871 | * @uuid: UUID from super block | ||
| 872 | * | ||
| 873 | * @lhead_lnum: log head logical eraseblock number | ||
| 874 | * @lhead_offs: log head offset | ||
| 875 | * @ltail_lnum: log tail logical eraseblock number (offset is always 0) | ||
| 876 | * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and | ||
| 877 | * @bud_bytes | ||
| 878 | * @min_log_bytes: minimum required number of bytes in the log | ||
| 879 | * @cmt_bud_bytes: used during commit to temporarily amount of bytes in | ||
| 880 | * committed buds | ||
| 881 | * | ||
| 882 | * @buds: tree of all buds indexed by bud LEB number | ||
| 883 | * @bud_bytes: how many bytes of flash is used by buds | ||
| 884 | * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud | ||
| 885 | * lists | ||
| 886 | * @jhead_cnt: count of journal heads | ||
| 887 | * @jheads: journal heads (head zero is base head) | ||
| 888 | * @max_bud_bytes: maximum number of bytes allowed in buds | ||
| 889 | * @bg_bud_bytes: number of bud bytes when background commit is initiated | ||
| 890 | * @old_buds: buds to be released after commit ends | ||
| 891 | * @max_bud_cnt: maximum number of buds | ||
| 892 | * | ||
| 893 | * @commit_sem: synchronizes committer with other processes | ||
| 894 | * @cmt_state: commit state | ||
| 895 | * @cs_lock: commit state lock | ||
| 896 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running | ||
| 897 | * @fast_unmount: do not run journal commit before un-mounting | ||
| 898 | * @big_lpt: flag that LPT is too big to write whole during commit | ||
| 899 | * @check_lpt_free: flag that indicates LPT GC may be needed | ||
| 900 | * @nospace: non-zero if the file-system does not have flash space (used as | ||
| 901 | * optimization) | ||
| 902 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
| 903 | * pool is full | ||
| 904 | * | ||
| 905 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and | ||
| 906 | * @calc_idx_sz | ||
| 907 | * @zroot: zbranch which points to the root index node and znode | ||
| 908 | * @cnext: next znode to commit | ||
| 909 | * @enext: next znode to commit to empty space | ||
| 910 | * @gap_lebs: array of LEBs used by the in-gaps commit method | ||
| 911 | * @cbuf: commit buffer | ||
| 912 | * @ileb_buf: buffer for commit in-the-gaps method | ||
| 913 | * @ileb_len: length of data in ileb_buf | ||
| 914 | * @ihead_lnum: LEB number of index head | ||
| 915 | * @ihead_offs: offset of index head | ||
| 916 | * @ilebs: pre-allocated index LEBs | ||
| 917 | * @ileb_cnt: number of pre-allocated index LEBs | ||
| 918 | * @ileb_nxt: next pre-allocated index LEBs | ||
| 919 | * @old_idx: tree of index nodes obsoleted since the last commit start | ||
| 920 | * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c | ||
| 921 | * @new_ihead_lnum: used by debugging to check ihead_lnum | ||
| 922 | * @new_ihead_offs: used by debugging to check ihead_offs | ||
| 923 | * | ||
| 924 | * @mst_node: master node | ||
| 925 | * @mst_offs: offset of valid master node | ||
| 926 | * @mst_mutex: protects the master node area, @mst_node, and @mst_offs | ||
| 927 | * | ||
| 928 | * @log_lebs: number of logical eraseblocks in the log | ||
| 929 | * @log_bytes: log size in bytes | ||
| 930 | * @log_last: last LEB of the log | ||
| 931 | * @lpt_lebs: number of LEBs used for lprops table | ||
| 932 | * @lpt_first: first LEB of the lprops table area | ||
| 933 | * @lpt_last: last LEB of the lprops table area | ||
| 934 | * @orph_lebs: number of LEBs used for the orphan area | ||
| 935 | * @orph_first: first LEB of the orphan area | ||
| 936 | * @orph_last: last LEB of the orphan area | ||
| 937 | * @main_lebs: count of LEBs in the main area | ||
| 938 | * @main_first: first LEB of the main area | ||
| 939 | * @main_bytes: main area size in bytes | ||
| 940 | * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) | ||
| 941 | * | ||
| 942 | * @key_hash_type: type of the key hash | ||
| 943 | * @key_hash: direntry key hash function | ||
| 944 | * @key_fmt: key format | ||
| 945 | * @key_len: key length | ||
| 946 | * @fanout: fanout of the index tree (number of links per indexing node) | ||
| 947 | * | ||
| 948 | * @min_io_size: minimal input/output unit size | ||
| 949 | * @min_io_shift: number of bits in @min_io_size minus one | ||
| 950 | * @leb_size: logical eraseblock size in bytes | ||
| 951 | * @half_leb_size: half LEB size | ||
| 952 | * @leb_cnt: count of logical eraseblocks | ||
| 953 | * @max_leb_cnt: maximum count of logical eraseblocks | ||
| 954 | * @old_leb_cnt: count of logical eraseblocks before re-size | ||
| 955 | * @ro_media: the underlying UBI volume is read-only | ||
| 956 | * | ||
| 957 | * @dirty_pg_cnt: number of dirty pages (not used) | ||
| 958 | * @dirty_zn_cnt: number of dirty znodes | ||
| 959 | * @clean_zn_cnt: number of clean znodes | ||
| 960 | * | ||
| 961 | * @budg_idx_growth: amount of bytes budgeted for index growth | ||
| 962 | * @budg_data_growth: amount of bytes budgeted for cached data | ||
| 963 | * @budg_dd_growth: amount of bytes budgeted for cached data that will make | ||
| 964 | * other data dirty | ||
| 965 | * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, | ||
| 966 | * but which still have to be taken into account because | ||
| 967 | * the index has not been committed so far | ||
| 968 | * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, | ||
| 969 | * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; | ||
| 970 | * @min_idx_lebs: minimum number of LEBs required for the index | ||
| 971 | * @old_idx_sz: size of index on flash | ||
| 972 | * @calc_idx_sz: temporary variable which is used to calculate new index size | ||
| 973 | * (contains accurate new index size at end of TNC commit start) | ||
| 974 | * @lst: lprops statistics | ||
| 975 | * | ||
| 976 | * @page_budget: budget for a page | ||
| 977 | * @inode_budget: budget for an inode | ||
| 978 | * @dent_budget: budget for a directory entry | ||
| 979 | * | ||
| 980 | * @ref_node_alsz: size of the LEB reference node aligned to the min. flash | ||
| 981 | * I/O unit | ||
| 982 | * @mst_node_alsz: master node aligned size | ||
| 983 | * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary | ||
| 984 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | ||
| 985 | * @max_inode_sz: maximum possible inode size in bytes | ||
| 986 | * @max_znode_sz: size of znode in bytes | ||
| 987 | * @dead_wm: LEB dead space watermark | ||
| 988 | * @dark_wm: LEB dark space watermark | ||
| 989 | * @block_cnt: count of 4KiB blocks on the FS | ||
| 990 | * | ||
| 991 | * @ranges: UBIFS node length ranges | ||
| 992 | * @ubi: UBI volume descriptor | ||
| 993 | * @di: UBI device information | ||
| 994 | * @vi: UBI volume information | ||
| 995 | * | ||
| 996 | * @orph_tree: rb-tree of orphan inode numbers | ||
| 997 | * @orph_list: list of orphan inode numbers in order added | ||
| 998 | * @orph_new: list of orphan inode numbers added since last commit | ||
| 999 | * @orph_cnext: next orphan to commit | ||
| 1000 | * @orph_dnext: next orphan to delete | ||
| 1001 | * @orphan_lock: lock for orph_tree and orph_new | ||
| 1002 | * @orph_buf: buffer for orphan nodes | ||
| 1003 | * @new_orphans: number of orphans since last commit | ||
| 1004 | * @cmt_orphans: number of orphans being committed | ||
| 1005 | * @tot_orphans: number of orphans in the rb_tree | ||
| 1006 | * @max_orphans: maximum number of orphans allowed | ||
| 1007 | * @ohead_lnum: orphan head LEB number | ||
| 1008 | * @ohead_offs: orphan head offset | ||
| 1009 | * @no_orphs: non-zero if there are no orphans | ||
| 1010 | * | ||
| 1011 | * @bgt: UBIFS background thread | ||
| 1012 | * @bgt_name: background thread name | ||
| 1013 | * @need_bgt: if background thread should run | ||
| 1014 | * @need_wbuf_sync: if write-buffers have to be synchronized | ||
| 1015 | * | ||
| 1016 | * @gc_lnum: LEB number used for garbage collection | ||
| 1017 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | ||
| 1018 | * @idx_gc: list of index LEBs that have been garbage collected | ||
| 1019 | * @idx_gc_cnt: number of elements on the idx_gc list | ||
| 1020 | * | ||
| 1021 | * @infos_list: links all 'ubifs_info' objects | ||
| 1022 | * @umount_mutex: serializes shrinker and un-mount | ||
| 1023 | * @shrinker_run_no: shrinker run number | ||
| 1024 | * | ||
| 1025 | * @space_bits: number of bits needed to record free or dirty space | ||
| 1026 | * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT | ||
| 1027 | * @lpt_offs_bits: number of bits needed to record an offset in the LPT | ||
| 1028 | * @lpt_spc_bits: number of bits needed to space in the LPT | ||
| 1029 | * @pcnt_bits: number of bits needed to record pnode or nnode number | ||
| 1030 | * @lnum_bits: number of bits needed to record LEB number | ||
| 1031 | * @nnode_sz: size of on-flash nnode | ||
| 1032 | * @pnode_sz: size of on-flash pnode | ||
| 1033 | * @ltab_sz: size of on-flash LPT lprops table | ||
| 1034 | * @lsave_sz: size of on-flash LPT save table | ||
| 1035 | * @pnode_cnt: number of pnodes | ||
| 1036 | * @nnode_cnt: number of nnodes | ||
| 1037 | * @lpt_hght: height of the LPT | ||
| 1038 | * @pnodes_have: number of pnodes in memory | ||
| 1039 | * | ||
| 1040 | * @lp_mutex: protects lprops table and all the other lprops-related fields | ||
| 1041 | * @lpt_lnum: LEB number of the root nnode of the LPT | ||
| 1042 | * @lpt_offs: offset of the root nnode of the LPT | ||
| 1043 | * @nhead_lnum: LEB number of LPT head | ||
| 1044 | * @nhead_offs: offset of LPT head | ||
| 1045 | * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab | ||
| 1046 | * @dirty_nn_cnt: number of dirty nnodes | ||
| 1047 | * @dirty_pn_cnt: number of dirty pnodes | ||
| 1048 | * @lpt_sz: LPT size | ||
| 1049 | * @lpt_nod_buf: buffer for an on-flash nnode or pnode | ||
| 1050 | * @lpt_buf: buffer of LEB size used by LPT | ||
| 1051 | * @nroot: address in memory of the root nnode of the LPT | ||
| 1052 | * @lpt_cnext: next LPT node to commit | ||
| 1053 | * @lpt_heap: array of heaps of categorized lprops | ||
| 1054 | * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at | ||
| 1055 | * previous commit start | ||
| 1056 | * @uncat_list: list of un-categorized LEBs | ||
| 1057 | * @empty_list: list of empty LEBs | ||
| 1058 | * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) | ||
| 1059 | * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) | ||
| 1060 | * @freeable_cnt: number of freeable LEBs in @freeable_list | ||
| 1061 | * | ||
| 1062 | * @ltab_lnum: LEB number of LPT's own lprops table | ||
| 1063 | * @ltab_offs: offset of LPT's own lprops table | ||
| 1064 | * @ltab: LPT's own lprops table | ||
| 1065 | * @ltab_cmt: LPT's own lprops table (commit copy) | ||
| 1066 | * @lsave_cnt: number of LEB numbers in LPT's save table | ||
| 1067 | * @lsave_lnum: LEB number of LPT's save table | ||
| 1068 | * @lsave_offs: offset of LPT's save table | ||
| 1069 | * @lsave: LPT's save table | ||
| 1070 | * @lscan_lnum: LEB number of last LPT scan | ||
| 1071 | * | ||
| 1072 | * @rp_size: size of the reserved pool in bytes | ||
| 1073 | * @report_rp_size: size of the reserved pool reported to user-space | ||
| 1074 | * @rp_uid: reserved pool user ID | ||
| 1075 | * @rp_gid: reserved pool group ID | ||
| 1076 | * | ||
| 1077 | * @empty: if the UBI device is empty | ||
| 1078 | * @replay_tree: temporary tree used during journal replay | ||
| 1079 | * @replay_list: temporary list used during journal replay | ||
| 1080 | * @replay_buds: list of buds to replay | ||
| 1081 | * @cs_sqnum: sequence number of first node in the log (commit start node) | ||
| 1082 | * @replay_sqnum: sequence number of node currently being replayed | ||
| 1083 | * @need_recovery: file-system needs recovery | ||
| 1084 | * @replaying: set to %1 during journal replay | ||
| 1085 | * @unclean_leb_list: LEBs to recover when mounting ro to rw | ||
| 1086 | * @rcvrd_mst_node: recovered master node to write when mounting ro to rw | ||
| 1087 | * @size_tree: inode size information for recovery | ||
| 1088 | * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) | ||
| 1089 | * @mount_opts: UBIFS-specific mount options | ||
| 1090 | * | ||
| 1091 | * @dbg_buf: a buffer of LEB size used for debugging purposes | ||
| 1092 | * @old_zroot: old index root - used by 'dbg_check_old_index()' | ||
| 1093 | * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' | ||
| 1094 | * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' | ||
| 1095 | * @failure_mode: failure mode for recovery testing | ||
| 1096 | * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls | ||
| 1097 | * @fail_timeout: time in jiffies when delay of failure mode expires | ||
| 1098 | * @fail_cnt: current number of calls to failure mode I/O functions | ||
| 1099 | * @fail_cnt_max: number of calls by which to delay failure mode | ||
| 1100 | */ | ||
| 1101 | struct ubifs_info { | ||
| 1102 | struct super_block *vfs_sb; | ||
| 1103 | struct backing_dev_info bdi; | ||
| 1104 | |||
| 1105 | ino_t highest_inum; | ||
| 1106 | unsigned int vfs_gen; | ||
| 1107 | unsigned long long max_sqnum; | ||
| 1108 | unsigned long long cmt_no; | ||
| 1109 | spinlock_t cnt_lock; | ||
| 1110 | int fmt_version; | ||
| 1111 | unsigned char uuid[16]; | ||
| 1112 | |||
| 1113 | int lhead_lnum; | ||
| 1114 | int lhead_offs; | ||
| 1115 | int ltail_lnum; | ||
| 1116 | struct mutex log_mutex; | ||
| 1117 | int min_log_bytes; | ||
| 1118 | long long cmt_bud_bytes; | ||
| 1119 | |||
| 1120 | struct rb_root buds; | ||
| 1121 | long long bud_bytes; | ||
| 1122 | spinlock_t buds_lock; | ||
| 1123 | int jhead_cnt; | ||
| 1124 | struct ubifs_jhead *jheads; | ||
| 1125 | long long max_bud_bytes; | ||
| 1126 | long long bg_bud_bytes; | ||
| 1127 | struct list_head old_buds; | ||
| 1128 | int max_bud_cnt; | ||
| 1129 | |||
| 1130 | struct rw_semaphore commit_sem; | ||
| 1131 | int cmt_state; | ||
| 1132 | spinlock_t cs_lock; | ||
| 1133 | wait_queue_head_t cmt_wq; | ||
| 1134 | unsigned int fast_unmount:1; | ||
| 1135 | unsigned int big_lpt:1; | ||
| 1136 | unsigned int check_lpt_free:1; | ||
| 1137 | unsigned int nospace:1; | ||
| 1138 | unsigned int nospace_rp:1; | ||
| 1139 | |||
| 1140 | struct mutex tnc_mutex; | ||
| 1141 | struct ubifs_zbranch zroot; | ||
| 1142 | struct ubifs_znode *cnext; | ||
| 1143 | struct ubifs_znode *enext; | ||
| 1144 | int *gap_lebs; | ||
| 1145 | void *cbuf; | ||
| 1146 | void *ileb_buf; | ||
| 1147 | int ileb_len; | ||
| 1148 | int ihead_lnum; | ||
| 1149 | int ihead_offs; | ||
| 1150 | int *ilebs; | ||
| 1151 | int ileb_cnt; | ||
| 1152 | int ileb_nxt; | ||
| 1153 | struct rb_root old_idx; | ||
| 1154 | int *bottom_up_buf; | ||
| 1155 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 1156 | int new_ihead_lnum; | ||
| 1157 | int new_ihead_offs; | ||
| 1158 | #endif | ||
| 1159 | |||
| 1160 | struct ubifs_mst_node *mst_node; | ||
| 1161 | int mst_offs; | ||
| 1162 | struct mutex mst_mutex; | ||
| 1163 | |||
| 1164 | int log_lebs; | ||
| 1165 | long long log_bytes; | ||
| 1166 | int log_last; | ||
| 1167 | int lpt_lebs; | ||
| 1168 | int lpt_first; | ||
| 1169 | int lpt_last; | ||
| 1170 | int orph_lebs; | ||
| 1171 | int orph_first; | ||
| 1172 | int orph_last; | ||
| 1173 | int main_lebs; | ||
| 1174 | int main_first; | ||
| 1175 | long long main_bytes; | ||
| 1176 | int default_compr; | ||
| 1177 | |||
| 1178 | uint8_t key_hash_type; | ||
| 1179 | uint32_t (*key_hash)(const char *str, int len); | ||
| 1180 | int key_fmt; | ||
| 1181 | int key_len; | ||
| 1182 | int fanout; | ||
| 1183 | |||
| 1184 | int min_io_size; | ||
| 1185 | int min_io_shift; | ||
| 1186 | int leb_size; | ||
| 1187 | int half_leb_size; | ||
| 1188 | int leb_cnt; | ||
| 1189 | int max_leb_cnt; | ||
| 1190 | int old_leb_cnt; | ||
| 1191 | int ro_media; | ||
| 1192 | |||
| 1193 | atomic_long_t dirty_pg_cnt; | ||
| 1194 | atomic_long_t dirty_zn_cnt; | ||
| 1195 | atomic_long_t clean_zn_cnt; | ||
| 1196 | |||
| 1197 | long long budg_idx_growth; | ||
| 1198 | long long budg_data_growth; | ||
| 1199 | long long budg_dd_growth; | ||
| 1200 | long long budg_uncommitted_idx; | ||
| 1201 | spinlock_t space_lock; | ||
| 1202 | int min_idx_lebs; | ||
| 1203 | unsigned long long old_idx_sz; | ||
| 1204 | unsigned long long calc_idx_sz; | ||
| 1205 | struct ubifs_lp_stats lst; | ||
| 1206 | |||
| 1207 | int page_budget; | ||
| 1208 | int inode_budget; | ||
| 1209 | int dent_budget; | ||
| 1210 | |||
| 1211 | int ref_node_alsz; | ||
| 1212 | int mst_node_alsz; | ||
| 1213 | int min_idx_node_sz; | ||
| 1214 | int max_idx_node_sz; | ||
| 1215 | long long max_inode_sz; | ||
| 1216 | int max_znode_sz; | ||
| 1217 | int dead_wm; | ||
| 1218 | int dark_wm; | ||
| 1219 | int block_cnt; | ||
| 1220 | |||
| 1221 | struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT]; | ||
| 1222 | struct ubi_volume_desc *ubi; | ||
| 1223 | struct ubi_device_info di; | ||
| 1224 | struct ubi_volume_info vi; | ||
| 1225 | |||
| 1226 | struct rb_root orph_tree; | ||
| 1227 | struct list_head orph_list; | ||
| 1228 | struct list_head orph_new; | ||
| 1229 | struct ubifs_orphan *orph_cnext; | ||
| 1230 | struct ubifs_orphan *orph_dnext; | ||
| 1231 | spinlock_t orphan_lock; | ||
| 1232 | void *orph_buf; | ||
| 1233 | int new_orphans; | ||
| 1234 | int cmt_orphans; | ||
| 1235 | int tot_orphans; | ||
| 1236 | int max_orphans; | ||
| 1237 | int ohead_lnum; | ||
| 1238 | int ohead_offs; | ||
| 1239 | int no_orphs; | ||
| 1240 | |||
| 1241 | struct task_struct *bgt; | ||
| 1242 | char bgt_name[sizeof(BGT_NAME_PATTERN) + 9]; | ||
| 1243 | int need_bgt; | ||
| 1244 | int need_wbuf_sync; | ||
| 1245 | |||
| 1246 | int gc_lnum; | ||
| 1247 | void *sbuf; | ||
| 1248 | struct list_head idx_gc; | ||
| 1249 | int idx_gc_cnt; | ||
| 1250 | |||
| 1251 | struct list_head infos_list; | ||
| 1252 | struct mutex umount_mutex; | ||
| 1253 | unsigned int shrinker_run_no; | ||
| 1254 | |||
| 1255 | int space_bits; | ||
| 1256 | int lpt_lnum_bits; | ||
| 1257 | int lpt_offs_bits; | ||
| 1258 | int lpt_spc_bits; | ||
| 1259 | int pcnt_bits; | ||
| 1260 | int lnum_bits; | ||
| 1261 | int nnode_sz; | ||
| 1262 | int pnode_sz; | ||
| 1263 | int ltab_sz; | ||
| 1264 | int lsave_sz; | ||
| 1265 | int pnode_cnt; | ||
| 1266 | int nnode_cnt; | ||
| 1267 | int lpt_hght; | ||
| 1268 | int pnodes_have; | ||
| 1269 | |||
| 1270 | struct mutex lp_mutex; | ||
| 1271 | int lpt_lnum; | ||
| 1272 | int lpt_offs; | ||
| 1273 | int nhead_lnum; | ||
| 1274 | int nhead_offs; | ||
| 1275 | int lpt_drty_flgs; | ||
| 1276 | int dirty_nn_cnt; | ||
| 1277 | int dirty_pn_cnt; | ||
| 1278 | long long lpt_sz; | ||
| 1279 | void *lpt_nod_buf; | ||
| 1280 | void *lpt_buf; | ||
| 1281 | struct ubifs_nnode *nroot; | ||
| 1282 | struct ubifs_cnode *lpt_cnext; | ||
| 1283 | struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT]; | ||
| 1284 | struct ubifs_lpt_heap dirty_idx; | ||
| 1285 | struct list_head uncat_list; | ||
| 1286 | struct list_head empty_list; | ||
| 1287 | struct list_head freeable_list; | ||
| 1288 | struct list_head frdi_idx_list; | ||
| 1289 | int freeable_cnt; | ||
| 1290 | |||
| 1291 | int ltab_lnum; | ||
| 1292 | int ltab_offs; | ||
| 1293 | struct ubifs_lpt_lprops *ltab; | ||
| 1294 | struct ubifs_lpt_lprops *ltab_cmt; | ||
| 1295 | int lsave_cnt; | ||
| 1296 | int lsave_lnum; | ||
| 1297 | int lsave_offs; | ||
| 1298 | int *lsave; | ||
| 1299 | int lscan_lnum; | ||
| 1300 | |||
| 1301 | long long rp_size; | ||
| 1302 | long long report_rp_size; | ||
| 1303 | uid_t rp_uid; | ||
| 1304 | gid_t rp_gid; | ||
| 1305 | |||
| 1306 | /* The below fields are used only during mounting and re-mounting */ | ||
| 1307 | int empty; | ||
| 1308 | struct rb_root replay_tree; | ||
| 1309 | struct list_head replay_list; | ||
| 1310 | struct list_head replay_buds; | ||
| 1311 | unsigned long long cs_sqnum; | ||
| 1312 | unsigned long long replay_sqnum; | ||
| 1313 | int need_recovery; | ||
| 1314 | int replaying; | ||
| 1315 | struct list_head unclean_leb_list; | ||
| 1316 | struct ubifs_mst_node *rcvrd_mst_node; | ||
| 1317 | struct rb_root size_tree; | ||
| 1318 | int remounting_rw; | ||
| 1319 | struct ubifs_mount_opts mount_opts; | ||
| 1320 | |||
| 1321 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 1322 | void *dbg_buf; | ||
| 1323 | struct ubifs_zbranch old_zroot; | ||
| 1324 | int old_zroot_level; | ||
| 1325 | unsigned long long old_zroot_sqnum; | ||
| 1326 | int failure_mode; | ||
| 1327 | int fail_delay; | ||
| 1328 | unsigned long fail_timeout; | ||
| 1329 | unsigned int fail_cnt; | ||
| 1330 | unsigned int fail_cnt_max; | ||
| 1331 | #endif | ||
| 1332 | }; | ||
| 1333 | |||
| 1334 | extern struct list_head ubifs_infos; | ||
| 1335 | extern spinlock_t ubifs_infos_lock; | ||
| 1336 | extern atomic_long_t ubifs_clean_zn_cnt; | ||
| 1337 | extern struct kmem_cache *ubifs_inode_slab; | ||
| 1338 | extern struct super_operations ubifs_super_operations; | ||
| 1339 | extern struct address_space_operations ubifs_file_address_operations; | ||
| 1340 | extern struct file_operations ubifs_file_operations; | ||
| 1341 | extern struct inode_operations ubifs_file_inode_operations; | ||
| 1342 | extern struct file_operations ubifs_dir_operations; | ||
| 1343 | extern struct inode_operations ubifs_dir_inode_operations; | ||
| 1344 | extern struct inode_operations ubifs_symlink_inode_operations; | ||
| 1345 | extern struct backing_dev_info ubifs_backing_dev_info; | ||
| 1346 | extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; | ||
| 1347 | |||
| 1348 | /* io.c */ | ||
| 1349 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); | ||
| 1350 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | ||
| 1351 | int dtype); | ||
| 1352 | int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); | ||
| 1353 | int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, | ||
| 1354 | int lnum, int offs); | ||
| 1355 | int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, | ||
| 1356 | int lnum, int offs); | ||
| 1357 | int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, | ||
| 1358 | int offs, int dtype); | ||
| 1359 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | ||
| 1360 | int offs, int quiet); | ||
| 1361 | void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); | ||
| 1362 | void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); | ||
| 1363 | int ubifs_io_init(struct ubifs_info *c); | ||
| 1364 | void ubifs_pad(const struct ubifs_info *c, void *buf, int pad); | ||
| 1365 | int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf); | ||
| 1366 | int ubifs_bg_wbufs_sync(struct ubifs_info *c); | ||
| 1367 | void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum); | ||
| 1368 | int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); | ||
| 1369 | |||
| 1370 | /* scan.c */ | ||
| 1371 | struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, | ||
| 1372 | int offs, void *sbuf); | ||
| 1373 | void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); | ||
| 1374 | int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | ||
| 1375 | int offs, int quiet); | ||
| 1376 | struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, | ||
| 1377 | int offs, void *sbuf); | ||
| 1378 | void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
| 1379 | int lnum, int offs); | ||
| 1380 | int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
| 1381 | void *buf, int offs); | ||
| 1382 | void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, | ||
| 1383 | void *buf); | ||
| 1384 | |||
| 1385 | /* log.c */ | ||
| 1386 | void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud); | ||
| 1387 | void ubifs_create_buds_lists(struct ubifs_info *c); | ||
| 1388 | int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs); | ||
| 1389 | struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum); | ||
| 1390 | struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum); | ||
| 1391 | int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum); | ||
| 1392 | int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum); | ||
| 1393 | int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum); | ||
| 1394 | int ubifs_consolidate_log(struct ubifs_info *c); | ||
| 1395 | |||
| 1396 | /* journal.c */ | ||
| 1397 | int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, | ||
| 1398 | const struct qstr *nm, const struct inode *inode, | ||
| 1399 | int deletion, int xent); | ||
| 1400 | int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | ||
| 1401 | const union ubifs_key *key, const void *buf, int len); | ||
| 1402 | int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, | ||
| 1403 | int last_reference); | ||
| 1404 | int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, | ||
| 1405 | const struct dentry *old_dentry, | ||
| 1406 | const struct inode *new_dir, | ||
| 1407 | const struct dentry *new_dentry, int sync); | ||
| 1408 | int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, | ||
| 1409 | loff_t old_size, loff_t new_size); | ||
| 1410 | int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, | ||
| 1411 | const struct inode *inode, const struct qstr *nm); | ||
| 1412 | int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1, | ||
| 1413 | const struct inode *inode2); | ||
| 1414 | |||
| 1415 | /* budget.c */ | ||
| 1416 | int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req); | ||
| 1417 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req); | ||
| 1418 | void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | ||
| 1419 | struct ubifs_inode *ui); | ||
| 1420 | int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode, | ||
| 1421 | struct ubifs_budget_req *req); | ||
| 1422 | void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | ||
| 1423 | struct ubifs_budget_req *req); | ||
| 1424 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | ||
| 1425 | struct ubifs_budget_req *req); | ||
| 1426 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | ||
| 1427 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | ||
| 1428 | void ubifs_convert_page_budget(struct ubifs_info *c); | ||
| 1429 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | ||
| 1430 | |||
| 1431 | /* find.c */ | ||
| 1432 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | ||
| 1433 | int squeeze); | ||
| 1434 | int ubifs_find_free_leb_for_idx(struct ubifs_info *c); | ||
| 1435 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | ||
| 1436 | int min_space, int pick_free); | ||
| 1437 | int ubifs_find_dirty_idx_leb(struct ubifs_info *c); | ||
| 1438 | int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | ||
| 1439 | |||
| 1440 | /* tnc.c */ | ||
| 1441 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1442 | struct ubifs_znode **zn, int *n); | ||
| 1443 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1444 | void *node); | ||
| 1445 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1446 | void *node, const struct qstr *nm); | ||
| 1447 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1448 | void *node, int *lnum, int *offs); | ||
| 1449 | int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, | ||
| 1450 | int offs, int len); | ||
| 1451 | int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1452 | int old_lnum, int old_offs, int lnum, int offs, int len); | ||
| 1453 | int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1454 | int lnum, int offs, int len, const struct qstr *nm); | ||
| 1455 | int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key); | ||
| 1456 | int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1457 | const struct qstr *nm); | ||
| 1458 | int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, | ||
| 1459 | union ubifs_key *to_key); | ||
| 1460 | int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum); | ||
| 1461 | struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, | ||
| 1462 | union ubifs_key *key, | ||
| 1463 | const struct qstr *nm); | ||
| 1464 | void ubifs_tnc_close(struct ubifs_info *c); | ||
| 1465 | int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, | ||
| 1466 | int lnum, int offs, int is_idx); | ||
| 1467 | int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, | ||
| 1468 | int lnum, int offs); | ||
| 1469 | /* Shared by tnc.c for tnc_commit.c */ | ||
| 1470 | void destroy_old_idx(struct ubifs_info *c); | ||
| 1471 | int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, | ||
| 1472 | int lnum, int offs); | ||
| 1473 | int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); | ||
| 1474 | |||
| 1475 | /* tnc_misc.c */ | ||
| 1476 | struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, | ||
| 1477 | struct ubifs_znode *znode); | ||
| 1478 | int ubifs_search_zbranch(const struct ubifs_info *c, | ||
| 1479 | const struct ubifs_znode *znode, | ||
| 1480 | const union ubifs_key *key, int *n); | ||
| 1481 | struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode); | ||
| 1482 | struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode); | ||
| 1483 | long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr); | ||
| 1484 | struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, | ||
| 1485 | struct ubifs_zbranch *zbr, | ||
| 1486 | struct ubifs_znode *parent, int iip); | ||
| 1487 | int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, | ||
| 1488 | void *node); | ||
| 1489 | |||
| 1490 | /* tnc_commit.c */ | ||
| 1491 | int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); | ||
| 1492 | int ubifs_tnc_end_commit(struct ubifs_info *c); | ||
| 1493 | |||
| 1494 | /* shrinker.c */ | ||
| 1495 | int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); | ||
| 1496 | |||
| 1497 | /* commit.c */ | ||
| 1498 | int ubifs_bg_thread(void *info); | ||
| 1499 | void ubifs_commit_required(struct ubifs_info *c); | ||
| 1500 | void ubifs_request_bg_commit(struct ubifs_info *c); | ||
| 1501 | int ubifs_run_commit(struct ubifs_info *c); | ||
| 1502 | void ubifs_recovery_commit(struct ubifs_info *c); | ||
| 1503 | int ubifs_gc_should_commit(struct ubifs_info *c); | ||
| 1504 | void ubifs_wait_for_commit(struct ubifs_info *c); | ||
| 1505 | |||
| 1506 | /* master.c */ | ||
| 1507 | int ubifs_read_master(struct ubifs_info *c); | ||
| 1508 | int ubifs_write_master(struct ubifs_info *c); | ||
| 1509 | |||
| 1510 | /* sb.c */ | ||
| 1511 | int ubifs_read_superblock(struct ubifs_info *c); | ||
| 1512 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); | ||
| 1513 | int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); | ||
| 1514 | |||
| 1515 | /* replay.c */ | ||
| 1516 | int ubifs_validate_entry(struct ubifs_info *c, | ||
| 1517 | const struct ubifs_dent_node *dent); | ||
| 1518 | int ubifs_replay_journal(struct ubifs_info *c); | ||
| 1519 | |||
| 1520 | /* gc.c */ | ||
| 1521 | int ubifs_garbage_collect(struct ubifs_info *c, int anyway); | ||
| 1522 | int ubifs_gc_start_commit(struct ubifs_info *c); | ||
| 1523 | int ubifs_gc_end_commit(struct ubifs_info *c); | ||
| 1524 | void ubifs_destroy_idx_gc(struct ubifs_info *c); | ||
| 1525 | int ubifs_get_idx_gc_leb(struct ubifs_info *c); | ||
| 1526 | int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); | ||
| 1527 | |||
| 1528 | /* orphan.c */ | ||
| 1529 | int ubifs_add_orphan(struct ubifs_info *c, ino_t inum); | ||
| 1530 | void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); | ||
| 1531 | int ubifs_orphan_start_commit(struct ubifs_info *c); | ||
| 1532 | int ubifs_orphan_end_commit(struct ubifs_info *c); | ||
| 1533 | int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); | ||
| 1534 | |||
| 1535 | /* lpt.c */ | ||
| 1536 | int ubifs_calc_lpt_geom(struct ubifs_info *c); | ||
| 1537 | int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, | ||
| 1538 | int *lpt_lebs, int *big_lpt); | ||
| 1539 | int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr); | ||
| 1540 | struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum); | ||
| 1541 | struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum); | ||
| 1542 | int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, | ||
| 1543 | ubifs_lpt_scan_callback scan_cb, void *data); | ||
| 1544 | |||
| 1545 | /* Shared by lpt.c for lpt_commit.c */ | ||
| 1546 | void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave); | ||
| 1547 | void ubifs_pack_ltab(struct ubifs_info *c, void *buf, | ||
| 1548 | struct ubifs_lpt_lprops *ltab); | ||
| 1549 | void ubifs_pack_pnode(struct ubifs_info *c, void *buf, | ||
| 1550 | struct ubifs_pnode *pnode); | ||
| 1551 | void ubifs_pack_nnode(struct ubifs_info *c, void *buf, | ||
| 1552 | struct ubifs_nnode *nnode); | ||
| 1553 | struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, | ||
| 1554 | struct ubifs_nnode *parent, int iip); | ||
| 1555 | struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, | ||
| 1556 | struct ubifs_nnode *parent, int iip); | ||
| 1557 | int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip); | ||
| 1558 | void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); | ||
| 1559 | void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); | ||
| 1560 | uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); | ||
| 1561 | struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); | ||
| 1562 | |||
| 1563 | /* lpt_commit.c */ | ||
| 1564 | int ubifs_lpt_start_commit(struct ubifs_info *c); | ||
| 1565 | int ubifs_lpt_end_commit(struct ubifs_info *c); | ||
| 1566 | int ubifs_lpt_post_commit(struct ubifs_info *c); | ||
| 1567 | void ubifs_lpt_free(struct ubifs_info *c, int wr_only); | ||
| 1568 | |||
| 1569 | /* lprops.c */ | ||
| 1570 | void ubifs_get_lprops(struct ubifs_info *c); | ||
| 1571 | const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | ||
| 1572 | const struct ubifs_lprops *lp, | ||
| 1573 | int free, int dirty, int flags, | ||
| 1574 | int idx_gc_cnt); | ||
| 1575 | void ubifs_release_lprops(struct ubifs_info *c); | ||
| 1576 | void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); | ||
| 1577 | void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, | ||
| 1578 | int cat); | ||
| 1579 | void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, | ||
| 1580 | struct ubifs_lprops *new_lprops); | ||
| 1581 | void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops); | ||
| 1582 | int ubifs_categorize_lprops(const struct ubifs_info *c, | ||
| 1583 | const struct ubifs_lprops *lprops); | ||
| 1584 | int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, | ||
| 1585 | int flags_set, int flags_clean, int idx_gc_cnt); | ||
| 1586 | int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, | ||
| 1587 | int flags_set, int flags_clean); | ||
| 1588 | int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp); | ||
| 1589 | const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); | ||
| 1590 | const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); | ||
| 1591 | const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); | ||
| 1592 | const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); | ||
| 1593 | |||
| 1594 | /* file.c */ | ||
| 1595 | int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); | ||
| 1596 | int ubifs_setattr(struct dentry *dentry, struct iattr *attr); | ||
| 1597 | |||
| 1598 | /* dir.c */ | ||
| 1599 | struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, | ||
| 1600 | int mode); | ||
| 1601 | int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 1602 | struct kstat *stat); | ||
| 1603 | |||
| 1604 | /* xattr.c */ | ||
| 1605 | int ubifs_setxattr(struct dentry *dentry, const char *name, | ||
| 1606 | const void *value, size_t size, int flags); | ||
| 1607 | ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, | ||
| 1608 | size_t size); | ||
| 1609 | ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); | ||
| 1610 | int ubifs_removexattr(struct dentry *dentry, const char *name); | ||
| 1611 | |||
| 1612 | /* super.c */ | ||
| 1613 | struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); | ||
| 1614 | |||
| 1615 | /* recovery.c */ | ||
| 1616 | int ubifs_recover_master_node(struct ubifs_info *c); | ||
| 1617 | int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); | ||
| 1618 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | ||
| 1619 | int offs, void *sbuf, int grouped); | ||
| 1620 | struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | ||
| 1621 | int offs, void *sbuf); | ||
| 1622 | int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); | ||
| 1623 | int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); | ||
| 1624 | int ubifs_rcvry_gc_commit(struct ubifs_info *c); | ||
| 1625 | int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, | ||
| 1626 | int deletion, loff_t new_size); | ||
| 1627 | int ubifs_recover_size(struct ubifs_info *c); | ||
| 1628 | void ubifs_destroy_size_tree(struct ubifs_info *c); | ||
| 1629 | |||
| 1630 | /* ioctl.c */ | ||
| 1631 | long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||
| 1632 | void ubifs_set_inode_flags(struct inode *inode); | ||
| 1633 | #ifdef CONFIG_COMPAT | ||
| 1634 | long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||
| 1635 | #endif | ||
| 1636 | |||
| 1637 | /* compressor.c */ | ||
| 1638 | int __init ubifs_compressors_init(void); | ||
| 1639 | void __exit ubifs_compressors_exit(void); | ||
| 1640 | void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, | ||
| 1641 | int *compr_type); | ||
| 1642 | int ubifs_decompress(const void *buf, int len, void *out, int *out_len, | ||
| 1643 | int compr_type); | ||
| 1644 | |||
| 1645 | #include "debug.h" | ||
| 1646 | #include "misc.h" | ||
| 1647 | #include "key.h" | ||
| 1648 | |||
| 1649 | #endif /* !__UBIFS_H__ */ | ||
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c new file mode 100644 index 000000000000..1388a078e1a9 --- /dev/null +++ b/fs/ubifs/xattr.c | |||
| @@ -0,0 +1,581 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements UBIFS extended attributes support. | ||
| 25 | * | ||
| 26 | * Extended attributes are implemented as regular inodes with attached data, | ||
| 27 | * which limits extended attribute size to UBIFS block size (4KiB). Names of | ||
| 28 | * extended attributes are described by extended attribute entries (xentries), | ||
| 29 | * which are almost identical to directory entries, but have different key type. | ||
| 30 | * | ||
| 31 | * In other words, the situation with extended attributes is very similar to | ||
| 32 | * directories. Indeed, any inode (but of course not xattr inodes) may have a | ||
| 33 | * number of associated xentries, just like directory inodes have associated | ||
| 34 | * directory entries. Extended attribute entries store the name of the extended | ||
| 35 | * attribute, the host inode number, and the extended attribute inode number. | ||
| 36 | * Similarly, direntries store the name, the parent and the target inode | ||
| 37 | * numbers. Thus, most of the common UBIFS mechanisms may be re-used for | ||
| 38 | * extended attributes. | ||
| 39 | * | ||
| 40 | * The number of extended attributes is not limited, but there is Linux | ||
| 41 | * limitation on the maximum possible size of the list of all extended | ||
| 42 | * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure | ||
| 43 | * the sum of all extended attribute names of the inode does not exceed that | ||
| 44 | * limit. | ||
| 45 | * | ||
| 46 | * Extended attributes are synchronous, which means they are written to the | ||
| 47 | * flash media synchronously and there is no write-back for extended attribute | ||
| 48 | * inodes. The extended attribute values are not stored in compressed form on | ||
| 49 | * the media. | ||
| 50 | * | ||
| 51 | * Since extended attributes are represented by regular inodes, they are cached | ||
| 52 | * in the VFS inode cache. The xentries are cached in the LNC cache (see | ||
| 53 | * tnc.c). | ||
| 54 | * | ||
| 55 | * ACL support is not implemented. | ||
| 56 | */ | ||
| 57 | |||
| 58 | #include <linux/xattr.h> | ||
| 59 | #include <linux/posix_acl_xattr.h> | ||
| 60 | #include "ubifs.h" | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Limit the number of extended attributes per inode so that the total size | ||
| 64 | * (xattr_size) is guaranteeded to fit in an 'unsigned int'. | ||
| 65 | */ | ||
| 66 | #define MAX_XATTRS_PER_INODE 65535 | ||
| 67 | |||
| 68 | /* | ||
| 69 | * Extended attribute type constants. | ||
| 70 | * | ||
| 71 | * USER_XATTR: user extended attribute ("user.*") | ||
| 72 | * TRUSTED_XATTR: trusted extended attribute ("trusted.*) | ||
| 73 | * SECURITY_XATTR: security extended attribute ("security.*") | ||
| 74 | */ | ||
| 75 | enum { | ||
| 76 | USER_XATTR, | ||
| 77 | TRUSTED_XATTR, | ||
| 78 | SECURITY_XATTR, | ||
| 79 | }; | ||
| 80 | |||
| 81 | static struct inode_operations none_inode_operations; | ||
| 82 | static struct address_space_operations none_address_operations; | ||
| 83 | static struct file_operations none_file_operations; | ||
| 84 | |||
| 85 | /** | ||
| 86 | * create_xattr - create an extended attribute. | ||
| 87 | * @c: UBIFS file-system description object | ||
| 88 | * @host: host inode | ||
| 89 | * @nm: extended attribute name | ||
| 90 | * @value: extended attribute value | ||
| 91 | * @size: size of extended attribute value | ||
| 92 | * | ||
| 93 | * This is a helper function which creates an extended attribute of name @nm | ||
| 94 | * and value @value for inode @host. The host inode is also updated on flash | ||
| 95 | * because the ctime and extended attribute accounting data changes. This | ||
| 96 | * function returns zero in case of success and a negative error code in case | ||
| 97 | * of failure. | ||
| 98 | */ | ||
| 99 | static int create_xattr(struct ubifs_info *c, struct inode *host, | ||
| 100 | const struct qstr *nm, const void *value, int size) | ||
| 101 | { | ||
| 102 | int err; | ||
| 103 | struct inode *inode; | ||
| 104 | struct ubifs_inode *ui, *host_ui = ubifs_inode(host); | ||
| 105 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | ||
| 106 | .new_ino_d = size, .dirtied_ino = 1, | ||
| 107 | .dirtied_ino_d = host_ui->data_len}; | ||
| 108 | |||
| 109 | if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) | ||
| 110 | return -ENOSPC; | ||
| 111 | /* | ||
| 112 | * Linux limits the maximum size of the extended attribute names list | ||
| 113 | * to %XATTR_LIST_MAX. This means we should not allow creating more* | ||
| 114 | * extended attributes if the name list becomes larger. This limitation | ||
| 115 | * is artificial for UBIFS, though. | ||
| 116 | */ | ||
| 117 | if (host_ui->xattr_names + host_ui->xattr_cnt + | ||
| 118 | nm->len + 1 > XATTR_LIST_MAX) | ||
| 119 | return -ENOSPC; | ||
| 120 | |||
| 121 | err = ubifs_budget_space(c, &req); | ||
| 122 | if (err) | ||
| 123 | return err; | ||
| 124 | |||
| 125 | inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO); | ||
| 126 | if (IS_ERR(inode)) { | ||
| 127 | err = PTR_ERR(inode); | ||
| 128 | goto out_budg; | ||
| 129 | } | ||
| 130 | |||
| 131 | mutex_lock(&host_ui->ui_mutex); | ||
| 132 | /* Re-define all operations to be "nothing" */ | ||
| 133 | inode->i_mapping->a_ops = &none_address_operations; | ||
| 134 | inode->i_op = &none_inode_operations; | ||
| 135 | inode->i_fop = &none_file_operations; | ||
| 136 | |||
| 137 | inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; | ||
| 138 | ui = ubifs_inode(inode); | ||
| 139 | ui->xattr = 1; | ||
| 140 | ui->flags |= UBIFS_XATTR_FL; | ||
| 141 | ui->data = kmalloc(size, GFP_NOFS); | ||
| 142 | if (!ui->data) { | ||
| 143 | err = -ENOMEM; | ||
| 144 | goto out_unlock; | ||
| 145 | } | ||
| 146 | |||
| 147 | memcpy(ui->data, value, size); | ||
| 148 | host->i_ctime = ubifs_current_time(host); | ||
| 149 | host_ui->xattr_cnt += 1; | ||
| 150 | host_ui->xattr_size += CALC_DENT_SIZE(nm->len); | ||
| 151 | host_ui->xattr_size += CALC_XATTR_BYTES(size); | ||
| 152 | host_ui->xattr_names += nm->len; | ||
| 153 | |||
| 154 | /* | ||
| 155 | * We do not use i_size_write() because nobody can race with us as we | ||
| 156 | * are holding host @host->i_mutex - every xattr operation for this | ||
| 157 | * inode is serialized by it. | ||
| 158 | */ | ||
| 159 | inode->i_size = ui->ui_size = size; | ||
| 160 | ui->data_len = size; | ||
| 161 | err = ubifs_jnl_update(c, host, nm, inode, 0, 1); | ||
| 162 | if (err) | ||
| 163 | goto out_cancel; | ||
| 164 | mutex_unlock(&host_ui->ui_mutex); | ||
| 165 | |||
| 166 | ubifs_release_budget(c, &req); | ||
| 167 | insert_inode_hash(inode); | ||
| 168 | iput(inode); | ||
| 169 | return 0; | ||
| 170 | |||
| 171 | out_cancel: | ||
| 172 | host_ui->xattr_cnt -= 1; | ||
| 173 | host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); | ||
| 174 | host_ui->xattr_size -= CALC_XATTR_BYTES(size); | ||
| 175 | out_unlock: | ||
| 176 | mutex_unlock(&host_ui->ui_mutex); | ||
| 177 | make_bad_inode(inode); | ||
| 178 | iput(inode); | ||
| 179 | out_budg: | ||
| 180 | ubifs_release_budget(c, &req); | ||
| 181 | return err; | ||
| 182 | } | ||
| 183 | |||
| 184 | /** | ||
| 185 | * change_xattr - change an extended attribute. | ||
| 186 | * @c: UBIFS file-system description object | ||
| 187 | * @host: host inode | ||
| 188 | * @inode: extended attribute inode | ||
| 189 | * @value: extended attribute value | ||
| 190 | * @size: size of extended attribute value | ||
| 191 | * | ||
| 192 | * This helper function changes the value of extended attribute @inode with new | ||
| 193 | * data from @value. Returns zero in case of success and a negative error code | ||
| 194 | * in case of failure. | ||
| 195 | */ | ||
| 196 | static int change_xattr(struct ubifs_info *c, struct inode *host, | ||
| 197 | struct inode *inode, const void *value, int size) | ||
| 198 | { | ||
| 199 | int err; | ||
| 200 | struct ubifs_inode *host_ui = ubifs_inode(host); | ||
| 201 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 202 | struct ubifs_budget_req req = { .dirtied_ino = 2, | ||
| 203 | .dirtied_ino_d = size + host_ui->data_len }; | ||
| 204 | |||
| 205 | ubifs_assert(ui->data_len == inode->i_size); | ||
| 206 | err = ubifs_budget_space(c, &req); | ||
| 207 | if (err) | ||
| 208 | return err; | ||
| 209 | |||
| 210 | mutex_lock(&host_ui->ui_mutex); | ||
| 211 | host->i_ctime = ubifs_current_time(host); | ||
| 212 | host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); | ||
| 213 | host_ui->xattr_size += CALC_XATTR_BYTES(size); | ||
| 214 | |||
| 215 | kfree(ui->data); | ||
| 216 | ui->data = kmalloc(size, GFP_NOFS); | ||
| 217 | if (!ui->data) { | ||
| 218 | err = -ENOMEM; | ||
| 219 | goto out_unlock; | ||
| 220 | } | ||
| 221 | |||
| 222 | memcpy(ui->data, value, size); | ||
| 223 | inode->i_size = ui->ui_size = size; | ||
| 224 | ui->data_len = size; | ||
| 225 | |||
| 226 | /* | ||
| 227 | * It is important to write the host inode after the xattr inode | ||
| 228 | * because if the host inode gets synchronized (via 'fsync()'), then | ||
| 229 | * the extended attribute inode gets synchronized, because it goes | ||
| 230 | * before the host inode in the write-buffer. | ||
| 231 | */ | ||
| 232 | err = ubifs_jnl_change_xattr(c, inode, host); | ||
| 233 | if (err) | ||
| 234 | goto out_cancel; | ||
| 235 | mutex_unlock(&host_ui->ui_mutex); | ||
| 236 | |||
| 237 | ubifs_release_budget(c, &req); | ||
| 238 | return 0; | ||
| 239 | |||
| 240 | out_cancel: | ||
| 241 | host_ui->xattr_size -= CALC_XATTR_BYTES(size); | ||
| 242 | host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); | ||
| 243 | make_bad_inode(inode); | ||
| 244 | out_unlock: | ||
| 245 | mutex_unlock(&host_ui->ui_mutex); | ||
| 246 | ubifs_release_budget(c, &req); | ||
| 247 | return err; | ||
| 248 | } | ||
| 249 | |||
| 250 | /** | ||
| 251 | * check_namespace - check extended attribute name-space. | ||
| 252 | * @nm: extended attribute name | ||
| 253 | * | ||
| 254 | * This function makes sure the extended attribute name belongs to one of the | ||
| 255 | * supported extended attribute name-spaces. Returns name-space index in case | ||
| 256 | * of success and a negative error code in case of failure. | ||
| 257 | */ | ||
| 258 | static int check_namespace(const struct qstr *nm) | ||
| 259 | { | ||
| 260 | int type; | ||
| 261 | |||
| 262 | if (nm->len > UBIFS_MAX_NLEN) | ||
| 263 | return -ENAMETOOLONG; | ||
| 264 | |||
| 265 | if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, | ||
| 266 | XATTR_TRUSTED_PREFIX_LEN)) { | ||
| 267 | if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') | ||
| 268 | return -EINVAL; | ||
| 269 | type = TRUSTED_XATTR; | ||
| 270 | } else if (!strncmp(nm->name, XATTR_USER_PREFIX, | ||
| 271 | XATTR_USER_PREFIX_LEN)) { | ||
| 272 | if (nm->name[XATTR_USER_PREFIX_LEN] == '\0') | ||
| 273 | return -EINVAL; | ||
| 274 | type = USER_XATTR; | ||
| 275 | } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, | ||
| 276 | XATTR_SECURITY_PREFIX_LEN)) { | ||
| 277 | if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') | ||
| 278 | return -EINVAL; | ||
| 279 | type = SECURITY_XATTR; | ||
| 280 | } else | ||
| 281 | return -EOPNOTSUPP; | ||
| 282 | |||
| 283 | return type; | ||
| 284 | } | ||
| 285 | |||
| 286 | static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) | ||
| 287 | { | ||
| 288 | struct inode *inode; | ||
| 289 | |||
| 290 | inode = ubifs_iget(c->vfs_sb, inum); | ||
| 291 | if (IS_ERR(inode)) { | ||
| 292 | ubifs_err("dead extended attribute entry, error %d", | ||
| 293 | (int)PTR_ERR(inode)); | ||
| 294 | return inode; | ||
| 295 | } | ||
| 296 | if (ubifs_inode(inode)->xattr) | ||
| 297 | return inode; | ||
| 298 | ubifs_err("corrupt extended attribute entry"); | ||
| 299 | iput(inode); | ||
| 300 | return ERR_PTR(-EINVAL); | ||
| 301 | } | ||
| 302 | |||
| 303 | int ubifs_setxattr(struct dentry *dentry, const char *name, | ||
| 304 | const void *value, size_t size, int flags) | ||
| 305 | { | ||
| 306 | struct inode *inode, *host = dentry->d_inode; | ||
| 307 | struct ubifs_info *c = host->i_sb->s_fs_info; | ||
| 308 | struct qstr nm = { .name = name, .len = strlen(name) }; | ||
| 309 | struct ubifs_dent_node *xent; | ||
| 310 | union ubifs_key key; | ||
| 311 | int err, type; | ||
| 312 | |||
| 313 | dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, | ||
| 314 | host->i_ino, dentry->d_name.len, dentry->d_name.name, size); | ||
| 315 | |||
| 316 | if (size > UBIFS_MAX_INO_DATA) | ||
| 317 | return -ERANGE; | ||
| 318 | |||
| 319 | type = check_namespace(&nm); | ||
| 320 | if (type < 0) | ||
| 321 | return type; | ||
| 322 | |||
| 323 | xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); | ||
| 324 | if (!xent) | ||
| 325 | return -ENOMEM; | ||
| 326 | |||
| 327 | /* | ||
| 328 | * The extended attribute entries are stored in LNC, so multiple | ||
| 329 | * look-ups do not involve reading the flash. | ||
| 330 | */ | ||
| 331 | xent_key_init(c, &key, host->i_ino, &nm); | ||
| 332 | err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); | ||
| 333 | if (err) { | ||
| 334 | if (err != -ENOENT) | ||
| 335 | goto out_free; | ||
| 336 | |||
| 337 | if (flags & XATTR_REPLACE) | ||
| 338 | /* We are asked not to create the xattr */ | ||
| 339 | err = -ENODATA; | ||
| 340 | else | ||
| 341 | err = create_xattr(c, host, &nm, value, size); | ||
| 342 | goto out_free; | ||
| 343 | } | ||
| 344 | |||
| 345 | if (flags & XATTR_CREATE) { | ||
| 346 | /* We are asked not to replace the xattr */ | ||
| 347 | err = -EEXIST; | ||
| 348 | goto out_free; | ||
| 349 | } | ||
| 350 | |||
| 351 | inode = iget_xattr(c, le64_to_cpu(xent->inum)); | ||
| 352 | if (IS_ERR(inode)) { | ||
| 353 | err = PTR_ERR(inode); | ||
| 354 | goto out_free; | ||
| 355 | } | ||
| 356 | |||
| 357 | err = change_xattr(c, host, inode, value, size); | ||
| 358 | iput(inode); | ||
| 359 | |||
| 360 | out_free: | ||
| 361 | kfree(xent); | ||
| 362 | return err; | ||
| 363 | } | ||
| 364 | |||
| 365 | ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, | ||
| 366 | size_t size) | ||
| 367 | { | ||
| 368 | struct inode *inode, *host = dentry->d_inode; | ||
| 369 | struct ubifs_info *c = host->i_sb->s_fs_info; | ||
| 370 | struct qstr nm = { .name = name, .len = strlen(name) }; | ||
| 371 | struct ubifs_inode *ui; | ||
| 372 | struct ubifs_dent_node *xent; | ||
| 373 | union ubifs_key key; | ||
| 374 | int err; | ||
| 375 | |||
| 376 | dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, | ||
| 377 | host->i_ino, dentry->d_name.len, dentry->d_name.name, size); | ||
| 378 | |||
| 379 | err = check_namespace(&nm); | ||
| 380 | if (err < 0) | ||
| 381 | return err; | ||
| 382 | |||
| 383 | xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); | ||
| 384 | if (!xent) | ||
| 385 | return -ENOMEM; | ||
| 386 | |||
| 387 | mutex_lock(&host->i_mutex); | ||
| 388 | xent_key_init(c, &key, host->i_ino, &nm); | ||
| 389 | err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); | ||
| 390 | if (err) { | ||
| 391 | if (err == -ENOENT) | ||
| 392 | err = -ENODATA; | ||
| 393 | goto out_unlock; | ||
| 394 | } | ||
| 395 | |||
| 396 | inode = iget_xattr(c, le64_to_cpu(xent->inum)); | ||
| 397 | if (IS_ERR(inode)) { | ||
| 398 | err = PTR_ERR(inode); | ||
| 399 | goto out_unlock; | ||
| 400 | } | ||
| 401 | |||
| 402 | ui = ubifs_inode(inode); | ||
| 403 | ubifs_assert(inode->i_size == ui->data_len); | ||
| 404 | ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); | ||
| 405 | |||
| 406 | if (buf) { | ||
| 407 | /* If @buf is %NULL we are supposed to return the length */ | ||
| 408 | if (ui->data_len > size) { | ||
| 409 | dbg_err("buffer size %zd, xattr len %d", | ||
| 410 | size, ui->data_len); | ||
| 411 | err = -ERANGE; | ||
| 412 | goto out_iput; | ||
| 413 | } | ||
| 414 | |||
| 415 | memcpy(buf, ui->data, ui->data_len); | ||
| 416 | } | ||
| 417 | err = ui->data_len; | ||
| 418 | |||
| 419 | out_iput: | ||
| 420 | iput(inode); | ||
| 421 | out_unlock: | ||
| 422 | mutex_unlock(&host->i_mutex); | ||
| 423 | kfree(xent); | ||
| 424 | return err; | ||
| 425 | } | ||
| 426 | |||
| 427 | ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) | ||
| 428 | { | ||
| 429 | union ubifs_key key; | ||
| 430 | struct inode *host = dentry->d_inode; | ||
| 431 | struct ubifs_info *c = host->i_sb->s_fs_info; | ||
| 432 | struct ubifs_inode *host_ui = ubifs_inode(host); | ||
| 433 | struct ubifs_dent_node *xent, *pxent = NULL; | ||
| 434 | int err, len, written = 0; | ||
| 435 | struct qstr nm = { .name = NULL }; | ||
| 436 | |||
| 437 | dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, | ||
| 438 | dentry->d_name.len, dentry->d_name.name, size); | ||
| 439 | |||
| 440 | len = host_ui->xattr_names + host_ui->xattr_cnt; | ||
| 441 | if (!buffer) | ||
| 442 | /* | ||
| 443 | * We should return the minimum buffer size which will fit a | ||
| 444 | * null-terminated list of all the extended attribute names. | ||
| 445 | */ | ||
| 446 | return len; | ||
| 447 | |||
| 448 | if (len > size) | ||
| 449 | return -ERANGE; | ||
| 450 | |||
| 451 | lowest_xent_key(c, &key, host->i_ino); | ||
| 452 | |||
| 453 | mutex_lock(&host->i_mutex); | ||
| 454 | while (1) { | ||
| 455 | int type; | ||
| 456 | |||
| 457 | xent = ubifs_tnc_next_ent(c, &key, &nm); | ||
| 458 | if (unlikely(IS_ERR(xent))) { | ||
| 459 | err = PTR_ERR(xent); | ||
| 460 | break; | ||
| 461 | } | ||
| 462 | |||
| 463 | nm.name = xent->name; | ||
| 464 | nm.len = le16_to_cpu(xent->nlen); | ||
| 465 | |||
| 466 | type = check_namespace(&nm); | ||
| 467 | if (unlikely(type < 0)) { | ||
| 468 | err = type; | ||
| 469 | break; | ||
| 470 | } | ||
| 471 | |||
| 472 | /* Show trusted namespace only for "power" users */ | ||
| 473 | if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) { | ||
| 474 | memcpy(buffer + written, nm.name, nm.len + 1); | ||
| 475 | written += nm.len + 1; | ||
| 476 | } | ||
| 477 | |||
| 478 | kfree(pxent); | ||
| 479 | pxent = xent; | ||
| 480 | key_read(c, &xent->key, &key); | ||
| 481 | } | ||
| 482 | mutex_unlock(&host->i_mutex); | ||
| 483 | |||
| 484 | kfree(pxent); | ||
| 485 | if (err != -ENOENT) { | ||
| 486 | ubifs_err("cannot find next direntry, error %d", err); | ||
| 487 | return err; | ||
| 488 | } | ||
| 489 | |||
| 490 | ubifs_assert(written <= size); | ||
| 491 | return written; | ||
| 492 | } | ||
| 493 | |||
| 494 | static int remove_xattr(struct ubifs_info *c, struct inode *host, | ||
| 495 | struct inode *inode, const struct qstr *nm) | ||
| 496 | { | ||
| 497 | int err; | ||
| 498 | struct ubifs_inode *host_ui = ubifs_inode(host); | ||
| 499 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 500 | struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, | ||
| 501 | .dirtied_ino_d = host_ui->data_len }; | ||
| 502 | |||
| 503 | ubifs_assert(ui->data_len == inode->i_size); | ||
| 504 | |||
| 505 | err = ubifs_budget_space(c, &req); | ||
| 506 | if (err) | ||
| 507 | return err; | ||
| 508 | |||
| 509 | mutex_lock(&host_ui->ui_mutex); | ||
| 510 | host->i_ctime = ubifs_current_time(host); | ||
| 511 | host_ui->xattr_cnt -= 1; | ||
| 512 | host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); | ||
| 513 | host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); | ||
| 514 | host_ui->xattr_names -= nm->len; | ||
| 515 | |||
| 516 | err = ubifs_jnl_delete_xattr(c, host, inode, nm); | ||
| 517 | if (err) | ||
| 518 | goto out_cancel; | ||
| 519 | mutex_unlock(&host_ui->ui_mutex); | ||
| 520 | |||
| 521 | ubifs_release_budget(c, &req); | ||
| 522 | return 0; | ||
| 523 | |||
| 524 | out_cancel: | ||
| 525 | host_ui->xattr_cnt += 1; | ||
| 526 | host_ui->xattr_size += CALC_DENT_SIZE(nm->len); | ||
| 527 | host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); | ||
| 528 | mutex_unlock(&host_ui->ui_mutex); | ||
| 529 | ubifs_release_budget(c, &req); | ||
| 530 | make_bad_inode(inode); | ||
| 531 | return err; | ||
| 532 | } | ||
| 533 | |||
| 534 | int ubifs_removexattr(struct dentry *dentry, const char *name) | ||
| 535 | { | ||
| 536 | struct inode *inode, *host = dentry->d_inode; | ||
| 537 | struct ubifs_info *c = host->i_sb->s_fs_info; | ||
| 538 | struct qstr nm = { .name = name, .len = strlen(name) }; | ||
| 539 | struct ubifs_dent_node *xent; | ||
| 540 | union ubifs_key key; | ||
| 541 | int err; | ||
| 542 | |||
| 543 | dbg_gen("xattr '%s', ino %lu ('%.*s')", name, | ||
| 544 | host->i_ino, dentry->d_name.len, dentry->d_name.name); | ||
| 545 | ubifs_assert(mutex_is_locked(&host->i_mutex)); | ||
| 546 | |||
| 547 | err = check_namespace(&nm); | ||
| 548 | if (err < 0) | ||
| 549 | return err; | ||
| 550 | |||
| 551 | xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); | ||
| 552 | if (!xent) | ||
| 553 | return -ENOMEM; | ||
| 554 | |||
| 555 | xent_key_init(c, &key, host->i_ino, &nm); | ||
| 556 | err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); | ||
| 557 | if (err) { | ||
| 558 | if (err == -ENOENT) | ||
| 559 | err = -ENODATA; | ||
| 560 | goto out_free; | ||
| 561 | } | ||
| 562 | |||
| 563 | inode = iget_xattr(c, le64_to_cpu(xent->inum)); | ||
| 564 | if (IS_ERR(inode)) { | ||
| 565 | err = PTR_ERR(inode); | ||
| 566 | goto out_free; | ||
| 567 | } | ||
| 568 | |||
| 569 | ubifs_assert(inode->i_nlink == 1); | ||
| 570 | inode->i_nlink = 0; | ||
| 571 | err = remove_xattr(c, host, inode, &nm); | ||
| 572 | if (err) | ||
| 573 | inode->i_nlink = 1; | ||
| 574 | |||
| 575 | /* If @i_nlink is 0, 'iput()' will delete the inode */ | ||
| 576 | iput(inode); | ||
| 577 | |||
| 578 | out_free: | ||
| 579 | kfree(xent); | ||
| 580 | return err; | ||
| 581 | } | ||
diff --git a/fs/udf/super.c b/fs/udf/super.c index 7a5f69be6ac2..44cc702f96cc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
| @@ -682,38 +682,26 @@ static int udf_vrs(struct super_block *sb, int silent) | |||
| 682 | /* | 682 | /* |
| 683 | * Check whether there is an anchor block in the given block | 683 | * Check whether there is an anchor block in the given block |
| 684 | */ | 684 | */ |
| 685 | static int udf_check_anchor_block(struct super_block *sb, sector_t block, | 685 | static int udf_check_anchor_block(struct super_block *sb, sector_t block) |
| 686 | bool varconv) | ||
| 687 | { | 686 | { |
| 688 | struct buffer_head *bh = NULL; | 687 | struct buffer_head *bh; |
| 689 | tag *t; | ||
| 690 | uint16_t ident; | 688 | uint16_t ident; |
| 691 | uint32_t location; | ||
| 692 | 689 | ||
| 693 | if (varconv) { | 690 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && |
| 694 | if (udf_fixed_to_variable(block) >= | 691 | udf_fixed_to_variable(block) >= |
| 695 | sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) | 692 | sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) |
| 696 | return 0; | 693 | return 0; |
| 697 | bh = sb_bread(sb, udf_fixed_to_variable(block)); | ||
| 698 | } | ||
| 699 | else | ||
| 700 | bh = sb_bread(sb, block); | ||
| 701 | 694 | ||
| 695 | bh = udf_read_tagged(sb, block, block, &ident); | ||
| 702 | if (!bh) | 696 | if (!bh) |
| 703 | return 0; | 697 | return 0; |
| 704 | |||
| 705 | t = (tag *)bh->b_data; | ||
| 706 | ident = le16_to_cpu(t->tagIdent); | ||
| 707 | location = le32_to_cpu(t->tagLocation); | ||
| 708 | brelse(bh); | 698 | brelse(bh); |
| 709 | if (ident != TAG_IDENT_AVDP) | 699 | |
| 710 | return 0; | 700 | return ident == TAG_IDENT_AVDP; |
| 711 | return location == block; | ||
| 712 | } | 701 | } |
| 713 | 702 | ||
| 714 | /* Search for an anchor volume descriptor pointer */ | 703 | /* Search for an anchor volume descriptor pointer */ |
| 715 | static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, | 704 | static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock) |
| 716 | sector_t lastblock) | ||
| 717 | { | 705 | { |
| 718 | sector_t last[6]; | 706 | sector_t last[6]; |
| 719 | int i; | 707 | int i; |
| @@ -739,7 +727,7 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, | |||
| 739 | sb->s_blocksize_bits) | 727 | sb->s_blocksize_bits) |
| 740 | continue; | 728 | continue; |
| 741 | 729 | ||
| 742 | if (udf_check_anchor_block(sb, last[i], varconv)) { | 730 | if (udf_check_anchor_block(sb, last[i])) { |
| 743 | sbi->s_anchor[0] = last[i]; | 731 | sbi->s_anchor[0] = last[i]; |
| 744 | sbi->s_anchor[1] = last[i] - 256; | 732 | sbi->s_anchor[1] = last[i] - 256; |
| 745 | return last[i]; | 733 | return last[i]; |
| @@ -748,17 +736,17 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, | |||
| 748 | if (last[i] < 256) | 736 | if (last[i] < 256) |
| 749 | continue; | 737 | continue; |
| 750 | 738 | ||
| 751 | if (udf_check_anchor_block(sb, last[i] - 256, varconv)) { | 739 | if (udf_check_anchor_block(sb, last[i] - 256)) { |
| 752 | sbi->s_anchor[1] = last[i] - 256; | 740 | sbi->s_anchor[1] = last[i] - 256; |
| 753 | return last[i]; | 741 | return last[i]; |
| 754 | } | 742 | } |
| 755 | } | 743 | } |
| 756 | 744 | ||
| 757 | if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) { | 745 | if (udf_check_anchor_block(sb, sbi->s_session + 256)) { |
| 758 | sbi->s_anchor[0] = sbi->s_session + 256; | 746 | sbi->s_anchor[0] = sbi->s_session + 256; |
| 759 | return last[0]; | 747 | return last[0]; |
| 760 | } | 748 | } |
| 761 | if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) { | 749 | if (udf_check_anchor_block(sb, sbi->s_session + 512)) { |
| 762 | sbi->s_anchor[0] = sbi->s_session + 512; | 750 | sbi->s_anchor[0] = sbi->s_session + 512; |
| 763 | return last[0]; | 751 | return last[0]; |
| 764 | } | 752 | } |
| @@ -780,23 +768,24 @@ static void udf_find_anchor(struct super_block *sb) | |||
| 780 | int i; | 768 | int i; |
| 781 | struct udf_sb_info *sbi = UDF_SB(sb); | 769 | struct udf_sb_info *sbi = UDF_SB(sb); |
| 782 | 770 | ||
| 783 | lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block); | 771 | lastblock = udf_scan_anchors(sb, sbi->s_last_block); |
| 784 | if (lastblock) | 772 | if (lastblock) |
| 785 | goto check_anchor; | 773 | goto check_anchor; |
| 786 | 774 | ||
| 787 | /* No anchor found? Try VARCONV conversion of block numbers */ | 775 | /* No anchor found? Try VARCONV conversion of block numbers */ |
| 776 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | ||
| 788 | /* Firstly, we try to not convert number of the last block */ | 777 | /* Firstly, we try to not convert number of the last block */ |
| 789 | lastblock = udf_scan_anchors(sb, 1, | 778 | lastblock = udf_scan_anchors(sb, |
| 790 | udf_variable_to_fixed(sbi->s_last_block)); | 779 | udf_variable_to_fixed(sbi->s_last_block)); |
| 791 | if (lastblock) { | 780 | if (lastblock) |
| 792 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | ||
| 793 | goto check_anchor; | 781 | goto check_anchor; |
| 794 | } | ||
| 795 | 782 | ||
| 796 | /* Secondly, we try with converted number of the last block */ | 783 | /* Secondly, we try with converted number of the last block */ |
| 797 | lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block); | 784 | lastblock = udf_scan_anchors(sb, sbi->s_last_block); |
| 798 | if (lastblock) | 785 | if (!lastblock) { |
| 799 | UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); | 786 | /* VARCONV didn't help. Clear it. */ |
| 787 | UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV); | ||
| 788 | } | ||
| 800 | 789 | ||
| 801 | check_anchor: | 790 | check_anchor: |
| 802 | /* | 791 | /* |
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8fa9c2d70911..8ec865de5f13 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #define UDF_PREALLOCATE | 16 | #define UDF_PREALLOCATE |
| 17 | #define UDF_DEFAULT_PREALLOC_BLOCKS 8 | 17 | #define UDF_DEFAULT_PREALLOC_BLOCKS 8 |
| 18 | 18 | ||
| 19 | #define UDFFS_DEBUG | 19 | #undef UDFFS_DEBUG |
| 20 | 20 | ||
| 21 | #ifdef UDFFS_DEBUG | 21 | #ifdef UDFFS_DEBUG |
| 22 | #define udf_debug(f, a...) \ | 22 | #define udf_debug(f, a...) \ |
diff --git a/fs/utimes.c b/fs/utimes.c index af059d5cb485..b6b664e7145e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
| @@ -40,14 +40,9 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) | |||
| 40 | 40 | ||
| 41 | #endif | 41 | #endif |
| 42 | 42 | ||
| 43 | static bool nsec_special(long nsec) | ||
| 44 | { | ||
| 45 | return nsec == UTIME_OMIT || nsec == UTIME_NOW; | ||
| 46 | } | ||
| 47 | |||
| 48 | static bool nsec_valid(long nsec) | 43 | static bool nsec_valid(long nsec) |
| 49 | { | 44 | { |
| 50 | if (nsec_special(nsec)) | 45 | if (nsec == UTIME_OMIT || nsec == UTIME_NOW) |
| 51 | return true; | 46 | return true; |
| 52 | 47 | ||
| 53 | return nsec >= 0 && nsec <= 999999999; | 48 | return nsec >= 0 && nsec <= 999999999; |
| @@ -102,7 +97,11 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
| 102 | if (error) | 97 | if (error) |
| 103 | goto dput_and_out; | 98 | goto dput_and_out; |
| 104 | 99 | ||
| 105 | /* Don't worry, the checks are done in inode_change_ok() */ | 100 | if (times && times[0].tv_nsec == UTIME_NOW && |
| 101 | times[1].tv_nsec == UTIME_NOW) | ||
| 102 | times = NULL; | ||
| 103 | |||
| 104 | /* In most cases, the checks are done in inode_change_ok() */ | ||
| 106 | newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; | 105 | newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; |
| 107 | if (times) { | 106 | if (times) { |
| 108 | error = -EPERM; | 107 | error = -EPERM; |
| @@ -124,28 +123,34 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags | |||
| 124 | newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; | 123 | newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; |
| 125 | newattrs.ia_valid |= ATTR_MTIME_SET; | 124 | newattrs.ia_valid |= ATTR_MTIME_SET; |
| 126 | } | 125 | } |
| 127 | } | ||
| 128 | 126 | ||
| 129 | /* | 127 | /* |
| 130 | * If times is NULL or both times are either UTIME_OMIT or | 128 | * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT |
| 131 | * UTIME_NOW, then need to check permissions, because | 129 | * cases, we need to make an extra check that is not done by |
| 132 | * inode_change_ok() won't do it. | 130 | * inode_change_ok(). |
| 133 | */ | 131 | */ |
| 134 | if (!times || (nsec_special(times[0].tv_nsec) && | 132 | if (((times[0].tv_nsec == UTIME_NOW && |
| 135 | nsec_special(times[1].tv_nsec))) { | 133 | times[1].tv_nsec == UTIME_OMIT) |
| 134 | || | ||
| 135 | (times[0].tv_nsec == UTIME_OMIT && | ||
| 136 | times[1].tv_nsec == UTIME_NOW)) | ||
| 137 | && !is_owner_or_cap(inode)) | ||
| 138 | goto mnt_drop_write_and_out; | ||
| 139 | } else { | ||
| 140 | |||
| 141 | /* | ||
| 142 | * If times is NULL (or both times are UTIME_NOW), | ||
| 143 | * then we need to check permissions, because | ||
| 144 | * inode_change_ok() won't do it. | ||
| 145 | */ | ||
| 136 | error = -EACCES; | 146 | error = -EACCES; |
| 137 | if (IS_IMMUTABLE(inode)) | 147 | if (IS_IMMUTABLE(inode)) |
| 138 | goto mnt_drop_write_and_out; | 148 | goto mnt_drop_write_and_out; |
| 139 | 149 | ||
| 140 | if (!is_owner_or_cap(inode)) { | 150 | if (!is_owner_or_cap(inode)) { |
| 141 | if (f) { | 151 | error = permission(inode, MAY_WRITE, NULL); |
| 142 | if (!(f->f_mode & FMODE_WRITE)) | 152 | if (error) |
| 143 | goto mnt_drop_write_and_out; | 153 | goto mnt_drop_write_and_out; |
| 144 | } else { | ||
| 145 | error = vfs_permission(&nd, MAY_WRITE); | ||
| 146 | if (error) | ||
| 147 | goto mnt_drop_write_and_out; | ||
| 148 | } | ||
| 149 | } | 154 | } |
| 150 | } | 155 | } |
| 151 | mutex_lock(&inode->i_mutex); | 156 | mutex_lock(&inode->i_mutex); |
| @@ -169,14 +174,6 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __ | |||
| 169 | if (utimes) { | 174 | if (utimes) { |
| 170 | if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) | 175 | if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) |
| 171 | return -EFAULT; | 176 | return -EFAULT; |
| 172 | if ((tstimes[0].tv_nsec == UTIME_OMIT || | ||
| 173 | tstimes[0].tv_nsec == UTIME_NOW) && | ||
| 174 | tstimes[0].tv_sec != 0) | ||
| 175 | return -EINVAL; | ||
| 176 | if ((tstimes[1].tv_nsec == UTIME_OMIT || | ||
| 177 | tstimes[1].tv_nsec == UTIME_NOW) && | ||
| 178 | tstimes[1].tv_sec != 0) | ||
| 179 | return -EINVAL; | ||
| 180 | 177 | ||
| 181 | /* Nothing to do, we must not even check the path. */ | 178 | /* Nothing to do, we must not even check the path. */ |
| 182 | if (tstimes[0].tv_nsec == UTIME_OMIT && | 179 | if (tstimes[0].tv_nsec == UTIME_OMIT && |
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index a3522727ea5b..b546ba69be82 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c | |||
| @@ -645,7 +645,7 @@ static int vfat_add_entry(struct inode *dir, struct qstr *qname, int is_dir, | |||
| 645 | if (len == 0) | 645 | if (len == 0) |
| 646 | return -ENOENT; | 646 | return -ENOENT; |
| 647 | 647 | ||
| 648 | slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_KERNEL); | 648 | slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_NOFS); |
| 649 | if (slots == NULL) | 649 | if (slots == NULL) |
| 650 | return -ENOMEM; | 650 | return -ENOMEM; |
| 651 | 651 | ||
| @@ -687,7 +687,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 687 | struct dentry *alias; | 687 | struct dentry *alias; |
| 688 | int err, table; | 688 | int err, table; |
| 689 | 689 | ||
| 690 | lock_kernel(); | 690 | lock_super(sb); |
| 691 | table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; | 691 | table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; |
| 692 | dentry->d_op = &vfat_dentry_ops[table]; | 692 | dentry->d_op = &vfat_dentry_ops[table]; |
| 693 | 693 | ||
| @@ -699,7 +699,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 699 | inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); | 699 | inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); |
| 700 | brelse(sinfo.bh); | 700 | brelse(sinfo.bh); |
| 701 | if (IS_ERR(inode)) { | 701 | if (IS_ERR(inode)) { |
| 702 | unlock_kernel(); | 702 | unlock_super(sb); |
| 703 | return ERR_CAST(inode); | 703 | return ERR_CAST(inode); |
| 704 | } | 704 | } |
| 705 | alias = d_find_alias(inode); | 705 | alias = d_find_alias(inode); |
| @@ -708,13 +708,13 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 708 | dput(alias); | 708 | dput(alias); |
| 709 | else { | 709 | else { |
| 710 | iput(inode); | 710 | iput(inode); |
| 711 | unlock_kernel(); | 711 | unlock_super(sb); |
| 712 | return alias; | 712 | return alias; |
| 713 | } | 713 | } |
| 714 | 714 | ||
| 715 | } | 715 | } |
| 716 | error: | 716 | error: |
| 717 | unlock_kernel(); | 717 | unlock_super(sb); |
| 718 | dentry->d_op = &vfat_dentry_ops[table]; | 718 | dentry->d_op = &vfat_dentry_ops[table]; |
| 719 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 719 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
| 720 | dentry = d_splice_alias(inode, dentry); | 720 | dentry = d_splice_alias(inode, dentry); |
| @@ -734,7 +734,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 734 | struct timespec ts; | 734 | struct timespec ts; |
| 735 | int err; | 735 | int err; |
| 736 | 736 | ||
| 737 | lock_kernel(); | 737 | lock_super(sb); |
| 738 | 738 | ||
| 739 | ts = CURRENT_TIME_SEC; | 739 | ts = CURRENT_TIME_SEC; |
| 740 | err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); | 740 | err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); |
| @@ -755,17 +755,18 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 755 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 755 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
| 756 | d_instantiate(dentry, inode); | 756 | d_instantiate(dentry, inode); |
| 757 | out: | 757 | out: |
| 758 | unlock_kernel(); | 758 | unlock_super(sb); |
| 759 | return err; | 759 | return err; |
| 760 | } | 760 | } |
| 761 | 761 | ||
| 762 | static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | 762 | static int vfat_rmdir(struct inode *dir, struct dentry *dentry) |
| 763 | { | 763 | { |
| 764 | struct inode *inode = dentry->d_inode; | 764 | struct inode *inode = dentry->d_inode; |
| 765 | struct super_block *sb = dir->i_sb; | ||
| 765 | struct fat_slot_info sinfo; | 766 | struct fat_slot_info sinfo; |
| 766 | int err; | 767 | int err; |
| 767 | 768 | ||
| 768 | lock_kernel(); | 769 | lock_super(sb); |
| 769 | 770 | ||
| 770 | err = fat_dir_empty(inode); | 771 | err = fat_dir_empty(inode); |
| 771 | if (err) | 772 | if (err) |
| @@ -783,7 +784,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 783 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 784 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
| 784 | fat_detach(inode); | 785 | fat_detach(inode); |
| 785 | out: | 786 | out: |
| 786 | unlock_kernel(); | 787 | unlock_super(sb); |
| 787 | 788 | ||
| 788 | return err; | 789 | return err; |
| 789 | } | 790 | } |
| @@ -791,10 +792,11 @@ out: | |||
| 791 | static int vfat_unlink(struct inode *dir, struct dentry *dentry) | 792 | static int vfat_unlink(struct inode *dir, struct dentry *dentry) |
| 792 | { | 793 | { |
| 793 | struct inode *inode = dentry->d_inode; | 794 | struct inode *inode = dentry->d_inode; |
| 795 | struct super_block *sb = dir->i_sb; | ||
| 794 | struct fat_slot_info sinfo; | 796 | struct fat_slot_info sinfo; |
| 795 | int err; | 797 | int err; |
| 796 | 798 | ||
| 797 | lock_kernel(); | 799 | lock_super(sb); |
| 798 | 800 | ||
| 799 | err = vfat_find(dir, &dentry->d_name, &sinfo); | 801 | err = vfat_find(dir, &dentry->d_name, &sinfo); |
| 800 | if (err) | 802 | if (err) |
| @@ -807,7 +809,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) | |||
| 807 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 809 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
| 808 | fat_detach(inode); | 810 | fat_detach(inode); |
| 809 | out: | 811 | out: |
| 810 | unlock_kernel(); | 812 | unlock_super(sb); |
| 811 | 813 | ||
| 812 | return err; | 814 | return err; |
| 813 | } | 815 | } |
| @@ -820,7 +822,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 820 | struct timespec ts; | 822 | struct timespec ts; |
| 821 | int err, cluster; | 823 | int err, cluster; |
| 822 | 824 | ||
| 823 | lock_kernel(); | 825 | lock_super(sb); |
| 824 | 826 | ||
| 825 | ts = CURRENT_TIME_SEC; | 827 | ts = CURRENT_TIME_SEC; |
| 826 | cluster = fat_alloc_new_dir(dir, &ts); | 828 | cluster = fat_alloc_new_dir(dir, &ts); |
| @@ -849,13 +851,13 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 849 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 851 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
| 850 | d_instantiate(dentry, inode); | 852 | d_instantiate(dentry, inode); |
| 851 | 853 | ||
| 852 | unlock_kernel(); | 854 | unlock_super(sb); |
| 853 | return 0; | 855 | return 0; |
| 854 | 856 | ||
| 855 | out_free: | 857 | out_free: |
| 856 | fat_free_clusters(dir, cluster); | 858 | fat_free_clusters(dir, cluster); |
| 857 | out: | 859 | out: |
| 858 | unlock_kernel(); | 860 | unlock_super(sb); |
| 859 | return err; | 861 | return err; |
| 860 | } | 862 | } |
| 861 | 863 | ||
| @@ -869,11 +871,12 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 869 | struct timespec ts; | 871 | struct timespec ts; |
| 870 | loff_t dotdot_i_pos, new_i_pos; | 872 | loff_t dotdot_i_pos, new_i_pos; |
| 871 | int err, is_dir, update_dotdot, corrupt = 0; | 873 | int err, is_dir, update_dotdot, corrupt = 0; |
| 874 | struct super_block *sb = old_dir->i_sb; | ||
| 872 | 875 | ||
| 873 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; | 876 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; |
| 874 | old_inode = old_dentry->d_inode; | 877 | old_inode = old_dentry->d_inode; |
| 875 | new_inode = new_dentry->d_inode; | 878 | new_inode = new_dentry->d_inode; |
| 876 | lock_kernel(); | 879 | lock_super(sb); |
| 877 | err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); | 880 | err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); |
| 878 | if (err) | 881 | if (err) |
| 879 | goto out; | 882 | goto out; |
| @@ -951,7 +954,7 @@ out: | |||
| 951 | brelse(sinfo.bh); | 954 | brelse(sinfo.bh); |
| 952 | brelse(dotdot_bh); | 955 | brelse(dotdot_bh); |
| 953 | brelse(old_sinfo.bh); | 956 | brelse(old_sinfo.bh); |
| 954 | unlock_kernel(); | 957 | unlock_super(sb); |
| 955 | 958 | ||
| 956 | return err; | 959 | return err; |
| 957 | 960 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index afaee301b0ee..ad3d26ddfe31 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -2427,13 +2427,20 @@ restart: | |||
| 2427 | if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { | 2427 | if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { |
| 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); | 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); |
| 2429 | 2429 | ||
| 2430 | /* If I'm the only one writing to this iclog, sync it to disk */ | 2430 | /* |
| 2431 | if (atomic_read(&iclog->ic_refcnt) == 1) { | 2431 | * If I'm the only one writing to this iclog, sync it to disk. |
| 2432 | * We need to do an atomic compare and decrement here to avoid | ||
| 2433 | * racing with concurrent atomic_dec_and_lock() calls in | ||
| 2434 | * xlog_state_release_iclog() when there is more than one | ||
| 2435 | * reference to the iclog. | ||
| 2436 | */ | ||
| 2437 | if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) { | ||
| 2438 | /* we are the only one */ | ||
| 2432 | spin_unlock(&log->l_icloglock); | 2439 | spin_unlock(&log->l_icloglock); |
| 2433 | if ((error = xlog_state_release_iclog(log, iclog))) | 2440 | error = xlog_state_release_iclog(log, iclog); |
| 2441 | if (error) | ||
| 2434 | return error; | 2442 | return error; |
| 2435 | } else { | 2443 | } else { |
| 2436 | atomic_dec(&iclog->ic_refcnt); | ||
| 2437 | spin_unlock(&log->l_icloglock); | 2444 | spin_unlock(&log->l_icloglock); |
| 2438 | } | 2445 | } |
| 2439 | goto restart; | 2446 | goto restart; |
