diff options
| author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-04 12:06:16 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-04 12:06:16 -0400 |
| commit | 4a61f17378c2cdd9bd8f34ef8bd7422861d0c1f1 (patch) | |
| tree | a2054556900af8c16fd9f5419f012dcf1ee2995a | |
| parent | d002ec481c24f325ed6cfcb7810d317c015dd1b5 (diff) | |
| parent | 7ecdb70a0ea436c06540140242bfac6ac3babfc0 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6: (292 commits)
[GFS2] Fix endian bug for de_type
[GFS2] Initialize SELinux extended attributes at inode creation time.
[GFS2] Move logging code into log.c (mostly)
[GFS2] Mark nlink cleared so VFS sees it happen
[GFS2] Two redundant casts removed
[GFS2] Remove uneeded endian conversion
[GFS2] Remove duplicate sb reading code
[GFS2] Mark metadata reads for blktrace
[GFS2] Remove iflags.h, use FS_
[GFS2] Fix code style/indent in ops_file.c
[GFS2] streamline-generic_file_-interfaces-and-filemap gfs fix
[GFS2] Remove readv/writev methods and use aio_read/aio_write instead (gfs bits)
[GFS2] inode-diet: Eliminate i_blksize from the inode structure
[GFS2] inode_diet: Replace inode.u.generic_ip with inode.i_private (gfs)
[GFS2] Fix typo in last patch
[GFS2] Fix direct i/o logic in filemap.c
[GFS2] Fix bug in Makefiles for lock modules
[GFS2] Remove (extra) fs_subsys declaration
[GFS2/DLM] Fix trailing whitespace
[GFS2] Tidy up meta_io code
...
126 files changed, 40197 insertions, 6 deletions
| @@ -3578,11 +3578,11 @@ S: Fargo, North Dakota 58122 | |||
| 3578 | S: USA | 3578 | S: USA |
| 3579 | 3579 | ||
| 3580 | N: Steven Whitehouse | 3580 | N: Steven Whitehouse |
| 3581 | E: SteveW@ACM.org | 3581 | E: steve@chygwyn.com |
| 3582 | W: http://www.chygwyn.com/~steve | 3582 | W: http://www.chygwyn.com/~steve |
| 3583 | D: Linux DECnet project: http://www.sucs.swan.ac.uk/~rohan/DECnet/index.html | 3583 | D: Linux DECnet project |
| 3584 | D: Minor debugging of other networking protocols. | 3584 | D: Minor debugging of other networking protocols. |
| 3585 | D: Misc bug fixes and filesystem development | 3585 | D: Misc bug fixes and GFS2 filesystem development |
| 3586 | 3586 | ||
| 3587 | N: Hans-Joachim Widmaier | 3587 | N: Hans-Joachim Widmaier |
| 3588 | E: hjw@zvw.de | 3588 | E: hjw@zvw.de |
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt new file mode 100644 index 000000000000..593004b6bbab --- /dev/null +++ b/Documentation/filesystems/gfs2.txt | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | Global File System | ||
| 2 | ------------------ | ||
| 3 | |||
| 4 | http://sources.redhat.com/cluster/ | ||
| 5 | |||
| 6 | GFS is a cluster file system. It allows a cluster of computers to | ||
| 7 | simultaneously use a block device that is shared between them (with FC, | ||
| 8 | iSCSI, NBD, etc). GFS reads and writes to the block device like a local | ||
| 9 | file system, but also uses a lock module to allow the computers coordinate | ||
| 10 | their I/O so file system consistency is maintained. One of the nifty | ||
| 11 | features of GFS is perfect consistency -- changes made to the file system | ||
| 12 | on one machine show up immediately on all other machines in the cluster. | ||
| 13 | |||
| 14 | GFS uses interchangable inter-node locking mechanisms. Different lock | ||
| 15 | modules can plug into GFS and each file system selects the appropriate | ||
| 16 | lock module at mount time. Lock modules include: | ||
| 17 | |||
| 18 | lock_nolock -- allows gfs to be used as a local file system | ||
| 19 | |||
| 20 | lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking | ||
| 21 | The dlm is found at linux/fs/dlm/ | ||
| 22 | |||
| 23 | In addition to interfacing with an external locking manager, a gfs lock | ||
| 24 | module is responsible for interacting with external cluster management | ||
| 25 | systems. Lock_dlm depends on user space cluster management systems found | ||
| 26 | at the URL above. | ||
| 27 | |||
| 28 | To use gfs as a local file system, no external clustering systems are | ||
| 29 | needed, simply: | ||
| 30 | |||
| 31 | $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device | ||
| 32 | $ mount -t gfs2 /dev/block_device /dir | ||
| 33 | |||
| 34 | GFS2 is not on-disk compatible with previous versions of GFS. | ||
| 35 | |||
| 36 | The following man pages can be found at the URL above: | ||
| 37 | gfs2_fsck to repair a filesystem | ||
| 38 | gfs2_grow to expand a filesystem online | ||
| 39 | gfs2_jadd to add journals to a filesystem online | ||
| 40 | gfs2_tool to manipulate, examine and tune a filesystem | ||
| 41 | gfs2_quota to examine and change quota values in a filesystem | ||
| 42 | mount.gfs2 to help mount(8) mount a filesystem | ||
| 43 | mkfs.gfs2 to make a filesystem | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 8c35b3c503aa..17becb9b1a96 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -898,6 +898,16 @@ M: jack@suse.cz | |||
| 898 | L: linux-kernel@vger.kernel.org | 898 | L: linux-kernel@vger.kernel.org |
| 899 | S: Maintained | 899 | S: Maintained |
| 900 | 900 | ||
| 901 | DISTRIBUTED LOCK MANAGER | ||
| 902 | P: Patrick Caulfield | ||
| 903 | M: pcaulfie@redhat.com | ||
| 904 | P: David Teigland | ||
| 905 | M: teigland@redhat.com | ||
| 906 | L: cluster-devel@redhat.com | ||
| 907 | W: http://sources.redhat.com/cluster/ | ||
| 908 | T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs-2.6.git | ||
| 909 | S: Supported | ||
| 910 | |||
| 901 | DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER | 911 | DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER |
| 902 | P: Tobias Ringstrom | 912 | P: Tobias Ringstrom |
| 903 | M: tori@unhappy.mine.nu | 913 | M: tori@unhappy.mine.nu |
| @@ -1173,6 +1183,14 @@ M: khc@pm.waw.pl | |||
| 1173 | W: http://www.kernel.org/pub/linux/utils/net/hdlc/ | 1183 | W: http://www.kernel.org/pub/linux/utils/net/hdlc/ |
| 1174 | S: Maintained | 1184 | S: Maintained |
| 1175 | 1185 | ||
| 1186 | GFS2 FILE SYSTEM | ||
| 1187 | P: Steven Whitehouse | ||
| 1188 | M: swhiteho@redhat.com | ||
| 1189 | L: cluster-devel@redhat.com | ||
| 1190 | W: http://sources.redhat.com/cluster/ | ||
| 1191 | T: git kernel.org:/pub/scm/linux/kernel/git/steve/gfs-2.6.git | ||
| 1192 | S: Supported | ||
| 1193 | |||
| 1176 | GIGASET ISDN DRIVERS | 1194 | GIGASET ISDN DRIVERS |
| 1177 | P: Hansjoerg Lipp | 1195 | P: Hansjoerg Lipp |
| 1178 | M: hjlipp@web.de | 1196 | M: hjlipp@web.de |
diff --git a/fs/Kconfig b/fs/Kconfig index 674cfbb83a95..599de54451af 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -325,6 +325,7 @@ config FS_POSIX_ACL | |||
| 325 | default n | 325 | default n |
| 326 | 326 | ||
| 327 | source "fs/xfs/Kconfig" | 327 | source "fs/xfs/Kconfig" |
| 328 | source "fs/gfs2/Kconfig" | ||
| 328 | 329 | ||
| 329 | config OCFS2_FS | 330 | config OCFS2_FS |
| 330 | tristate "OCFS2 file system support" | 331 | tristate "OCFS2 file system support" |
| @@ -1995,6 +1996,7 @@ endmenu | |||
| 1995 | endif | 1996 | endif |
| 1996 | 1997 | ||
| 1997 | source "fs/nls/Kconfig" | 1998 | source "fs/nls/Kconfig" |
| 1999 | source "fs/dlm/Kconfig" | ||
| 1998 | 2000 | ||
| 1999 | endmenu | 2001 | endmenu |
| 2000 | 2002 | ||
diff --git a/fs/Makefile b/fs/Makefile index fd24d67a7cdb..df614eacee86 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -57,6 +57,7 @@ obj-$(CONFIG_CONFIGFS_FS) += configfs/ | |||
| 57 | obj-y += devpts/ | 57 | obj-y += devpts/ |
| 58 | 58 | ||
| 59 | obj-$(CONFIG_PROFILING) += dcookies.o | 59 | obj-$(CONFIG_PROFILING) += dcookies.o |
| 60 | obj-$(CONFIG_DLM) += dlm/ | ||
| 60 | 61 | ||
| 61 | # Do not add any filesystems before this line | 62 | # Do not add any filesystems before this line |
| 62 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 63 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
| @@ -110,3 +111,4 @@ obj-$(CONFIG_HOSTFS) += hostfs/ | |||
| 110 | obj-$(CONFIG_HPPFS) += hppfs/ | 111 | obj-$(CONFIG_HPPFS) += hppfs/ |
| 111 | obj-$(CONFIG_DEBUG_FS) += debugfs/ | 112 | obj-$(CONFIG_DEBUG_FS) += debugfs/ |
| 112 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 113 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
| 114 | obj-$(CONFIG_GFS2_FS) += gfs2/ | ||
diff --git a/fs/configfs/item.c b/fs/configfs/item.c index e07485ac50ad..24421209f854 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c | |||
| @@ -224,4 +224,4 @@ EXPORT_SYMBOL(config_item_init); | |||
| 224 | EXPORT_SYMBOL(config_group_init); | 224 | EXPORT_SYMBOL(config_group_init); |
| 225 | EXPORT_SYMBOL(config_item_get); | 225 | EXPORT_SYMBOL(config_item_get); |
| 226 | EXPORT_SYMBOL(config_item_put); | 226 | EXPORT_SYMBOL(config_item_put); |
| 227 | 227 | EXPORT_SYMBOL(config_group_find_obj); | |
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig new file mode 100644 index 000000000000..490f85b3fa59 --- /dev/null +++ b/fs/dlm/Kconfig | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | menu "Distributed Lock Manager" | ||
| 2 | depends on INET && EXPERIMENTAL | ||
| 3 | |||
| 4 | config DLM | ||
| 5 | tristate "Distributed Lock Manager (DLM)" | ||
| 6 | depends on IPV6 || IPV6=n | ||
| 7 | depends on IP_SCTP | ||
| 8 | select CONFIGFS_FS | ||
| 9 | help | ||
| 10 | A general purpose distributed lock manager for kernel or userspace | ||
| 11 | applications. | ||
| 12 | |||
| 13 | config DLM_DEBUG | ||
| 14 | bool "DLM debugging" | ||
| 15 | depends on DLM | ||
| 16 | help | ||
| 17 | Under the debugfs mount point, the name of each lockspace will | ||
| 18 | appear as a file in the "dlm" directory. The output is the | ||
| 19 | list of resource and locks the local node knows about. | ||
| 20 | |||
| 21 | endmenu | ||
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile new file mode 100644 index 000000000000..1832e0297f7d --- /dev/null +++ b/fs/dlm/Makefile | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | obj-$(CONFIG_DLM) += dlm.o | ||
| 2 | dlm-y := ast.o \ | ||
| 3 | config.o \ | ||
| 4 | dir.o \ | ||
| 5 | lock.o \ | ||
| 6 | lockspace.o \ | ||
| 7 | lowcomms.o \ | ||
| 8 | main.o \ | ||
| 9 | member.o \ | ||
| 10 | memory.o \ | ||
| 11 | midcomms.o \ | ||
| 12 | rcom.o \ | ||
| 13 | recover.o \ | ||
| 14 | recoverd.o \ | ||
| 15 | requestqueue.o \ | ||
| 16 | user.o \ | ||
| 17 | util.o | ||
| 18 | dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o | ||
| 19 | |||
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c new file mode 100644 index 000000000000..f91d39cb1e0b --- /dev/null +++ b/fs/dlm/ast.c | |||
| @@ -0,0 +1,173 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "lock.h" | ||
| 16 | #include "user.h" | ||
| 17 | |||
| 18 | #define WAKE_ASTS 0 | ||
| 19 | |||
| 20 | static struct list_head ast_queue; | ||
| 21 | static spinlock_t ast_queue_lock; | ||
| 22 | static struct task_struct * astd_task; | ||
| 23 | static unsigned long astd_wakeflags; | ||
| 24 | static struct mutex astd_running; | ||
| 25 | |||
| 26 | |||
| 27 | void dlm_del_ast(struct dlm_lkb *lkb) | ||
| 28 | { | ||
| 29 | spin_lock(&ast_queue_lock); | ||
| 30 | if (lkb->lkb_ast_type & (AST_COMP | AST_BAST)) | ||
| 31 | list_del(&lkb->lkb_astqueue); | ||
| 32 | spin_unlock(&ast_queue_lock); | ||
| 33 | } | ||
| 34 | |||
| 35 | void dlm_add_ast(struct dlm_lkb *lkb, int type) | ||
| 36 | { | ||
| 37 | if (lkb->lkb_flags & DLM_IFL_USER) { | ||
| 38 | dlm_user_add_ast(lkb, type); | ||
| 39 | return; | ||
| 40 | } | ||
| 41 | DLM_ASSERT(lkb->lkb_astaddr != DLM_FAKE_USER_AST, dlm_print_lkb(lkb);); | ||
| 42 | |||
| 43 | spin_lock(&ast_queue_lock); | ||
| 44 | if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { | ||
| 45 | kref_get(&lkb->lkb_ref); | ||
| 46 | list_add_tail(&lkb->lkb_astqueue, &ast_queue); | ||
| 47 | } | ||
| 48 | lkb->lkb_ast_type |= type; | ||
| 49 | spin_unlock(&ast_queue_lock); | ||
| 50 | |||
| 51 | set_bit(WAKE_ASTS, &astd_wakeflags); | ||
| 52 | wake_up_process(astd_task); | ||
| 53 | } | ||
| 54 | |||
| 55 | static void process_asts(void) | ||
| 56 | { | ||
| 57 | struct dlm_ls *ls = NULL; | ||
| 58 | struct dlm_rsb *r = NULL; | ||
| 59 | struct dlm_lkb *lkb; | ||
| 60 | void (*cast) (long param); | ||
| 61 | void (*bast) (long param, int mode); | ||
| 62 | int type = 0, found, bmode; | ||
| 63 | |||
| 64 | for (;;) { | ||
| 65 | found = 0; | ||
| 66 | spin_lock(&ast_queue_lock); | ||
| 67 | list_for_each_entry(lkb, &ast_queue, lkb_astqueue) { | ||
| 68 | r = lkb->lkb_resource; | ||
| 69 | ls = r->res_ls; | ||
| 70 | |||
| 71 | if (dlm_locking_stopped(ls)) | ||
| 72 | continue; | ||
| 73 | |||
| 74 | list_del(&lkb->lkb_astqueue); | ||
| 75 | type = lkb->lkb_ast_type; | ||
| 76 | lkb->lkb_ast_type = 0; | ||
| 77 | found = 1; | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | spin_unlock(&ast_queue_lock); | ||
| 81 | |||
| 82 | if (!found) | ||
| 83 | break; | ||
| 84 | |||
| 85 | cast = lkb->lkb_astaddr; | ||
| 86 | bast = lkb->lkb_bastaddr; | ||
| 87 | bmode = lkb->lkb_bastmode; | ||
| 88 | |||
| 89 | if ((type & AST_COMP) && cast) | ||
| 90 | cast(lkb->lkb_astparam); | ||
| 91 | |||
| 92 | /* FIXME: Is it safe to look at lkb_grmode here | ||
| 93 | without doing a lock_rsb() ? | ||
| 94 | Look at other checks in v1 to avoid basts. */ | ||
| 95 | |||
| 96 | if ((type & AST_BAST) && bast) | ||
| 97 | if (!dlm_modes_compat(lkb->lkb_grmode, bmode)) | ||
| 98 | bast(lkb->lkb_astparam, bmode); | ||
| 99 | |||
| 100 | /* this removes the reference added by dlm_add_ast | ||
| 101 | and may result in the lkb being freed */ | ||
| 102 | dlm_put_lkb(lkb); | ||
| 103 | |||
| 104 | schedule(); | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline int no_asts(void) | ||
| 109 | { | ||
| 110 | int ret; | ||
| 111 | |||
| 112 | spin_lock(&ast_queue_lock); | ||
| 113 | ret = list_empty(&ast_queue); | ||
| 114 | spin_unlock(&ast_queue_lock); | ||
| 115 | return ret; | ||
| 116 | } | ||
| 117 | |||
| 118 | static int dlm_astd(void *data) | ||
| 119 | { | ||
| 120 | while (!kthread_should_stop()) { | ||
| 121 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 122 | if (!test_bit(WAKE_ASTS, &astd_wakeflags)) | ||
| 123 | schedule(); | ||
| 124 | set_current_state(TASK_RUNNING); | ||
| 125 | |||
| 126 | mutex_lock(&astd_running); | ||
| 127 | if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) | ||
| 128 | process_asts(); | ||
| 129 | mutex_unlock(&astd_running); | ||
| 130 | } | ||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | |||
| 134 | void dlm_astd_wake(void) | ||
| 135 | { | ||
| 136 | if (!no_asts()) { | ||
| 137 | set_bit(WAKE_ASTS, &astd_wakeflags); | ||
| 138 | wake_up_process(astd_task); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | int dlm_astd_start(void) | ||
| 143 | { | ||
| 144 | struct task_struct *p; | ||
| 145 | int error = 0; | ||
| 146 | |||
| 147 | INIT_LIST_HEAD(&ast_queue); | ||
| 148 | spin_lock_init(&ast_queue_lock); | ||
| 149 | mutex_init(&astd_running); | ||
| 150 | |||
| 151 | p = kthread_run(dlm_astd, NULL, "dlm_astd"); | ||
| 152 | if (IS_ERR(p)) | ||
| 153 | error = PTR_ERR(p); | ||
| 154 | else | ||
| 155 | astd_task = p; | ||
| 156 | return error; | ||
| 157 | } | ||
| 158 | |||
| 159 | void dlm_astd_stop(void) | ||
| 160 | { | ||
| 161 | kthread_stop(astd_task); | ||
| 162 | } | ||
| 163 | |||
| 164 | void dlm_astd_suspend(void) | ||
| 165 | { | ||
| 166 | mutex_lock(&astd_running); | ||
| 167 | } | ||
| 168 | |||
| 169 | void dlm_astd_resume(void) | ||
| 170 | { | ||
| 171 | mutex_unlock(&astd_running); | ||
| 172 | } | ||
| 173 | |||
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h new file mode 100644 index 000000000000..6ee276c74c52 --- /dev/null +++ b/fs/dlm/ast.h | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #ifndef __ASTD_DOT_H__ | ||
| 14 | #define __ASTD_DOT_H__ | ||
| 15 | |||
| 16 | void dlm_add_ast(struct dlm_lkb *lkb, int type); | ||
| 17 | void dlm_del_ast(struct dlm_lkb *lkb); | ||
| 18 | |||
| 19 | void dlm_astd_wake(void); | ||
| 20 | int dlm_astd_start(void); | ||
| 21 | void dlm_astd_stop(void); | ||
| 22 | void dlm_astd_suspend(void); | ||
| 23 | void dlm_astd_resume(void); | ||
| 24 | |||
| 25 | #endif | ||
| 26 | |||
diff --git a/fs/dlm/config.c b/fs/dlm/config.c new file mode 100644 index 000000000000..88553054bbfa --- /dev/null +++ b/fs/dlm/config.c | |||
| @@ -0,0 +1,789 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include <linux/kernel.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | #include <linux/configfs.h> | ||
| 17 | #include <net/sock.h> | ||
| 18 | |||
| 19 | #include "config.h" | ||
| 20 | #include "lowcomms.h" | ||
| 21 | |||
| 22 | /* | ||
| 23 | * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/nodeid | ||
| 24 | * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight | ||
| 25 | * /config/dlm/<cluster>/comms/<comm>/nodeid | ||
| 26 | * /config/dlm/<cluster>/comms/<comm>/local | ||
| 27 | * /config/dlm/<cluster>/comms/<comm>/addr | ||
| 28 | * The <cluster> level is useless, but I haven't figured out how to avoid it. | ||
| 29 | */ | ||
| 30 | |||
| 31 | static struct config_group *space_list; | ||
| 32 | static struct config_group *comm_list; | ||
| 33 | static struct comm *local_comm; | ||
| 34 | |||
| 35 | struct clusters; | ||
| 36 | struct cluster; | ||
| 37 | struct spaces; | ||
| 38 | struct space; | ||
| 39 | struct comms; | ||
| 40 | struct comm; | ||
| 41 | struct nodes; | ||
| 42 | struct node; | ||
| 43 | |||
| 44 | static struct config_group *make_cluster(struct config_group *, const char *); | ||
| 45 | static void drop_cluster(struct config_group *, struct config_item *); | ||
| 46 | static void release_cluster(struct config_item *); | ||
| 47 | static struct config_group *make_space(struct config_group *, const char *); | ||
| 48 | static void drop_space(struct config_group *, struct config_item *); | ||
| 49 | static void release_space(struct config_item *); | ||
| 50 | static struct config_item *make_comm(struct config_group *, const char *); | ||
| 51 | static void drop_comm(struct config_group *, struct config_item *); | ||
| 52 | static void release_comm(struct config_item *); | ||
| 53 | static struct config_item *make_node(struct config_group *, const char *); | ||
| 54 | static void drop_node(struct config_group *, struct config_item *); | ||
| 55 | static void release_node(struct config_item *); | ||
| 56 | |||
| 57 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | ||
| 58 | char *buf); | ||
| 59 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | ||
| 60 | const char *buf, size_t len); | ||
| 61 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | ||
| 62 | char *buf); | ||
| 63 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | ||
| 64 | const char *buf, size_t len); | ||
| 65 | |||
| 66 | static ssize_t comm_nodeid_read(struct comm *cm, char *buf); | ||
| 67 | static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len); | ||
| 68 | static ssize_t comm_local_read(struct comm *cm, char *buf); | ||
| 69 | static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len); | ||
| 70 | static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len); | ||
| 71 | static ssize_t node_nodeid_read(struct node *nd, char *buf); | ||
| 72 | static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); | ||
| 73 | static ssize_t node_weight_read(struct node *nd, char *buf); | ||
| 74 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); | ||
| 75 | |||
| 76 | enum { | ||
| 77 | COMM_ATTR_NODEID = 0, | ||
| 78 | COMM_ATTR_LOCAL, | ||
| 79 | COMM_ATTR_ADDR, | ||
| 80 | }; | ||
| 81 | |||
| 82 | struct comm_attribute { | ||
| 83 | struct configfs_attribute attr; | ||
| 84 | ssize_t (*show)(struct comm *, char *); | ||
| 85 | ssize_t (*store)(struct comm *, const char *, size_t); | ||
| 86 | }; | ||
| 87 | |||
| 88 | static struct comm_attribute comm_attr_nodeid = { | ||
| 89 | .attr = { .ca_owner = THIS_MODULE, | ||
| 90 | .ca_name = "nodeid", | ||
| 91 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
| 92 | .show = comm_nodeid_read, | ||
| 93 | .store = comm_nodeid_write, | ||
| 94 | }; | ||
| 95 | |||
| 96 | static struct comm_attribute comm_attr_local = { | ||
| 97 | .attr = { .ca_owner = THIS_MODULE, | ||
| 98 | .ca_name = "local", | ||
| 99 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
| 100 | .show = comm_local_read, | ||
| 101 | .store = comm_local_write, | ||
| 102 | }; | ||
| 103 | |||
| 104 | static struct comm_attribute comm_attr_addr = { | ||
| 105 | .attr = { .ca_owner = THIS_MODULE, | ||
| 106 | .ca_name = "addr", | ||
| 107 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
| 108 | .store = comm_addr_write, | ||
| 109 | }; | ||
| 110 | |||
| 111 | static struct configfs_attribute *comm_attrs[] = { | ||
| 112 | [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, | ||
| 113 | [COMM_ATTR_LOCAL] = &comm_attr_local.attr, | ||
| 114 | [COMM_ATTR_ADDR] = &comm_attr_addr.attr, | ||
| 115 | NULL, | ||
| 116 | }; | ||
| 117 | |||
| 118 | enum { | ||
| 119 | NODE_ATTR_NODEID = 0, | ||
| 120 | NODE_ATTR_WEIGHT, | ||
| 121 | }; | ||
| 122 | |||
| 123 | struct node_attribute { | ||
| 124 | struct configfs_attribute attr; | ||
| 125 | ssize_t (*show)(struct node *, char *); | ||
| 126 | ssize_t (*store)(struct node *, const char *, size_t); | ||
| 127 | }; | ||
| 128 | |||
| 129 | static struct node_attribute node_attr_nodeid = { | ||
| 130 | .attr = { .ca_owner = THIS_MODULE, | ||
| 131 | .ca_name = "nodeid", | ||
| 132 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
| 133 | .show = node_nodeid_read, | ||
| 134 | .store = node_nodeid_write, | ||
| 135 | }; | ||
| 136 | |||
| 137 | static struct node_attribute node_attr_weight = { | ||
| 138 | .attr = { .ca_owner = THIS_MODULE, | ||
| 139 | .ca_name = "weight", | ||
| 140 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
| 141 | .show = node_weight_read, | ||
| 142 | .store = node_weight_write, | ||
| 143 | }; | ||
| 144 | |||
| 145 | static struct configfs_attribute *node_attrs[] = { | ||
| 146 | [NODE_ATTR_NODEID] = &node_attr_nodeid.attr, | ||
| 147 | [NODE_ATTR_WEIGHT] = &node_attr_weight.attr, | ||
| 148 | NULL, | ||
| 149 | }; | ||
| 150 | |||
| 151 | struct clusters { | ||
| 152 | struct configfs_subsystem subsys; | ||
| 153 | }; | ||
| 154 | |||
| 155 | struct cluster { | ||
| 156 | struct config_group group; | ||
| 157 | }; | ||
| 158 | |||
| 159 | struct spaces { | ||
| 160 | struct config_group ss_group; | ||
| 161 | }; | ||
| 162 | |||
| 163 | struct space { | ||
| 164 | struct config_group group; | ||
| 165 | struct list_head members; | ||
| 166 | struct mutex members_lock; | ||
| 167 | int members_count; | ||
| 168 | }; | ||
| 169 | |||
| 170 | struct comms { | ||
| 171 | struct config_group cs_group; | ||
| 172 | }; | ||
| 173 | |||
| 174 | struct comm { | ||
| 175 | struct config_item item; | ||
| 176 | int nodeid; | ||
| 177 | int local; | ||
| 178 | int addr_count; | ||
| 179 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | ||
| 180 | }; | ||
| 181 | |||
| 182 | struct nodes { | ||
| 183 | struct config_group ns_group; | ||
| 184 | }; | ||
| 185 | |||
| 186 | struct node { | ||
| 187 | struct config_item item; | ||
| 188 | struct list_head list; /* space->members */ | ||
| 189 | int nodeid; | ||
| 190 | int weight; | ||
| 191 | }; | ||
| 192 | |||
| 193 | static struct configfs_group_operations clusters_ops = { | ||
| 194 | .make_group = make_cluster, | ||
| 195 | .drop_item = drop_cluster, | ||
| 196 | }; | ||
| 197 | |||
| 198 | static struct configfs_item_operations cluster_ops = { | ||
| 199 | .release = release_cluster, | ||
| 200 | }; | ||
| 201 | |||
| 202 | static struct configfs_group_operations spaces_ops = { | ||
| 203 | .make_group = make_space, | ||
| 204 | .drop_item = drop_space, | ||
| 205 | }; | ||
| 206 | |||
| 207 | static struct configfs_item_operations space_ops = { | ||
| 208 | .release = release_space, | ||
| 209 | }; | ||
| 210 | |||
| 211 | static struct configfs_group_operations comms_ops = { | ||
| 212 | .make_item = make_comm, | ||
| 213 | .drop_item = drop_comm, | ||
| 214 | }; | ||
| 215 | |||
| 216 | static struct configfs_item_operations comm_ops = { | ||
| 217 | .release = release_comm, | ||
| 218 | .show_attribute = show_comm, | ||
| 219 | .store_attribute = store_comm, | ||
| 220 | }; | ||
| 221 | |||
| 222 | static struct configfs_group_operations nodes_ops = { | ||
| 223 | .make_item = make_node, | ||
| 224 | .drop_item = drop_node, | ||
| 225 | }; | ||
| 226 | |||
| 227 | static struct configfs_item_operations node_ops = { | ||
| 228 | .release = release_node, | ||
| 229 | .show_attribute = show_node, | ||
| 230 | .store_attribute = store_node, | ||
| 231 | }; | ||
| 232 | |||
| 233 | static struct config_item_type clusters_type = { | ||
| 234 | .ct_group_ops = &clusters_ops, | ||
| 235 | .ct_owner = THIS_MODULE, | ||
| 236 | }; | ||
| 237 | |||
| 238 | static struct config_item_type cluster_type = { | ||
| 239 | .ct_item_ops = &cluster_ops, | ||
| 240 | .ct_owner = THIS_MODULE, | ||
| 241 | }; | ||
| 242 | |||
| 243 | static struct config_item_type spaces_type = { | ||
| 244 | .ct_group_ops = &spaces_ops, | ||
| 245 | .ct_owner = THIS_MODULE, | ||
| 246 | }; | ||
| 247 | |||
| 248 | static struct config_item_type space_type = { | ||
| 249 | .ct_item_ops = &space_ops, | ||
| 250 | .ct_owner = THIS_MODULE, | ||
| 251 | }; | ||
| 252 | |||
| 253 | static struct config_item_type comms_type = { | ||
| 254 | .ct_group_ops = &comms_ops, | ||
| 255 | .ct_owner = THIS_MODULE, | ||
| 256 | }; | ||
| 257 | |||
| 258 | static struct config_item_type comm_type = { | ||
| 259 | .ct_item_ops = &comm_ops, | ||
| 260 | .ct_attrs = comm_attrs, | ||
| 261 | .ct_owner = THIS_MODULE, | ||
| 262 | }; | ||
| 263 | |||
| 264 | static struct config_item_type nodes_type = { | ||
| 265 | .ct_group_ops = &nodes_ops, | ||
| 266 | .ct_owner = THIS_MODULE, | ||
| 267 | }; | ||
| 268 | |||
| 269 | static struct config_item_type node_type = { | ||
| 270 | .ct_item_ops = &node_ops, | ||
| 271 | .ct_attrs = node_attrs, | ||
| 272 | .ct_owner = THIS_MODULE, | ||
| 273 | }; | ||
| 274 | |||
| 275 | static struct cluster *to_cluster(struct config_item *i) | ||
| 276 | { | ||
| 277 | return i ? container_of(to_config_group(i), struct cluster, group):NULL; | ||
| 278 | } | ||
| 279 | |||
| 280 | static struct space *to_space(struct config_item *i) | ||
| 281 | { | ||
| 282 | return i ? container_of(to_config_group(i), struct space, group) : NULL; | ||
| 283 | } | ||
| 284 | |||
| 285 | static struct comm *to_comm(struct config_item *i) | ||
| 286 | { | ||
| 287 | return i ? container_of(i, struct comm, item) : NULL; | ||
| 288 | } | ||
| 289 | |||
| 290 | static struct node *to_node(struct config_item *i) | ||
| 291 | { | ||
| 292 | return i ? container_of(i, struct node, item) : NULL; | ||
| 293 | } | ||
| 294 | |||
| 295 | static struct config_group *make_cluster(struct config_group *g, | ||
| 296 | const char *name) | ||
| 297 | { | ||
| 298 | struct cluster *cl = NULL; | ||
| 299 | struct spaces *sps = NULL; | ||
| 300 | struct comms *cms = NULL; | ||
| 301 | void *gps = NULL; | ||
| 302 | |||
| 303 | cl = kzalloc(sizeof(struct cluster), GFP_KERNEL); | ||
| 304 | gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); | ||
| 305 | sps = kzalloc(sizeof(struct spaces), GFP_KERNEL); | ||
| 306 | cms = kzalloc(sizeof(struct comms), GFP_KERNEL); | ||
| 307 | |||
| 308 | if (!cl || !gps || !sps || !cms) | ||
| 309 | goto fail; | ||
| 310 | |||
| 311 | config_group_init_type_name(&cl->group, name, &cluster_type); | ||
| 312 | config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type); | ||
| 313 | config_group_init_type_name(&cms->cs_group, "comms", &comms_type); | ||
| 314 | |||
| 315 | cl->group.default_groups = gps; | ||
| 316 | cl->group.default_groups[0] = &sps->ss_group; | ||
| 317 | cl->group.default_groups[1] = &cms->cs_group; | ||
| 318 | cl->group.default_groups[2] = NULL; | ||
| 319 | |||
| 320 | space_list = &sps->ss_group; | ||
| 321 | comm_list = &cms->cs_group; | ||
| 322 | return &cl->group; | ||
| 323 | |||
| 324 | fail: | ||
| 325 | kfree(cl); | ||
| 326 | kfree(gps); | ||
| 327 | kfree(sps); | ||
| 328 | kfree(cms); | ||
| 329 | return NULL; | ||
| 330 | } | ||
| 331 | |||
| 332 | static void drop_cluster(struct config_group *g, struct config_item *i) | ||
| 333 | { | ||
| 334 | struct cluster *cl = to_cluster(i); | ||
| 335 | struct config_item *tmp; | ||
| 336 | int j; | ||
| 337 | |||
| 338 | for (j = 0; cl->group.default_groups[j]; j++) { | ||
| 339 | tmp = &cl->group.default_groups[j]->cg_item; | ||
| 340 | cl->group.default_groups[j] = NULL; | ||
| 341 | config_item_put(tmp); | ||
| 342 | } | ||
| 343 | |||
| 344 | space_list = NULL; | ||
| 345 | comm_list = NULL; | ||
| 346 | |||
| 347 | config_item_put(i); | ||
| 348 | } | ||
| 349 | |||
| 350 | static void release_cluster(struct config_item *i) | ||
| 351 | { | ||
| 352 | struct cluster *cl = to_cluster(i); | ||
| 353 | kfree(cl->group.default_groups); | ||
| 354 | kfree(cl); | ||
| 355 | } | ||
| 356 | |||
| 357 | static struct config_group *make_space(struct config_group *g, const char *name) | ||
| 358 | { | ||
| 359 | struct space *sp = NULL; | ||
| 360 | struct nodes *nds = NULL; | ||
| 361 | void *gps = NULL; | ||
| 362 | |||
| 363 | sp = kzalloc(sizeof(struct space), GFP_KERNEL); | ||
| 364 | gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); | ||
| 365 | nds = kzalloc(sizeof(struct nodes), GFP_KERNEL); | ||
| 366 | |||
| 367 | if (!sp || !gps || !nds) | ||
| 368 | goto fail; | ||
| 369 | |||
| 370 | config_group_init_type_name(&sp->group, name, &space_type); | ||
| 371 | config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type); | ||
| 372 | |||
| 373 | sp->group.default_groups = gps; | ||
| 374 | sp->group.default_groups[0] = &nds->ns_group; | ||
| 375 | sp->group.default_groups[1] = NULL; | ||
| 376 | |||
| 377 | INIT_LIST_HEAD(&sp->members); | ||
| 378 | mutex_init(&sp->members_lock); | ||
| 379 | sp->members_count = 0; | ||
| 380 | return &sp->group; | ||
| 381 | |||
| 382 | fail: | ||
| 383 | kfree(sp); | ||
| 384 | kfree(gps); | ||
| 385 | kfree(nds); | ||
| 386 | return NULL; | ||
| 387 | } | ||
| 388 | |||
| 389 | static void drop_space(struct config_group *g, struct config_item *i) | ||
| 390 | { | ||
| 391 | struct space *sp = to_space(i); | ||
| 392 | struct config_item *tmp; | ||
| 393 | int j; | ||
| 394 | |||
| 395 | /* assert list_empty(&sp->members) */ | ||
| 396 | |||
| 397 | for (j = 0; sp->group.default_groups[j]; j++) { | ||
| 398 | tmp = &sp->group.default_groups[j]->cg_item; | ||
| 399 | sp->group.default_groups[j] = NULL; | ||
| 400 | config_item_put(tmp); | ||
| 401 | } | ||
| 402 | |||
| 403 | config_item_put(i); | ||
| 404 | } | ||
| 405 | |||
| 406 | static void release_space(struct config_item *i) | ||
| 407 | { | ||
| 408 | struct space *sp = to_space(i); | ||
| 409 | kfree(sp->group.default_groups); | ||
| 410 | kfree(sp); | ||
| 411 | } | ||
| 412 | |||
| 413 | static struct config_item *make_comm(struct config_group *g, const char *name) | ||
| 414 | { | ||
| 415 | struct comm *cm; | ||
| 416 | |||
| 417 | cm = kzalloc(sizeof(struct comm), GFP_KERNEL); | ||
| 418 | if (!cm) | ||
| 419 | return NULL; | ||
| 420 | |||
| 421 | config_item_init_type_name(&cm->item, name, &comm_type); | ||
| 422 | cm->nodeid = -1; | ||
| 423 | cm->local = 0; | ||
| 424 | cm->addr_count = 0; | ||
| 425 | return &cm->item; | ||
| 426 | } | ||
| 427 | |||
| 428 | static void drop_comm(struct config_group *g, struct config_item *i) | ||
| 429 | { | ||
| 430 | struct comm *cm = to_comm(i); | ||
| 431 | if (local_comm == cm) | ||
| 432 | local_comm = NULL; | ||
| 433 | dlm_lowcomms_close(cm->nodeid); | ||
| 434 | while (cm->addr_count--) | ||
| 435 | kfree(cm->addr[cm->addr_count]); | ||
| 436 | config_item_put(i); | ||
| 437 | } | ||
| 438 | |||
| 439 | static void release_comm(struct config_item *i) | ||
| 440 | { | ||
| 441 | struct comm *cm = to_comm(i); | ||
| 442 | kfree(cm); | ||
| 443 | } | ||
| 444 | |||
| 445 | static struct config_item *make_node(struct config_group *g, const char *name) | ||
| 446 | { | ||
| 447 | struct space *sp = to_space(g->cg_item.ci_parent); | ||
| 448 | struct node *nd; | ||
| 449 | |||
| 450 | nd = kzalloc(sizeof(struct node), GFP_KERNEL); | ||
| 451 | if (!nd) | ||
| 452 | return NULL; | ||
| 453 | |||
| 454 | config_item_init_type_name(&nd->item, name, &node_type); | ||
| 455 | nd->nodeid = -1; | ||
| 456 | nd->weight = 1; /* default weight of 1 if none is set */ | ||
| 457 | |||
| 458 | mutex_lock(&sp->members_lock); | ||
| 459 | list_add(&nd->list, &sp->members); | ||
| 460 | sp->members_count++; | ||
| 461 | mutex_unlock(&sp->members_lock); | ||
| 462 | |||
| 463 | return &nd->item; | ||
| 464 | } | ||
| 465 | |||
| 466 | static void drop_node(struct config_group *g, struct config_item *i) | ||
| 467 | { | ||
| 468 | struct space *sp = to_space(g->cg_item.ci_parent); | ||
| 469 | struct node *nd = to_node(i); | ||
| 470 | |||
| 471 | mutex_lock(&sp->members_lock); | ||
| 472 | list_del(&nd->list); | ||
| 473 | sp->members_count--; | ||
| 474 | mutex_unlock(&sp->members_lock); | ||
| 475 | |||
| 476 | config_item_put(i); | ||
| 477 | } | ||
| 478 | |||
| 479 | static void release_node(struct config_item *i) | ||
| 480 | { | ||
| 481 | struct node *nd = to_node(i); | ||
| 482 | kfree(nd); | ||
| 483 | } | ||
| 484 | |||
| 485 | static struct clusters clusters_root = { | ||
| 486 | .subsys = { | ||
| 487 | .su_group = { | ||
| 488 | .cg_item = { | ||
| 489 | .ci_namebuf = "dlm", | ||
| 490 | .ci_type = &clusters_type, | ||
| 491 | }, | ||
| 492 | }, | ||
| 493 | }, | ||
| 494 | }; | ||
| 495 | |||
| 496 | int dlm_config_init(void) | ||
| 497 | { | ||
| 498 | config_group_init(&clusters_root.subsys.su_group); | ||
| 499 | init_MUTEX(&clusters_root.subsys.su_sem); | ||
| 500 | return configfs_register_subsystem(&clusters_root.subsys); | ||
| 501 | } | ||
| 502 | |||
| 503 | void dlm_config_exit(void) | ||
| 504 | { | ||
| 505 | configfs_unregister_subsystem(&clusters_root.subsys); | ||
| 506 | } | ||
| 507 | |||
| 508 | /* | ||
| 509 | * Functions for user space to read/write attributes | ||
| 510 | */ | ||
| 511 | |||
| 512 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | ||
| 513 | char *buf) | ||
| 514 | { | ||
| 515 | struct comm *cm = to_comm(i); | ||
| 516 | struct comm_attribute *cma = | ||
| 517 | container_of(a, struct comm_attribute, attr); | ||
| 518 | return cma->show ? cma->show(cm, buf) : 0; | ||
| 519 | } | ||
| 520 | |||
| 521 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | ||
| 522 | const char *buf, size_t len) | ||
| 523 | { | ||
| 524 | struct comm *cm = to_comm(i); | ||
| 525 | struct comm_attribute *cma = | ||
| 526 | container_of(a, struct comm_attribute, attr); | ||
| 527 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; | ||
| 528 | } | ||
| 529 | |||
| 530 | static ssize_t comm_nodeid_read(struct comm *cm, char *buf) | ||
| 531 | { | ||
| 532 | return sprintf(buf, "%d\n", cm->nodeid); | ||
| 533 | } | ||
| 534 | |||
| 535 | static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len) | ||
| 536 | { | ||
| 537 | cm->nodeid = simple_strtol(buf, NULL, 0); | ||
| 538 | return len; | ||
| 539 | } | ||
| 540 | |||
| 541 | static ssize_t comm_local_read(struct comm *cm, char *buf) | ||
| 542 | { | ||
| 543 | return sprintf(buf, "%d\n", cm->local); | ||
| 544 | } | ||
| 545 | |||
| 546 | static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) | ||
| 547 | { | ||
| 548 | cm->local= simple_strtol(buf, NULL, 0); | ||
| 549 | if (cm->local && !local_comm) | ||
| 550 | local_comm = cm; | ||
| 551 | return len; | ||
| 552 | } | ||
| 553 | |||
| 554 | static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) | ||
| 555 | { | ||
| 556 | struct sockaddr_storage *addr; | ||
| 557 | |||
| 558 | if (len != sizeof(struct sockaddr_storage)) | ||
| 559 | return -EINVAL; | ||
| 560 | |||
| 561 | if (cm->addr_count >= DLM_MAX_ADDR_COUNT) | ||
| 562 | return -ENOSPC; | ||
| 563 | |||
| 564 | addr = kzalloc(sizeof(*addr), GFP_KERNEL); | ||
| 565 | if (!addr) | ||
| 566 | return -ENOMEM; | ||
| 567 | |||
| 568 | memcpy(addr, buf, len); | ||
| 569 | cm->addr[cm->addr_count++] = addr; | ||
| 570 | return len; | ||
| 571 | } | ||
| 572 | |||
| 573 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | ||
| 574 | char *buf) | ||
| 575 | { | ||
| 576 | struct node *nd = to_node(i); | ||
| 577 | struct node_attribute *nda = | ||
| 578 | container_of(a, struct node_attribute, attr); | ||
| 579 | return nda->show ? nda->show(nd, buf) : 0; | ||
| 580 | } | ||
| 581 | |||
| 582 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | ||
| 583 | const char *buf, size_t len) | ||
| 584 | { | ||
| 585 | struct node *nd = to_node(i); | ||
| 586 | struct node_attribute *nda = | ||
| 587 | container_of(a, struct node_attribute, attr); | ||
| 588 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; | ||
| 589 | } | ||
| 590 | |||
| 591 | static ssize_t node_nodeid_read(struct node *nd, char *buf) | ||
| 592 | { | ||
| 593 | return sprintf(buf, "%d\n", nd->nodeid); | ||
| 594 | } | ||
| 595 | |||
| 596 | static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len) | ||
| 597 | { | ||
| 598 | nd->nodeid = simple_strtol(buf, NULL, 0); | ||
| 599 | return len; | ||
| 600 | } | ||
| 601 | |||
| 602 | static ssize_t node_weight_read(struct node *nd, char *buf) | ||
| 603 | { | ||
| 604 | return sprintf(buf, "%d\n", nd->weight); | ||
| 605 | } | ||
| 606 | |||
| 607 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) | ||
| 608 | { | ||
| 609 | nd->weight = simple_strtol(buf, NULL, 0); | ||
| 610 | return len; | ||
| 611 | } | ||
| 612 | |||
| 613 | /* | ||
| 614 | * Functions for the dlm to get the info that's been configured | ||
| 615 | */ | ||
| 616 | |||
| 617 | static struct space *get_space(char *name) | ||
| 618 | { | ||
| 619 | if (!space_list) | ||
| 620 | return NULL; | ||
| 621 | return to_space(config_group_find_obj(space_list, name)); | ||
| 622 | } | ||
| 623 | |||
| 624 | static void put_space(struct space *sp) | ||
| 625 | { | ||
| 626 | config_item_put(&sp->group.cg_item); | ||
| 627 | } | ||
| 628 | |||
| 629 | static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | ||
| 630 | { | ||
| 631 | struct config_item *i; | ||
| 632 | struct comm *cm = NULL; | ||
| 633 | int found = 0; | ||
| 634 | |||
| 635 | if (!comm_list) | ||
| 636 | return NULL; | ||
| 637 | |||
| 638 | down(&clusters_root.subsys.su_sem); | ||
| 639 | |||
| 640 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | ||
| 641 | cm = to_comm(i); | ||
| 642 | |||
| 643 | if (nodeid) { | ||
| 644 | if (cm->nodeid != nodeid) | ||
| 645 | continue; | ||
| 646 | found = 1; | ||
| 647 | break; | ||
| 648 | } else { | ||
| 649 | if (!cm->addr_count || | ||
| 650 | memcmp(cm->addr[0], addr, sizeof(*addr))) | ||
| 651 | continue; | ||
| 652 | found = 1; | ||
| 653 | break; | ||
| 654 | } | ||
| 655 | } | ||
| 656 | up(&clusters_root.subsys.su_sem); | ||
| 657 | |||
| 658 | if (found) | ||
| 659 | config_item_get(i); | ||
| 660 | else | ||
| 661 | cm = NULL; | ||
| 662 | return cm; | ||
| 663 | } | ||
| 664 | |||
| 665 | static void put_comm(struct comm *cm) | ||
| 666 | { | ||
| 667 | config_item_put(&cm->item); | ||
| 668 | } | ||
| 669 | |||
| 670 | /* caller must free mem */ | ||
| 671 | int dlm_nodeid_list(char *lsname, int **ids_out) | ||
| 672 | { | ||
| 673 | struct space *sp; | ||
| 674 | struct node *nd; | ||
| 675 | int i = 0, rv = 0; | ||
| 676 | int *ids; | ||
| 677 | |||
| 678 | sp = get_space(lsname); | ||
| 679 | if (!sp) | ||
| 680 | return -EEXIST; | ||
| 681 | |||
| 682 | mutex_lock(&sp->members_lock); | ||
| 683 | if (!sp->members_count) { | ||
| 684 | rv = 0; | ||
| 685 | goto out; | ||
| 686 | } | ||
| 687 | |||
| 688 | ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); | ||
| 689 | if (!ids) { | ||
| 690 | rv = -ENOMEM; | ||
| 691 | goto out; | ||
| 692 | } | ||
| 693 | |||
| 694 | rv = sp->members_count; | ||
| 695 | list_for_each_entry(nd, &sp->members, list) | ||
| 696 | ids[i++] = nd->nodeid; | ||
| 697 | |||
| 698 | if (rv != i) | ||
| 699 | printk("bad nodeid count %d %d\n", rv, i); | ||
| 700 | |||
| 701 | *ids_out = ids; | ||
| 702 | out: | ||
| 703 | mutex_unlock(&sp->members_lock); | ||
| 704 | put_space(sp); | ||
| 705 | return rv; | ||
| 706 | } | ||
| 707 | |||
| 708 | int dlm_node_weight(char *lsname, int nodeid) | ||
| 709 | { | ||
| 710 | struct space *sp; | ||
| 711 | struct node *nd; | ||
| 712 | int w = -EEXIST; | ||
| 713 | |||
| 714 | sp = get_space(lsname); | ||
| 715 | if (!sp) | ||
| 716 | goto out; | ||
| 717 | |||
| 718 | mutex_lock(&sp->members_lock); | ||
| 719 | list_for_each_entry(nd, &sp->members, list) { | ||
| 720 | if (nd->nodeid != nodeid) | ||
| 721 | continue; | ||
| 722 | w = nd->weight; | ||
| 723 | break; | ||
| 724 | } | ||
| 725 | mutex_unlock(&sp->members_lock); | ||
| 726 | put_space(sp); | ||
| 727 | out: | ||
| 728 | return w; | ||
| 729 | } | ||
| 730 | |||
| 731 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) | ||
| 732 | { | ||
| 733 | struct comm *cm = get_comm(nodeid, NULL); | ||
| 734 | if (!cm) | ||
| 735 | return -EEXIST; | ||
| 736 | if (!cm->addr_count) | ||
| 737 | return -ENOENT; | ||
| 738 | memcpy(addr, cm->addr[0], sizeof(*addr)); | ||
| 739 | put_comm(cm); | ||
| 740 | return 0; | ||
| 741 | } | ||
| 742 | |||
| 743 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | ||
| 744 | { | ||
| 745 | struct comm *cm = get_comm(0, addr); | ||
| 746 | if (!cm) | ||
| 747 | return -EEXIST; | ||
| 748 | *nodeid = cm->nodeid; | ||
| 749 | put_comm(cm); | ||
| 750 | return 0; | ||
| 751 | } | ||
| 752 | |||
| 753 | int dlm_our_nodeid(void) | ||
| 754 | { | ||
| 755 | return local_comm ? local_comm->nodeid : 0; | ||
| 756 | } | ||
| 757 | |||
| 758 | /* num 0 is first addr, num 1 is second addr */ | ||
| 759 | int dlm_our_addr(struct sockaddr_storage *addr, int num) | ||
| 760 | { | ||
| 761 | if (!local_comm) | ||
| 762 | return -1; | ||
| 763 | if (num + 1 > local_comm->addr_count) | ||
| 764 | return -1; | ||
| 765 | memcpy(addr, local_comm->addr[num], sizeof(*addr)); | ||
| 766 | return 0; | ||
| 767 | } | ||
| 768 | |||
| 769 | /* Config file defaults */ | ||
| 770 | #define DEFAULT_TCP_PORT 21064 | ||
| 771 | #define DEFAULT_BUFFER_SIZE 4096 | ||
| 772 | #define DEFAULT_RSBTBL_SIZE 256 | ||
| 773 | #define DEFAULT_LKBTBL_SIZE 1024 | ||
| 774 | #define DEFAULT_DIRTBL_SIZE 512 | ||
| 775 | #define DEFAULT_RECOVER_TIMER 5 | ||
| 776 | #define DEFAULT_TOSS_SECS 10 | ||
| 777 | #define DEFAULT_SCAN_SECS 5 | ||
| 778 | |||
| 779 | struct dlm_config_info dlm_config = { | ||
| 780 | .tcp_port = DEFAULT_TCP_PORT, | ||
| 781 | .buffer_size = DEFAULT_BUFFER_SIZE, | ||
| 782 | .rsbtbl_size = DEFAULT_RSBTBL_SIZE, | ||
| 783 | .lkbtbl_size = DEFAULT_LKBTBL_SIZE, | ||
| 784 | .dirtbl_size = DEFAULT_DIRTBL_SIZE, | ||
| 785 | .recover_timer = DEFAULT_RECOVER_TIMER, | ||
| 786 | .toss_secs = DEFAULT_TOSS_SECS, | ||
| 787 | .scan_secs = DEFAULT_SCAN_SECS | ||
| 788 | }; | ||
| 789 | |||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h new file mode 100644 index 000000000000..9da7839958a9 --- /dev/null +++ b/fs/dlm/config.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __CONFIG_DOT_H__ | ||
| 15 | #define __CONFIG_DOT_H__ | ||
| 16 | |||
| 17 | #define DLM_MAX_ADDR_COUNT 3 | ||
| 18 | |||
| 19 | struct dlm_config_info { | ||
| 20 | int tcp_port; | ||
| 21 | int buffer_size; | ||
| 22 | int rsbtbl_size; | ||
| 23 | int lkbtbl_size; | ||
| 24 | int dirtbl_size; | ||
| 25 | int recover_timer; | ||
| 26 | int toss_secs; | ||
| 27 | int scan_secs; | ||
| 28 | }; | ||
| 29 | |||
| 30 | extern struct dlm_config_info dlm_config; | ||
| 31 | |||
| 32 | int dlm_config_init(void); | ||
| 33 | void dlm_config_exit(void); | ||
| 34 | int dlm_node_weight(char *lsname, int nodeid); | ||
| 35 | int dlm_nodeid_list(char *lsname, int **ids_out); | ||
| 36 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); | ||
| 37 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); | ||
| 38 | int dlm_our_nodeid(void); | ||
| 39 | int dlm_our_addr(struct sockaddr_storage *addr, int num); | ||
| 40 | |||
| 41 | #endif /* __CONFIG_DOT_H__ */ | ||
| 42 | |||
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c new file mode 100644 index 000000000000..ca94a837a5bb --- /dev/null +++ b/fs/dlm/debug_fs.c | |||
| @@ -0,0 +1,387 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #include <linux/pagemap.h> | ||
| 14 | #include <linux/seq_file.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | #include <linux/ctype.h> | ||
| 17 | #include <linux/debugfs.h> | ||
| 18 | |||
| 19 | #include "dlm_internal.h" | ||
| 20 | |||
| 21 | #define DLM_DEBUG_BUF_LEN 4096 | ||
| 22 | static char debug_buf[DLM_DEBUG_BUF_LEN]; | ||
| 23 | static struct mutex debug_buf_lock; | ||
| 24 | |||
| 25 | static struct dentry *dlm_root; | ||
| 26 | |||
| 27 | struct rsb_iter { | ||
| 28 | int entry; | ||
| 29 | struct dlm_ls *ls; | ||
| 30 | struct list_head *next; | ||
| 31 | struct dlm_rsb *rsb; | ||
| 32 | }; | ||
| 33 | |||
| 34 | /* | ||
| 35 | * dump all rsb's in the lockspace hash table | ||
| 36 | */ | ||
| 37 | |||
| 38 | static char *print_lockmode(int mode) | ||
| 39 | { | ||
| 40 | switch (mode) { | ||
| 41 | case DLM_LOCK_IV: | ||
| 42 | return "--"; | ||
| 43 | case DLM_LOCK_NL: | ||
| 44 | return "NL"; | ||
| 45 | case DLM_LOCK_CR: | ||
| 46 | return "CR"; | ||
| 47 | case DLM_LOCK_CW: | ||
| 48 | return "CW"; | ||
| 49 | case DLM_LOCK_PR: | ||
| 50 | return "PR"; | ||
| 51 | case DLM_LOCK_PW: | ||
| 52 | return "PW"; | ||
| 53 | case DLM_LOCK_EX: | ||
| 54 | return "EX"; | ||
| 55 | default: | ||
| 56 | return "??"; | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, | ||
| 61 | struct dlm_rsb *res) | ||
| 62 | { | ||
| 63 | seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); | ||
| 64 | |||
| 65 | if (lkb->lkb_status == DLM_LKSTS_CONVERT | ||
| 66 | || lkb->lkb_status == DLM_LKSTS_WAITING) | ||
| 67 | seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode)); | ||
| 68 | |||
| 69 | if (lkb->lkb_nodeid) { | ||
| 70 | if (lkb->lkb_nodeid != res->res_nodeid) | ||
| 71 | seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid, | ||
| 72 | lkb->lkb_remid); | ||
| 73 | else | ||
| 74 | seq_printf(s, " Master: %08x", lkb->lkb_remid); | ||
| 75 | } | ||
| 76 | |||
| 77 | if (lkb->lkb_wait_type) | ||
| 78 | seq_printf(s, " wait_type: %d", lkb->lkb_wait_type); | ||
| 79 | |||
| 80 | seq_printf(s, "\n"); | ||
| 81 | } | ||
| 82 | |||
| 83 | static int print_resource(struct dlm_rsb *res, struct seq_file *s) | ||
| 84 | { | ||
| 85 | struct dlm_lkb *lkb; | ||
| 86 | int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; | ||
| 87 | |||
| 88 | seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); | ||
| 89 | for (i = 0; i < res->res_length; i++) { | ||
| 90 | if (isprint(res->res_name[i])) | ||
| 91 | seq_printf(s, "%c", res->res_name[i]); | ||
| 92 | else | ||
| 93 | seq_printf(s, "%c", '.'); | ||
| 94 | } | ||
| 95 | if (res->res_nodeid > 0) | ||
| 96 | seq_printf(s, "\" \nLocal Copy, Master is node %d\n", | ||
| 97 | res->res_nodeid); | ||
| 98 | else if (res->res_nodeid == 0) | ||
| 99 | seq_printf(s, "\" \nMaster Copy\n"); | ||
| 100 | else if (res->res_nodeid == -1) | ||
| 101 | seq_printf(s, "\" \nLooking up master (lkid %x)\n", | ||
| 102 | res->res_first_lkid); | ||
| 103 | else | ||
| 104 | seq_printf(s, "\" \nInvalid master %d\n", res->res_nodeid); | ||
| 105 | |||
| 106 | /* Print the LVB: */ | ||
| 107 | if (res->res_lvbptr) { | ||
| 108 | seq_printf(s, "LVB: "); | ||
| 109 | for (i = 0; i < lvblen; i++) { | ||
| 110 | if (i == lvblen / 2) | ||
| 111 | seq_printf(s, "\n "); | ||
| 112 | seq_printf(s, "%02x ", | ||
| 113 | (unsigned char) res->res_lvbptr[i]); | ||
| 114 | } | ||
| 115 | if (rsb_flag(res, RSB_VALNOTVALID)) | ||
| 116 | seq_printf(s, " (INVALID)"); | ||
| 117 | seq_printf(s, "\n"); | ||
| 118 | } | ||
| 119 | |||
| 120 | root_list = !list_empty(&res->res_root_list); | ||
| 121 | recover_list = !list_empty(&res->res_recover_list); | ||
| 122 | |||
| 123 | if (root_list || recover_list) { | ||
| 124 | seq_printf(s, "Recovery: root %d recover %d flags %lx " | ||
| 125 | "count %d\n", root_list, recover_list, | ||
| 126 | res->res_flags, res->res_recover_locks_count); | ||
| 127 | } | ||
| 128 | |||
| 129 | /* Print the locks attached to this resource */ | ||
| 130 | seq_printf(s, "Granted Queue\n"); | ||
| 131 | list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) | ||
| 132 | print_lock(s, lkb, res); | ||
| 133 | |||
| 134 | seq_printf(s, "Conversion Queue\n"); | ||
| 135 | list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) | ||
| 136 | print_lock(s, lkb, res); | ||
| 137 | |||
| 138 | seq_printf(s, "Waiting Queue\n"); | ||
| 139 | list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) | ||
| 140 | print_lock(s, lkb, res); | ||
| 141 | |||
| 142 | if (list_empty(&res->res_lookup)) | ||
| 143 | goto out; | ||
| 144 | |||
| 145 | seq_printf(s, "Lookup Queue\n"); | ||
| 146 | list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) { | ||
| 147 | seq_printf(s, "%08x %s", lkb->lkb_id, | ||
| 148 | print_lockmode(lkb->lkb_rqmode)); | ||
| 149 | if (lkb->lkb_wait_type) | ||
| 150 | seq_printf(s, " wait_type: %d", lkb->lkb_wait_type); | ||
| 151 | seq_printf(s, "\n"); | ||
| 152 | } | ||
| 153 | out: | ||
| 154 | return 0; | ||
| 155 | } | ||
| 156 | |||
| 157 | static int rsb_iter_next(struct rsb_iter *ri) | ||
| 158 | { | ||
| 159 | struct dlm_ls *ls = ri->ls; | ||
| 160 | int i; | ||
| 161 | |||
| 162 | if (!ri->next) { | ||
| 163 | top: | ||
| 164 | /* Find the next non-empty hash bucket */ | ||
| 165 | for (i = ri->entry; i < ls->ls_rsbtbl_size; i++) { | ||
| 166 | read_lock(&ls->ls_rsbtbl[i].lock); | ||
| 167 | if (!list_empty(&ls->ls_rsbtbl[i].list)) { | ||
| 168 | ri->next = ls->ls_rsbtbl[i].list.next; | ||
| 169 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
| 173 | } | ||
| 174 | ri->entry = i; | ||
| 175 | |||
| 176 | if (ri->entry >= ls->ls_rsbtbl_size) | ||
| 177 | return 1; | ||
| 178 | } else { | ||
| 179 | i = ri->entry; | ||
| 180 | read_lock(&ls->ls_rsbtbl[i].lock); | ||
| 181 | ri->next = ri->next->next; | ||
| 182 | if (ri->next->next == ls->ls_rsbtbl[i].list.next) { | ||
| 183 | /* End of list - move to next bucket */ | ||
| 184 | ri->next = NULL; | ||
| 185 | ri->entry++; | ||
| 186 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
| 187 | goto top; | ||
| 188 | } | ||
| 189 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
| 190 | } | ||
| 191 | ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); | ||
| 192 | |||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | static void rsb_iter_free(struct rsb_iter *ri) | ||
| 197 | { | ||
| 198 | kfree(ri); | ||
| 199 | } | ||
| 200 | |||
| 201 | static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls) | ||
| 202 | { | ||
| 203 | struct rsb_iter *ri; | ||
| 204 | |||
| 205 | ri = kmalloc(sizeof *ri, GFP_KERNEL); | ||
| 206 | if (!ri) | ||
| 207 | return NULL; | ||
| 208 | |||
| 209 | ri->ls = ls; | ||
| 210 | ri->entry = 0; | ||
| 211 | ri->next = NULL; | ||
| 212 | |||
| 213 | if (rsb_iter_next(ri)) { | ||
| 214 | rsb_iter_free(ri); | ||
| 215 | return NULL; | ||
| 216 | } | ||
| 217 | |||
| 218 | return ri; | ||
| 219 | } | ||
| 220 | |||
| 221 | static void *rsb_seq_start(struct seq_file *file, loff_t *pos) | ||
| 222 | { | ||
| 223 | struct rsb_iter *ri; | ||
| 224 | loff_t n = *pos; | ||
| 225 | |||
| 226 | ri = rsb_iter_init(file->private); | ||
| 227 | if (!ri) | ||
| 228 | return NULL; | ||
| 229 | |||
| 230 | while (n--) { | ||
| 231 | if (rsb_iter_next(ri)) { | ||
| 232 | rsb_iter_free(ri); | ||
| 233 | return NULL; | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | return ri; | ||
| 238 | } | ||
| 239 | |||
| 240 | static void *rsb_seq_next(struct seq_file *file, void *iter_ptr, loff_t *pos) | ||
| 241 | { | ||
| 242 | struct rsb_iter *ri = iter_ptr; | ||
| 243 | |||
| 244 | (*pos)++; | ||
| 245 | |||
| 246 | if (rsb_iter_next(ri)) { | ||
| 247 | rsb_iter_free(ri); | ||
| 248 | return NULL; | ||
| 249 | } | ||
| 250 | |||
| 251 | return ri; | ||
| 252 | } | ||
| 253 | |||
| 254 | static void rsb_seq_stop(struct seq_file *file, void *iter_ptr) | ||
| 255 | { | ||
| 256 | /* nothing for now */ | ||
| 257 | } | ||
| 258 | |||
| 259 | static int rsb_seq_show(struct seq_file *file, void *iter_ptr) | ||
| 260 | { | ||
| 261 | struct rsb_iter *ri = iter_ptr; | ||
| 262 | |||
| 263 | print_resource(ri->rsb, file); | ||
| 264 | |||
| 265 | return 0; | ||
| 266 | } | ||
| 267 | |||
| 268 | static struct seq_operations rsb_seq_ops = { | ||
| 269 | .start = rsb_seq_start, | ||
| 270 | .next = rsb_seq_next, | ||
| 271 | .stop = rsb_seq_stop, | ||
| 272 | .show = rsb_seq_show, | ||
| 273 | }; | ||
| 274 | |||
| 275 | static int rsb_open(struct inode *inode, struct file *file) | ||
| 276 | { | ||
| 277 | struct seq_file *seq; | ||
| 278 | int ret; | ||
| 279 | |||
| 280 | ret = seq_open(file, &rsb_seq_ops); | ||
| 281 | if (ret) | ||
| 282 | return ret; | ||
| 283 | |||
| 284 | seq = file->private_data; | ||
| 285 | seq->private = inode->i_private; | ||
| 286 | |||
| 287 | return 0; | ||
| 288 | } | ||
| 289 | |||
| 290 | static struct file_operations rsb_fops = { | ||
| 291 | .owner = THIS_MODULE, | ||
| 292 | .open = rsb_open, | ||
| 293 | .read = seq_read, | ||
| 294 | .llseek = seq_lseek, | ||
| 295 | .release = seq_release | ||
| 296 | }; | ||
| 297 | |||
| 298 | /* | ||
| 299 | * dump lkb's on the ls_waiters list | ||
| 300 | */ | ||
| 301 | |||
| 302 | static int waiters_open(struct inode *inode, struct file *file) | ||
| 303 | { | ||
| 304 | file->private_data = inode->i_private; | ||
| 305 | return 0; | ||
| 306 | } | ||
| 307 | |||
| 308 | static ssize_t waiters_read(struct file *file, char __user *userbuf, | ||
| 309 | size_t count, loff_t *ppos) | ||
| 310 | { | ||
| 311 | struct dlm_ls *ls = file->private_data; | ||
| 312 | struct dlm_lkb *lkb; | ||
| 313 | size_t len = DLM_DEBUG_BUF_LEN, pos = 0, ret, rv; | ||
| 314 | |||
| 315 | mutex_lock(&debug_buf_lock); | ||
| 316 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 317 | memset(debug_buf, 0, sizeof(debug_buf)); | ||
| 318 | |||
| 319 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | ||
| 320 | ret = snprintf(debug_buf + pos, len - pos, "%x %d %d %s\n", | ||
| 321 | lkb->lkb_id, lkb->lkb_wait_type, | ||
| 322 | lkb->lkb_nodeid, lkb->lkb_resource->res_name); | ||
| 323 | if (ret >= len - pos) | ||
| 324 | break; | ||
| 325 | pos += ret; | ||
| 326 | } | ||
| 327 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 328 | |||
| 329 | rv = simple_read_from_buffer(userbuf, count, ppos, debug_buf, pos); | ||
| 330 | mutex_unlock(&debug_buf_lock); | ||
| 331 | return rv; | ||
| 332 | } | ||
| 333 | |||
| 334 | static struct file_operations waiters_fops = { | ||
| 335 | .owner = THIS_MODULE, | ||
| 336 | .open = waiters_open, | ||
| 337 | .read = waiters_read | ||
| 338 | }; | ||
| 339 | |||
| 340 | int dlm_create_debug_file(struct dlm_ls *ls) | ||
| 341 | { | ||
| 342 | char name[DLM_LOCKSPACE_LEN+8]; | ||
| 343 | |||
| 344 | ls->ls_debug_rsb_dentry = debugfs_create_file(ls->ls_name, | ||
| 345 | S_IFREG | S_IRUGO, | ||
| 346 | dlm_root, | ||
| 347 | ls, | ||
| 348 | &rsb_fops); | ||
| 349 | if (!ls->ls_debug_rsb_dentry) | ||
| 350 | return -ENOMEM; | ||
| 351 | |||
| 352 | memset(name, 0, sizeof(name)); | ||
| 353 | snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name); | ||
| 354 | |||
| 355 | ls->ls_debug_waiters_dentry = debugfs_create_file(name, | ||
| 356 | S_IFREG | S_IRUGO, | ||
| 357 | dlm_root, | ||
| 358 | ls, | ||
| 359 | &waiters_fops); | ||
| 360 | if (!ls->ls_debug_waiters_dentry) { | ||
| 361 | debugfs_remove(ls->ls_debug_rsb_dentry); | ||
| 362 | return -ENOMEM; | ||
| 363 | } | ||
| 364 | |||
| 365 | return 0; | ||
| 366 | } | ||
| 367 | |||
| 368 | void dlm_delete_debug_file(struct dlm_ls *ls) | ||
| 369 | { | ||
| 370 | if (ls->ls_debug_rsb_dentry) | ||
| 371 | debugfs_remove(ls->ls_debug_rsb_dentry); | ||
| 372 | if (ls->ls_debug_waiters_dentry) | ||
| 373 | debugfs_remove(ls->ls_debug_waiters_dentry); | ||
| 374 | } | ||
| 375 | |||
| 376 | int dlm_register_debugfs(void) | ||
| 377 | { | ||
| 378 | mutex_init(&debug_buf_lock); | ||
| 379 | dlm_root = debugfs_create_dir("dlm", NULL); | ||
| 380 | return dlm_root ? 0 : -ENOMEM; | ||
| 381 | } | ||
| 382 | |||
| 383 | void dlm_unregister_debugfs(void) | ||
| 384 | { | ||
| 385 | debugfs_remove(dlm_root); | ||
| 386 | } | ||
| 387 | |||
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c new file mode 100644 index 000000000000..46754553fdcc --- /dev/null +++ b/fs/dlm/dir.c | |||
| @@ -0,0 +1,423 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "lockspace.h" | ||
| 16 | #include "member.h" | ||
| 17 | #include "lowcomms.h" | ||
| 18 | #include "rcom.h" | ||
| 19 | #include "config.h" | ||
| 20 | #include "memory.h" | ||
| 21 | #include "recover.h" | ||
| 22 | #include "util.h" | ||
| 23 | #include "lock.h" | ||
| 24 | #include "dir.h" | ||
| 25 | |||
| 26 | |||
| 27 | static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) | ||
| 28 | { | ||
| 29 | spin_lock(&ls->ls_recover_list_lock); | ||
| 30 | list_add(&de->list, &ls->ls_recover_list); | ||
| 31 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 32 | } | ||
| 33 | |||
| 34 | static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) | ||
| 35 | { | ||
| 36 | int found = 0; | ||
| 37 | struct dlm_direntry *de; | ||
| 38 | |||
| 39 | spin_lock(&ls->ls_recover_list_lock); | ||
| 40 | list_for_each_entry(de, &ls->ls_recover_list, list) { | ||
| 41 | if (de->length == len) { | ||
| 42 | list_del(&de->list); | ||
| 43 | de->master_nodeid = 0; | ||
| 44 | memset(de->name, 0, len); | ||
| 45 | found = 1; | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | } | ||
| 49 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 50 | |||
| 51 | if (!found) | ||
| 52 | de = allocate_direntry(ls, len); | ||
| 53 | return de; | ||
| 54 | } | ||
| 55 | |||
| 56 | void dlm_clear_free_entries(struct dlm_ls *ls) | ||
| 57 | { | ||
| 58 | struct dlm_direntry *de; | ||
| 59 | |||
| 60 | spin_lock(&ls->ls_recover_list_lock); | ||
| 61 | while (!list_empty(&ls->ls_recover_list)) { | ||
| 62 | de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, | ||
| 63 | list); | ||
| 64 | list_del(&de->list); | ||
| 65 | free_direntry(de); | ||
| 66 | } | ||
| 67 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 68 | } | ||
| 69 | |||
| 70 | /* | ||
| 71 | * We use the upper 16 bits of the hash value to select the directory node. | ||
| 72 | * Low bits are used for distribution of rsb's among hash buckets on each node. | ||
| 73 | * | ||
| 74 | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of | ||
| 75 | * num_nodes to the hash value. This value in the desired range is used as an | ||
| 76 | * offset into the sorted list of nodeid's to give the particular nodeid. | ||
| 77 | */ | ||
| 78 | |||
| 79 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) | ||
| 80 | { | ||
| 81 | struct list_head *tmp; | ||
| 82 | struct dlm_member *memb = NULL; | ||
| 83 | uint32_t node, n = 0; | ||
| 84 | int nodeid; | ||
| 85 | |||
| 86 | if (ls->ls_num_nodes == 1) { | ||
| 87 | nodeid = dlm_our_nodeid(); | ||
| 88 | goto out; | ||
| 89 | } | ||
| 90 | |||
| 91 | if (ls->ls_node_array) { | ||
| 92 | node = (hash >> 16) % ls->ls_total_weight; | ||
| 93 | nodeid = ls->ls_node_array[node]; | ||
| 94 | goto out; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* make_member_array() failed to kmalloc ls_node_array... */ | ||
| 98 | |||
| 99 | node = (hash >> 16) % ls->ls_num_nodes; | ||
| 100 | |||
| 101 | list_for_each(tmp, &ls->ls_nodes) { | ||
| 102 | if (n++ != node) | ||
| 103 | continue; | ||
| 104 | memb = list_entry(tmp, struct dlm_member, list); | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | |||
| 108 | DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n", | ||
| 109 | ls->ls_num_nodes, n, node);); | ||
| 110 | nodeid = memb->nodeid; | ||
| 111 | out: | ||
| 112 | return nodeid; | ||
| 113 | } | ||
| 114 | |||
| 115 | int dlm_dir_nodeid(struct dlm_rsb *r) | ||
| 116 | { | ||
| 117 | return dlm_hash2nodeid(r->res_ls, r->res_hash); | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) | ||
| 121 | { | ||
| 122 | uint32_t val; | ||
| 123 | |||
| 124 | val = jhash(name, len, 0); | ||
| 125 | val &= (ls->ls_dirtbl_size - 1); | ||
| 126 | |||
| 127 | return val; | ||
| 128 | } | ||
| 129 | |||
| 130 | static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) | ||
| 131 | { | ||
| 132 | uint32_t bucket; | ||
| 133 | |||
| 134 | bucket = dir_hash(ls, de->name, de->length); | ||
| 135 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | ||
| 136 | } | ||
| 137 | |||
| 138 | static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, | ||
| 139 | int namelen, uint32_t bucket) | ||
| 140 | { | ||
| 141 | struct dlm_direntry *de; | ||
| 142 | |||
| 143 | list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { | ||
| 144 | if (de->length == namelen && !memcmp(name, de->name, namelen)) | ||
| 145 | goto out; | ||
| 146 | } | ||
| 147 | de = NULL; | ||
| 148 | out: | ||
| 149 | return de; | ||
| 150 | } | ||
| 151 | |||
| 152 | void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen) | ||
| 153 | { | ||
| 154 | struct dlm_direntry *de; | ||
| 155 | uint32_t bucket; | ||
| 156 | |||
| 157 | bucket = dir_hash(ls, name, namelen); | ||
| 158 | |||
| 159 | write_lock(&ls->ls_dirtbl[bucket].lock); | ||
| 160 | |||
| 161 | de = search_bucket(ls, name, namelen, bucket); | ||
| 162 | |||
| 163 | if (!de) { | ||
| 164 | log_error(ls, "remove fr %u none", nodeid); | ||
| 165 | goto out; | ||
| 166 | } | ||
| 167 | |||
| 168 | if (de->master_nodeid != nodeid) { | ||
| 169 | log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); | ||
| 170 | goto out; | ||
| 171 | } | ||
| 172 | |||
| 173 | list_del(&de->list); | ||
| 174 | free_direntry(de); | ||
| 175 | out: | ||
| 176 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
| 177 | } | ||
| 178 | |||
| 179 | void dlm_dir_clear(struct dlm_ls *ls) | ||
| 180 | { | ||
| 181 | struct list_head *head; | ||
| 182 | struct dlm_direntry *de; | ||
| 183 | int i; | ||
| 184 | |||
| 185 | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); | ||
| 186 | |||
| 187 | for (i = 0; i < ls->ls_dirtbl_size; i++) { | ||
| 188 | write_lock(&ls->ls_dirtbl[i].lock); | ||
| 189 | head = &ls->ls_dirtbl[i].list; | ||
| 190 | while (!list_empty(head)) { | ||
| 191 | de = list_entry(head->next, struct dlm_direntry, list); | ||
| 192 | list_del(&de->list); | ||
| 193 | put_free_de(ls, de); | ||
| 194 | } | ||
| 195 | write_unlock(&ls->ls_dirtbl[i].lock); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | int dlm_recover_directory(struct dlm_ls *ls) | ||
| 200 | { | ||
| 201 | struct dlm_member *memb; | ||
| 202 | struct dlm_direntry *de; | ||
| 203 | char *b, *last_name = NULL; | ||
| 204 | int error = -ENOMEM, last_len, count = 0; | ||
| 205 | uint16_t namelen; | ||
| 206 | |||
| 207 | log_debug(ls, "dlm_recover_directory"); | ||
| 208 | |||
| 209 | if (dlm_no_directory(ls)) | ||
| 210 | goto out_status; | ||
| 211 | |||
| 212 | dlm_dir_clear(ls); | ||
| 213 | |||
| 214 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); | ||
| 215 | if (!last_name) | ||
| 216 | goto out; | ||
| 217 | |||
| 218 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
| 219 | memset(last_name, 0, DLM_RESNAME_MAXLEN); | ||
| 220 | last_len = 0; | ||
| 221 | |||
| 222 | for (;;) { | ||
| 223 | error = dlm_recovery_stopped(ls); | ||
| 224 | if (error) | ||
| 225 | goto out_free; | ||
| 226 | |||
| 227 | error = dlm_rcom_names(ls, memb->nodeid, | ||
| 228 | last_name, last_len); | ||
| 229 | if (error) | ||
| 230 | goto out_free; | ||
| 231 | |||
| 232 | schedule(); | ||
| 233 | |||
| 234 | /* | ||
| 235 | * pick namelen/name pairs out of received buffer | ||
| 236 | */ | ||
| 237 | |||
| 238 | b = ls->ls_recover_buf + sizeof(struct dlm_rcom); | ||
| 239 | |||
| 240 | for (;;) { | ||
| 241 | memcpy(&namelen, b, sizeof(uint16_t)); | ||
| 242 | namelen = be16_to_cpu(namelen); | ||
| 243 | b += sizeof(uint16_t); | ||
| 244 | |||
| 245 | /* namelen of 0xFFFFF marks end of names for | ||
| 246 | this node; namelen of 0 marks end of the | ||
| 247 | buffer */ | ||
| 248 | |||
| 249 | if (namelen == 0xFFFF) | ||
| 250 | goto done; | ||
| 251 | if (!namelen) | ||
| 252 | break; | ||
| 253 | |||
| 254 | error = -ENOMEM; | ||
| 255 | de = get_free_de(ls, namelen); | ||
| 256 | if (!de) | ||
| 257 | goto out_free; | ||
| 258 | |||
| 259 | de->master_nodeid = memb->nodeid; | ||
| 260 | de->length = namelen; | ||
| 261 | last_len = namelen; | ||
| 262 | memcpy(de->name, b, namelen); | ||
| 263 | memcpy(last_name, b, namelen); | ||
| 264 | b += namelen; | ||
| 265 | |||
| 266 | add_entry_to_hash(ls, de); | ||
| 267 | count++; | ||
| 268 | } | ||
| 269 | } | ||
| 270 | done: | ||
| 271 | ; | ||
| 272 | } | ||
| 273 | |||
| 274 | out_status: | ||
| 275 | error = 0; | ||
| 276 | dlm_set_recover_status(ls, DLM_RS_DIR); | ||
| 277 | log_debug(ls, "dlm_recover_directory %d entries", count); | ||
| 278 | out_free: | ||
| 279 | kfree(last_name); | ||
| 280 | out: | ||
| 281 | dlm_clear_free_entries(ls); | ||
| 282 | return error; | ||
| 283 | } | ||
| 284 | |||
| 285 | static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | ||
| 286 | int namelen, int *r_nodeid) | ||
| 287 | { | ||
| 288 | struct dlm_direntry *de, *tmp; | ||
| 289 | uint32_t bucket; | ||
| 290 | |||
| 291 | bucket = dir_hash(ls, name, namelen); | ||
| 292 | |||
| 293 | write_lock(&ls->ls_dirtbl[bucket].lock); | ||
| 294 | de = search_bucket(ls, name, namelen, bucket); | ||
| 295 | if (de) { | ||
| 296 | *r_nodeid = de->master_nodeid; | ||
| 297 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
| 298 | if (*r_nodeid == nodeid) | ||
| 299 | return -EEXIST; | ||
| 300 | return 0; | ||
| 301 | } | ||
| 302 | |||
| 303 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
| 304 | |||
| 305 | de = allocate_direntry(ls, namelen); | ||
| 306 | if (!de) | ||
| 307 | return -ENOMEM; | ||
| 308 | |||
| 309 | de->master_nodeid = nodeid; | ||
| 310 | de->length = namelen; | ||
| 311 | memcpy(de->name, name, namelen); | ||
| 312 | |||
| 313 | write_lock(&ls->ls_dirtbl[bucket].lock); | ||
| 314 | tmp = search_bucket(ls, name, namelen, bucket); | ||
| 315 | if (tmp) { | ||
| 316 | free_direntry(de); | ||
| 317 | de = tmp; | ||
| 318 | } else { | ||
| 319 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | ||
| 320 | } | ||
| 321 | *r_nodeid = de->master_nodeid; | ||
| 322 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
| 323 | return 0; | ||
| 324 | } | ||
| 325 | |||
| 326 | int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | ||
| 327 | int *r_nodeid) | ||
| 328 | { | ||
| 329 | return get_entry(ls, nodeid, name, namelen, r_nodeid); | ||
| 330 | } | ||
| 331 | |||
| 332 | /* Copy the names of master rsb's into the buffer provided. | ||
| 333 | Only select names whose dir node is the given nodeid. */ | ||
| 334 | |||
| 335 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | ||
| 336 | char *outbuf, int outlen, int nodeid) | ||
| 337 | { | ||
| 338 | struct list_head *list; | ||
| 339 | struct dlm_rsb *start_r = NULL, *r = NULL; | ||
| 340 | int offset = 0, start_namelen, error, dir_nodeid; | ||
| 341 | char *start_name; | ||
| 342 | uint16_t be_namelen; | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Find the rsb where we left off (or start again) | ||
| 346 | */ | ||
| 347 | |||
| 348 | start_namelen = inlen; | ||
| 349 | start_name = inbuf; | ||
| 350 | |||
| 351 | if (start_namelen > 1) { | ||
| 352 | /* | ||
| 353 | * We could also use a find_rsb_root() function here that | ||
| 354 | * searched the ls_root_list. | ||
| 355 | */ | ||
| 356 | error = dlm_find_rsb(ls, start_name, start_namelen, R_MASTER, | ||
| 357 | &start_r); | ||
| 358 | DLM_ASSERT(!error && start_r, | ||
| 359 | printk("error %d\n", error);); | ||
| 360 | DLM_ASSERT(!list_empty(&start_r->res_root_list), | ||
| 361 | dlm_print_rsb(start_r);); | ||
| 362 | dlm_put_rsb(start_r); | ||
| 363 | } | ||
| 364 | |||
| 365 | /* | ||
| 366 | * Send rsb names for rsb's we're master of and whose directory node | ||
| 367 | * matches the requesting node. | ||
| 368 | */ | ||
| 369 | |||
| 370 | down_read(&ls->ls_root_sem); | ||
| 371 | if (start_r) | ||
| 372 | list = start_r->res_root_list.next; | ||
| 373 | else | ||
| 374 | list = ls->ls_root_list.next; | ||
| 375 | |||
| 376 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { | ||
| 377 | r = list_entry(list, struct dlm_rsb, res_root_list); | ||
| 378 | if (r->res_nodeid) | ||
| 379 | continue; | ||
| 380 | |||
| 381 | dir_nodeid = dlm_dir_nodeid(r); | ||
| 382 | if (dir_nodeid != nodeid) | ||
| 383 | continue; | ||
| 384 | |||
| 385 | /* | ||
| 386 | * The block ends when we can't fit the following in the | ||
| 387 | * remaining buffer space: | ||
| 388 | * namelen (uint16_t) + | ||
| 389 | * name (r->res_length) + | ||
| 390 | * end-of-block record 0x0000 (uint16_t) | ||
| 391 | */ | ||
| 392 | |||
| 393 | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { | ||
| 394 | /* Write end-of-block record */ | ||
| 395 | be_namelen = 0; | ||
| 396 | memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); | ||
| 397 | offset += sizeof(uint16_t); | ||
| 398 | goto out; | ||
| 399 | } | ||
| 400 | |||
| 401 | be_namelen = cpu_to_be16(r->res_length); | ||
| 402 | memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); | ||
| 403 | offset += sizeof(uint16_t); | ||
| 404 | memcpy(outbuf + offset, r->res_name, r->res_length); | ||
| 405 | offset += r->res_length; | ||
| 406 | } | ||
| 407 | |||
| 408 | /* | ||
| 409 | * If we've reached the end of the list (and there's room) write a | ||
| 410 | * terminating record. | ||
| 411 | */ | ||
| 412 | |||
| 413 | if ((list == &ls->ls_root_list) && | ||
| 414 | (offset + sizeof(uint16_t) <= outlen)) { | ||
| 415 | be_namelen = 0xFFFF; | ||
| 416 | memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); | ||
| 417 | offset += sizeof(uint16_t); | ||
| 418 | } | ||
| 419 | |||
| 420 | out: | ||
| 421 | up_read(&ls->ls_root_sem); | ||
| 422 | } | ||
| 423 | |||
diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h new file mode 100644 index 000000000000..0b0eb1267b6e --- /dev/null +++ b/fs/dlm/dir.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __DIR_DOT_H__ | ||
| 15 | #define __DIR_DOT_H__ | ||
| 16 | |||
| 17 | |||
| 18 | int dlm_dir_nodeid(struct dlm_rsb *rsb); | ||
| 19 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash); | ||
| 20 | void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len); | ||
| 21 | void dlm_dir_clear(struct dlm_ls *ls); | ||
| 22 | void dlm_clear_free_entries(struct dlm_ls *ls); | ||
| 23 | int dlm_recover_directory(struct dlm_ls *ls); | ||
| 24 | int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | ||
| 25 | int *r_nodeid); | ||
| 26 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | ||
| 27 | char *outbuf, int outlen, int nodeid); | ||
| 28 | |||
| 29 | #endif /* __DIR_DOT_H__ */ | ||
| 30 | |||
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h new file mode 100644 index 000000000000..1e5cd67e1b7a --- /dev/null +++ b/fs/dlm/dlm_internal.h | |||
| @@ -0,0 +1,543 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __DLM_INTERNAL_DOT_H__ | ||
| 15 | #define __DLM_INTERNAL_DOT_H__ | ||
| 16 | |||
| 17 | /* | ||
| 18 | * This is the main header file to be included in each DLM source file. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <linux/module.h> | ||
| 22 | #include <linux/slab.h> | ||
| 23 | #include <linux/sched.h> | ||
| 24 | #include <linux/types.h> | ||
| 25 | #include <linux/ctype.h> | ||
| 26 | #include <linux/spinlock.h> | ||
| 27 | #include <linux/vmalloc.h> | ||
| 28 | #include <linux/list.h> | ||
| 29 | #include <linux/errno.h> | ||
| 30 | #include <linux/random.h> | ||
| 31 | #include <linux/delay.h> | ||
| 32 | #include <linux/socket.h> | ||
| 33 | #include <linux/kthread.h> | ||
| 34 | #include <linux/kobject.h> | ||
| 35 | #include <linux/kref.h> | ||
| 36 | #include <linux/kernel.h> | ||
| 37 | #include <linux/jhash.h> | ||
| 38 | #include <linux/miscdevice.h> | ||
| 39 | #include <linux/mutex.h> | ||
| 40 | #include <asm/semaphore.h> | ||
| 41 | #include <asm/uaccess.h> | ||
| 42 | |||
| 43 | #include <linux/dlm.h> | ||
| 44 | |||
| 45 | #define DLM_LOCKSPACE_LEN 64 | ||
| 46 | |||
| 47 | /* Size of the temp buffer midcomms allocates on the stack. | ||
| 48 | We try to make this large enough so most messages fit. | ||
| 49 | FIXME: should sctp make this unnecessary? */ | ||
| 50 | |||
| 51 | #define DLM_INBUF_LEN 148 | ||
| 52 | |||
| 53 | struct dlm_ls; | ||
| 54 | struct dlm_lkb; | ||
| 55 | struct dlm_rsb; | ||
| 56 | struct dlm_member; | ||
| 57 | struct dlm_lkbtable; | ||
| 58 | struct dlm_rsbtable; | ||
| 59 | struct dlm_dirtable; | ||
| 60 | struct dlm_direntry; | ||
| 61 | struct dlm_recover; | ||
| 62 | struct dlm_header; | ||
| 63 | struct dlm_message; | ||
| 64 | struct dlm_rcom; | ||
| 65 | struct dlm_mhandle; | ||
| 66 | |||
| 67 | #define log_print(fmt, args...) \ | ||
| 68 | printk(KERN_ERR "dlm: "fmt"\n" , ##args) | ||
| 69 | #define log_error(ls, fmt, args...) \ | ||
| 70 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) | ||
| 71 | |||
| 72 | #define DLM_LOG_DEBUG | ||
| 73 | #ifdef DLM_LOG_DEBUG | ||
| 74 | #define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) | ||
| 75 | #else | ||
| 76 | #define log_debug(ls, fmt, args...) | ||
| 77 | #endif | ||
| 78 | |||
| 79 | #define DLM_ASSERT(x, do) \ | ||
| 80 | { \ | ||
| 81 | if (!(x)) \ | ||
| 82 | { \ | ||
| 83 | printk(KERN_ERR "\nDLM: Assertion failed on line %d of file %s\n" \ | ||
| 84 | "DLM: assertion: \"%s\"\n" \ | ||
| 85 | "DLM: time = %lu\n", \ | ||
| 86 | __LINE__, __FILE__, #x, jiffies); \ | ||
| 87 | {do} \ | ||
| 88 | printk("\n"); \ | ||
| 89 | BUG(); \ | ||
| 90 | panic("DLM: Record message above and reboot.\n"); \ | ||
| 91 | } \ | ||
| 92 | } | ||
| 93 | |||
| 94 | #define DLM_FAKE_USER_AST ERR_PTR(-EINVAL) | ||
| 95 | |||
| 96 | |||
| 97 | struct dlm_direntry { | ||
| 98 | struct list_head list; | ||
| 99 | uint32_t master_nodeid; | ||
| 100 | uint16_t length; | ||
| 101 | char name[1]; | ||
| 102 | }; | ||
| 103 | |||
| 104 | struct dlm_dirtable { | ||
| 105 | struct list_head list; | ||
| 106 | rwlock_t lock; | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct dlm_rsbtable { | ||
| 110 | struct list_head list; | ||
| 111 | struct list_head toss; | ||
| 112 | rwlock_t lock; | ||
| 113 | }; | ||
| 114 | |||
| 115 | struct dlm_lkbtable { | ||
| 116 | struct list_head list; | ||
| 117 | rwlock_t lock; | ||
| 118 | uint16_t counter; | ||
| 119 | }; | ||
| 120 | |||
| 121 | /* | ||
| 122 | * Lockspace member (per node in a ls) | ||
| 123 | */ | ||
| 124 | |||
| 125 | struct dlm_member { | ||
| 126 | struct list_head list; | ||
| 127 | int nodeid; | ||
| 128 | int weight; | ||
| 129 | }; | ||
| 130 | |||
| 131 | /* | ||
| 132 | * Save and manage recovery state for a lockspace. | ||
| 133 | */ | ||
| 134 | |||
| 135 | struct dlm_recover { | ||
| 136 | struct list_head list; | ||
| 137 | int *nodeids; | ||
| 138 | int node_count; | ||
| 139 | uint64_t seq; | ||
| 140 | }; | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Pass input args to second stage locking function. | ||
| 144 | */ | ||
| 145 | |||
| 146 | struct dlm_args { | ||
| 147 | uint32_t flags; | ||
| 148 | void *astaddr; | ||
| 149 | long astparam; | ||
| 150 | void *bastaddr; | ||
| 151 | int mode; | ||
| 152 | struct dlm_lksb *lksb; | ||
| 153 | }; | ||
| 154 | |||
| 155 | |||
| 156 | /* | ||
| 157 | * Lock block | ||
| 158 | * | ||
| 159 | * A lock can be one of three types: | ||
| 160 | * | ||
| 161 | * local copy lock is mastered locally | ||
| 162 | * (lkb_nodeid is zero and DLM_LKF_MSTCPY is not set) | ||
| 163 | * process copy lock is mastered on a remote node | ||
| 164 | * (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is not set) | ||
| 165 | * master copy master node's copy of a lock owned by remote node | ||
| 166 | * (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is set) | ||
| 167 | * | ||
| 168 | * lkb_exflags: a copy of the most recent flags arg provided to dlm_lock or | ||
| 169 | * dlm_unlock. The dlm does not modify these or use any private flags in | ||
| 170 | * this field; it only contains DLM_LKF_ flags from dlm.h. These flags | ||
| 171 | * are sent as-is to the remote master when the lock is remote. | ||
| 172 | * | ||
| 173 | * lkb_flags: internal dlm flags (DLM_IFL_ prefix) from dlm_internal.h. | ||
| 174 | * Some internal flags are shared between the master and process nodes; | ||
| 175 | * these shared flags are kept in the lower two bytes. One of these | ||
| 176 | * flags set on the master copy will be propagated to the process copy | ||
| 177 | * and v.v. Other internal flags are private to the master or process | ||
| 178 | * node (e.g. DLM_IFL_MSTCPY). These are kept in the high two bytes. | ||
| 179 | * | ||
| 180 | * lkb_sbflags: status block flags. These flags are copied directly into | ||
| 181 | * the caller's lksb.sb_flags prior to the dlm_lock/dlm_unlock completion | ||
| 182 | * ast. All defined in dlm.h with DLM_SBF_ prefix. | ||
| 183 | * | ||
| 184 | * lkb_status: the lock status indicates which rsb queue the lock is | ||
| 185 | * on, grant, convert, or wait. DLM_LKSTS_ WAITING/GRANTED/CONVERT | ||
| 186 | * | ||
| 187 | * lkb_wait_type: the dlm message type (DLM_MSG_ prefix) for which a | ||
| 188 | * reply is needed. Only set when the lkb is on the lockspace waiters | ||
| 189 | * list awaiting a reply from a remote node. | ||
| 190 | * | ||
| 191 | * lkb_nodeid: when the lkb is a local copy, nodeid is 0; when the lkb | ||
| 192 | * is a master copy, nodeid specifies the remote lock holder, when the | ||
| 193 | * lkb is a process copy, the nodeid specifies the lock master. | ||
| 194 | */ | ||
| 195 | |||
| 196 | /* lkb_ast_type */ | ||
| 197 | |||
| 198 | #define AST_COMP 1 | ||
| 199 | #define AST_BAST 2 | ||
| 200 | |||
| 201 | /* lkb_status */ | ||
| 202 | |||
| 203 | #define DLM_LKSTS_WAITING 1 | ||
| 204 | #define DLM_LKSTS_GRANTED 2 | ||
| 205 | #define DLM_LKSTS_CONVERT 3 | ||
| 206 | |||
| 207 | /* lkb_flags */ | ||
| 208 | |||
| 209 | #define DLM_IFL_MSTCPY 0x00010000 | ||
| 210 | #define DLM_IFL_RESEND 0x00020000 | ||
| 211 | #define DLM_IFL_DEAD 0x00040000 | ||
| 212 | #define DLM_IFL_USER 0x00000001 | ||
| 213 | #define DLM_IFL_ORPHAN 0x00000002 | ||
| 214 | |||
| 215 | struct dlm_lkb { | ||
| 216 | struct dlm_rsb *lkb_resource; /* the rsb */ | ||
| 217 | struct kref lkb_ref; | ||
| 218 | int lkb_nodeid; /* copied from rsb */ | ||
| 219 | int lkb_ownpid; /* pid of lock owner */ | ||
| 220 | uint32_t lkb_id; /* our lock ID */ | ||
| 221 | uint32_t lkb_remid; /* lock ID on remote partner */ | ||
| 222 | uint32_t lkb_exflags; /* external flags from caller */ | ||
| 223 | uint32_t lkb_sbflags; /* lksb flags */ | ||
| 224 | uint32_t lkb_flags; /* internal flags */ | ||
| 225 | uint32_t lkb_lvbseq; /* lvb sequence number */ | ||
| 226 | |||
| 227 | int8_t lkb_status; /* granted, waiting, convert */ | ||
| 228 | int8_t lkb_rqmode; /* requested lock mode */ | ||
| 229 | int8_t lkb_grmode; /* granted lock mode */ | ||
| 230 | int8_t lkb_bastmode; /* requested mode */ | ||
| 231 | int8_t lkb_highbast; /* highest mode bast sent for */ | ||
| 232 | |||
| 233 | int8_t lkb_wait_type; /* type of reply waiting for */ | ||
| 234 | int8_t lkb_ast_type; /* type of ast queued for */ | ||
| 235 | |||
| 236 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ | ||
| 237 | struct list_head lkb_statequeue; /* rsb g/c/w list */ | ||
| 238 | struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ | ||
| 239 | struct list_head lkb_wait_reply; /* waiting for remote reply */ | ||
| 240 | struct list_head lkb_astqueue; /* need ast to be sent */ | ||
| 241 | struct list_head lkb_ownqueue; /* list of locks for a process */ | ||
| 242 | |||
| 243 | char *lkb_lvbptr; | ||
| 244 | struct dlm_lksb *lkb_lksb; /* caller's status block */ | ||
| 245 | void *lkb_astaddr; /* caller's ast function */ | ||
| 246 | void *lkb_bastaddr; /* caller's bast function */ | ||
| 247 | long lkb_astparam; /* caller's ast arg */ | ||
| 248 | }; | ||
| 249 | |||
| 250 | |||
| 251 | struct dlm_rsb { | ||
| 252 | struct dlm_ls *res_ls; /* the lockspace */ | ||
| 253 | struct kref res_ref; | ||
| 254 | struct mutex res_mutex; | ||
| 255 | unsigned long res_flags; | ||
| 256 | int res_length; /* length of rsb name */ | ||
| 257 | int res_nodeid; | ||
| 258 | uint32_t res_lvbseq; | ||
| 259 | uint32_t res_hash; | ||
| 260 | uint32_t res_bucket; /* rsbtbl */ | ||
| 261 | unsigned long res_toss_time; | ||
| 262 | uint32_t res_first_lkid; | ||
| 263 | struct list_head res_lookup; /* lkbs waiting on first */ | ||
| 264 | struct list_head res_hashchain; /* rsbtbl */ | ||
| 265 | struct list_head res_grantqueue; | ||
| 266 | struct list_head res_convertqueue; | ||
| 267 | struct list_head res_waitqueue; | ||
| 268 | |||
| 269 | struct list_head res_root_list; /* used for recovery */ | ||
| 270 | struct list_head res_recover_list; /* used for recovery */ | ||
| 271 | int res_recover_locks_count; | ||
| 272 | |||
| 273 | char *res_lvbptr; | ||
| 274 | char res_name[1]; | ||
| 275 | }; | ||
| 276 | |||
| 277 | /* find_rsb() flags */ | ||
| 278 | |||
| 279 | #define R_MASTER 1 /* only return rsb if it's a master */ | ||
| 280 | #define R_CREATE 2 /* create/add rsb if not found */ | ||
| 281 | |||
| 282 | /* rsb_flags */ | ||
| 283 | |||
| 284 | enum rsb_flags { | ||
| 285 | RSB_MASTER_UNCERTAIN, | ||
| 286 | RSB_VALNOTVALID, | ||
| 287 | RSB_VALNOTVALID_PREV, | ||
| 288 | RSB_NEW_MASTER, | ||
| 289 | RSB_NEW_MASTER2, | ||
| 290 | RSB_RECOVER_CONVERT, | ||
| 291 | RSB_LOCKS_PURGED, | ||
| 292 | }; | ||
| 293 | |||
| 294 | static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) | ||
| 295 | { | ||
| 296 | __set_bit(flag, &r->res_flags); | ||
| 297 | } | ||
| 298 | |||
| 299 | static inline void rsb_clear_flag(struct dlm_rsb *r, enum rsb_flags flag) | ||
| 300 | { | ||
| 301 | __clear_bit(flag, &r->res_flags); | ||
| 302 | } | ||
| 303 | |||
| 304 | static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) | ||
| 305 | { | ||
| 306 | return test_bit(flag, &r->res_flags); | ||
| 307 | } | ||
| 308 | |||
| 309 | |||
| 310 | /* dlm_header is first element of all structs sent between nodes */ | ||
| 311 | |||
| 312 | #define DLM_HEADER_MAJOR 0x00020000 | ||
| 313 | #define DLM_HEADER_MINOR 0x00000001 | ||
| 314 | |||
| 315 | #define DLM_MSG 1 | ||
| 316 | #define DLM_RCOM 2 | ||
| 317 | |||
| 318 | struct dlm_header { | ||
| 319 | uint32_t h_version; | ||
| 320 | uint32_t h_lockspace; | ||
| 321 | uint32_t h_nodeid; /* nodeid of sender */ | ||
| 322 | uint16_t h_length; | ||
| 323 | uint8_t h_cmd; /* DLM_MSG, DLM_RCOM */ | ||
| 324 | uint8_t h_pad; | ||
| 325 | }; | ||
| 326 | |||
| 327 | |||
| 328 | #define DLM_MSG_REQUEST 1 | ||
| 329 | #define DLM_MSG_CONVERT 2 | ||
| 330 | #define DLM_MSG_UNLOCK 3 | ||
| 331 | #define DLM_MSG_CANCEL 4 | ||
| 332 | #define DLM_MSG_REQUEST_REPLY 5 | ||
| 333 | #define DLM_MSG_CONVERT_REPLY 6 | ||
| 334 | #define DLM_MSG_UNLOCK_REPLY 7 | ||
| 335 | #define DLM_MSG_CANCEL_REPLY 8 | ||
| 336 | #define DLM_MSG_GRANT 9 | ||
| 337 | #define DLM_MSG_BAST 10 | ||
| 338 | #define DLM_MSG_LOOKUP 11 | ||
| 339 | #define DLM_MSG_REMOVE 12 | ||
| 340 | #define DLM_MSG_LOOKUP_REPLY 13 | ||
| 341 | |||
| 342 | struct dlm_message { | ||
| 343 | struct dlm_header m_header; | ||
| 344 | uint32_t m_type; /* DLM_MSG_ */ | ||
| 345 | uint32_t m_nodeid; | ||
| 346 | uint32_t m_pid; | ||
| 347 | uint32_t m_lkid; /* lkid on sender */ | ||
| 348 | uint32_t m_remid; /* lkid on receiver */ | ||
| 349 | uint32_t m_parent_lkid; | ||
| 350 | uint32_t m_parent_remid; | ||
| 351 | uint32_t m_exflags; | ||
| 352 | uint32_t m_sbflags; | ||
| 353 | uint32_t m_flags; | ||
| 354 | uint32_t m_lvbseq; | ||
| 355 | uint32_t m_hash; | ||
| 356 | int m_status; | ||
| 357 | int m_grmode; | ||
| 358 | int m_rqmode; | ||
| 359 | int m_bastmode; | ||
| 360 | int m_asts; | ||
| 361 | int m_result; /* 0 or -EXXX */ | ||
| 362 | char m_extra[0]; /* name or lvb */ | ||
| 363 | }; | ||
| 364 | |||
| 365 | |||
| 366 | #define DLM_RS_NODES 0x00000001 | ||
| 367 | #define DLM_RS_NODES_ALL 0x00000002 | ||
| 368 | #define DLM_RS_DIR 0x00000004 | ||
| 369 | #define DLM_RS_DIR_ALL 0x00000008 | ||
| 370 | #define DLM_RS_LOCKS 0x00000010 | ||
| 371 | #define DLM_RS_LOCKS_ALL 0x00000020 | ||
| 372 | #define DLM_RS_DONE 0x00000040 | ||
| 373 | #define DLM_RS_DONE_ALL 0x00000080 | ||
| 374 | |||
| 375 | #define DLM_RCOM_STATUS 1 | ||
| 376 | #define DLM_RCOM_NAMES 2 | ||
| 377 | #define DLM_RCOM_LOOKUP 3 | ||
| 378 | #define DLM_RCOM_LOCK 4 | ||
| 379 | #define DLM_RCOM_STATUS_REPLY 5 | ||
| 380 | #define DLM_RCOM_NAMES_REPLY 6 | ||
| 381 | #define DLM_RCOM_LOOKUP_REPLY 7 | ||
| 382 | #define DLM_RCOM_LOCK_REPLY 8 | ||
| 383 | |||
| 384 | struct dlm_rcom { | ||
| 385 | struct dlm_header rc_header; | ||
| 386 | uint32_t rc_type; /* DLM_RCOM_ */ | ||
| 387 | int rc_result; /* multi-purpose */ | ||
| 388 | uint64_t rc_id; /* match reply with request */ | ||
| 389 | char rc_buf[0]; | ||
| 390 | }; | ||
| 391 | |||
| 392 | struct rcom_config { | ||
| 393 | uint32_t rf_lvblen; | ||
| 394 | uint32_t rf_lsflags; | ||
| 395 | uint64_t rf_unused; | ||
| 396 | }; | ||
| 397 | |||
| 398 | struct rcom_lock { | ||
| 399 | uint32_t rl_ownpid; | ||
| 400 | uint32_t rl_lkid; | ||
| 401 | uint32_t rl_remid; | ||
| 402 | uint32_t rl_parent_lkid; | ||
| 403 | uint32_t rl_parent_remid; | ||
| 404 | uint32_t rl_exflags; | ||
| 405 | uint32_t rl_flags; | ||
| 406 | uint32_t rl_lvbseq; | ||
| 407 | int rl_result; | ||
| 408 | int8_t rl_rqmode; | ||
| 409 | int8_t rl_grmode; | ||
| 410 | int8_t rl_status; | ||
| 411 | int8_t rl_asts; | ||
| 412 | uint16_t rl_wait_type; | ||
| 413 | uint16_t rl_namelen; | ||
| 414 | char rl_name[DLM_RESNAME_MAXLEN]; | ||
| 415 | char rl_lvb[0]; | ||
| 416 | }; | ||
| 417 | |||
| 418 | struct dlm_ls { | ||
| 419 | struct list_head ls_list; /* list of lockspaces */ | ||
| 420 | dlm_lockspace_t *ls_local_handle; | ||
| 421 | uint32_t ls_global_id; /* global unique lockspace ID */ | ||
| 422 | uint32_t ls_exflags; | ||
| 423 | int ls_lvblen; | ||
| 424 | int ls_count; /* reference count */ | ||
| 425 | unsigned long ls_flags; /* LSFL_ */ | ||
| 426 | struct kobject ls_kobj; | ||
| 427 | |||
| 428 | struct dlm_rsbtable *ls_rsbtbl; | ||
| 429 | uint32_t ls_rsbtbl_size; | ||
| 430 | |||
| 431 | struct dlm_lkbtable *ls_lkbtbl; | ||
| 432 | uint32_t ls_lkbtbl_size; | ||
| 433 | |||
| 434 | struct dlm_dirtable *ls_dirtbl; | ||
| 435 | uint32_t ls_dirtbl_size; | ||
| 436 | |||
| 437 | struct mutex ls_waiters_mutex; | ||
| 438 | struct list_head ls_waiters; /* lkbs needing a reply */ | ||
| 439 | |||
| 440 | struct list_head ls_nodes; /* current nodes in ls */ | ||
| 441 | struct list_head ls_nodes_gone; /* dead node list, recovery */ | ||
| 442 | int ls_num_nodes; /* number of nodes in ls */ | ||
| 443 | int ls_low_nodeid; | ||
| 444 | int ls_total_weight; | ||
| 445 | int *ls_node_array; | ||
| 446 | |||
| 447 | struct dlm_rsb ls_stub_rsb; /* for returning errors */ | ||
| 448 | struct dlm_lkb ls_stub_lkb; /* for returning errors */ | ||
| 449 | struct dlm_message ls_stub_ms; /* for faking a reply */ | ||
| 450 | |||
| 451 | struct dentry *ls_debug_rsb_dentry; /* debugfs */ | ||
| 452 | struct dentry *ls_debug_waiters_dentry; /* debugfs */ | ||
| 453 | |||
| 454 | wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ | ||
| 455 | int ls_uevent_result; | ||
| 456 | |||
| 457 | struct miscdevice ls_device; | ||
| 458 | |||
| 459 | /* recovery related */ | ||
| 460 | |||
| 461 | struct timer_list ls_timer; | ||
| 462 | struct task_struct *ls_recoverd_task; | ||
| 463 | struct mutex ls_recoverd_active; | ||
| 464 | spinlock_t ls_recover_lock; | ||
| 465 | uint32_t ls_recover_status; /* DLM_RS_ */ | ||
| 466 | uint64_t ls_recover_seq; | ||
| 467 | struct dlm_recover *ls_recover_args; | ||
| 468 | struct rw_semaphore ls_in_recovery; /* block local requests */ | ||
| 469 | struct list_head ls_requestqueue;/* queue remote requests */ | ||
| 470 | struct mutex ls_requestqueue_mutex; | ||
| 471 | char *ls_recover_buf; | ||
| 472 | int ls_recover_nodeid; /* for debugging */ | ||
| 473 | uint64_t ls_rcom_seq; | ||
| 474 | struct list_head ls_recover_list; | ||
| 475 | spinlock_t ls_recover_list_lock; | ||
| 476 | int ls_recover_list_count; | ||
| 477 | wait_queue_head_t ls_wait_general; | ||
| 478 | struct mutex ls_clear_proc_locks; | ||
| 479 | |||
| 480 | struct list_head ls_root_list; /* root resources */ | ||
| 481 | struct rw_semaphore ls_root_sem; /* protect root_list */ | ||
| 482 | |||
| 483 | int ls_namelen; | ||
| 484 | char ls_name[1]; | ||
| 485 | }; | ||
| 486 | |||
| 487 | #define LSFL_WORK 0 | ||
| 488 | #define LSFL_RUNNING 1 | ||
| 489 | #define LSFL_RECOVERY_STOP 2 | ||
| 490 | #define LSFL_RCOM_READY 3 | ||
| 491 | #define LSFL_UEVENT_WAIT 4 | ||
| 492 | |||
| 493 | /* much of this is just saving user space pointers associated with the | ||
| 494 | lock that we pass back to the user lib with an ast */ | ||
| 495 | |||
| 496 | struct dlm_user_args { | ||
| 497 | struct dlm_user_proc *proc; /* each process that opens the lockspace | ||
| 498 | device has private data | ||
| 499 | (dlm_user_proc) on the struct file, | ||
| 500 | the process's locks point back to it*/ | ||
| 501 | struct dlm_lksb lksb; | ||
| 502 | int old_mode; | ||
| 503 | int update_user_lvb; | ||
| 504 | struct dlm_lksb __user *user_lksb; | ||
| 505 | void __user *castparam; | ||
| 506 | void __user *castaddr; | ||
| 507 | void __user *bastparam; | ||
| 508 | void __user *bastaddr; | ||
| 509 | }; | ||
| 510 | |||
| 511 | #define DLM_PROC_FLAGS_CLOSING 1 | ||
| 512 | #define DLM_PROC_FLAGS_COMPAT 2 | ||
| 513 | |||
| 514 | /* locks list is kept so we can remove all a process's locks when it | ||
| 515 | exits (or orphan those that are persistent) */ | ||
| 516 | |||
| 517 | struct dlm_user_proc { | ||
| 518 | dlm_lockspace_t *lockspace; | ||
| 519 | unsigned long flags; /* DLM_PROC_FLAGS */ | ||
| 520 | struct list_head asts; | ||
| 521 | spinlock_t asts_spin; | ||
| 522 | struct list_head locks; | ||
| 523 | spinlock_t locks_spin; | ||
| 524 | wait_queue_head_t wait; | ||
| 525 | }; | ||
| 526 | |||
| 527 | static inline int dlm_locking_stopped(struct dlm_ls *ls) | ||
| 528 | { | ||
| 529 | return !test_bit(LSFL_RUNNING, &ls->ls_flags); | ||
| 530 | } | ||
| 531 | |||
| 532 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) | ||
| 533 | { | ||
| 534 | return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | ||
| 535 | } | ||
| 536 | |||
| 537 | static inline int dlm_no_directory(struct dlm_ls *ls) | ||
| 538 | { | ||
| 539 | return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0; | ||
| 540 | } | ||
| 541 | |||
| 542 | #endif /* __DLM_INTERNAL_DOT_H__ */ | ||
| 543 | |||
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c new file mode 100644 index 000000000000..3f2befa4797b --- /dev/null +++ b/fs/dlm/lock.c | |||
| @@ -0,0 +1,3871 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | /* Central locking logic has four stages: | ||
| 14 | |||
| 15 | dlm_lock() | ||
| 16 | dlm_unlock() | ||
| 17 | |||
| 18 | request_lock(ls, lkb) | ||
| 19 | convert_lock(ls, lkb) | ||
| 20 | unlock_lock(ls, lkb) | ||
| 21 | cancel_lock(ls, lkb) | ||
| 22 | |||
| 23 | _request_lock(r, lkb) | ||
| 24 | _convert_lock(r, lkb) | ||
| 25 | _unlock_lock(r, lkb) | ||
| 26 | _cancel_lock(r, lkb) | ||
| 27 | |||
| 28 | do_request(r, lkb) | ||
| 29 | do_convert(r, lkb) | ||
| 30 | do_unlock(r, lkb) | ||
| 31 | do_cancel(r, lkb) | ||
| 32 | |||
| 33 | Stage 1 (lock, unlock) is mainly about checking input args and | ||
| 34 | splitting into one of the four main operations: | ||
| 35 | |||
| 36 | dlm_lock = request_lock | ||
| 37 | dlm_lock+CONVERT = convert_lock | ||
| 38 | dlm_unlock = unlock_lock | ||
| 39 | dlm_unlock+CANCEL = cancel_lock | ||
| 40 | |||
| 41 | Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is | ||
| 42 | provided to the next stage. | ||
| 43 | |||
| 44 | Stage 3, _xxxx_lock(), determines if the operation is local or remote. | ||
| 45 | When remote, it calls send_xxxx(), when local it calls do_xxxx(). | ||
| 46 | |||
| 47 | Stage 4, do_xxxx(), is the guts of the operation. It manipulates the | ||
| 48 | given rsb and lkb and queues callbacks. | ||
| 49 | |||
| 50 | For remote operations, send_xxxx() results in the corresponding do_xxxx() | ||
| 51 | function being executed on the remote node. The connecting send/receive | ||
| 52 | calls on local (L) and remote (R) nodes: | ||
| 53 | |||
| 54 | L: send_xxxx() -> R: receive_xxxx() | ||
| 55 | R: do_xxxx() | ||
| 56 | L: receive_xxxx_reply() <- R: send_xxxx_reply() | ||
| 57 | */ | ||
| 58 | #include <linux/types.h> | ||
| 59 | #include "dlm_internal.h" | ||
| 60 | #include <linux/dlm_device.h> | ||
| 61 | #include "memory.h" | ||
| 62 | #include "lowcomms.h" | ||
| 63 | #include "requestqueue.h" | ||
| 64 | #include "util.h" | ||
| 65 | #include "dir.h" | ||
| 66 | #include "member.h" | ||
| 67 | #include "lockspace.h" | ||
| 68 | #include "ast.h" | ||
| 69 | #include "lock.h" | ||
| 70 | #include "rcom.h" | ||
| 71 | #include "recover.h" | ||
| 72 | #include "lvb_table.h" | ||
| 73 | #include "user.h" | ||
| 74 | #include "config.h" | ||
| 75 | |||
| 76 | static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 77 | static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 78 | static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 79 | static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 80 | static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 81 | static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode); | ||
| 82 | static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 83 | static int send_remove(struct dlm_rsb *r); | ||
| 84 | static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 85 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 86 | struct dlm_message *ms); | ||
| 87 | static int receive_extralen(struct dlm_message *ms); | ||
| 88 | |||
| 89 | /* | ||
| 90 | * Lock compatibilty matrix - thanks Steve | ||
| 91 | * UN = Unlocked state. Not really a state, used as a flag | ||
| 92 | * PD = Padding. Used to make the matrix a nice power of two in size | ||
| 93 | * Other states are the same as the VMS DLM. | ||
| 94 | * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same) | ||
| 95 | */ | ||
| 96 | |||
| 97 | static const int __dlm_compat_matrix[8][8] = { | ||
| 98 | /* UN NL CR CW PR PW EX PD */ | ||
| 99 | {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */ | ||
| 100 | {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */ | ||
| 101 | {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */ | ||
| 102 | {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */ | ||
| 103 | {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */ | ||
| 104 | {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */ | ||
| 105 | {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */ | ||
| 106 | {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | ||
| 107 | }; | ||
| 108 | |||
| 109 | /* | ||
| 110 | * This defines the direction of transfer of LVB data. | ||
| 111 | * Granted mode is the row; requested mode is the column. | ||
| 112 | * Usage: matrix[grmode+1][rqmode+1] | ||
| 113 | * 1 = LVB is returned to the caller | ||
| 114 | * 0 = LVB is written to the resource | ||
| 115 | * -1 = nothing happens to the LVB | ||
| 116 | */ | ||
| 117 | |||
| 118 | const int dlm_lvb_operations[8][8] = { | ||
| 119 | /* UN NL CR CW PR PW EX PD*/ | ||
| 120 | { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */ | ||
| 121 | { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */ | ||
| 122 | { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */ | ||
| 123 | { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */ | ||
| 124 | { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */ | ||
| 125 | { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */ | ||
| 126 | { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */ | ||
| 127 | { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */ | ||
| 128 | }; | ||
| 129 | |||
| 130 | #define modes_compat(gr, rq) \ | ||
| 131 | __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1] | ||
| 132 | |||
| 133 | int dlm_modes_compat(int mode1, int mode2) | ||
| 134 | { | ||
| 135 | return __dlm_compat_matrix[mode1 + 1][mode2 + 1]; | ||
| 136 | } | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Compatibility matrix for conversions with QUECVT set. | ||
| 140 | * Granted mode is the row; requested mode is the column. | ||
| 141 | * Usage: matrix[grmode+1][rqmode+1] | ||
| 142 | */ | ||
| 143 | |||
| 144 | static const int __quecvt_compat_matrix[8][8] = { | ||
| 145 | /* UN NL CR CW PR PW EX PD */ | ||
| 146 | {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */ | ||
| 147 | {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */ | ||
| 148 | {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */ | ||
| 149 | {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */ | ||
| 150 | {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */ | ||
| 151 | {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */ | ||
| 152 | {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */ | ||
| 153 | {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | ||
| 154 | }; | ||
| 155 | |||
| 156 | void dlm_print_lkb(struct dlm_lkb *lkb) | ||
| 157 | { | ||
| 158 | printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n" | ||
| 159 | " status %d rqmode %d grmode %d wait_type %d ast_type %d\n", | ||
| 160 | lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags, | ||
| 161 | lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode, | ||
| 162 | lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); | ||
| 163 | } | ||
| 164 | |||
| 165 | void dlm_print_rsb(struct dlm_rsb *r) | ||
| 166 | { | ||
| 167 | printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", | ||
| 168 | r->res_nodeid, r->res_flags, r->res_first_lkid, | ||
| 169 | r->res_recover_locks_count, r->res_name); | ||
| 170 | } | ||
| 171 | |||
| 172 | void dlm_dump_rsb(struct dlm_rsb *r) | ||
| 173 | { | ||
| 174 | struct dlm_lkb *lkb; | ||
| 175 | |||
| 176 | dlm_print_rsb(r); | ||
| 177 | |||
| 178 | printk(KERN_ERR "rsb: root_list empty %d recover_list empty %d\n", | ||
| 179 | list_empty(&r->res_root_list), list_empty(&r->res_recover_list)); | ||
| 180 | printk(KERN_ERR "rsb lookup list\n"); | ||
| 181 | list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup) | ||
| 182 | dlm_print_lkb(lkb); | ||
| 183 | printk(KERN_ERR "rsb grant queue:\n"); | ||
| 184 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) | ||
| 185 | dlm_print_lkb(lkb); | ||
| 186 | printk(KERN_ERR "rsb convert queue:\n"); | ||
| 187 | list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) | ||
| 188 | dlm_print_lkb(lkb); | ||
| 189 | printk(KERN_ERR "rsb wait queue:\n"); | ||
| 190 | list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) | ||
| 191 | dlm_print_lkb(lkb); | ||
| 192 | } | ||
| 193 | |||
| 194 | /* Threads cannot use the lockspace while it's being recovered */ | ||
| 195 | |||
| 196 | static inline void lock_recovery(struct dlm_ls *ls) | ||
| 197 | { | ||
| 198 | down_read(&ls->ls_in_recovery); | ||
| 199 | } | ||
| 200 | |||
| 201 | static inline void unlock_recovery(struct dlm_ls *ls) | ||
| 202 | { | ||
| 203 | up_read(&ls->ls_in_recovery); | ||
| 204 | } | ||
| 205 | |||
| 206 | static inline int lock_recovery_try(struct dlm_ls *ls) | ||
| 207 | { | ||
| 208 | return down_read_trylock(&ls->ls_in_recovery); | ||
| 209 | } | ||
| 210 | |||
| 211 | static inline int can_be_queued(struct dlm_lkb *lkb) | ||
| 212 | { | ||
| 213 | return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE); | ||
| 214 | } | ||
| 215 | |||
| 216 | static inline int force_blocking_asts(struct dlm_lkb *lkb) | ||
| 217 | { | ||
| 218 | return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST); | ||
| 219 | } | ||
| 220 | |||
| 221 | static inline int is_demoted(struct dlm_lkb *lkb) | ||
| 222 | { | ||
| 223 | return (lkb->lkb_sbflags & DLM_SBF_DEMOTED); | ||
| 224 | } | ||
| 225 | |||
| 226 | static inline int is_remote(struct dlm_rsb *r) | ||
| 227 | { | ||
| 228 | DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r);); | ||
| 229 | return !!r->res_nodeid; | ||
| 230 | } | ||
| 231 | |||
| 232 | static inline int is_process_copy(struct dlm_lkb *lkb) | ||
| 233 | { | ||
| 234 | return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY)); | ||
| 235 | } | ||
| 236 | |||
| 237 | static inline int is_master_copy(struct dlm_lkb *lkb) | ||
| 238 | { | ||
| 239 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
| 240 | DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb);); | ||
| 241 | return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0; | ||
| 242 | } | ||
| 243 | |||
| 244 | static inline int middle_conversion(struct dlm_lkb *lkb) | ||
| 245 | { | ||
| 246 | if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) || | ||
| 247 | (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW)) | ||
| 248 | return 1; | ||
| 249 | return 0; | ||
| 250 | } | ||
| 251 | |||
| 252 | static inline int down_conversion(struct dlm_lkb *lkb) | ||
| 253 | { | ||
| 254 | return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode); | ||
| 255 | } | ||
| 256 | |||
| 257 | static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
| 258 | { | ||
| 259 | if (is_master_copy(lkb)) | ||
| 260 | return; | ||
| 261 | |||
| 262 | DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); | ||
| 263 | |||
| 264 | lkb->lkb_lksb->sb_status = rv; | ||
| 265 | lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; | ||
| 266 | |||
| 267 | dlm_add_ast(lkb, AST_COMP); | ||
| 268 | } | ||
| 269 | |||
| 270 | static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) | ||
| 271 | { | ||
| 272 | if (is_master_copy(lkb)) | ||
| 273 | send_bast(r, lkb, rqmode); | ||
| 274 | else { | ||
| 275 | lkb->lkb_bastmode = rqmode; | ||
| 276 | dlm_add_ast(lkb, AST_BAST); | ||
| 277 | } | ||
| 278 | } | ||
| 279 | |||
| 280 | /* | ||
| 281 | * Basic operations on rsb's and lkb's | ||
| 282 | */ | ||
| 283 | |||
| 284 | static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) | ||
| 285 | { | ||
| 286 | struct dlm_rsb *r; | ||
| 287 | |||
| 288 | r = allocate_rsb(ls, len); | ||
| 289 | if (!r) | ||
| 290 | return NULL; | ||
| 291 | |||
| 292 | r->res_ls = ls; | ||
| 293 | r->res_length = len; | ||
| 294 | memcpy(r->res_name, name, len); | ||
| 295 | mutex_init(&r->res_mutex); | ||
| 296 | |||
| 297 | INIT_LIST_HEAD(&r->res_lookup); | ||
| 298 | INIT_LIST_HEAD(&r->res_grantqueue); | ||
| 299 | INIT_LIST_HEAD(&r->res_convertqueue); | ||
| 300 | INIT_LIST_HEAD(&r->res_waitqueue); | ||
| 301 | INIT_LIST_HEAD(&r->res_root_list); | ||
| 302 | INIT_LIST_HEAD(&r->res_recover_list); | ||
| 303 | |||
| 304 | return r; | ||
| 305 | } | ||
| 306 | |||
| 307 | static int search_rsb_list(struct list_head *head, char *name, int len, | ||
| 308 | unsigned int flags, struct dlm_rsb **r_ret) | ||
| 309 | { | ||
| 310 | struct dlm_rsb *r; | ||
| 311 | int error = 0; | ||
| 312 | |||
| 313 | list_for_each_entry(r, head, res_hashchain) { | ||
| 314 | if (len == r->res_length && !memcmp(name, r->res_name, len)) | ||
| 315 | goto found; | ||
| 316 | } | ||
| 317 | return -EBADR; | ||
| 318 | |||
| 319 | found: | ||
| 320 | if (r->res_nodeid && (flags & R_MASTER)) | ||
| 321 | error = -ENOTBLK; | ||
| 322 | *r_ret = r; | ||
| 323 | return error; | ||
| 324 | } | ||
| 325 | |||
| 326 | static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, | ||
| 327 | unsigned int flags, struct dlm_rsb **r_ret) | ||
| 328 | { | ||
| 329 | struct dlm_rsb *r; | ||
| 330 | int error; | ||
| 331 | |||
| 332 | error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); | ||
| 333 | if (!error) { | ||
| 334 | kref_get(&r->res_ref); | ||
| 335 | goto out; | ||
| 336 | } | ||
| 337 | error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); | ||
| 338 | if (error) | ||
| 339 | goto out; | ||
| 340 | |||
| 341 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); | ||
| 342 | |||
| 343 | if (dlm_no_directory(ls)) | ||
| 344 | goto out; | ||
| 345 | |||
| 346 | if (r->res_nodeid == -1) { | ||
| 347 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | ||
| 348 | r->res_first_lkid = 0; | ||
| 349 | } else if (r->res_nodeid > 0) { | ||
| 350 | rsb_set_flag(r, RSB_MASTER_UNCERTAIN); | ||
| 351 | r->res_first_lkid = 0; | ||
| 352 | } else { | ||
| 353 | DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r);); | ||
| 354 | DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),); | ||
| 355 | } | ||
| 356 | out: | ||
| 357 | *r_ret = r; | ||
| 358 | return error; | ||
| 359 | } | ||
| 360 | |||
| 361 | static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, | ||
| 362 | unsigned int flags, struct dlm_rsb **r_ret) | ||
| 363 | { | ||
| 364 | int error; | ||
| 365 | write_lock(&ls->ls_rsbtbl[b].lock); | ||
| 366 | error = _search_rsb(ls, name, len, b, flags, r_ret); | ||
| 367 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
| 368 | return error; | ||
| 369 | } | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Find rsb in rsbtbl and potentially create/add one | ||
| 373 | * | ||
| 374 | * Delaying the release of rsb's has a similar benefit to applications keeping | ||
| 375 | * NL locks on an rsb, but without the guarantee that the cached master value | ||
| 376 | * will still be valid when the rsb is reused. Apps aren't always smart enough | ||
| 377 | * to keep NL locks on an rsb that they may lock again shortly; this can lead | ||
| 378 | * to excessive master lookups and removals if we don't delay the release. | ||
| 379 | * | ||
| 380 | * Searching for an rsb means looking through both the normal list and toss | ||
| 381 | * list. When found on the toss list the rsb is moved to the normal list with | ||
| 382 | * ref count of 1; when found on normal list the ref count is incremented. | ||
| 383 | */ | ||
| 384 | |||
| 385 | static int find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
| 386 | unsigned int flags, struct dlm_rsb **r_ret) | ||
| 387 | { | ||
| 388 | struct dlm_rsb *r, *tmp; | ||
| 389 | uint32_t hash, bucket; | ||
| 390 | int error = 0; | ||
| 391 | |||
| 392 | if (dlm_no_directory(ls)) | ||
| 393 | flags |= R_CREATE; | ||
| 394 | |||
| 395 | hash = jhash(name, namelen, 0); | ||
| 396 | bucket = hash & (ls->ls_rsbtbl_size - 1); | ||
| 397 | |||
| 398 | error = search_rsb(ls, name, namelen, bucket, flags, &r); | ||
| 399 | if (!error) | ||
| 400 | goto out; | ||
| 401 | |||
| 402 | if (error == -EBADR && !(flags & R_CREATE)) | ||
| 403 | goto out; | ||
| 404 | |||
| 405 | /* the rsb was found but wasn't a master copy */ | ||
| 406 | if (error == -ENOTBLK) | ||
| 407 | goto out; | ||
| 408 | |||
| 409 | error = -ENOMEM; | ||
| 410 | r = create_rsb(ls, name, namelen); | ||
| 411 | if (!r) | ||
| 412 | goto out; | ||
| 413 | |||
| 414 | r->res_hash = hash; | ||
| 415 | r->res_bucket = bucket; | ||
| 416 | r->res_nodeid = -1; | ||
| 417 | kref_init(&r->res_ref); | ||
| 418 | |||
| 419 | /* With no directory, the master can be set immediately */ | ||
| 420 | if (dlm_no_directory(ls)) { | ||
| 421 | int nodeid = dlm_dir_nodeid(r); | ||
| 422 | if (nodeid == dlm_our_nodeid()) | ||
| 423 | nodeid = 0; | ||
| 424 | r->res_nodeid = nodeid; | ||
| 425 | } | ||
| 426 | |||
| 427 | write_lock(&ls->ls_rsbtbl[bucket].lock); | ||
| 428 | error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); | ||
| 429 | if (!error) { | ||
| 430 | write_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
| 431 | free_rsb(r); | ||
| 432 | r = tmp; | ||
| 433 | goto out; | ||
| 434 | } | ||
| 435 | list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); | ||
| 436 | write_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
| 437 | error = 0; | ||
| 438 | out: | ||
| 439 | *r_ret = r; | ||
| 440 | return error; | ||
| 441 | } | ||
| 442 | |||
| 443 | int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
| 444 | unsigned int flags, struct dlm_rsb **r_ret) | ||
| 445 | { | ||
| 446 | return find_rsb(ls, name, namelen, flags, r_ret); | ||
| 447 | } | ||
| 448 | |||
| 449 | /* This is only called to add a reference when the code already holds | ||
| 450 | a valid reference to the rsb, so there's no need for locking. */ | ||
| 451 | |||
| 452 | static inline void hold_rsb(struct dlm_rsb *r) | ||
| 453 | { | ||
| 454 | kref_get(&r->res_ref); | ||
| 455 | } | ||
| 456 | |||
| 457 | void dlm_hold_rsb(struct dlm_rsb *r) | ||
| 458 | { | ||
| 459 | hold_rsb(r); | ||
| 460 | } | ||
| 461 | |||
| 462 | static void toss_rsb(struct kref *kref) | ||
| 463 | { | ||
| 464 | struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); | ||
| 465 | struct dlm_ls *ls = r->res_ls; | ||
| 466 | |||
| 467 | DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); | ||
| 468 | kref_init(&r->res_ref); | ||
| 469 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); | ||
| 470 | r->res_toss_time = jiffies; | ||
| 471 | if (r->res_lvbptr) { | ||
| 472 | free_lvb(r->res_lvbptr); | ||
| 473 | r->res_lvbptr = NULL; | ||
| 474 | } | ||
| 475 | } | ||
| 476 | |||
| 477 | /* When all references to the rsb are gone it's transfered to | ||
| 478 | the tossed list for later disposal. */ | ||
| 479 | |||
| 480 | static void put_rsb(struct dlm_rsb *r) | ||
| 481 | { | ||
| 482 | struct dlm_ls *ls = r->res_ls; | ||
| 483 | uint32_t bucket = r->res_bucket; | ||
| 484 | |||
| 485 | write_lock(&ls->ls_rsbtbl[bucket].lock); | ||
| 486 | kref_put(&r->res_ref, toss_rsb); | ||
| 487 | write_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
| 488 | } | ||
| 489 | |||
| 490 | void dlm_put_rsb(struct dlm_rsb *r) | ||
| 491 | { | ||
| 492 | put_rsb(r); | ||
| 493 | } | ||
| 494 | |||
| 495 | /* See comment for unhold_lkb */ | ||
| 496 | |||
| 497 | static void unhold_rsb(struct dlm_rsb *r) | ||
| 498 | { | ||
| 499 | int rv; | ||
| 500 | rv = kref_put(&r->res_ref, toss_rsb); | ||
| 501 | DLM_ASSERT(!rv, dlm_dump_rsb(r);); | ||
| 502 | } | ||
| 503 | |||
| 504 | static void kill_rsb(struct kref *kref) | ||
| 505 | { | ||
| 506 | struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); | ||
| 507 | |||
| 508 | /* All work is done after the return from kref_put() so we | ||
| 509 | can release the write_lock before the remove and free. */ | ||
| 510 | |||
| 511 | DLM_ASSERT(list_empty(&r->res_lookup), dlm_dump_rsb(r);); | ||
| 512 | DLM_ASSERT(list_empty(&r->res_grantqueue), dlm_dump_rsb(r);); | ||
| 513 | DLM_ASSERT(list_empty(&r->res_convertqueue), dlm_dump_rsb(r);); | ||
| 514 | DLM_ASSERT(list_empty(&r->res_waitqueue), dlm_dump_rsb(r);); | ||
| 515 | DLM_ASSERT(list_empty(&r->res_root_list), dlm_dump_rsb(r);); | ||
| 516 | DLM_ASSERT(list_empty(&r->res_recover_list), dlm_dump_rsb(r);); | ||
| 517 | } | ||
| 518 | |||
| 519 | /* Attaching/detaching lkb's from rsb's is for rsb reference counting. | ||
| 520 | The rsb must exist as long as any lkb's for it do. */ | ||
| 521 | |||
| 522 | static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 523 | { | ||
| 524 | hold_rsb(r); | ||
| 525 | lkb->lkb_resource = r; | ||
| 526 | } | ||
| 527 | |||
| 528 | static void detach_lkb(struct dlm_lkb *lkb) | ||
| 529 | { | ||
| 530 | if (lkb->lkb_resource) { | ||
| 531 | put_rsb(lkb->lkb_resource); | ||
| 532 | lkb->lkb_resource = NULL; | ||
| 533 | } | ||
| 534 | } | ||
| 535 | |||
| 536 | static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | ||
| 537 | { | ||
| 538 | struct dlm_lkb *lkb, *tmp; | ||
| 539 | uint32_t lkid = 0; | ||
| 540 | uint16_t bucket; | ||
| 541 | |||
| 542 | lkb = allocate_lkb(ls); | ||
| 543 | if (!lkb) | ||
| 544 | return -ENOMEM; | ||
| 545 | |||
| 546 | lkb->lkb_nodeid = -1; | ||
| 547 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
| 548 | kref_init(&lkb->lkb_ref); | ||
| 549 | INIT_LIST_HEAD(&lkb->lkb_ownqueue); | ||
| 550 | |||
| 551 | get_random_bytes(&bucket, sizeof(bucket)); | ||
| 552 | bucket &= (ls->ls_lkbtbl_size - 1); | ||
| 553 | |||
| 554 | write_lock(&ls->ls_lkbtbl[bucket].lock); | ||
| 555 | |||
| 556 | /* counter can roll over so we must verify lkid is not in use */ | ||
| 557 | |||
| 558 | while (lkid == 0) { | ||
| 559 | lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); | ||
| 560 | |||
| 561 | list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, | ||
| 562 | lkb_idtbl_list) { | ||
| 563 | if (tmp->lkb_id != lkid) | ||
| 564 | continue; | ||
| 565 | lkid = 0; | ||
| 566 | break; | ||
| 567 | } | ||
| 568 | } | ||
| 569 | |||
| 570 | lkb->lkb_id = lkid; | ||
| 571 | list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); | ||
| 572 | write_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
| 573 | |||
| 574 | *lkb_ret = lkb; | ||
| 575 | return 0; | ||
| 576 | } | ||
| 577 | |||
| 578 | static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) | ||
| 579 | { | ||
| 580 | uint16_t bucket = lkid & 0xFFFF; | ||
| 581 | struct dlm_lkb *lkb; | ||
| 582 | |||
| 583 | list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { | ||
| 584 | if (lkb->lkb_id == lkid) | ||
| 585 | return lkb; | ||
| 586 | } | ||
| 587 | return NULL; | ||
| 588 | } | ||
| 589 | |||
| 590 | static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) | ||
| 591 | { | ||
| 592 | struct dlm_lkb *lkb; | ||
| 593 | uint16_t bucket = lkid & 0xFFFF; | ||
| 594 | |||
| 595 | if (bucket >= ls->ls_lkbtbl_size) | ||
| 596 | return -EBADSLT; | ||
| 597 | |||
| 598 | read_lock(&ls->ls_lkbtbl[bucket].lock); | ||
| 599 | lkb = __find_lkb(ls, lkid); | ||
| 600 | if (lkb) | ||
| 601 | kref_get(&lkb->lkb_ref); | ||
| 602 | read_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
| 603 | |||
| 604 | *lkb_ret = lkb; | ||
| 605 | return lkb ? 0 : -ENOENT; | ||
| 606 | } | ||
| 607 | |||
| 608 | static void kill_lkb(struct kref *kref) | ||
| 609 | { | ||
| 610 | struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref); | ||
| 611 | |||
| 612 | /* All work is done after the return from kref_put() so we | ||
| 613 | can release the write_lock before the detach_lkb */ | ||
| 614 | |||
| 615 | DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); | ||
| 616 | } | ||
| 617 | |||
| 618 | /* __put_lkb() is used when an lkb may not have an rsb attached to | ||
| 619 | it so we need to provide the lockspace explicitly */ | ||
| 620 | |||
| 621 | static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
| 622 | { | ||
| 623 | uint16_t bucket = lkb->lkb_id & 0xFFFF; | ||
| 624 | |||
| 625 | write_lock(&ls->ls_lkbtbl[bucket].lock); | ||
| 626 | if (kref_put(&lkb->lkb_ref, kill_lkb)) { | ||
| 627 | list_del(&lkb->lkb_idtbl_list); | ||
| 628 | write_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
| 629 | |||
| 630 | detach_lkb(lkb); | ||
| 631 | |||
| 632 | /* for local/process lkbs, lvbptr points to caller's lksb */ | ||
| 633 | if (lkb->lkb_lvbptr && is_master_copy(lkb)) | ||
| 634 | free_lvb(lkb->lkb_lvbptr); | ||
| 635 | free_lkb(lkb); | ||
| 636 | return 1; | ||
| 637 | } else { | ||
| 638 | write_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
| 639 | return 0; | ||
| 640 | } | ||
| 641 | } | ||
| 642 | |||
| 643 | int dlm_put_lkb(struct dlm_lkb *lkb) | ||
| 644 | { | ||
| 645 | struct dlm_ls *ls; | ||
| 646 | |||
| 647 | DLM_ASSERT(lkb->lkb_resource, dlm_print_lkb(lkb);); | ||
| 648 | DLM_ASSERT(lkb->lkb_resource->res_ls, dlm_print_lkb(lkb);); | ||
| 649 | |||
| 650 | ls = lkb->lkb_resource->res_ls; | ||
| 651 | return __put_lkb(ls, lkb); | ||
| 652 | } | ||
| 653 | |||
| 654 | /* This is only called to add a reference when the code already holds | ||
| 655 | a valid reference to the lkb, so there's no need for locking. */ | ||
| 656 | |||
| 657 | static inline void hold_lkb(struct dlm_lkb *lkb) | ||
| 658 | { | ||
| 659 | kref_get(&lkb->lkb_ref); | ||
| 660 | } | ||
| 661 | |||
| 662 | /* This is called when we need to remove a reference and are certain | ||
| 663 | it's not the last ref. e.g. del_lkb is always called between a | ||
| 664 | find_lkb/put_lkb and is always the inverse of a previous add_lkb. | ||
| 665 | put_lkb would work fine, but would involve unnecessary locking */ | ||
| 666 | |||
| 667 | static inline void unhold_lkb(struct dlm_lkb *lkb) | ||
| 668 | { | ||
| 669 | int rv; | ||
| 670 | rv = kref_put(&lkb->lkb_ref, kill_lkb); | ||
| 671 | DLM_ASSERT(!rv, dlm_print_lkb(lkb);); | ||
| 672 | } | ||
| 673 | |||
| 674 | static void lkb_add_ordered(struct list_head *new, struct list_head *head, | ||
| 675 | int mode) | ||
| 676 | { | ||
| 677 | struct dlm_lkb *lkb = NULL; | ||
| 678 | |||
| 679 | list_for_each_entry(lkb, head, lkb_statequeue) | ||
| 680 | if (lkb->lkb_rqmode < mode) | ||
| 681 | break; | ||
| 682 | |||
| 683 | if (!lkb) | ||
| 684 | list_add_tail(new, head); | ||
| 685 | else | ||
| 686 | __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue); | ||
| 687 | } | ||
| 688 | |||
| 689 | /* add/remove lkb to rsb's grant/convert/wait queue */ | ||
| 690 | |||
| 691 | static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status) | ||
| 692 | { | ||
| 693 | kref_get(&lkb->lkb_ref); | ||
| 694 | |||
| 695 | DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); | ||
| 696 | |||
| 697 | lkb->lkb_status = status; | ||
| 698 | |||
| 699 | switch (status) { | ||
| 700 | case DLM_LKSTS_WAITING: | ||
| 701 | if (lkb->lkb_exflags & DLM_LKF_HEADQUE) | ||
| 702 | list_add(&lkb->lkb_statequeue, &r->res_waitqueue); | ||
| 703 | else | ||
| 704 | list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue); | ||
| 705 | break; | ||
| 706 | case DLM_LKSTS_GRANTED: | ||
| 707 | /* convention says granted locks kept in order of grmode */ | ||
| 708 | lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue, | ||
| 709 | lkb->lkb_grmode); | ||
| 710 | break; | ||
| 711 | case DLM_LKSTS_CONVERT: | ||
| 712 | if (lkb->lkb_exflags & DLM_LKF_HEADQUE) | ||
| 713 | list_add(&lkb->lkb_statequeue, &r->res_convertqueue); | ||
| 714 | else | ||
| 715 | list_add_tail(&lkb->lkb_statequeue, | ||
| 716 | &r->res_convertqueue); | ||
| 717 | break; | ||
| 718 | default: | ||
| 719 | DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status);); | ||
| 720 | } | ||
| 721 | } | ||
| 722 | |||
| 723 | static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 724 | { | ||
| 725 | lkb->lkb_status = 0; | ||
| 726 | list_del(&lkb->lkb_statequeue); | ||
| 727 | unhold_lkb(lkb); | ||
| 728 | } | ||
| 729 | |||
| 730 | static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts) | ||
| 731 | { | ||
| 732 | hold_lkb(lkb); | ||
| 733 | del_lkb(r, lkb); | ||
| 734 | add_lkb(r, lkb, sts); | ||
| 735 | unhold_lkb(lkb); | ||
| 736 | } | ||
| 737 | |||
| 738 | /* add/remove lkb from global waiters list of lkb's waiting for | ||
| 739 | a reply from a remote node */ | ||
| 740 | |||
| 741 | static void add_to_waiters(struct dlm_lkb *lkb, int mstype) | ||
| 742 | { | ||
| 743 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | ||
| 744 | |||
| 745 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 746 | if (lkb->lkb_wait_type) { | ||
| 747 | log_print("add_to_waiters error %d", lkb->lkb_wait_type); | ||
| 748 | goto out; | ||
| 749 | } | ||
| 750 | lkb->lkb_wait_type = mstype; | ||
| 751 | kref_get(&lkb->lkb_ref); | ||
| 752 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | ||
| 753 | out: | ||
| 754 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 755 | } | ||
| 756 | |||
| 757 | static int _remove_from_waiters(struct dlm_lkb *lkb) | ||
| 758 | { | ||
| 759 | int error = 0; | ||
| 760 | |||
| 761 | if (!lkb->lkb_wait_type) { | ||
| 762 | log_print("remove_from_waiters error"); | ||
| 763 | error = -EINVAL; | ||
| 764 | goto out; | ||
| 765 | } | ||
| 766 | lkb->lkb_wait_type = 0; | ||
| 767 | list_del(&lkb->lkb_wait_reply); | ||
| 768 | unhold_lkb(lkb); | ||
| 769 | out: | ||
| 770 | return error; | ||
| 771 | } | ||
| 772 | |||
| 773 | static int remove_from_waiters(struct dlm_lkb *lkb) | ||
| 774 | { | ||
| 775 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | ||
| 776 | int error; | ||
| 777 | |||
| 778 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 779 | error = _remove_from_waiters(lkb); | ||
| 780 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 781 | return error; | ||
| 782 | } | ||
| 783 | |||
| 784 | static void dir_remove(struct dlm_rsb *r) | ||
| 785 | { | ||
| 786 | int to_nodeid; | ||
| 787 | |||
| 788 | if (dlm_no_directory(r->res_ls)) | ||
| 789 | return; | ||
| 790 | |||
| 791 | to_nodeid = dlm_dir_nodeid(r); | ||
| 792 | if (to_nodeid != dlm_our_nodeid()) | ||
| 793 | send_remove(r); | ||
| 794 | else | ||
| 795 | dlm_dir_remove_entry(r->res_ls, to_nodeid, | ||
| 796 | r->res_name, r->res_length); | ||
| 797 | } | ||
| 798 | |||
| 799 | /* FIXME: shouldn't this be able to exit as soon as one non-due rsb is | ||
| 800 | found since they are in order of newest to oldest? */ | ||
| 801 | |||
| 802 | static int shrink_bucket(struct dlm_ls *ls, int b) | ||
| 803 | { | ||
| 804 | struct dlm_rsb *r; | ||
| 805 | int count = 0, found; | ||
| 806 | |||
| 807 | for (;;) { | ||
| 808 | found = 0; | ||
| 809 | write_lock(&ls->ls_rsbtbl[b].lock); | ||
| 810 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, | ||
| 811 | res_hashchain) { | ||
| 812 | if (!time_after_eq(jiffies, r->res_toss_time + | ||
| 813 | dlm_config.toss_secs * HZ)) | ||
| 814 | continue; | ||
| 815 | found = 1; | ||
| 816 | break; | ||
| 817 | } | ||
| 818 | |||
| 819 | if (!found) { | ||
| 820 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
| 821 | break; | ||
| 822 | } | ||
| 823 | |||
| 824 | if (kref_put(&r->res_ref, kill_rsb)) { | ||
| 825 | list_del(&r->res_hashchain); | ||
| 826 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
| 827 | |||
| 828 | if (is_master(r)) | ||
| 829 | dir_remove(r); | ||
| 830 | free_rsb(r); | ||
| 831 | count++; | ||
| 832 | } else { | ||
| 833 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
| 834 | log_error(ls, "tossed rsb in use %s", r->res_name); | ||
| 835 | } | ||
| 836 | } | ||
| 837 | |||
| 838 | return count; | ||
| 839 | } | ||
| 840 | |||
| 841 | void dlm_scan_rsbs(struct dlm_ls *ls) | ||
| 842 | { | ||
| 843 | int i; | ||
| 844 | |||
| 845 | if (dlm_locking_stopped(ls)) | ||
| 846 | return; | ||
| 847 | |||
| 848 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
| 849 | shrink_bucket(ls, i); | ||
| 850 | cond_resched(); | ||
| 851 | } | ||
| 852 | } | ||
| 853 | |||
| 854 | /* lkb is master or local copy */ | ||
| 855 | |||
| 856 | static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 857 | { | ||
| 858 | int b, len = r->res_ls->ls_lvblen; | ||
| 859 | |||
| 860 | /* b=1 lvb returned to caller | ||
| 861 | b=0 lvb written to rsb or invalidated | ||
| 862 | b=-1 do nothing */ | ||
| 863 | |||
| 864 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | ||
| 865 | |||
| 866 | if (b == 1) { | ||
| 867 | if (!lkb->lkb_lvbptr) | ||
| 868 | return; | ||
| 869 | |||
| 870 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
| 871 | return; | ||
| 872 | |||
| 873 | if (!r->res_lvbptr) | ||
| 874 | return; | ||
| 875 | |||
| 876 | memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len); | ||
| 877 | lkb->lkb_lvbseq = r->res_lvbseq; | ||
| 878 | |||
| 879 | } else if (b == 0) { | ||
| 880 | if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { | ||
| 881 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
| 882 | return; | ||
| 883 | } | ||
| 884 | |||
| 885 | if (!lkb->lkb_lvbptr) | ||
| 886 | return; | ||
| 887 | |||
| 888 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
| 889 | return; | ||
| 890 | |||
| 891 | if (!r->res_lvbptr) | ||
| 892 | r->res_lvbptr = allocate_lvb(r->res_ls); | ||
| 893 | |||
| 894 | if (!r->res_lvbptr) | ||
| 895 | return; | ||
| 896 | |||
| 897 | memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len); | ||
| 898 | r->res_lvbseq++; | ||
| 899 | lkb->lkb_lvbseq = r->res_lvbseq; | ||
| 900 | rsb_clear_flag(r, RSB_VALNOTVALID); | ||
| 901 | } | ||
| 902 | |||
| 903 | if (rsb_flag(r, RSB_VALNOTVALID)) | ||
| 904 | lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID; | ||
| 905 | } | ||
| 906 | |||
| 907 | static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 908 | { | ||
| 909 | if (lkb->lkb_grmode < DLM_LOCK_PW) | ||
| 910 | return; | ||
| 911 | |||
| 912 | if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { | ||
| 913 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
| 914 | return; | ||
| 915 | } | ||
| 916 | |||
| 917 | if (!lkb->lkb_lvbptr) | ||
| 918 | return; | ||
| 919 | |||
| 920 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
| 921 | return; | ||
| 922 | |||
| 923 | if (!r->res_lvbptr) | ||
| 924 | r->res_lvbptr = allocate_lvb(r->res_ls); | ||
| 925 | |||
| 926 | if (!r->res_lvbptr) | ||
| 927 | return; | ||
| 928 | |||
| 929 | memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | ||
| 930 | r->res_lvbseq++; | ||
| 931 | rsb_clear_flag(r, RSB_VALNOTVALID); | ||
| 932 | } | ||
| 933 | |||
| 934 | /* lkb is process copy (pc) */ | ||
| 935 | |||
| 936 | static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 937 | struct dlm_message *ms) | ||
| 938 | { | ||
| 939 | int b; | ||
| 940 | |||
| 941 | if (!lkb->lkb_lvbptr) | ||
| 942 | return; | ||
| 943 | |||
| 944 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
| 945 | return; | ||
| 946 | |||
| 947 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | ||
| 948 | if (b == 1) { | ||
| 949 | int len = receive_extralen(ms); | ||
| 950 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | ||
| 951 | lkb->lkb_lvbseq = ms->m_lvbseq; | ||
| 952 | } | ||
| 953 | } | ||
| 954 | |||
| 955 | /* Manipulate lkb's on rsb's convert/granted/waiting queues | ||
| 956 | remove_lock -- used for unlock, removes lkb from granted | ||
| 957 | revert_lock -- used for cancel, moves lkb from convert to granted | ||
| 958 | grant_lock -- used for request and convert, adds lkb to granted or | ||
| 959 | moves lkb from convert or waiting to granted | ||
| 960 | |||
| 961 | Each of these is used for master or local copy lkb's. There is | ||
| 962 | also a _pc() variation used to make the corresponding change on | ||
| 963 | a process copy (pc) lkb. */ | ||
| 964 | |||
| 965 | static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 966 | { | ||
| 967 | del_lkb(r, lkb); | ||
| 968 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
| 969 | /* this unhold undoes the original ref from create_lkb() | ||
| 970 | so this leads to the lkb being freed */ | ||
| 971 | unhold_lkb(lkb); | ||
| 972 | } | ||
| 973 | |||
| 974 | static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 975 | { | ||
| 976 | set_lvb_unlock(r, lkb); | ||
| 977 | _remove_lock(r, lkb); | ||
| 978 | } | ||
| 979 | |||
| 980 | static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 981 | { | ||
| 982 | _remove_lock(r, lkb); | ||
| 983 | } | ||
| 984 | |||
| 985 | static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 986 | { | ||
| 987 | lkb->lkb_rqmode = DLM_LOCK_IV; | ||
| 988 | |||
| 989 | switch (lkb->lkb_status) { | ||
| 990 | case DLM_LKSTS_GRANTED: | ||
| 991 | break; | ||
| 992 | case DLM_LKSTS_CONVERT: | ||
| 993 | move_lkb(r, lkb, DLM_LKSTS_GRANTED); | ||
| 994 | break; | ||
| 995 | case DLM_LKSTS_WAITING: | ||
| 996 | del_lkb(r, lkb); | ||
| 997 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
| 998 | /* this unhold undoes the original ref from create_lkb() | ||
| 999 | so this leads to the lkb being freed */ | ||
| 1000 | unhold_lkb(lkb); | ||
| 1001 | break; | ||
| 1002 | default: | ||
| 1003 | log_print("invalid status for revert %d", lkb->lkb_status); | ||
| 1004 | } | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1008 | { | ||
| 1009 | revert_lock(r, lkb); | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1013 | { | ||
| 1014 | if (lkb->lkb_grmode != lkb->lkb_rqmode) { | ||
| 1015 | lkb->lkb_grmode = lkb->lkb_rqmode; | ||
| 1016 | if (lkb->lkb_status) | ||
| 1017 | move_lkb(r, lkb, DLM_LKSTS_GRANTED); | ||
| 1018 | else | ||
| 1019 | add_lkb(r, lkb, DLM_LKSTS_GRANTED); | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | lkb->lkb_rqmode = DLM_LOCK_IV; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1026 | { | ||
| 1027 | set_lvb_lock(r, lkb); | ||
| 1028 | _grant_lock(r, lkb); | ||
| 1029 | lkb->lkb_highbast = 0; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 1033 | struct dlm_message *ms) | ||
| 1034 | { | ||
| 1035 | set_lvb_lock_pc(r, lkb, ms); | ||
| 1036 | _grant_lock(r, lkb); | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | /* called by grant_pending_locks() which means an async grant message must | ||
| 1040 | be sent to the requesting node in addition to granting the lock if the | ||
| 1041 | lkb belongs to a remote node. */ | ||
| 1042 | |||
| 1043 | static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1044 | { | ||
| 1045 | grant_lock(r, lkb); | ||
| 1046 | if (is_master_copy(lkb)) | ||
| 1047 | send_grant(r, lkb); | ||
| 1048 | else | ||
| 1049 | queue_cast(r, lkb, 0); | ||
| 1050 | } | ||
| 1051 | |||
| 1052 | static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) | ||
| 1053 | { | ||
| 1054 | struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, | ||
| 1055 | lkb_statequeue); | ||
| 1056 | if (lkb->lkb_id == first->lkb_id) | ||
| 1057 | return 1; | ||
| 1058 | |||
| 1059 | return 0; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | /* Check if the given lkb conflicts with another lkb on the queue. */ | ||
| 1063 | |||
| 1064 | static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) | ||
| 1065 | { | ||
| 1066 | struct dlm_lkb *this; | ||
| 1067 | |||
| 1068 | list_for_each_entry(this, head, lkb_statequeue) { | ||
| 1069 | if (this == lkb) | ||
| 1070 | continue; | ||
| 1071 | if (!modes_compat(this, lkb)) | ||
| 1072 | return 1; | ||
| 1073 | } | ||
| 1074 | return 0; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | /* | ||
| 1078 | * "A conversion deadlock arises with a pair of lock requests in the converting | ||
| 1079 | * queue for one resource. The granted mode of each lock blocks the requested | ||
| 1080 | * mode of the other lock." | ||
| 1081 | * | ||
| 1082 | * Part 2: if the granted mode of lkb is preventing the first lkb in the | ||
| 1083 | * convert queue from being granted, then demote lkb (set grmode to NL). | ||
| 1084 | * This second form requires that we check for conv-deadlk even when | ||
| 1085 | * now == 0 in _can_be_granted(). | ||
| 1086 | * | ||
| 1087 | * Example: | ||
| 1088 | * Granted Queue: empty | ||
| 1089 | * Convert Queue: NL->EX (first lock) | ||
| 1090 | * PR->EX (second lock) | ||
| 1091 | * | ||
| 1092 | * The first lock can't be granted because of the granted mode of the second | ||
| 1093 | * lock and the second lock can't be granted because it's not first in the | ||
| 1094 | * list. We demote the granted mode of the second lock (the lkb passed to this | ||
| 1095 | * function). | ||
| 1096 | * | ||
| 1097 | * After the resolution, the "grant pending" function needs to go back and try | ||
| 1098 | * to grant locks on the convert queue again since the first lock can now be | ||
| 1099 | * granted. | ||
| 1100 | */ | ||
| 1101 | |||
| 1102 | static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) | ||
| 1103 | { | ||
| 1104 | struct dlm_lkb *this, *first = NULL, *self = NULL; | ||
| 1105 | |||
| 1106 | list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { | ||
| 1107 | if (!first) | ||
| 1108 | first = this; | ||
| 1109 | if (this == lkb) { | ||
| 1110 | self = lkb; | ||
| 1111 | continue; | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) | ||
| 1115 | return 1; | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | /* if lkb is on the convert queue and is preventing the first | ||
| 1119 | from being granted, then there's deadlock and we demote lkb. | ||
| 1120 | multiple converting locks may need to do this before the first | ||
| 1121 | converting lock can be granted. */ | ||
| 1122 | |||
| 1123 | if (self && self != first) { | ||
| 1124 | if (!modes_compat(lkb, first) && | ||
| 1125 | !queue_conflict(&rsb->res_grantqueue, first)) | ||
| 1126 | return 1; | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | return 0; | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | /* | ||
| 1133 | * Return 1 if the lock can be granted, 0 otherwise. | ||
| 1134 | * Also detect and resolve conversion deadlocks. | ||
| 1135 | * | ||
| 1136 | * lkb is the lock to be granted | ||
| 1137 | * | ||
| 1138 | * now is 1 if the function is being called in the context of the | ||
| 1139 | * immediate request, it is 0 if called later, after the lock has been | ||
| 1140 | * queued. | ||
| 1141 | * | ||
| 1142 | * References are from chapter 6 of "VAXcluster Principles" by Roy Davis | ||
| 1143 | */ | ||
| 1144 | |||
| 1145 | static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) | ||
| 1146 | { | ||
| 1147 | int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); | ||
| 1148 | |||
| 1149 | /* | ||
| 1150 | * 6-10: Version 5.4 introduced an option to address the phenomenon of | ||
| 1151 | * a new request for a NL mode lock being blocked. | ||
| 1152 | * | ||
| 1153 | * 6-11: If the optional EXPEDITE flag is used with the new NL mode | ||
| 1154 | * request, then it would be granted. In essence, the use of this flag | ||
| 1155 | * tells the Lock Manager to expedite theis request by not considering | ||
| 1156 | * what may be in the CONVERTING or WAITING queues... As of this | ||
| 1157 | * writing, the EXPEDITE flag can be used only with new requests for NL | ||
| 1158 | * mode locks. This flag is not valid for conversion requests. | ||
| 1159 | * | ||
| 1160 | * A shortcut. Earlier checks return an error if EXPEDITE is used in a | ||
| 1161 | * conversion or used with a non-NL requested mode. We also know an | ||
| 1162 | * EXPEDITE request is always granted immediately, so now must always | ||
| 1163 | * be 1. The full condition to grant an expedite request: (now && | ||
| 1164 | * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can | ||
| 1165 | * therefore be shortened to just checking the flag. | ||
| 1166 | */ | ||
| 1167 | |||
| 1168 | if (lkb->lkb_exflags & DLM_LKF_EXPEDITE) | ||
| 1169 | return 1; | ||
| 1170 | |||
| 1171 | /* | ||
| 1172 | * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be | ||
| 1173 | * added to the remaining conditions. | ||
| 1174 | */ | ||
| 1175 | |||
| 1176 | if (queue_conflict(&r->res_grantqueue, lkb)) | ||
| 1177 | goto out; | ||
| 1178 | |||
| 1179 | /* | ||
| 1180 | * 6-3: By default, a conversion request is immediately granted if the | ||
| 1181 | * requested mode is compatible with the modes of all other granted | ||
| 1182 | * locks | ||
| 1183 | */ | ||
| 1184 | |||
| 1185 | if (queue_conflict(&r->res_convertqueue, lkb)) | ||
| 1186 | goto out; | ||
| 1187 | |||
| 1188 | /* | ||
| 1189 | * 6-5: But the default algorithm for deciding whether to grant or | ||
| 1190 | * queue conversion requests does not by itself guarantee that such | ||
| 1191 | * requests are serviced on a "first come first serve" basis. This, in | ||
| 1192 | * turn, can lead to a phenomenon known as "indefinate postponement". | ||
| 1193 | * | ||
| 1194 | * 6-7: This issue is dealt with by using the optional QUECVT flag with | ||
| 1195 | * the system service employed to request a lock conversion. This flag | ||
| 1196 | * forces certain conversion requests to be queued, even if they are | ||
| 1197 | * compatible with the granted modes of other locks on the same | ||
| 1198 | * resource. Thus, the use of this flag results in conversion requests | ||
| 1199 | * being ordered on a "first come first servce" basis. | ||
| 1200 | * | ||
| 1201 | * DCT: This condition is all about new conversions being able to occur | ||
| 1202 | * "in place" while the lock remains on the granted queue (assuming | ||
| 1203 | * nothing else conflicts.) IOW if QUECVT isn't set, a conversion | ||
| 1204 | * doesn't _have_ to go onto the convert queue where it's processed in | ||
| 1205 | * order. The "now" variable is necessary to distinguish converts | ||
| 1206 | * being received and processed for the first time now, because once a | ||
| 1207 | * convert is moved to the conversion queue the condition below applies | ||
| 1208 | * requiring fifo granting. | ||
| 1209 | */ | ||
| 1210 | |||
| 1211 | if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT)) | ||
| 1212 | return 1; | ||
| 1213 | |||
| 1214 | /* | ||
| 1215 | * The NOORDER flag is set to avoid the standard vms rules on grant | ||
| 1216 | * order. | ||
| 1217 | */ | ||
| 1218 | |||
| 1219 | if (lkb->lkb_exflags & DLM_LKF_NOORDER) | ||
| 1220 | return 1; | ||
| 1221 | |||
| 1222 | /* | ||
| 1223 | * 6-3: Once in that queue [CONVERTING], a conversion request cannot be | ||
| 1224 | * granted until all other conversion requests ahead of it are granted | ||
| 1225 | * and/or canceled. | ||
| 1226 | */ | ||
| 1227 | |||
| 1228 | if (!now && conv && first_in_list(lkb, &r->res_convertqueue)) | ||
| 1229 | return 1; | ||
| 1230 | |||
| 1231 | /* | ||
| 1232 | * 6-4: By default, a new request is immediately granted only if all | ||
| 1233 | * three of the following conditions are satisfied when the request is | ||
| 1234 | * issued: | ||
| 1235 | * - The queue of ungranted conversion requests for the resource is | ||
| 1236 | * empty. | ||
| 1237 | * - The queue of ungranted new requests for the resource is empty. | ||
| 1238 | * - The mode of the new request is compatible with the most | ||
| 1239 | * restrictive mode of all granted locks on the resource. | ||
| 1240 | */ | ||
| 1241 | |||
| 1242 | if (now && !conv && list_empty(&r->res_convertqueue) && | ||
| 1243 | list_empty(&r->res_waitqueue)) | ||
| 1244 | return 1; | ||
| 1245 | |||
| 1246 | /* | ||
| 1247 | * 6-4: Once a lock request is in the queue of ungranted new requests, | ||
| 1248 | * it cannot be granted until the queue of ungranted conversion | ||
| 1249 | * requests is empty, all ungranted new requests ahead of it are | ||
| 1250 | * granted and/or canceled, and it is compatible with the granted mode | ||
| 1251 | * of the most restrictive lock granted on the resource. | ||
| 1252 | */ | ||
| 1253 | |||
| 1254 | if (!now && !conv && list_empty(&r->res_convertqueue) && | ||
| 1255 | first_in_list(lkb, &r->res_waitqueue)) | ||
| 1256 | return 1; | ||
| 1257 | |||
| 1258 | out: | ||
| 1259 | /* | ||
| 1260 | * The following, enabled by CONVDEADLK, departs from VMS. | ||
| 1261 | */ | ||
| 1262 | |||
| 1263 | if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) && | ||
| 1264 | conversion_deadlock_detect(r, lkb)) { | ||
| 1265 | lkb->lkb_grmode = DLM_LOCK_NL; | ||
| 1266 | lkb->lkb_sbflags |= DLM_SBF_DEMOTED; | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | return 0; | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | /* | ||
| 1273 | * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a | ||
| 1274 | * simple way to provide a big optimization to applications that can use them. | ||
| 1275 | */ | ||
| 1276 | |||
| 1277 | static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) | ||
| 1278 | { | ||
| 1279 | uint32_t flags = lkb->lkb_exflags; | ||
| 1280 | int rv; | ||
| 1281 | int8_t alt = 0, rqmode = lkb->lkb_rqmode; | ||
| 1282 | |||
| 1283 | rv = _can_be_granted(r, lkb, now); | ||
| 1284 | if (rv) | ||
| 1285 | goto out; | ||
| 1286 | |||
| 1287 | if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) | ||
| 1288 | goto out; | ||
| 1289 | |||
| 1290 | if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) | ||
| 1291 | alt = DLM_LOCK_PR; | ||
| 1292 | else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) | ||
| 1293 | alt = DLM_LOCK_CW; | ||
| 1294 | |||
| 1295 | if (alt) { | ||
| 1296 | lkb->lkb_rqmode = alt; | ||
| 1297 | rv = _can_be_granted(r, lkb, now); | ||
| 1298 | if (rv) | ||
| 1299 | lkb->lkb_sbflags |= DLM_SBF_ALTMODE; | ||
| 1300 | else | ||
| 1301 | lkb->lkb_rqmode = rqmode; | ||
| 1302 | } | ||
| 1303 | out: | ||
| 1304 | return rv; | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | static int grant_pending_convert(struct dlm_rsb *r, int high) | ||
| 1308 | { | ||
| 1309 | struct dlm_lkb *lkb, *s; | ||
| 1310 | int hi, demoted, quit, grant_restart, demote_restart; | ||
| 1311 | |||
| 1312 | quit = 0; | ||
| 1313 | restart: | ||
| 1314 | grant_restart = 0; | ||
| 1315 | demote_restart = 0; | ||
| 1316 | hi = DLM_LOCK_IV; | ||
| 1317 | |||
| 1318 | list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { | ||
| 1319 | demoted = is_demoted(lkb); | ||
| 1320 | if (can_be_granted(r, lkb, 0)) { | ||
| 1321 | grant_lock_pending(r, lkb); | ||
| 1322 | grant_restart = 1; | ||
| 1323 | } else { | ||
| 1324 | hi = max_t(int, lkb->lkb_rqmode, hi); | ||
| 1325 | if (!demoted && is_demoted(lkb)) | ||
| 1326 | demote_restart = 1; | ||
| 1327 | } | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | if (grant_restart) | ||
| 1331 | goto restart; | ||
| 1332 | if (demote_restart && !quit) { | ||
| 1333 | quit = 1; | ||
| 1334 | goto restart; | ||
| 1335 | } | ||
| 1336 | |||
| 1337 | return max_t(int, high, hi); | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | static int grant_pending_wait(struct dlm_rsb *r, int high) | ||
| 1341 | { | ||
| 1342 | struct dlm_lkb *lkb, *s; | ||
| 1343 | |||
| 1344 | list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { | ||
| 1345 | if (can_be_granted(r, lkb, 0)) | ||
| 1346 | grant_lock_pending(r, lkb); | ||
| 1347 | else | ||
| 1348 | high = max_t(int, lkb->lkb_rqmode, high); | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | return high; | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | static void grant_pending_locks(struct dlm_rsb *r) | ||
| 1355 | { | ||
| 1356 | struct dlm_lkb *lkb, *s; | ||
| 1357 | int high = DLM_LOCK_IV; | ||
| 1358 | |||
| 1359 | DLM_ASSERT(is_master(r), dlm_dump_rsb(r);); | ||
| 1360 | |||
| 1361 | high = grant_pending_convert(r, high); | ||
| 1362 | high = grant_pending_wait(r, high); | ||
| 1363 | |||
| 1364 | if (high == DLM_LOCK_IV) | ||
| 1365 | return; | ||
| 1366 | |||
| 1367 | /* | ||
| 1368 | * If there are locks left on the wait/convert queue then send blocking | ||
| 1369 | * ASTs to granted locks based on the largest requested mode (high) | ||
| 1370 | * found above. FIXME: highbast < high comparison not valid for PR/CW. | ||
| 1371 | */ | ||
| 1372 | |||
| 1373 | list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { | ||
| 1374 | if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) && | ||
| 1375 | !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) { | ||
| 1376 | queue_bast(r, lkb, high); | ||
| 1377 | lkb->lkb_highbast = high; | ||
| 1378 | } | ||
| 1379 | } | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, | ||
| 1383 | struct dlm_lkb *lkb) | ||
| 1384 | { | ||
| 1385 | struct dlm_lkb *gr; | ||
| 1386 | |||
| 1387 | list_for_each_entry(gr, head, lkb_statequeue) { | ||
| 1388 | if (gr->lkb_bastaddr && | ||
| 1389 | gr->lkb_highbast < lkb->lkb_rqmode && | ||
| 1390 | !modes_compat(gr, lkb)) { | ||
| 1391 | queue_bast(r, gr, lkb->lkb_rqmode); | ||
| 1392 | gr->lkb_highbast = lkb->lkb_rqmode; | ||
| 1393 | } | ||
| 1394 | } | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1398 | { | ||
| 1399 | send_bast_queue(r, &r->res_grantqueue, lkb); | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1403 | { | ||
| 1404 | send_bast_queue(r, &r->res_grantqueue, lkb); | ||
| 1405 | send_bast_queue(r, &r->res_convertqueue, lkb); | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | /* set_master(r, lkb) -- set the master nodeid of a resource | ||
| 1409 | |||
| 1410 | The purpose of this function is to set the nodeid field in the given | ||
| 1411 | lkb using the nodeid field in the given rsb. If the rsb's nodeid is | ||
| 1412 | known, it can just be copied to the lkb and the function will return | ||
| 1413 | 0. If the rsb's nodeid is _not_ known, it needs to be looked up | ||
| 1414 | before it can be copied to the lkb. | ||
| 1415 | |||
| 1416 | When the rsb nodeid is being looked up remotely, the initial lkb | ||
| 1417 | causing the lookup is kept on the ls_waiters list waiting for the | ||
| 1418 | lookup reply. Other lkb's waiting for the same rsb lookup are kept | ||
| 1419 | on the rsb's res_lookup list until the master is verified. | ||
| 1420 | |||
| 1421 | Return values: | ||
| 1422 | 0: nodeid is set in rsb/lkb and the caller should go ahead and use it | ||
| 1423 | 1: the rsb master is not available and the lkb has been placed on | ||
| 1424 | a wait queue | ||
| 1425 | */ | ||
| 1426 | |||
| 1427 | static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1428 | { | ||
| 1429 | struct dlm_ls *ls = r->res_ls; | ||
| 1430 | int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); | ||
| 1431 | |||
| 1432 | if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { | ||
| 1433 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | ||
| 1434 | r->res_first_lkid = lkb->lkb_id; | ||
| 1435 | lkb->lkb_nodeid = r->res_nodeid; | ||
| 1436 | return 0; | ||
| 1437 | } | ||
| 1438 | |||
| 1439 | if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) { | ||
| 1440 | list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup); | ||
| 1441 | return 1; | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | if (r->res_nodeid == 0) { | ||
| 1445 | lkb->lkb_nodeid = 0; | ||
| 1446 | return 0; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | if (r->res_nodeid > 0) { | ||
| 1450 | lkb->lkb_nodeid = r->res_nodeid; | ||
| 1451 | return 0; | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | DLM_ASSERT(r->res_nodeid == -1, dlm_dump_rsb(r);); | ||
| 1455 | |||
| 1456 | dir_nodeid = dlm_dir_nodeid(r); | ||
| 1457 | |||
| 1458 | if (dir_nodeid != our_nodeid) { | ||
| 1459 | r->res_first_lkid = lkb->lkb_id; | ||
| 1460 | send_lookup(r, lkb); | ||
| 1461 | return 1; | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | for (;;) { | ||
| 1465 | /* It's possible for dlm_scand to remove an old rsb for | ||
| 1466 | this same resource from the toss list, us to create | ||
| 1467 | a new one, look up the master locally, and find it | ||
| 1468 | already exists just before dlm_scand does the | ||
| 1469 | dir_remove() on the previous rsb. */ | ||
| 1470 | |||
| 1471 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, | ||
| 1472 | r->res_length, &ret_nodeid); | ||
| 1473 | if (!error) | ||
| 1474 | break; | ||
| 1475 | log_debug(ls, "dir_lookup error %d %s", error, r->res_name); | ||
| 1476 | schedule(); | ||
| 1477 | } | ||
| 1478 | |||
| 1479 | if (ret_nodeid == our_nodeid) { | ||
| 1480 | r->res_first_lkid = 0; | ||
| 1481 | r->res_nodeid = 0; | ||
| 1482 | lkb->lkb_nodeid = 0; | ||
| 1483 | } else { | ||
| 1484 | r->res_first_lkid = lkb->lkb_id; | ||
| 1485 | r->res_nodeid = ret_nodeid; | ||
| 1486 | lkb->lkb_nodeid = ret_nodeid; | ||
| 1487 | } | ||
| 1488 | return 0; | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | static void process_lookup_list(struct dlm_rsb *r) | ||
| 1492 | { | ||
| 1493 | struct dlm_lkb *lkb, *safe; | ||
| 1494 | |||
| 1495 | list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) { | ||
| 1496 | list_del(&lkb->lkb_rsb_lookup); | ||
| 1497 | _request_lock(r, lkb); | ||
| 1498 | schedule(); | ||
| 1499 | } | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | /* confirm_master -- confirm (or deny) an rsb's master nodeid */ | ||
| 1503 | |||
| 1504 | static void confirm_master(struct dlm_rsb *r, int error) | ||
| 1505 | { | ||
| 1506 | struct dlm_lkb *lkb; | ||
| 1507 | |||
| 1508 | if (!r->res_first_lkid) | ||
| 1509 | return; | ||
| 1510 | |||
| 1511 | switch (error) { | ||
| 1512 | case 0: | ||
| 1513 | case -EINPROGRESS: | ||
| 1514 | r->res_first_lkid = 0; | ||
| 1515 | process_lookup_list(r); | ||
| 1516 | break; | ||
| 1517 | |||
| 1518 | case -EAGAIN: | ||
| 1519 | /* the remote master didn't queue our NOQUEUE request; | ||
| 1520 | make a waiting lkb the first_lkid */ | ||
| 1521 | |||
| 1522 | r->res_first_lkid = 0; | ||
| 1523 | |||
| 1524 | if (!list_empty(&r->res_lookup)) { | ||
| 1525 | lkb = list_entry(r->res_lookup.next, struct dlm_lkb, | ||
| 1526 | lkb_rsb_lookup); | ||
| 1527 | list_del(&lkb->lkb_rsb_lookup); | ||
| 1528 | r->res_first_lkid = lkb->lkb_id; | ||
| 1529 | _request_lock(r, lkb); | ||
| 1530 | } else | ||
| 1531 | r->res_nodeid = -1; | ||
| 1532 | break; | ||
| 1533 | |||
| 1534 | default: | ||
| 1535 | log_error(r->res_ls, "confirm_master unknown error %d", error); | ||
| 1536 | } | ||
| 1537 | } | ||
| 1538 | |||
| 1539 | static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, | ||
| 1540 | int namelen, uint32_t parent_lkid, void *ast, | ||
| 1541 | void *astarg, void *bast, struct dlm_args *args) | ||
| 1542 | { | ||
| 1543 | int rv = -EINVAL; | ||
| 1544 | |||
| 1545 | /* check for invalid arg usage */ | ||
| 1546 | |||
| 1547 | if (mode < 0 || mode > DLM_LOCK_EX) | ||
| 1548 | goto out; | ||
| 1549 | |||
| 1550 | if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN)) | ||
| 1551 | goto out; | ||
| 1552 | |||
| 1553 | if (flags & DLM_LKF_CANCEL) | ||
| 1554 | goto out; | ||
| 1555 | |||
| 1556 | if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT)) | ||
| 1557 | goto out; | ||
| 1558 | |||
| 1559 | if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT)) | ||
| 1560 | goto out; | ||
| 1561 | |||
| 1562 | if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE) | ||
| 1563 | goto out; | ||
| 1564 | |||
| 1565 | if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT) | ||
| 1566 | goto out; | ||
| 1567 | |||
| 1568 | if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT) | ||
| 1569 | goto out; | ||
| 1570 | |||
| 1571 | if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE) | ||
| 1572 | goto out; | ||
| 1573 | |||
| 1574 | if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL) | ||
| 1575 | goto out; | ||
| 1576 | |||
| 1577 | if (!ast || !lksb) | ||
| 1578 | goto out; | ||
| 1579 | |||
| 1580 | if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) | ||
| 1581 | goto out; | ||
| 1582 | |||
| 1583 | /* parent/child locks not yet supported */ | ||
| 1584 | if (parent_lkid) | ||
| 1585 | goto out; | ||
| 1586 | |||
| 1587 | if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) | ||
| 1588 | goto out; | ||
| 1589 | |||
| 1590 | /* these args will be copied to the lkb in validate_lock_args, | ||
| 1591 | it cannot be done now because when converting locks, fields in | ||
| 1592 | an active lkb cannot be modified before locking the rsb */ | ||
| 1593 | |||
| 1594 | args->flags = flags; | ||
| 1595 | args->astaddr = ast; | ||
| 1596 | args->astparam = (long) astarg; | ||
| 1597 | args->bastaddr = bast; | ||
| 1598 | args->mode = mode; | ||
| 1599 | args->lksb = lksb; | ||
| 1600 | rv = 0; | ||
| 1601 | out: | ||
| 1602 | return rv; | ||
| 1603 | } | ||
| 1604 | |||
| 1605 | static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) | ||
| 1606 | { | ||
| 1607 | if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK | | ||
| 1608 | DLM_LKF_FORCEUNLOCK)) | ||
| 1609 | return -EINVAL; | ||
| 1610 | |||
| 1611 | args->flags = flags; | ||
| 1612 | args->astparam = (long) astarg; | ||
| 1613 | return 0; | ||
| 1614 | } | ||
| 1615 | |||
| 1616 | static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 1617 | struct dlm_args *args) | ||
| 1618 | { | ||
| 1619 | int rv = -EINVAL; | ||
| 1620 | |||
| 1621 | if (args->flags & DLM_LKF_CONVERT) { | ||
| 1622 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
| 1623 | goto out; | ||
| 1624 | |||
| 1625 | if (args->flags & DLM_LKF_QUECVT && | ||
| 1626 | !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) | ||
| 1627 | goto out; | ||
| 1628 | |||
| 1629 | rv = -EBUSY; | ||
| 1630 | if (lkb->lkb_status != DLM_LKSTS_GRANTED) | ||
| 1631 | goto out; | ||
| 1632 | |||
| 1633 | if (lkb->lkb_wait_type) | ||
| 1634 | goto out; | ||
| 1635 | } | ||
| 1636 | |||
| 1637 | lkb->lkb_exflags = args->flags; | ||
| 1638 | lkb->lkb_sbflags = 0; | ||
| 1639 | lkb->lkb_astaddr = args->astaddr; | ||
| 1640 | lkb->lkb_astparam = args->astparam; | ||
| 1641 | lkb->lkb_bastaddr = args->bastaddr; | ||
| 1642 | lkb->lkb_rqmode = args->mode; | ||
| 1643 | lkb->lkb_lksb = args->lksb; | ||
| 1644 | lkb->lkb_lvbptr = args->lksb->sb_lvbptr; | ||
| 1645 | lkb->lkb_ownpid = (int) current->pid; | ||
| 1646 | rv = 0; | ||
| 1647 | out: | ||
| 1648 | return rv; | ||
| 1649 | } | ||
| 1650 | |||
| 1651 | static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) | ||
| 1652 | { | ||
| 1653 | int rv = -EINVAL; | ||
| 1654 | |||
| 1655 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
| 1656 | goto out; | ||
| 1657 | |||
| 1658 | if (args->flags & DLM_LKF_FORCEUNLOCK) | ||
| 1659 | goto out_ok; | ||
| 1660 | |||
| 1661 | if (args->flags & DLM_LKF_CANCEL && | ||
| 1662 | lkb->lkb_status == DLM_LKSTS_GRANTED) | ||
| 1663 | goto out; | ||
| 1664 | |||
| 1665 | if (!(args->flags & DLM_LKF_CANCEL) && | ||
| 1666 | lkb->lkb_status != DLM_LKSTS_GRANTED) | ||
| 1667 | goto out; | ||
| 1668 | |||
| 1669 | rv = -EBUSY; | ||
| 1670 | if (lkb->lkb_wait_type) | ||
| 1671 | goto out; | ||
| 1672 | |||
| 1673 | out_ok: | ||
| 1674 | lkb->lkb_exflags = args->flags; | ||
| 1675 | lkb->lkb_sbflags = 0; | ||
| 1676 | lkb->lkb_astparam = args->astparam; | ||
| 1677 | |||
| 1678 | rv = 0; | ||
| 1679 | out: | ||
| 1680 | return rv; | ||
| 1681 | } | ||
| 1682 | |||
| 1683 | /* | ||
| 1684 | * Four stage 4 varieties: | ||
| 1685 | * do_request(), do_convert(), do_unlock(), do_cancel() | ||
| 1686 | * These are called on the master node for the given lock and | ||
| 1687 | * from the central locking logic. | ||
| 1688 | */ | ||
| 1689 | |||
| 1690 | static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1691 | { | ||
| 1692 | int error = 0; | ||
| 1693 | |||
| 1694 | if (can_be_granted(r, lkb, 1)) { | ||
| 1695 | grant_lock(r, lkb); | ||
| 1696 | queue_cast(r, lkb, 0); | ||
| 1697 | goto out; | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | if (can_be_queued(lkb)) { | ||
| 1701 | error = -EINPROGRESS; | ||
| 1702 | add_lkb(r, lkb, DLM_LKSTS_WAITING); | ||
| 1703 | send_blocking_asts(r, lkb); | ||
| 1704 | goto out; | ||
| 1705 | } | ||
| 1706 | |||
| 1707 | error = -EAGAIN; | ||
| 1708 | if (force_blocking_asts(lkb)) | ||
| 1709 | send_blocking_asts_all(r, lkb); | ||
| 1710 | queue_cast(r, lkb, -EAGAIN); | ||
| 1711 | |||
| 1712 | out: | ||
| 1713 | return error; | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1717 | { | ||
| 1718 | int error = 0; | ||
| 1719 | |||
| 1720 | /* changing an existing lock may allow others to be granted */ | ||
| 1721 | |||
| 1722 | if (can_be_granted(r, lkb, 1)) { | ||
| 1723 | grant_lock(r, lkb); | ||
| 1724 | queue_cast(r, lkb, 0); | ||
| 1725 | grant_pending_locks(r); | ||
| 1726 | goto out; | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | if (can_be_queued(lkb)) { | ||
| 1730 | if (is_demoted(lkb)) | ||
| 1731 | grant_pending_locks(r); | ||
| 1732 | error = -EINPROGRESS; | ||
| 1733 | del_lkb(r, lkb); | ||
| 1734 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | ||
| 1735 | send_blocking_asts(r, lkb); | ||
| 1736 | goto out; | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | error = -EAGAIN; | ||
| 1740 | if (force_blocking_asts(lkb)) | ||
| 1741 | send_blocking_asts_all(r, lkb); | ||
| 1742 | queue_cast(r, lkb, -EAGAIN); | ||
| 1743 | |||
| 1744 | out: | ||
| 1745 | return error; | ||
| 1746 | } | ||
| 1747 | |||
| 1748 | static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1749 | { | ||
| 1750 | remove_lock(r, lkb); | ||
| 1751 | queue_cast(r, lkb, -DLM_EUNLOCK); | ||
| 1752 | grant_pending_locks(r); | ||
| 1753 | return -DLM_EUNLOCK; | ||
| 1754 | } | ||
| 1755 | |||
| 1756 | /* FIXME: if revert_lock() finds that the lkb is granted, we should | ||
| 1757 | skip the queue_cast(ECANCEL). It indicates that the request/convert | ||
| 1758 | completed (and queued a normal ast) just before the cancel; we don't | ||
| 1759 | want to clobber the sb_result for the normal ast with ECANCEL. */ | ||
| 1760 | |||
| 1761 | static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1762 | { | ||
| 1763 | revert_lock(r, lkb); | ||
| 1764 | queue_cast(r, lkb, -DLM_ECANCEL); | ||
| 1765 | grant_pending_locks(r); | ||
| 1766 | return -DLM_ECANCEL; | ||
| 1767 | } | ||
| 1768 | |||
| 1769 | /* | ||
| 1770 | * Four stage 3 varieties: | ||
| 1771 | * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock() | ||
| 1772 | */ | ||
| 1773 | |||
| 1774 | /* add a new lkb to a possibly new rsb, called by requesting process */ | ||
| 1775 | |||
| 1776 | static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1777 | { | ||
| 1778 | int error; | ||
| 1779 | |||
| 1780 | /* set_master: sets lkb nodeid from r */ | ||
| 1781 | |||
| 1782 | error = set_master(r, lkb); | ||
| 1783 | if (error < 0) | ||
| 1784 | goto out; | ||
| 1785 | if (error) { | ||
| 1786 | error = 0; | ||
| 1787 | goto out; | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | if (is_remote(r)) | ||
| 1791 | /* receive_request() calls do_request() on remote node */ | ||
| 1792 | error = send_request(r, lkb); | ||
| 1793 | else | ||
| 1794 | error = do_request(r, lkb); | ||
| 1795 | out: | ||
| 1796 | return error; | ||
| 1797 | } | ||
| 1798 | |||
| 1799 | /* change some property of an existing lkb, e.g. mode */ | ||
| 1800 | |||
| 1801 | static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1802 | { | ||
| 1803 | int error; | ||
| 1804 | |||
| 1805 | if (is_remote(r)) | ||
| 1806 | /* receive_convert() calls do_convert() on remote node */ | ||
| 1807 | error = send_convert(r, lkb); | ||
| 1808 | else | ||
| 1809 | error = do_convert(r, lkb); | ||
| 1810 | |||
| 1811 | return error; | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | /* remove an existing lkb from the granted queue */ | ||
| 1815 | |||
| 1816 | static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1817 | { | ||
| 1818 | int error; | ||
| 1819 | |||
| 1820 | if (is_remote(r)) | ||
| 1821 | /* receive_unlock() calls do_unlock() on remote node */ | ||
| 1822 | error = send_unlock(r, lkb); | ||
| 1823 | else | ||
| 1824 | error = do_unlock(r, lkb); | ||
| 1825 | |||
| 1826 | return error; | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | /* remove an existing lkb from the convert or wait queue */ | ||
| 1830 | |||
| 1831 | static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 1832 | { | ||
| 1833 | int error; | ||
| 1834 | |||
| 1835 | if (is_remote(r)) | ||
| 1836 | /* receive_cancel() calls do_cancel() on remote node */ | ||
| 1837 | error = send_cancel(r, lkb); | ||
| 1838 | else | ||
| 1839 | error = do_cancel(r, lkb); | ||
| 1840 | |||
| 1841 | return error; | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | /* | ||
| 1845 | * Four stage 2 varieties: | ||
| 1846 | * request_lock(), convert_lock(), unlock_lock(), cancel_lock() | ||
| 1847 | */ | ||
| 1848 | |||
| 1849 | static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, | ||
| 1850 | int len, struct dlm_args *args) | ||
| 1851 | { | ||
| 1852 | struct dlm_rsb *r; | ||
| 1853 | int error; | ||
| 1854 | |||
| 1855 | error = validate_lock_args(ls, lkb, args); | ||
| 1856 | if (error) | ||
| 1857 | goto out; | ||
| 1858 | |||
| 1859 | error = find_rsb(ls, name, len, R_CREATE, &r); | ||
| 1860 | if (error) | ||
| 1861 | goto out; | ||
| 1862 | |||
| 1863 | lock_rsb(r); | ||
| 1864 | |||
| 1865 | attach_lkb(r, lkb); | ||
| 1866 | lkb->lkb_lksb->sb_lkid = lkb->lkb_id; | ||
| 1867 | |||
| 1868 | error = _request_lock(r, lkb); | ||
| 1869 | |||
| 1870 | unlock_rsb(r); | ||
| 1871 | put_rsb(r); | ||
| 1872 | |||
| 1873 | out: | ||
| 1874 | return error; | ||
| 1875 | } | ||
| 1876 | |||
| 1877 | static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 1878 | struct dlm_args *args) | ||
| 1879 | { | ||
| 1880 | struct dlm_rsb *r; | ||
| 1881 | int error; | ||
| 1882 | |||
| 1883 | r = lkb->lkb_resource; | ||
| 1884 | |||
| 1885 | hold_rsb(r); | ||
| 1886 | lock_rsb(r); | ||
| 1887 | |||
| 1888 | error = validate_lock_args(ls, lkb, args); | ||
| 1889 | if (error) | ||
| 1890 | goto out; | ||
| 1891 | |||
| 1892 | error = _convert_lock(r, lkb); | ||
| 1893 | out: | ||
| 1894 | unlock_rsb(r); | ||
| 1895 | put_rsb(r); | ||
| 1896 | return error; | ||
| 1897 | } | ||
| 1898 | |||
| 1899 | static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 1900 | struct dlm_args *args) | ||
| 1901 | { | ||
| 1902 | struct dlm_rsb *r; | ||
| 1903 | int error; | ||
| 1904 | |||
| 1905 | r = lkb->lkb_resource; | ||
| 1906 | |||
| 1907 | hold_rsb(r); | ||
| 1908 | lock_rsb(r); | ||
| 1909 | |||
| 1910 | error = validate_unlock_args(lkb, args); | ||
| 1911 | if (error) | ||
| 1912 | goto out; | ||
| 1913 | |||
| 1914 | error = _unlock_lock(r, lkb); | ||
| 1915 | out: | ||
| 1916 | unlock_rsb(r); | ||
| 1917 | put_rsb(r); | ||
| 1918 | return error; | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 1922 | struct dlm_args *args) | ||
| 1923 | { | ||
| 1924 | struct dlm_rsb *r; | ||
| 1925 | int error; | ||
| 1926 | |||
| 1927 | r = lkb->lkb_resource; | ||
| 1928 | |||
| 1929 | hold_rsb(r); | ||
| 1930 | lock_rsb(r); | ||
| 1931 | |||
| 1932 | error = validate_unlock_args(lkb, args); | ||
| 1933 | if (error) | ||
| 1934 | goto out; | ||
| 1935 | |||
| 1936 | error = _cancel_lock(r, lkb); | ||
| 1937 | out: | ||
| 1938 | unlock_rsb(r); | ||
| 1939 | put_rsb(r); | ||
| 1940 | return error; | ||
| 1941 | } | ||
| 1942 | |||
| 1943 | /* | ||
| 1944 | * Two stage 1 varieties: dlm_lock() and dlm_unlock() | ||
| 1945 | */ | ||
| 1946 | |||
| 1947 | int dlm_lock(dlm_lockspace_t *lockspace, | ||
| 1948 | int mode, | ||
| 1949 | struct dlm_lksb *lksb, | ||
| 1950 | uint32_t flags, | ||
| 1951 | void *name, | ||
| 1952 | unsigned int namelen, | ||
| 1953 | uint32_t parent_lkid, | ||
| 1954 | void (*ast) (void *astarg), | ||
| 1955 | void *astarg, | ||
| 1956 | void (*bast) (void *astarg, int mode)) | ||
| 1957 | { | ||
| 1958 | struct dlm_ls *ls; | ||
| 1959 | struct dlm_lkb *lkb; | ||
| 1960 | struct dlm_args args; | ||
| 1961 | int error, convert = flags & DLM_LKF_CONVERT; | ||
| 1962 | |||
| 1963 | ls = dlm_find_lockspace_local(lockspace); | ||
| 1964 | if (!ls) | ||
| 1965 | return -EINVAL; | ||
| 1966 | |||
| 1967 | lock_recovery(ls); | ||
| 1968 | |||
| 1969 | if (convert) | ||
| 1970 | error = find_lkb(ls, lksb->sb_lkid, &lkb); | ||
| 1971 | else | ||
| 1972 | error = create_lkb(ls, &lkb); | ||
| 1973 | |||
| 1974 | if (error) | ||
| 1975 | goto out; | ||
| 1976 | |||
| 1977 | error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, | ||
| 1978 | astarg, bast, &args); | ||
| 1979 | if (error) | ||
| 1980 | goto out_put; | ||
| 1981 | |||
| 1982 | if (convert) | ||
| 1983 | error = convert_lock(ls, lkb, &args); | ||
| 1984 | else | ||
| 1985 | error = request_lock(ls, lkb, name, namelen, &args); | ||
| 1986 | |||
| 1987 | if (error == -EINPROGRESS) | ||
| 1988 | error = 0; | ||
| 1989 | out_put: | ||
| 1990 | if (convert || error) | ||
| 1991 | __put_lkb(ls, lkb); | ||
| 1992 | if (error == -EAGAIN) | ||
| 1993 | error = 0; | ||
| 1994 | out: | ||
| 1995 | unlock_recovery(ls); | ||
| 1996 | dlm_put_lockspace(ls); | ||
| 1997 | return error; | ||
| 1998 | } | ||
| 1999 | |||
| 2000 | int dlm_unlock(dlm_lockspace_t *lockspace, | ||
| 2001 | uint32_t lkid, | ||
| 2002 | uint32_t flags, | ||
| 2003 | struct dlm_lksb *lksb, | ||
| 2004 | void *astarg) | ||
| 2005 | { | ||
| 2006 | struct dlm_ls *ls; | ||
| 2007 | struct dlm_lkb *lkb; | ||
| 2008 | struct dlm_args args; | ||
| 2009 | int error; | ||
| 2010 | |||
| 2011 | ls = dlm_find_lockspace_local(lockspace); | ||
| 2012 | if (!ls) | ||
| 2013 | return -EINVAL; | ||
| 2014 | |||
| 2015 | lock_recovery(ls); | ||
| 2016 | |||
| 2017 | error = find_lkb(ls, lkid, &lkb); | ||
| 2018 | if (error) | ||
| 2019 | goto out; | ||
| 2020 | |||
| 2021 | error = set_unlock_args(flags, astarg, &args); | ||
| 2022 | if (error) | ||
| 2023 | goto out_put; | ||
| 2024 | |||
| 2025 | if (flags & DLM_LKF_CANCEL) | ||
| 2026 | error = cancel_lock(ls, lkb, &args); | ||
| 2027 | else | ||
| 2028 | error = unlock_lock(ls, lkb, &args); | ||
| 2029 | |||
| 2030 | if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) | ||
| 2031 | error = 0; | ||
| 2032 | out_put: | ||
| 2033 | dlm_put_lkb(lkb); | ||
| 2034 | out: | ||
| 2035 | unlock_recovery(ls); | ||
| 2036 | dlm_put_lockspace(ls); | ||
| 2037 | return error; | ||
| 2038 | } | ||
| 2039 | |||
| 2040 | /* | ||
| 2041 | * send/receive routines for remote operations and replies | ||
| 2042 | * | ||
| 2043 | * send_args | ||
| 2044 | * send_common | ||
| 2045 | * send_request receive_request | ||
| 2046 | * send_convert receive_convert | ||
| 2047 | * send_unlock receive_unlock | ||
| 2048 | * send_cancel receive_cancel | ||
| 2049 | * send_grant receive_grant | ||
| 2050 | * send_bast receive_bast | ||
| 2051 | * send_lookup receive_lookup | ||
| 2052 | * send_remove receive_remove | ||
| 2053 | * | ||
| 2054 | * send_common_reply | ||
| 2055 | * receive_request_reply send_request_reply | ||
| 2056 | * receive_convert_reply send_convert_reply | ||
| 2057 | * receive_unlock_reply send_unlock_reply | ||
| 2058 | * receive_cancel_reply send_cancel_reply | ||
| 2059 | * receive_lookup_reply send_lookup_reply | ||
| 2060 | */ | ||
| 2061 | |||
| 2062 | static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 2063 | int to_nodeid, int mstype, | ||
| 2064 | struct dlm_message **ms_ret, | ||
| 2065 | struct dlm_mhandle **mh_ret) | ||
| 2066 | { | ||
| 2067 | struct dlm_message *ms; | ||
| 2068 | struct dlm_mhandle *mh; | ||
| 2069 | char *mb; | ||
| 2070 | int mb_len = sizeof(struct dlm_message); | ||
| 2071 | |||
| 2072 | switch (mstype) { | ||
| 2073 | case DLM_MSG_REQUEST: | ||
| 2074 | case DLM_MSG_LOOKUP: | ||
| 2075 | case DLM_MSG_REMOVE: | ||
| 2076 | mb_len += r->res_length; | ||
| 2077 | break; | ||
| 2078 | case DLM_MSG_CONVERT: | ||
| 2079 | case DLM_MSG_UNLOCK: | ||
| 2080 | case DLM_MSG_REQUEST_REPLY: | ||
| 2081 | case DLM_MSG_CONVERT_REPLY: | ||
| 2082 | case DLM_MSG_GRANT: | ||
| 2083 | if (lkb && lkb->lkb_lvbptr) | ||
| 2084 | mb_len += r->res_ls->ls_lvblen; | ||
| 2085 | break; | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | /* get_buffer gives us a message handle (mh) that we need to | ||
| 2089 | pass into lowcomms_commit and a message buffer (mb) that we | ||
| 2090 | write our data into */ | ||
| 2091 | |||
| 2092 | mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); | ||
| 2093 | if (!mh) | ||
| 2094 | return -ENOBUFS; | ||
| 2095 | |||
| 2096 | memset(mb, 0, mb_len); | ||
| 2097 | |||
| 2098 | ms = (struct dlm_message *) mb; | ||
| 2099 | |||
| 2100 | ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | ||
| 2101 | ms->m_header.h_lockspace = r->res_ls->ls_global_id; | ||
| 2102 | ms->m_header.h_nodeid = dlm_our_nodeid(); | ||
| 2103 | ms->m_header.h_length = mb_len; | ||
| 2104 | ms->m_header.h_cmd = DLM_MSG; | ||
| 2105 | |||
| 2106 | ms->m_type = mstype; | ||
| 2107 | |||
| 2108 | *mh_ret = mh; | ||
| 2109 | *ms_ret = ms; | ||
| 2110 | return 0; | ||
| 2111 | } | ||
| 2112 | |||
| 2113 | /* further lowcomms enhancements or alternate implementations may make | ||
| 2114 | the return value from this function useful at some point */ | ||
| 2115 | |||
| 2116 | static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms) | ||
| 2117 | { | ||
| 2118 | dlm_message_out(ms); | ||
| 2119 | dlm_lowcomms_commit_buffer(mh); | ||
| 2120 | return 0; | ||
| 2121 | } | ||
| 2122 | |||
| 2123 | static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 2124 | struct dlm_message *ms) | ||
| 2125 | { | ||
| 2126 | ms->m_nodeid = lkb->lkb_nodeid; | ||
| 2127 | ms->m_pid = lkb->lkb_ownpid; | ||
| 2128 | ms->m_lkid = lkb->lkb_id; | ||
| 2129 | ms->m_remid = lkb->lkb_remid; | ||
| 2130 | ms->m_exflags = lkb->lkb_exflags; | ||
| 2131 | ms->m_sbflags = lkb->lkb_sbflags; | ||
| 2132 | ms->m_flags = lkb->lkb_flags; | ||
| 2133 | ms->m_lvbseq = lkb->lkb_lvbseq; | ||
| 2134 | ms->m_status = lkb->lkb_status; | ||
| 2135 | ms->m_grmode = lkb->lkb_grmode; | ||
| 2136 | ms->m_rqmode = lkb->lkb_rqmode; | ||
| 2137 | ms->m_hash = r->res_hash; | ||
| 2138 | |||
| 2139 | /* m_result and m_bastmode are set from function args, | ||
| 2140 | not from lkb fields */ | ||
| 2141 | |||
| 2142 | if (lkb->lkb_bastaddr) | ||
| 2143 | ms->m_asts |= AST_BAST; | ||
| 2144 | if (lkb->lkb_astaddr) | ||
| 2145 | ms->m_asts |= AST_COMP; | ||
| 2146 | |||
| 2147 | if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) | ||
| 2148 | memcpy(ms->m_extra, r->res_name, r->res_length); | ||
| 2149 | |||
| 2150 | else if (lkb->lkb_lvbptr) | ||
| 2151 | memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | ||
| 2152 | |||
| 2153 | } | ||
| 2154 | |||
| 2155 | static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | ||
| 2156 | { | ||
| 2157 | struct dlm_message *ms; | ||
| 2158 | struct dlm_mhandle *mh; | ||
| 2159 | int to_nodeid, error; | ||
| 2160 | |||
| 2161 | add_to_waiters(lkb, mstype); | ||
| 2162 | |||
| 2163 | to_nodeid = r->res_nodeid; | ||
| 2164 | |||
| 2165 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); | ||
| 2166 | if (error) | ||
| 2167 | goto fail; | ||
| 2168 | |||
| 2169 | send_args(r, lkb, ms); | ||
| 2170 | |||
| 2171 | error = send_message(mh, ms); | ||
| 2172 | if (error) | ||
| 2173 | goto fail; | ||
| 2174 | return 0; | ||
| 2175 | |||
| 2176 | fail: | ||
| 2177 | remove_from_waiters(lkb); | ||
| 2178 | return error; | ||
| 2179 | } | ||
| 2180 | |||
| 2181 | static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 2182 | { | ||
| 2183 | return send_common(r, lkb, DLM_MSG_REQUEST); | ||
| 2184 | } | ||
| 2185 | |||
| 2186 | static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 2187 | { | ||
| 2188 | int error; | ||
| 2189 | |||
| 2190 | error = send_common(r, lkb, DLM_MSG_CONVERT); | ||
| 2191 | |||
| 2192 | /* down conversions go without a reply from the master */ | ||
| 2193 | if (!error && down_conversion(lkb)) { | ||
| 2194 | remove_from_waiters(lkb); | ||
| 2195 | r->res_ls->ls_stub_ms.m_result = 0; | ||
| 2196 | r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
| 2197 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | return error; | ||
| 2201 | } | ||
| 2202 | |||
| 2203 | /* FIXME: if this lkb is the only lock we hold on the rsb, then set | ||
| 2204 | MASTER_UNCERTAIN to force the next request on the rsb to confirm | ||
| 2205 | that the master is still correct. */ | ||
| 2206 | |||
| 2207 | static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 2208 | { | ||
| 2209 | return send_common(r, lkb, DLM_MSG_UNLOCK); | ||
| 2210 | } | ||
| 2211 | |||
| 2212 | static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 2213 | { | ||
| 2214 | return send_common(r, lkb, DLM_MSG_CANCEL); | ||
| 2215 | } | ||
| 2216 | |||
| 2217 | static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 2218 | { | ||
| 2219 | struct dlm_message *ms; | ||
| 2220 | struct dlm_mhandle *mh; | ||
| 2221 | int to_nodeid, error; | ||
| 2222 | |||
| 2223 | to_nodeid = lkb->lkb_nodeid; | ||
| 2224 | |||
| 2225 | error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh); | ||
| 2226 | if (error) | ||
| 2227 | goto out; | ||
| 2228 | |||
| 2229 | send_args(r, lkb, ms); | ||
| 2230 | |||
| 2231 | ms->m_result = 0; | ||
| 2232 | |||
| 2233 | error = send_message(mh, ms); | ||
| 2234 | out: | ||
| 2235 | return error; | ||
| 2236 | } | ||
| 2237 | |||
| 2238 | static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode) | ||
| 2239 | { | ||
| 2240 | struct dlm_message *ms; | ||
| 2241 | struct dlm_mhandle *mh; | ||
| 2242 | int to_nodeid, error; | ||
| 2243 | |||
| 2244 | to_nodeid = lkb->lkb_nodeid; | ||
| 2245 | |||
| 2246 | error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh); | ||
| 2247 | if (error) | ||
| 2248 | goto out; | ||
| 2249 | |||
| 2250 | send_args(r, lkb, ms); | ||
| 2251 | |||
| 2252 | ms->m_bastmode = mode; | ||
| 2253 | |||
| 2254 | error = send_message(mh, ms); | ||
| 2255 | out: | ||
| 2256 | return error; | ||
| 2257 | } | ||
| 2258 | |||
| 2259 | static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 2260 | { | ||
| 2261 | struct dlm_message *ms; | ||
| 2262 | struct dlm_mhandle *mh; | ||
| 2263 | int to_nodeid, error; | ||
| 2264 | |||
| 2265 | add_to_waiters(lkb, DLM_MSG_LOOKUP); | ||
| 2266 | |||
| 2267 | to_nodeid = dlm_dir_nodeid(r); | ||
| 2268 | |||
| 2269 | error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); | ||
| 2270 | if (error) | ||
| 2271 | goto fail; | ||
| 2272 | |||
| 2273 | send_args(r, lkb, ms); | ||
| 2274 | |||
| 2275 | error = send_message(mh, ms); | ||
| 2276 | if (error) | ||
| 2277 | goto fail; | ||
| 2278 | return 0; | ||
| 2279 | |||
| 2280 | fail: | ||
| 2281 | remove_from_waiters(lkb); | ||
| 2282 | return error; | ||
| 2283 | } | ||
| 2284 | |||
| 2285 | static int send_remove(struct dlm_rsb *r) | ||
| 2286 | { | ||
| 2287 | struct dlm_message *ms; | ||
| 2288 | struct dlm_mhandle *mh; | ||
| 2289 | int to_nodeid, error; | ||
| 2290 | |||
| 2291 | to_nodeid = dlm_dir_nodeid(r); | ||
| 2292 | |||
| 2293 | error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh); | ||
| 2294 | if (error) | ||
| 2295 | goto out; | ||
| 2296 | |||
| 2297 | memcpy(ms->m_extra, r->res_name, r->res_length); | ||
| 2298 | ms->m_hash = r->res_hash; | ||
| 2299 | |||
| 2300 | error = send_message(mh, ms); | ||
| 2301 | out: | ||
| 2302 | return error; | ||
| 2303 | } | ||
| 2304 | |||
| 2305 | static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 2306 | int mstype, int rv) | ||
| 2307 | { | ||
| 2308 | struct dlm_message *ms; | ||
| 2309 | struct dlm_mhandle *mh; | ||
| 2310 | int to_nodeid, error; | ||
| 2311 | |||
| 2312 | to_nodeid = lkb->lkb_nodeid; | ||
| 2313 | |||
| 2314 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); | ||
| 2315 | if (error) | ||
| 2316 | goto out; | ||
| 2317 | |||
| 2318 | send_args(r, lkb, ms); | ||
| 2319 | |||
| 2320 | ms->m_result = rv; | ||
| 2321 | |||
| 2322 | error = send_message(mh, ms); | ||
| 2323 | out: | ||
| 2324 | return error; | ||
| 2325 | } | ||
| 2326 | |||
| 2327 | static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
| 2328 | { | ||
| 2329 | return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv); | ||
| 2330 | } | ||
| 2331 | |||
| 2332 | static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
| 2333 | { | ||
| 2334 | return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv); | ||
| 2335 | } | ||
| 2336 | |||
| 2337 | static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
| 2338 | { | ||
| 2339 | return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv); | ||
| 2340 | } | ||
| 2341 | |||
| 2342 | static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
| 2343 | { | ||
| 2344 | return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv); | ||
| 2345 | } | ||
| 2346 | |||
| 2347 | static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in, | ||
| 2348 | int ret_nodeid, int rv) | ||
| 2349 | { | ||
| 2350 | struct dlm_rsb *r = &ls->ls_stub_rsb; | ||
| 2351 | struct dlm_message *ms; | ||
| 2352 | struct dlm_mhandle *mh; | ||
| 2353 | int error, nodeid = ms_in->m_header.h_nodeid; | ||
| 2354 | |||
| 2355 | error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh); | ||
| 2356 | if (error) | ||
| 2357 | goto out; | ||
| 2358 | |||
| 2359 | ms->m_lkid = ms_in->m_lkid; | ||
| 2360 | ms->m_result = rv; | ||
| 2361 | ms->m_nodeid = ret_nodeid; | ||
| 2362 | |||
| 2363 | error = send_message(mh, ms); | ||
| 2364 | out: | ||
| 2365 | return error; | ||
| 2366 | } | ||
| 2367 | |||
| 2368 | /* which args we save from a received message depends heavily on the type | ||
| 2369 | of message, unlike the send side where we can safely send everything about | ||
| 2370 | the lkb for any type of message */ | ||
| 2371 | |||
| 2372 | static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 2373 | { | ||
| 2374 | lkb->lkb_exflags = ms->m_exflags; | ||
| 2375 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | | ||
| 2376 | (ms->m_flags & 0x0000FFFF); | ||
| 2377 | } | ||
| 2378 | |||
| 2379 | static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 2380 | { | ||
| 2381 | lkb->lkb_sbflags = ms->m_sbflags; | ||
| 2382 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | | ||
| 2383 | (ms->m_flags & 0x0000FFFF); | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | static int receive_extralen(struct dlm_message *ms) | ||
| 2387 | { | ||
| 2388 | return (ms->m_header.h_length - sizeof(struct dlm_message)); | ||
| 2389 | } | ||
| 2390 | |||
| 2391 | static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 2392 | struct dlm_message *ms) | ||
| 2393 | { | ||
| 2394 | int len; | ||
| 2395 | |||
| 2396 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { | ||
| 2397 | if (!lkb->lkb_lvbptr) | ||
| 2398 | lkb->lkb_lvbptr = allocate_lvb(ls); | ||
| 2399 | if (!lkb->lkb_lvbptr) | ||
| 2400 | return -ENOMEM; | ||
| 2401 | len = receive_extralen(ms); | ||
| 2402 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | ||
| 2403 | } | ||
| 2404 | return 0; | ||
| 2405 | } | ||
| 2406 | |||
| 2407 | static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 2408 | struct dlm_message *ms) | ||
| 2409 | { | ||
| 2410 | lkb->lkb_nodeid = ms->m_header.h_nodeid; | ||
| 2411 | lkb->lkb_ownpid = ms->m_pid; | ||
| 2412 | lkb->lkb_remid = ms->m_lkid; | ||
| 2413 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
| 2414 | lkb->lkb_rqmode = ms->m_rqmode; | ||
| 2415 | lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); | ||
| 2416 | lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); | ||
| 2417 | |||
| 2418 | DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); | ||
| 2419 | |||
| 2420 | if (receive_lvb(ls, lkb, ms)) | ||
| 2421 | return -ENOMEM; | ||
| 2422 | |||
| 2423 | return 0; | ||
| 2424 | } | ||
| 2425 | |||
| 2426 | static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 2427 | struct dlm_message *ms) | ||
| 2428 | { | ||
| 2429 | if (lkb->lkb_nodeid != ms->m_header.h_nodeid) { | ||
| 2430 | log_error(ls, "convert_args nodeid %d %d lkid %x %x", | ||
| 2431 | lkb->lkb_nodeid, ms->m_header.h_nodeid, | ||
| 2432 | lkb->lkb_id, lkb->lkb_remid); | ||
| 2433 | return -EINVAL; | ||
| 2434 | } | ||
| 2435 | |||
| 2436 | if (!is_master_copy(lkb)) | ||
| 2437 | return -EINVAL; | ||
| 2438 | |||
| 2439 | if (lkb->lkb_status != DLM_LKSTS_GRANTED) | ||
| 2440 | return -EBUSY; | ||
| 2441 | |||
| 2442 | if (receive_lvb(ls, lkb, ms)) | ||
| 2443 | return -ENOMEM; | ||
| 2444 | |||
| 2445 | lkb->lkb_rqmode = ms->m_rqmode; | ||
| 2446 | lkb->lkb_lvbseq = ms->m_lvbseq; | ||
| 2447 | |||
| 2448 | return 0; | ||
| 2449 | } | ||
| 2450 | |||
| 2451 | static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 2452 | struct dlm_message *ms) | ||
| 2453 | { | ||
| 2454 | if (!is_master_copy(lkb)) | ||
| 2455 | return -EINVAL; | ||
| 2456 | if (receive_lvb(ls, lkb, ms)) | ||
| 2457 | return -ENOMEM; | ||
| 2458 | return 0; | ||
| 2459 | } | ||
| 2460 | |||
| 2461 | /* We fill in the stub-lkb fields with the info that send_xxxx_reply() | ||
| 2462 | uses to send a reply and that the remote end uses to process the reply. */ | ||
| 2463 | |||
| 2464 | static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2465 | { | ||
| 2466 | struct dlm_lkb *lkb = &ls->ls_stub_lkb; | ||
| 2467 | lkb->lkb_nodeid = ms->m_header.h_nodeid; | ||
| 2468 | lkb->lkb_remid = ms->m_lkid; | ||
| 2469 | } | ||
| 2470 | |||
| 2471 | static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2472 | { | ||
| 2473 | struct dlm_lkb *lkb; | ||
| 2474 | struct dlm_rsb *r; | ||
| 2475 | int error, namelen; | ||
| 2476 | |||
| 2477 | error = create_lkb(ls, &lkb); | ||
| 2478 | if (error) | ||
| 2479 | goto fail; | ||
| 2480 | |||
| 2481 | receive_flags(lkb, ms); | ||
| 2482 | lkb->lkb_flags |= DLM_IFL_MSTCPY; | ||
| 2483 | error = receive_request_args(ls, lkb, ms); | ||
| 2484 | if (error) { | ||
| 2485 | __put_lkb(ls, lkb); | ||
| 2486 | goto fail; | ||
| 2487 | } | ||
| 2488 | |||
| 2489 | namelen = receive_extralen(ms); | ||
| 2490 | |||
| 2491 | error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r); | ||
| 2492 | if (error) { | ||
| 2493 | __put_lkb(ls, lkb); | ||
| 2494 | goto fail; | ||
| 2495 | } | ||
| 2496 | |||
| 2497 | lock_rsb(r); | ||
| 2498 | |||
| 2499 | attach_lkb(r, lkb); | ||
| 2500 | error = do_request(r, lkb); | ||
| 2501 | send_request_reply(r, lkb, error); | ||
| 2502 | |||
| 2503 | unlock_rsb(r); | ||
| 2504 | put_rsb(r); | ||
| 2505 | |||
| 2506 | if (error == -EINPROGRESS) | ||
| 2507 | error = 0; | ||
| 2508 | if (error) | ||
| 2509 | dlm_put_lkb(lkb); | ||
| 2510 | return; | ||
| 2511 | |||
| 2512 | fail: | ||
| 2513 | setup_stub_lkb(ls, ms); | ||
| 2514 | send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
| 2515 | } | ||
| 2516 | |||
| 2517 | static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2518 | { | ||
| 2519 | struct dlm_lkb *lkb; | ||
| 2520 | struct dlm_rsb *r; | ||
| 2521 | int error, reply = 1; | ||
| 2522 | |||
| 2523 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2524 | if (error) | ||
| 2525 | goto fail; | ||
| 2526 | |||
| 2527 | r = lkb->lkb_resource; | ||
| 2528 | |||
| 2529 | hold_rsb(r); | ||
| 2530 | lock_rsb(r); | ||
| 2531 | |||
| 2532 | receive_flags(lkb, ms); | ||
| 2533 | error = receive_convert_args(ls, lkb, ms); | ||
| 2534 | if (error) | ||
| 2535 | goto out; | ||
| 2536 | reply = !down_conversion(lkb); | ||
| 2537 | |||
| 2538 | error = do_convert(r, lkb); | ||
| 2539 | out: | ||
| 2540 | if (reply) | ||
| 2541 | send_convert_reply(r, lkb, error); | ||
| 2542 | |||
| 2543 | unlock_rsb(r); | ||
| 2544 | put_rsb(r); | ||
| 2545 | dlm_put_lkb(lkb); | ||
| 2546 | return; | ||
| 2547 | |||
| 2548 | fail: | ||
| 2549 | setup_stub_lkb(ls, ms); | ||
| 2550 | send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
| 2551 | } | ||
| 2552 | |||
| 2553 | static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2554 | { | ||
| 2555 | struct dlm_lkb *lkb; | ||
| 2556 | struct dlm_rsb *r; | ||
| 2557 | int error; | ||
| 2558 | |||
| 2559 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2560 | if (error) | ||
| 2561 | goto fail; | ||
| 2562 | |||
| 2563 | r = lkb->lkb_resource; | ||
| 2564 | |||
| 2565 | hold_rsb(r); | ||
| 2566 | lock_rsb(r); | ||
| 2567 | |||
| 2568 | receive_flags(lkb, ms); | ||
| 2569 | error = receive_unlock_args(ls, lkb, ms); | ||
| 2570 | if (error) | ||
| 2571 | goto out; | ||
| 2572 | |||
| 2573 | error = do_unlock(r, lkb); | ||
| 2574 | out: | ||
| 2575 | send_unlock_reply(r, lkb, error); | ||
| 2576 | |||
| 2577 | unlock_rsb(r); | ||
| 2578 | put_rsb(r); | ||
| 2579 | dlm_put_lkb(lkb); | ||
| 2580 | return; | ||
| 2581 | |||
| 2582 | fail: | ||
| 2583 | setup_stub_lkb(ls, ms); | ||
| 2584 | send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2588 | { | ||
| 2589 | struct dlm_lkb *lkb; | ||
| 2590 | struct dlm_rsb *r; | ||
| 2591 | int error; | ||
| 2592 | |||
| 2593 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2594 | if (error) | ||
| 2595 | goto fail; | ||
| 2596 | |||
| 2597 | receive_flags(lkb, ms); | ||
| 2598 | |||
| 2599 | r = lkb->lkb_resource; | ||
| 2600 | |||
| 2601 | hold_rsb(r); | ||
| 2602 | lock_rsb(r); | ||
| 2603 | |||
| 2604 | error = do_cancel(r, lkb); | ||
| 2605 | send_cancel_reply(r, lkb, error); | ||
| 2606 | |||
| 2607 | unlock_rsb(r); | ||
| 2608 | put_rsb(r); | ||
| 2609 | dlm_put_lkb(lkb); | ||
| 2610 | return; | ||
| 2611 | |||
| 2612 | fail: | ||
| 2613 | setup_stub_lkb(ls, ms); | ||
| 2614 | send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
| 2615 | } | ||
| 2616 | |||
| 2617 | static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2618 | { | ||
| 2619 | struct dlm_lkb *lkb; | ||
| 2620 | struct dlm_rsb *r; | ||
| 2621 | int error; | ||
| 2622 | |||
| 2623 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2624 | if (error) { | ||
| 2625 | log_error(ls, "receive_grant no lkb"); | ||
| 2626 | return; | ||
| 2627 | } | ||
| 2628 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
| 2629 | |||
| 2630 | r = lkb->lkb_resource; | ||
| 2631 | |||
| 2632 | hold_rsb(r); | ||
| 2633 | lock_rsb(r); | ||
| 2634 | |||
| 2635 | receive_flags_reply(lkb, ms); | ||
| 2636 | grant_lock_pc(r, lkb, ms); | ||
| 2637 | queue_cast(r, lkb, 0); | ||
| 2638 | |||
| 2639 | unlock_rsb(r); | ||
| 2640 | put_rsb(r); | ||
| 2641 | dlm_put_lkb(lkb); | ||
| 2642 | } | ||
| 2643 | |||
| 2644 | static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2645 | { | ||
| 2646 | struct dlm_lkb *lkb; | ||
| 2647 | struct dlm_rsb *r; | ||
| 2648 | int error; | ||
| 2649 | |||
| 2650 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2651 | if (error) { | ||
| 2652 | log_error(ls, "receive_bast no lkb"); | ||
| 2653 | return; | ||
| 2654 | } | ||
| 2655 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
| 2656 | |||
| 2657 | r = lkb->lkb_resource; | ||
| 2658 | |||
| 2659 | hold_rsb(r); | ||
| 2660 | lock_rsb(r); | ||
| 2661 | |||
| 2662 | queue_bast(r, lkb, ms->m_bastmode); | ||
| 2663 | |||
| 2664 | unlock_rsb(r); | ||
| 2665 | put_rsb(r); | ||
| 2666 | dlm_put_lkb(lkb); | ||
| 2667 | } | ||
| 2668 | |||
| 2669 | static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2670 | { | ||
| 2671 | int len, error, ret_nodeid, dir_nodeid, from_nodeid, our_nodeid; | ||
| 2672 | |||
| 2673 | from_nodeid = ms->m_header.h_nodeid; | ||
| 2674 | our_nodeid = dlm_our_nodeid(); | ||
| 2675 | |||
| 2676 | len = receive_extralen(ms); | ||
| 2677 | |||
| 2678 | dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); | ||
| 2679 | if (dir_nodeid != our_nodeid) { | ||
| 2680 | log_error(ls, "lookup dir_nodeid %d from %d", | ||
| 2681 | dir_nodeid, from_nodeid); | ||
| 2682 | error = -EINVAL; | ||
| 2683 | ret_nodeid = -1; | ||
| 2684 | goto out; | ||
| 2685 | } | ||
| 2686 | |||
| 2687 | error = dlm_dir_lookup(ls, from_nodeid, ms->m_extra, len, &ret_nodeid); | ||
| 2688 | |||
| 2689 | /* Optimization: we're master so treat lookup as a request */ | ||
| 2690 | if (!error && ret_nodeid == our_nodeid) { | ||
| 2691 | receive_request(ls, ms); | ||
| 2692 | return; | ||
| 2693 | } | ||
| 2694 | out: | ||
| 2695 | send_lookup_reply(ls, ms, ret_nodeid, error); | ||
| 2696 | } | ||
| 2697 | |||
| 2698 | static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2699 | { | ||
| 2700 | int len, dir_nodeid, from_nodeid; | ||
| 2701 | |||
| 2702 | from_nodeid = ms->m_header.h_nodeid; | ||
| 2703 | |||
| 2704 | len = receive_extralen(ms); | ||
| 2705 | |||
| 2706 | dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); | ||
| 2707 | if (dir_nodeid != dlm_our_nodeid()) { | ||
| 2708 | log_error(ls, "remove dir entry dir_nodeid %d from %d", | ||
| 2709 | dir_nodeid, from_nodeid); | ||
| 2710 | return; | ||
| 2711 | } | ||
| 2712 | |||
| 2713 | dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); | ||
| 2714 | } | ||
| 2715 | |||
| 2716 | static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2717 | { | ||
| 2718 | struct dlm_lkb *lkb; | ||
| 2719 | struct dlm_rsb *r; | ||
| 2720 | int error, mstype; | ||
| 2721 | |||
| 2722 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2723 | if (error) { | ||
| 2724 | log_error(ls, "receive_request_reply no lkb"); | ||
| 2725 | return; | ||
| 2726 | } | ||
| 2727 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
| 2728 | |||
| 2729 | mstype = lkb->lkb_wait_type; | ||
| 2730 | error = remove_from_waiters(lkb); | ||
| 2731 | if (error) { | ||
| 2732 | log_error(ls, "receive_request_reply not on waiters"); | ||
| 2733 | goto out; | ||
| 2734 | } | ||
| 2735 | |||
| 2736 | /* this is the value returned from do_request() on the master */ | ||
| 2737 | error = ms->m_result; | ||
| 2738 | |||
| 2739 | r = lkb->lkb_resource; | ||
| 2740 | hold_rsb(r); | ||
| 2741 | lock_rsb(r); | ||
| 2742 | |||
| 2743 | /* Optimization: the dir node was also the master, so it took our | ||
| 2744 | lookup as a request and sent request reply instead of lookup reply */ | ||
| 2745 | if (mstype == DLM_MSG_LOOKUP) { | ||
| 2746 | r->res_nodeid = ms->m_header.h_nodeid; | ||
| 2747 | lkb->lkb_nodeid = r->res_nodeid; | ||
| 2748 | } | ||
| 2749 | |||
| 2750 | switch (error) { | ||
| 2751 | case -EAGAIN: | ||
| 2752 | /* request would block (be queued) on remote master; | ||
| 2753 | the unhold undoes the original ref from create_lkb() | ||
| 2754 | so it leads to the lkb being freed */ | ||
| 2755 | queue_cast(r, lkb, -EAGAIN); | ||
| 2756 | confirm_master(r, -EAGAIN); | ||
| 2757 | unhold_lkb(lkb); | ||
| 2758 | break; | ||
| 2759 | |||
| 2760 | case -EINPROGRESS: | ||
| 2761 | case 0: | ||
| 2762 | /* request was queued or granted on remote master */ | ||
| 2763 | receive_flags_reply(lkb, ms); | ||
| 2764 | lkb->lkb_remid = ms->m_lkid; | ||
| 2765 | if (error) | ||
| 2766 | add_lkb(r, lkb, DLM_LKSTS_WAITING); | ||
| 2767 | else { | ||
| 2768 | grant_lock_pc(r, lkb, ms); | ||
| 2769 | queue_cast(r, lkb, 0); | ||
| 2770 | } | ||
| 2771 | confirm_master(r, error); | ||
| 2772 | break; | ||
| 2773 | |||
| 2774 | case -EBADR: | ||
| 2775 | case -ENOTBLK: | ||
| 2776 | /* find_rsb failed to find rsb or rsb wasn't master */ | ||
| 2777 | r->res_nodeid = -1; | ||
| 2778 | lkb->lkb_nodeid = -1; | ||
| 2779 | _request_lock(r, lkb); | ||
| 2780 | break; | ||
| 2781 | |||
| 2782 | default: | ||
| 2783 | log_error(ls, "receive_request_reply error %d", error); | ||
| 2784 | } | ||
| 2785 | |||
| 2786 | unlock_rsb(r); | ||
| 2787 | put_rsb(r); | ||
| 2788 | out: | ||
| 2789 | dlm_put_lkb(lkb); | ||
| 2790 | } | ||
| 2791 | |||
| 2792 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 2793 | struct dlm_message *ms) | ||
| 2794 | { | ||
| 2795 | int error = ms->m_result; | ||
| 2796 | |||
| 2797 | /* this is the value returned from do_convert() on the master */ | ||
| 2798 | |||
| 2799 | switch (error) { | ||
| 2800 | case -EAGAIN: | ||
| 2801 | /* convert would block (be queued) on remote master */ | ||
| 2802 | queue_cast(r, lkb, -EAGAIN); | ||
| 2803 | break; | ||
| 2804 | |||
| 2805 | case -EINPROGRESS: | ||
| 2806 | /* convert was queued on remote master */ | ||
| 2807 | del_lkb(r, lkb); | ||
| 2808 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | ||
| 2809 | break; | ||
| 2810 | |||
| 2811 | case 0: | ||
| 2812 | /* convert was granted on remote master */ | ||
| 2813 | receive_flags_reply(lkb, ms); | ||
| 2814 | grant_lock_pc(r, lkb, ms); | ||
| 2815 | queue_cast(r, lkb, 0); | ||
| 2816 | break; | ||
| 2817 | |||
| 2818 | default: | ||
| 2819 | log_error(r->res_ls, "receive_convert_reply error %d", error); | ||
| 2820 | } | ||
| 2821 | } | ||
| 2822 | |||
| 2823 | static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 2824 | { | ||
| 2825 | struct dlm_rsb *r = lkb->lkb_resource; | ||
| 2826 | |||
| 2827 | hold_rsb(r); | ||
| 2828 | lock_rsb(r); | ||
| 2829 | |||
| 2830 | __receive_convert_reply(r, lkb, ms); | ||
| 2831 | |||
| 2832 | unlock_rsb(r); | ||
| 2833 | put_rsb(r); | ||
| 2834 | } | ||
| 2835 | |||
| 2836 | static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2837 | { | ||
| 2838 | struct dlm_lkb *lkb; | ||
| 2839 | int error; | ||
| 2840 | |||
| 2841 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2842 | if (error) { | ||
| 2843 | log_error(ls, "receive_convert_reply no lkb"); | ||
| 2844 | return; | ||
| 2845 | } | ||
| 2846 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
| 2847 | |||
| 2848 | error = remove_from_waiters(lkb); | ||
| 2849 | if (error) { | ||
| 2850 | log_error(ls, "receive_convert_reply not on waiters"); | ||
| 2851 | goto out; | ||
| 2852 | } | ||
| 2853 | |||
| 2854 | _receive_convert_reply(lkb, ms); | ||
| 2855 | out: | ||
| 2856 | dlm_put_lkb(lkb); | ||
| 2857 | } | ||
| 2858 | |||
| 2859 | static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 2860 | { | ||
| 2861 | struct dlm_rsb *r = lkb->lkb_resource; | ||
| 2862 | int error = ms->m_result; | ||
| 2863 | |||
| 2864 | hold_rsb(r); | ||
| 2865 | lock_rsb(r); | ||
| 2866 | |||
| 2867 | /* this is the value returned from do_unlock() on the master */ | ||
| 2868 | |||
| 2869 | switch (error) { | ||
| 2870 | case -DLM_EUNLOCK: | ||
| 2871 | receive_flags_reply(lkb, ms); | ||
| 2872 | remove_lock_pc(r, lkb); | ||
| 2873 | queue_cast(r, lkb, -DLM_EUNLOCK); | ||
| 2874 | break; | ||
| 2875 | default: | ||
| 2876 | log_error(r->res_ls, "receive_unlock_reply error %d", error); | ||
| 2877 | } | ||
| 2878 | |||
| 2879 | unlock_rsb(r); | ||
| 2880 | put_rsb(r); | ||
| 2881 | } | ||
| 2882 | |||
| 2883 | static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2884 | { | ||
| 2885 | struct dlm_lkb *lkb; | ||
| 2886 | int error; | ||
| 2887 | |||
| 2888 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2889 | if (error) { | ||
| 2890 | log_error(ls, "receive_unlock_reply no lkb"); | ||
| 2891 | return; | ||
| 2892 | } | ||
| 2893 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
| 2894 | |||
| 2895 | error = remove_from_waiters(lkb); | ||
| 2896 | if (error) { | ||
| 2897 | log_error(ls, "receive_unlock_reply not on waiters"); | ||
| 2898 | goto out; | ||
| 2899 | } | ||
| 2900 | |||
| 2901 | _receive_unlock_reply(lkb, ms); | ||
| 2902 | out: | ||
| 2903 | dlm_put_lkb(lkb); | ||
| 2904 | } | ||
| 2905 | |||
| 2906 | static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 2907 | { | ||
| 2908 | struct dlm_rsb *r = lkb->lkb_resource; | ||
| 2909 | int error = ms->m_result; | ||
| 2910 | |||
| 2911 | hold_rsb(r); | ||
| 2912 | lock_rsb(r); | ||
| 2913 | |||
| 2914 | /* this is the value returned from do_cancel() on the master */ | ||
| 2915 | |||
| 2916 | switch (error) { | ||
| 2917 | case -DLM_ECANCEL: | ||
| 2918 | receive_flags_reply(lkb, ms); | ||
| 2919 | revert_lock_pc(r, lkb); | ||
| 2920 | queue_cast(r, lkb, -DLM_ECANCEL); | ||
| 2921 | break; | ||
| 2922 | default: | ||
| 2923 | log_error(r->res_ls, "receive_cancel_reply error %d", error); | ||
| 2924 | } | ||
| 2925 | |||
| 2926 | unlock_rsb(r); | ||
| 2927 | put_rsb(r); | ||
| 2928 | } | ||
| 2929 | |||
| 2930 | static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2931 | { | ||
| 2932 | struct dlm_lkb *lkb; | ||
| 2933 | int error; | ||
| 2934 | |||
| 2935 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
| 2936 | if (error) { | ||
| 2937 | log_error(ls, "receive_cancel_reply no lkb"); | ||
| 2938 | return; | ||
| 2939 | } | ||
| 2940 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
| 2941 | |||
| 2942 | error = remove_from_waiters(lkb); | ||
| 2943 | if (error) { | ||
| 2944 | log_error(ls, "receive_cancel_reply not on waiters"); | ||
| 2945 | goto out; | ||
| 2946 | } | ||
| 2947 | |||
| 2948 | _receive_cancel_reply(lkb, ms); | ||
| 2949 | out: | ||
| 2950 | dlm_put_lkb(lkb); | ||
| 2951 | } | ||
| 2952 | |||
| 2953 | static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 2954 | { | ||
| 2955 | struct dlm_lkb *lkb; | ||
| 2956 | struct dlm_rsb *r; | ||
| 2957 | int error, ret_nodeid; | ||
| 2958 | |||
| 2959 | error = find_lkb(ls, ms->m_lkid, &lkb); | ||
| 2960 | if (error) { | ||
| 2961 | log_error(ls, "receive_lookup_reply no lkb"); | ||
| 2962 | return; | ||
| 2963 | } | ||
| 2964 | |||
| 2965 | error = remove_from_waiters(lkb); | ||
| 2966 | if (error) { | ||
| 2967 | log_error(ls, "receive_lookup_reply not on waiters"); | ||
| 2968 | goto out; | ||
| 2969 | } | ||
| 2970 | |||
| 2971 | /* this is the value returned by dlm_dir_lookup on dir node | ||
| 2972 | FIXME: will a non-zero error ever be returned? */ | ||
| 2973 | error = ms->m_result; | ||
| 2974 | |||
| 2975 | r = lkb->lkb_resource; | ||
| 2976 | hold_rsb(r); | ||
| 2977 | lock_rsb(r); | ||
| 2978 | |||
| 2979 | ret_nodeid = ms->m_nodeid; | ||
| 2980 | if (ret_nodeid == dlm_our_nodeid()) { | ||
| 2981 | r->res_nodeid = 0; | ||
| 2982 | ret_nodeid = 0; | ||
| 2983 | r->res_first_lkid = 0; | ||
| 2984 | } else { | ||
| 2985 | /* set_master() will copy res_nodeid to lkb_nodeid */ | ||
| 2986 | r->res_nodeid = ret_nodeid; | ||
| 2987 | } | ||
| 2988 | |||
| 2989 | _request_lock(r, lkb); | ||
| 2990 | |||
| 2991 | if (!ret_nodeid) | ||
| 2992 | process_lookup_list(r); | ||
| 2993 | |||
| 2994 | unlock_rsb(r); | ||
| 2995 | put_rsb(r); | ||
| 2996 | out: | ||
| 2997 | dlm_put_lkb(lkb); | ||
| 2998 | } | ||
| 2999 | |||
| 3000 | int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) | ||
| 3001 | { | ||
| 3002 | struct dlm_message *ms = (struct dlm_message *) hd; | ||
| 3003 | struct dlm_ls *ls; | ||
| 3004 | int error; | ||
| 3005 | |||
| 3006 | if (!recovery) | ||
| 3007 | dlm_message_in(ms); | ||
| 3008 | |||
| 3009 | ls = dlm_find_lockspace_global(hd->h_lockspace); | ||
| 3010 | if (!ls) { | ||
| 3011 | log_print("drop message %d from %d for unknown lockspace %d", | ||
| 3012 | ms->m_type, nodeid, hd->h_lockspace); | ||
| 3013 | return -EINVAL; | ||
| 3014 | } | ||
| 3015 | |||
| 3016 | /* recovery may have just ended leaving a bunch of backed-up requests | ||
| 3017 | in the requestqueue; wait while dlm_recoverd clears them */ | ||
| 3018 | |||
| 3019 | if (!recovery) | ||
| 3020 | dlm_wait_requestqueue(ls); | ||
| 3021 | |||
| 3022 | /* recovery may have just started while there were a bunch of | ||
| 3023 | in-flight requests -- save them in requestqueue to be processed | ||
| 3024 | after recovery. we can't let dlm_recvd block on the recovery | ||
| 3025 | lock. if dlm_recoverd is calling this function to clear the | ||
| 3026 | requestqueue, it needs to be interrupted (-EINTR) if another | ||
| 3027 | recovery operation is starting. */ | ||
| 3028 | |||
| 3029 | while (1) { | ||
| 3030 | if (dlm_locking_stopped(ls)) { | ||
| 3031 | if (!recovery) | ||
| 3032 | dlm_add_requestqueue(ls, nodeid, hd); | ||
| 3033 | error = -EINTR; | ||
| 3034 | goto out; | ||
| 3035 | } | ||
| 3036 | |||
| 3037 | if (lock_recovery_try(ls)) | ||
| 3038 | break; | ||
| 3039 | schedule(); | ||
| 3040 | } | ||
| 3041 | |||
| 3042 | switch (ms->m_type) { | ||
| 3043 | |||
| 3044 | /* messages sent to a master node */ | ||
| 3045 | |||
| 3046 | case DLM_MSG_REQUEST: | ||
| 3047 | receive_request(ls, ms); | ||
| 3048 | break; | ||
| 3049 | |||
| 3050 | case DLM_MSG_CONVERT: | ||
| 3051 | receive_convert(ls, ms); | ||
| 3052 | break; | ||
| 3053 | |||
| 3054 | case DLM_MSG_UNLOCK: | ||
| 3055 | receive_unlock(ls, ms); | ||
| 3056 | break; | ||
| 3057 | |||
| 3058 | case DLM_MSG_CANCEL: | ||
| 3059 | receive_cancel(ls, ms); | ||
| 3060 | break; | ||
| 3061 | |||
| 3062 | /* messages sent from a master node (replies to above) */ | ||
| 3063 | |||
| 3064 | case DLM_MSG_REQUEST_REPLY: | ||
| 3065 | receive_request_reply(ls, ms); | ||
| 3066 | break; | ||
| 3067 | |||
| 3068 | case DLM_MSG_CONVERT_REPLY: | ||
| 3069 | receive_convert_reply(ls, ms); | ||
| 3070 | break; | ||
| 3071 | |||
| 3072 | case DLM_MSG_UNLOCK_REPLY: | ||
| 3073 | receive_unlock_reply(ls, ms); | ||
| 3074 | break; | ||
| 3075 | |||
| 3076 | case DLM_MSG_CANCEL_REPLY: | ||
| 3077 | receive_cancel_reply(ls, ms); | ||
| 3078 | break; | ||
| 3079 | |||
| 3080 | /* messages sent from a master node (only two types of async msg) */ | ||
| 3081 | |||
| 3082 | case DLM_MSG_GRANT: | ||
| 3083 | receive_grant(ls, ms); | ||
| 3084 | break; | ||
| 3085 | |||
| 3086 | case DLM_MSG_BAST: | ||
| 3087 | receive_bast(ls, ms); | ||
| 3088 | break; | ||
| 3089 | |||
| 3090 | /* messages sent to a dir node */ | ||
| 3091 | |||
| 3092 | case DLM_MSG_LOOKUP: | ||
| 3093 | receive_lookup(ls, ms); | ||
| 3094 | break; | ||
| 3095 | |||
| 3096 | case DLM_MSG_REMOVE: | ||
| 3097 | receive_remove(ls, ms); | ||
| 3098 | break; | ||
| 3099 | |||
| 3100 | /* messages sent from a dir node (remove has no reply) */ | ||
| 3101 | |||
| 3102 | case DLM_MSG_LOOKUP_REPLY: | ||
| 3103 | receive_lookup_reply(ls, ms); | ||
| 3104 | break; | ||
| 3105 | |||
| 3106 | default: | ||
| 3107 | log_error(ls, "unknown message type %d", ms->m_type); | ||
| 3108 | } | ||
| 3109 | |||
| 3110 | unlock_recovery(ls); | ||
| 3111 | out: | ||
| 3112 | dlm_put_lockspace(ls); | ||
| 3113 | dlm_astd_wake(); | ||
| 3114 | return 0; | ||
| 3115 | } | ||
| 3116 | |||
| 3117 | |||
| 3118 | /* | ||
| 3119 | * Recovery related | ||
| 3120 | */ | ||
| 3121 | |||
| 3122 | static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
| 3123 | { | ||
| 3124 | if (middle_conversion(lkb)) { | ||
| 3125 | hold_lkb(lkb); | ||
| 3126 | ls->ls_stub_ms.m_result = -EINPROGRESS; | ||
| 3127 | _remove_from_waiters(lkb); | ||
| 3128 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | ||
| 3129 | |||
| 3130 | /* Same special case as in receive_rcom_lock_args() */ | ||
| 3131 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
| 3132 | rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT); | ||
| 3133 | unhold_lkb(lkb); | ||
| 3134 | |||
| 3135 | } else if (lkb->lkb_rqmode >= lkb->lkb_grmode) { | ||
| 3136 | lkb->lkb_flags |= DLM_IFL_RESEND; | ||
| 3137 | } | ||
| 3138 | |||
| 3139 | /* lkb->lkb_rqmode < lkb->lkb_grmode shouldn't happen since down | ||
| 3140 | conversions are async; there's no reply from the remote master */ | ||
| 3141 | } | ||
| 3142 | |||
| 3143 | /* A waiting lkb needs recovery if the master node has failed, or | ||
| 3144 | the master node is changing (only when no directory is used) */ | ||
| 3145 | |||
| 3146 | static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
| 3147 | { | ||
| 3148 | if (dlm_is_removed(ls, lkb->lkb_nodeid)) | ||
| 3149 | return 1; | ||
| 3150 | |||
| 3151 | if (!dlm_no_directory(ls)) | ||
| 3152 | return 0; | ||
| 3153 | |||
| 3154 | if (dlm_dir_nodeid(lkb->lkb_resource) != lkb->lkb_nodeid) | ||
| 3155 | return 1; | ||
| 3156 | |||
| 3157 | return 0; | ||
| 3158 | } | ||
| 3159 | |||
| 3160 | /* Recovery for locks that are waiting for replies from nodes that are now | ||
| 3161 | gone. We can just complete unlocks and cancels by faking a reply from the | ||
| 3162 | dead node. Requests and up-conversions we flag to be resent after | ||
| 3163 | recovery. Down-conversions can just be completed with a fake reply like | ||
| 3164 | unlocks. Conversions between PR and CW need special attention. */ | ||
| 3165 | |||
| 3166 | void dlm_recover_waiters_pre(struct dlm_ls *ls) | ||
| 3167 | { | ||
| 3168 | struct dlm_lkb *lkb, *safe; | ||
| 3169 | |||
| 3170 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 3171 | |||
| 3172 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { | ||
| 3173 | log_debug(ls, "pre recover waiter lkid %x type %d flags %x", | ||
| 3174 | lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); | ||
| 3175 | |||
| 3176 | /* all outstanding lookups, regardless of destination will be | ||
| 3177 | resent after recovery is done */ | ||
| 3178 | |||
| 3179 | if (lkb->lkb_wait_type == DLM_MSG_LOOKUP) { | ||
| 3180 | lkb->lkb_flags |= DLM_IFL_RESEND; | ||
| 3181 | continue; | ||
| 3182 | } | ||
| 3183 | |||
| 3184 | if (!waiter_needs_recovery(ls, lkb)) | ||
| 3185 | continue; | ||
| 3186 | |||
| 3187 | switch (lkb->lkb_wait_type) { | ||
| 3188 | |||
| 3189 | case DLM_MSG_REQUEST: | ||
| 3190 | lkb->lkb_flags |= DLM_IFL_RESEND; | ||
| 3191 | break; | ||
| 3192 | |||
| 3193 | case DLM_MSG_CONVERT: | ||
| 3194 | recover_convert_waiter(ls, lkb); | ||
| 3195 | break; | ||
| 3196 | |||
| 3197 | case DLM_MSG_UNLOCK: | ||
| 3198 | hold_lkb(lkb); | ||
| 3199 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; | ||
| 3200 | _remove_from_waiters(lkb); | ||
| 3201 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | ||
| 3202 | dlm_put_lkb(lkb); | ||
| 3203 | break; | ||
| 3204 | |||
| 3205 | case DLM_MSG_CANCEL: | ||
| 3206 | hold_lkb(lkb); | ||
| 3207 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; | ||
| 3208 | _remove_from_waiters(lkb); | ||
| 3209 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | ||
| 3210 | dlm_put_lkb(lkb); | ||
| 3211 | break; | ||
| 3212 | |||
| 3213 | default: | ||
| 3214 | log_error(ls, "invalid lkb wait_type %d", | ||
| 3215 | lkb->lkb_wait_type); | ||
| 3216 | } | ||
| 3217 | schedule(); | ||
| 3218 | } | ||
| 3219 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 3220 | } | ||
| 3221 | |||
| 3222 | static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | ||
| 3223 | { | ||
| 3224 | struct dlm_lkb *lkb; | ||
| 3225 | int rv = 0; | ||
| 3226 | |||
| 3227 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 3228 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | ||
| 3229 | if (lkb->lkb_flags & DLM_IFL_RESEND) { | ||
| 3230 | rv = lkb->lkb_wait_type; | ||
| 3231 | _remove_from_waiters(lkb); | ||
| 3232 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | ||
| 3233 | break; | ||
| 3234 | } | ||
| 3235 | } | ||
| 3236 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 3237 | |||
| 3238 | if (!rv) | ||
| 3239 | lkb = NULL; | ||
| 3240 | *lkb_ret = lkb; | ||
| 3241 | return rv; | ||
| 3242 | } | ||
| 3243 | |||
| 3244 | /* Deal with lookups and lkb's marked RESEND from _pre. We may now be the | ||
| 3245 | master or dir-node for r. Processing the lkb may result in it being placed | ||
| 3246 | back on waiters. */ | ||
| 3247 | |||
| 3248 | int dlm_recover_waiters_post(struct dlm_ls *ls) | ||
| 3249 | { | ||
| 3250 | struct dlm_lkb *lkb; | ||
| 3251 | struct dlm_rsb *r; | ||
| 3252 | int error = 0, mstype; | ||
| 3253 | |||
| 3254 | while (1) { | ||
| 3255 | if (dlm_locking_stopped(ls)) { | ||
| 3256 | log_debug(ls, "recover_waiters_post aborted"); | ||
| 3257 | error = -EINTR; | ||
| 3258 | break; | ||
| 3259 | } | ||
| 3260 | |||
| 3261 | mstype = remove_resend_waiter(ls, &lkb); | ||
| 3262 | if (!mstype) | ||
| 3263 | break; | ||
| 3264 | |||
| 3265 | r = lkb->lkb_resource; | ||
| 3266 | |||
| 3267 | log_debug(ls, "recover_waiters_post %x type %d flags %x %s", | ||
| 3268 | lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); | ||
| 3269 | |||
| 3270 | switch (mstype) { | ||
| 3271 | |||
| 3272 | case DLM_MSG_LOOKUP: | ||
| 3273 | hold_rsb(r); | ||
| 3274 | lock_rsb(r); | ||
| 3275 | _request_lock(r, lkb); | ||
| 3276 | if (is_master(r)) | ||
| 3277 | confirm_master(r, 0); | ||
| 3278 | unlock_rsb(r); | ||
| 3279 | put_rsb(r); | ||
| 3280 | break; | ||
| 3281 | |||
| 3282 | case DLM_MSG_REQUEST: | ||
| 3283 | hold_rsb(r); | ||
| 3284 | lock_rsb(r); | ||
| 3285 | _request_lock(r, lkb); | ||
| 3286 | if (is_master(r)) | ||
| 3287 | confirm_master(r, 0); | ||
| 3288 | unlock_rsb(r); | ||
| 3289 | put_rsb(r); | ||
| 3290 | break; | ||
| 3291 | |||
| 3292 | case DLM_MSG_CONVERT: | ||
| 3293 | hold_rsb(r); | ||
| 3294 | lock_rsb(r); | ||
| 3295 | _convert_lock(r, lkb); | ||
| 3296 | unlock_rsb(r); | ||
| 3297 | put_rsb(r); | ||
| 3298 | break; | ||
| 3299 | |||
| 3300 | default: | ||
| 3301 | log_error(ls, "recover_waiters_post type %d", mstype); | ||
| 3302 | } | ||
| 3303 | } | ||
| 3304 | |||
| 3305 | return error; | ||
| 3306 | } | ||
| 3307 | |||
| 3308 | static void purge_queue(struct dlm_rsb *r, struct list_head *queue, | ||
| 3309 | int (*test)(struct dlm_ls *ls, struct dlm_lkb *lkb)) | ||
| 3310 | { | ||
| 3311 | struct dlm_ls *ls = r->res_ls; | ||
| 3312 | struct dlm_lkb *lkb, *safe; | ||
| 3313 | |||
| 3314 | list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) { | ||
| 3315 | if (test(ls, lkb)) { | ||
| 3316 | rsb_set_flag(r, RSB_LOCKS_PURGED); | ||
| 3317 | del_lkb(r, lkb); | ||
| 3318 | /* this put should free the lkb */ | ||
| 3319 | if (!dlm_put_lkb(lkb)) | ||
| 3320 | log_error(ls, "purged lkb not released"); | ||
| 3321 | } | ||
| 3322 | } | ||
| 3323 | } | ||
| 3324 | |||
| 3325 | static int purge_dead_test(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
| 3326 | { | ||
| 3327 | return (is_master_copy(lkb) && dlm_is_removed(ls, lkb->lkb_nodeid)); | ||
| 3328 | } | ||
| 3329 | |||
| 3330 | static int purge_mstcpy_test(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
| 3331 | { | ||
| 3332 | return is_master_copy(lkb); | ||
| 3333 | } | ||
| 3334 | |||
| 3335 | static void purge_dead_locks(struct dlm_rsb *r) | ||
| 3336 | { | ||
| 3337 | purge_queue(r, &r->res_grantqueue, &purge_dead_test); | ||
| 3338 | purge_queue(r, &r->res_convertqueue, &purge_dead_test); | ||
| 3339 | purge_queue(r, &r->res_waitqueue, &purge_dead_test); | ||
| 3340 | } | ||
| 3341 | |||
| 3342 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r) | ||
| 3343 | { | ||
| 3344 | purge_queue(r, &r->res_grantqueue, &purge_mstcpy_test); | ||
| 3345 | purge_queue(r, &r->res_convertqueue, &purge_mstcpy_test); | ||
| 3346 | purge_queue(r, &r->res_waitqueue, &purge_mstcpy_test); | ||
| 3347 | } | ||
| 3348 | |||
| 3349 | /* Get rid of locks held by nodes that are gone. */ | ||
| 3350 | |||
| 3351 | int dlm_purge_locks(struct dlm_ls *ls) | ||
| 3352 | { | ||
| 3353 | struct dlm_rsb *r; | ||
| 3354 | |||
| 3355 | log_debug(ls, "dlm_purge_locks"); | ||
| 3356 | |||
| 3357 | down_write(&ls->ls_root_sem); | ||
| 3358 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
| 3359 | hold_rsb(r); | ||
| 3360 | lock_rsb(r); | ||
| 3361 | if (is_master(r)) | ||
| 3362 | purge_dead_locks(r); | ||
| 3363 | unlock_rsb(r); | ||
| 3364 | unhold_rsb(r); | ||
| 3365 | |||
| 3366 | schedule(); | ||
| 3367 | } | ||
| 3368 | up_write(&ls->ls_root_sem); | ||
| 3369 | |||
| 3370 | return 0; | ||
| 3371 | } | ||
| 3372 | |||
| 3373 | static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) | ||
| 3374 | { | ||
| 3375 | struct dlm_rsb *r, *r_ret = NULL; | ||
| 3376 | |||
| 3377 | read_lock(&ls->ls_rsbtbl[bucket].lock); | ||
| 3378 | list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { | ||
| 3379 | if (!rsb_flag(r, RSB_LOCKS_PURGED)) | ||
| 3380 | continue; | ||
| 3381 | hold_rsb(r); | ||
| 3382 | rsb_clear_flag(r, RSB_LOCKS_PURGED); | ||
| 3383 | r_ret = r; | ||
| 3384 | break; | ||
| 3385 | } | ||
| 3386 | read_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
| 3387 | return r_ret; | ||
| 3388 | } | ||
| 3389 | |||
| 3390 | void dlm_grant_after_purge(struct dlm_ls *ls) | ||
| 3391 | { | ||
| 3392 | struct dlm_rsb *r; | ||
| 3393 | int bucket = 0; | ||
| 3394 | |||
| 3395 | while (1) { | ||
| 3396 | r = find_purged_rsb(ls, bucket); | ||
| 3397 | if (!r) { | ||
| 3398 | if (bucket == ls->ls_rsbtbl_size - 1) | ||
| 3399 | break; | ||
| 3400 | bucket++; | ||
| 3401 | continue; | ||
| 3402 | } | ||
| 3403 | lock_rsb(r); | ||
| 3404 | if (is_master(r)) { | ||
| 3405 | grant_pending_locks(r); | ||
| 3406 | confirm_master(r, 0); | ||
| 3407 | } | ||
| 3408 | unlock_rsb(r); | ||
| 3409 | put_rsb(r); | ||
| 3410 | schedule(); | ||
| 3411 | } | ||
| 3412 | } | ||
| 3413 | |||
| 3414 | static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid, | ||
| 3415 | uint32_t remid) | ||
| 3416 | { | ||
| 3417 | struct dlm_lkb *lkb; | ||
| 3418 | |||
| 3419 | list_for_each_entry(lkb, head, lkb_statequeue) { | ||
| 3420 | if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid) | ||
| 3421 | return lkb; | ||
| 3422 | } | ||
| 3423 | return NULL; | ||
| 3424 | } | ||
| 3425 | |||
| 3426 | static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid, | ||
| 3427 | uint32_t remid) | ||
| 3428 | { | ||
| 3429 | struct dlm_lkb *lkb; | ||
| 3430 | |||
| 3431 | lkb = search_remid_list(&r->res_grantqueue, nodeid, remid); | ||
| 3432 | if (lkb) | ||
| 3433 | return lkb; | ||
| 3434 | lkb = search_remid_list(&r->res_convertqueue, nodeid, remid); | ||
| 3435 | if (lkb) | ||
| 3436 | return lkb; | ||
| 3437 | lkb = search_remid_list(&r->res_waitqueue, nodeid, remid); | ||
| 3438 | if (lkb) | ||
| 3439 | return lkb; | ||
| 3440 | return NULL; | ||
| 3441 | } | ||
| 3442 | |||
| 3443 | static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
| 3444 | struct dlm_rsb *r, struct dlm_rcom *rc) | ||
| 3445 | { | ||
| 3446 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | ||
| 3447 | int lvblen; | ||
| 3448 | |||
| 3449 | lkb->lkb_nodeid = rc->rc_header.h_nodeid; | ||
| 3450 | lkb->lkb_ownpid = rl->rl_ownpid; | ||
| 3451 | lkb->lkb_remid = rl->rl_lkid; | ||
| 3452 | lkb->lkb_exflags = rl->rl_exflags; | ||
| 3453 | lkb->lkb_flags = rl->rl_flags & 0x0000FFFF; | ||
| 3454 | lkb->lkb_flags |= DLM_IFL_MSTCPY; | ||
| 3455 | lkb->lkb_lvbseq = rl->rl_lvbseq; | ||
| 3456 | lkb->lkb_rqmode = rl->rl_rqmode; | ||
| 3457 | lkb->lkb_grmode = rl->rl_grmode; | ||
| 3458 | /* don't set lkb_status because add_lkb wants to itself */ | ||
| 3459 | |||
| 3460 | lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST); | ||
| 3461 | lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); | ||
| 3462 | |||
| 3463 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { | ||
| 3464 | lkb->lkb_lvbptr = allocate_lvb(ls); | ||
| 3465 | if (!lkb->lkb_lvbptr) | ||
| 3466 | return -ENOMEM; | ||
| 3467 | lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - | ||
| 3468 | sizeof(struct rcom_lock); | ||
| 3469 | memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen); | ||
| 3470 | } | ||
| 3471 | |||
| 3472 | /* Conversions between PR and CW (middle modes) need special handling. | ||
| 3473 | The real granted mode of these converting locks cannot be determined | ||
| 3474 | until all locks have been rebuilt on the rsb (recover_conversion) */ | ||
| 3475 | |||
| 3476 | if (rl->rl_wait_type == DLM_MSG_CONVERT && middle_conversion(lkb)) { | ||
| 3477 | rl->rl_status = DLM_LKSTS_CONVERT; | ||
| 3478 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
| 3479 | rsb_set_flag(r, RSB_RECOVER_CONVERT); | ||
| 3480 | } | ||
| 3481 | |||
| 3482 | return 0; | ||
| 3483 | } | ||
| 3484 | |||
| 3485 | /* This lkb may have been recovered in a previous aborted recovery so we need | ||
| 3486 | to check if the rsb already has an lkb with the given remote nodeid/lkid. | ||
| 3487 | If so we just send back a standard reply. If not, we create a new lkb with | ||
| 3488 | the given values and send back our lkid. We send back our lkid by sending | ||
| 3489 | back the rcom_lock struct we got but with the remid field filled in. */ | ||
| 3490 | |||
| 3491 | int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
| 3492 | { | ||
| 3493 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | ||
| 3494 | struct dlm_rsb *r; | ||
| 3495 | struct dlm_lkb *lkb; | ||
| 3496 | int error; | ||
| 3497 | |||
| 3498 | if (rl->rl_parent_lkid) { | ||
| 3499 | error = -EOPNOTSUPP; | ||
| 3500 | goto out; | ||
| 3501 | } | ||
| 3502 | |||
| 3503 | error = find_rsb(ls, rl->rl_name, rl->rl_namelen, R_MASTER, &r); | ||
| 3504 | if (error) | ||
| 3505 | goto out; | ||
| 3506 | |||
| 3507 | lock_rsb(r); | ||
| 3508 | |||
| 3509 | lkb = search_remid(r, rc->rc_header.h_nodeid, rl->rl_lkid); | ||
| 3510 | if (lkb) { | ||
| 3511 | error = -EEXIST; | ||
| 3512 | goto out_remid; | ||
| 3513 | } | ||
| 3514 | |||
| 3515 | error = create_lkb(ls, &lkb); | ||
| 3516 | if (error) | ||
| 3517 | goto out_unlock; | ||
| 3518 | |||
| 3519 | error = receive_rcom_lock_args(ls, lkb, r, rc); | ||
| 3520 | if (error) { | ||
| 3521 | __put_lkb(ls, lkb); | ||
| 3522 | goto out_unlock; | ||
| 3523 | } | ||
| 3524 | |||
| 3525 | attach_lkb(r, lkb); | ||
| 3526 | add_lkb(r, lkb, rl->rl_status); | ||
| 3527 | error = 0; | ||
| 3528 | |||
| 3529 | out_remid: | ||
| 3530 | /* this is the new value returned to the lock holder for | ||
| 3531 | saving in its process-copy lkb */ | ||
| 3532 | rl->rl_remid = lkb->lkb_id; | ||
| 3533 | |||
| 3534 | out_unlock: | ||
| 3535 | unlock_rsb(r); | ||
| 3536 | put_rsb(r); | ||
| 3537 | out: | ||
| 3538 | if (error) | ||
| 3539 | log_print("recover_master_copy %d %x", error, rl->rl_lkid); | ||
| 3540 | rl->rl_result = error; | ||
| 3541 | return error; | ||
| 3542 | } | ||
| 3543 | |||
| 3544 | int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
| 3545 | { | ||
| 3546 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | ||
| 3547 | struct dlm_rsb *r; | ||
| 3548 | struct dlm_lkb *lkb; | ||
| 3549 | int error; | ||
| 3550 | |||
| 3551 | error = find_lkb(ls, rl->rl_lkid, &lkb); | ||
| 3552 | if (error) { | ||
| 3553 | log_error(ls, "recover_process_copy no lkid %x", rl->rl_lkid); | ||
| 3554 | return error; | ||
| 3555 | } | ||
| 3556 | |||
| 3557 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
| 3558 | |||
| 3559 | error = rl->rl_result; | ||
| 3560 | |||
| 3561 | r = lkb->lkb_resource; | ||
| 3562 | hold_rsb(r); | ||
| 3563 | lock_rsb(r); | ||
| 3564 | |||
| 3565 | switch (error) { | ||
| 3566 | case -EEXIST: | ||
| 3567 | log_debug(ls, "master copy exists %x", lkb->lkb_id); | ||
| 3568 | /* fall through */ | ||
| 3569 | case 0: | ||
| 3570 | lkb->lkb_remid = rl->rl_remid; | ||
| 3571 | break; | ||
| 3572 | default: | ||
| 3573 | log_error(ls, "dlm_recover_process_copy unknown error %d %x", | ||
| 3574 | error, lkb->lkb_id); | ||
| 3575 | } | ||
| 3576 | |||
| 3577 | /* an ack for dlm_recover_locks() which waits for replies from | ||
| 3578 | all the locks it sends to new masters */ | ||
| 3579 | dlm_recovered_lock(r); | ||
| 3580 | |||
| 3581 | unlock_rsb(r); | ||
| 3582 | put_rsb(r); | ||
| 3583 | dlm_put_lkb(lkb); | ||
| 3584 | |||
| 3585 | return 0; | ||
| 3586 | } | ||
| 3587 | |||
| 3588 | int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, | ||
| 3589 | int mode, uint32_t flags, void *name, unsigned int namelen, | ||
| 3590 | uint32_t parent_lkid) | ||
| 3591 | { | ||
| 3592 | struct dlm_lkb *lkb; | ||
| 3593 | struct dlm_args args; | ||
| 3594 | int error; | ||
| 3595 | |||
| 3596 | lock_recovery(ls); | ||
| 3597 | |||
| 3598 | error = create_lkb(ls, &lkb); | ||
| 3599 | if (error) { | ||
| 3600 | kfree(ua); | ||
| 3601 | goto out; | ||
| 3602 | } | ||
| 3603 | |||
| 3604 | if (flags & DLM_LKF_VALBLK) { | ||
| 3605 | ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); | ||
| 3606 | if (!ua->lksb.sb_lvbptr) { | ||
| 3607 | kfree(ua); | ||
| 3608 | __put_lkb(ls, lkb); | ||
| 3609 | error = -ENOMEM; | ||
| 3610 | goto out; | ||
| 3611 | } | ||
| 3612 | } | ||
| 3613 | |||
| 3614 | /* After ua is attached to lkb it will be freed by free_lkb(). | ||
| 3615 | When DLM_IFL_USER is set, the dlm knows that this is a userspace | ||
| 3616 | lock and that lkb_astparam is the dlm_user_args structure. */ | ||
| 3617 | |||
| 3618 | error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid, | ||
| 3619 | DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); | ||
| 3620 | lkb->lkb_flags |= DLM_IFL_USER; | ||
| 3621 | ua->old_mode = DLM_LOCK_IV; | ||
| 3622 | |||
| 3623 | if (error) { | ||
| 3624 | __put_lkb(ls, lkb); | ||
| 3625 | goto out; | ||
| 3626 | } | ||
| 3627 | |||
| 3628 | error = request_lock(ls, lkb, name, namelen, &args); | ||
| 3629 | |||
| 3630 | switch (error) { | ||
| 3631 | case 0: | ||
| 3632 | break; | ||
| 3633 | case -EINPROGRESS: | ||
| 3634 | error = 0; | ||
| 3635 | break; | ||
| 3636 | case -EAGAIN: | ||
| 3637 | error = 0; | ||
| 3638 | /* fall through */ | ||
| 3639 | default: | ||
| 3640 | __put_lkb(ls, lkb); | ||
| 3641 | goto out; | ||
| 3642 | } | ||
| 3643 | |||
| 3644 | /* add this new lkb to the per-process list of locks */ | ||
| 3645 | spin_lock(&ua->proc->locks_spin); | ||
| 3646 | kref_get(&lkb->lkb_ref); | ||
| 3647 | list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); | ||
| 3648 | spin_unlock(&ua->proc->locks_spin); | ||
| 3649 | out: | ||
| 3650 | unlock_recovery(ls); | ||
| 3651 | return error; | ||
| 3652 | } | ||
| 3653 | |||
| 3654 | int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | ||
| 3655 | int mode, uint32_t flags, uint32_t lkid, char *lvb_in) | ||
| 3656 | { | ||
| 3657 | struct dlm_lkb *lkb; | ||
| 3658 | struct dlm_args args; | ||
| 3659 | struct dlm_user_args *ua; | ||
| 3660 | int error; | ||
| 3661 | |||
| 3662 | lock_recovery(ls); | ||
| 3663 | |||
| 3664 | error = find_lkb(ls, lkid, &lkb); | ||
| 3665 | if (error) | ||
| 3666 | goto out; | ||
| 3667 | |||
| 3668 | /* user can change the params on its lock when it converts it, or | ||
| 3669 | add an lvb that didn't exist before */ | ||
| 3670 | |||
| 3671 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 3672 | |||
| 3673 | if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { | ||
| 3674 | ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); | ||
| 3675 | if (!ua->lksb.sb_lvbptr) { | ||
| 3676 | error = -ENOMEM; | ||
| 3677 | goto out_put; | ||
| 3678 | } | ||
| 3679 | } | ||
| 3680 | if (lvb_in && ua->lksb.sb_lvbptr) | ||
| 3681 | memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); | ||
| 3682 | |||
| 3683 | ua->castparam = ua_tmp->castparam; | ||
| 3684 | ua->castaddr = ua_tmp->castaddr; | ||
| 3685 | ua->bastparam = ua_tmp->bastparam; | ||
| 3686 | ua->bastaddr = ua_tmp->bastaddr; | ||
| 3687 | ua->user_lksb = ua_tmp->user_lksb; | ||
| 3688 | ua->old_mode = lkb->lkb_grmode; | ||
| 3689 | |||
| 3690 | error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST, | ||
| 3691 | ua, DLM_FAKE_USER_AST, &args); | ||
| 3692 | if (error) | ||
| 3693 | goto out_put; | ||
| 3694 | |||
| 3695 | error = convert_lock(ls, lkb, &args); | ||
| 3696 | |||
| 3697 | if (error == -EINPROGRESS || error == -EAGAIN) | ||
| 3698 | error = 0; | ||
| 3699 | out_put: | ||
| 3700 | dlm_put_lkb(lkb); | ||
| 3701 | out: | ||
| 3702 | unlock_recovery(ls); | ||
| 3703 | kfree(ua_tmp); | ||
| 3704 | return error; | ||
| 3705 | } | ||
| 3706 | |||
| 3707 | int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | ||
| 3708 | uint32_t flags, uint32_t lkid, char *lvb_in) | ||
| 3709 | { | ||
| 3710 | struct dlm_lkb *lkb; | ||
| 3711 | struct dlm_args args; | ||
| 3712 | struct dlm_user_args *ua; | ||
| 3713 | int error; | ||
| 3714 | |||
| 3715 | lock_recovery(ls); | ||
| 3716 | |||
| 3717 | error = find_lkb(ls, lkid, &lkb); | ||
| 3718 | if (error) | ||
| 3719 | goto out; | ||
| 3720 | |||
| 3721 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 3722 | |||
| 3723 | if (lvb_in && ua->lksb.sb_lvbptr) | ||
| 3724 | memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); | ||
| 3725 | ua->castparam = ua_tmp->castparam; | ||
| 3726 | ua->user_lksb = ua_tmp->user_lksb; | ||
| 3727 | |||
| 3728 | error = set_unlock_args(flags, ua, &args); | ||
| 3729 | if (error) | ||
| 3730 | goto out_put; | ||
| 3731 | |||
| 3732 | error = unlock_lock(ls, lkb, &args); | ||
| 3733 | |||
| 3734 | if (error == -DLM_EUNLOCK) | ||
| 3735 | error = 0; | ||
| 3736 | if (error) | ||
| 3737 | goto out_put; | ||
| 3738 | |||
| 3739 | spin_lock(&ua->proc->locks_spin); | ||
| 3740 | list_del_init(&lkb->lkb_ownqueue); | ||
| 3741 | spin_unlock(&ua->proc->locks_spin); | ||
| 3742 | |||
| 3743 | /* this removes the reference for the proc->locks list added by | ||
| 3744 | dlm_user_request */ | ||
| 3745 | unhold_lkb(lkb); | ||
| 3746 | out_put: | ||
| 3747 | dlm_put_lkb(lkb); | ||
| 3748 | out: | ||
| 3749 | unlock_recovery(ls); | ||
| 3750 | return error; | ||
| 3751 | } | ||
| 3752 | |||
| 3753 | int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | ||
| 3754 | uint32_t flags, uint32_t lkid) | ||
| 3755 | { | ||
| 3756 | struct dlm_lkb *lkb; | ||
| 3757 | struct dlm_args args; | ||
| 3758 | struct dlm_user_args *ua; | ||
| 3759 | int error; | ||
| 3760 | |||
| 3761 | lock_recovery(ls); | ||
| 3762 | |||
| 3763 | error = find_lkb(ls, lkid, &lkb); | ||
| 3764 | if (error) | ||
| 3765 | goto out; | ||
| 3766 | |||
| 3767 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 3768 | ua->castparam = ua_tmp->castparam; | ||
| 3769 | ua->user_lksb = ua_tmp->user_lksb; | ||
| 3770 | |||
| 3771 | error = set_unlock_args(flags, ua, &args); | ||
| 3772 | if (error) | ||
| 3773 | goto out_put; | ||
| 3774 | |||
| 3775 | error = cancel_lock(ls, lkb, &args); | ||
| 3776 | |||
| 3777 | if (error == -DLM_ECANCEL) | ||
| 3778 | error = 0; | ||
| 3779 | if (error) | ||
| 3780 | goto out_put; | ||
| 3781 | |||
| 3782 | /* this lkb was removed from the WAITING queue */ | ||
| 3783 | if (lkb->lkb_grmode == DLM_LOCK_IV) { | ||
| 3784 | spin_lock(&ua->proc->locks_spin); | ||
| 3785 | list_del_init(&lkb->lkb_ownqueue); | ||
| 3786 | spin_unlock(&ua->proc->locks_spin); | ||
| 3787 | unhold_lkb(lkb); | ||
| 3788 | } | ||
| 3789 | out_put: | ||
| 3790 | dlm_put_lkb(lkb); | ||
| 3791 | out: | ||
| 3792 | unlock_recovery(ls); | ||
| 3793 | return error; | ||
| 3794 | } | ||
| 3795 | |||
| 3796 | static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
| 3797 | { | ||
| 3798 | struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 3799 | |||
| 3800 | if (ua->lksb.sb_lvbptr) | ||
| 3801 | kfree(ua->lksb.sb_lvbptr); | ||
| 3802 | kfree(ua); | ||
| 3803 | lkb->lkb_astparam = (long)NULL; | ||
| 3804 | |||
| 3805 | /* TODO: propogate to master if needed */ | ||
| 3806 | return 0; | ||
| 3807 | } | ||
| 3808 | |||
| 3809 | /* The force flag allows the unlock to go ahead even if the lkb isn't granted. | ||
| 3810 | Regardless of what rsb queue the lock is on, it's removed and freed. */ | ||
| 3811 | |||
| 3812 | static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
| 3813 | { | ||
| 3814 | struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 3815 | struct dlm_args args; | ||
| 3816 | int error; | ||
| 3817 | |||
| 3818 | /* FIXME: we need to handle the case where the lkb is in limbo | ||
| 3819 | while the rsb is being looked up, currently we assert in | ||
| 3820 | _unlock_lock/is_remote because rsb nodeid is -1. */ | ||
| 3821 | |||
| 3822 | set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args); | ||
| 3823 | |||
| 3824 | error = unlock_lock(ls, lkb, &args); | ||
| 3825 | if (error == -DLM_EUNLOCK) | ||
| 3826 | error = 0; | ||
| 3827 | return error; | ||
| 3828 | } | ||
| 3829 | |||
| 3830 | /* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which | ||
| 3831 | 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, | ||
| 3832 | which we clear here. */ | ||
| 3833 | |||
| 3834 | /* proc CLOSING flag is set so no more device_reads should look at proc->asts | ||
| 3835 | list, and no more device_writes should add lkb's to proc->locks list; so we | ||
| 3836 | shouldn't need to take asts_spin or locks_spin here. this assumes that | ||
| 3837 | device reads/writes/closes are serialized -- FIXME: we may need to serialize | ||
| 3838 | them ourself. */ | ||
| 3839 | |||
| 3840 | void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | ||
| 3841 | { | ||
| 3842 | struct dlm_lkb *lkb, *safe; | ||
| 3843 | |||
| 3844 | lock_recovery(ls); | ||
| 3845 | mutex_lock(&ls->ls_clear_proc_locks); | ||
| 3846 | |||
| 3847 | list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { | ||
| 3848 | if (lkb->lkb_ast_type) { | ||
| 3849 | list_del(&lkb->lkb_astqueue); | ||
| 3850 | unhold_lkb(lkb); | ||
| 3851 | } | ||
| 3852 | |||
| 3853 | list_del_init(&lkb->lkb_ownqueue); | ||
| 3854 | |||
| 3855 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { | ||
| 3856 | lkb->lkb_flags |= DLM_IFL_ORPHAN; | ||
| 3857 | orphan_proc_lock(ls, lkb); | ||
| 3858 | } else { | ||
| 3859 | lkb->lkb_flags |= DLM_IFL_DEAD; | ||
| 3860 | unlock_proc_lock(ls, lkb); | ||
| 3861 | } | ||
| 3862 | |||
| 3863 | /* this removes the reference for the proc->locks list | ||
| 3864 | added by dlm_user_request, it may result in the lkb | ||
| 3865 | being freed */ | ||
| 3866 | |||
| 3867 | dlm_put_lkb(lkb); | ||
| 3868 | } | ||
| 3869 | mutex_unlock(&ls->ls_clear_proc_locks); | ||
| 3870 | unlock_recovery(ls); | ||
| 3871 | } | ||
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h new file mode 100644 index 000000000000..0843a3073ec3 --- /dev/null +++ b/fs/dlm/lock.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #ifndef __LOCK_DOT_H__ | ||
| 14 | #define __LOCK_DOT_H__ | ||
| 15 | |||
| 16 | void dlm_print_rsb(struct dlm_rsb *r); | ||
| 17 | void dlm_dump_rsb(struct dlm_rsb *r); | ||
| 18 | void dlm_print_lkb(struct dlm_lkb *lkb); | ||
| 19 | int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); | ||
| 20 | int dlm_modes_compat(int mode1, int mode2); | ||
| 21 | int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
| 22 | unsigned int flags, struct dlm_rsb **r_ret); | ||
| 23 | void dlm_put_rsb(struct dlm_rsb *r); | ||
| 24 | void dlm_hold_rsb(struct dlm_rsb *r); | ||
| 25 | int dlm_put_lkb(struct dlm_lkb *lkb); | ||
| 26 | void dlm_scan_rsbs(struct dlm_ls *ls); | ||
| 27 | |||
| 28 | int dlm_purge_locks(struct dlm_ls *ls); | ||
| 29 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r); | ||
| 30 | void dlm_grant_after_purge(struct dlm_ls *ls); | ||
| 31 | int dlm_recover_waiters_post(struct dlm_ls *ls); | ||
| 32 | void dlm_recover_waiters_pre(struct dlm_ls *ls); | ||
| 33 | int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
| 34 | int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
| 35 | |||
| 36 | int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, | ||
| 37 | uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid); | ||
| 38 | int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | ||
| 39 | int mode, uint32_t flags, uint32_t lkid, char *lvb_in); | ||
| 40 | int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | ||
| 41 | uint32_t flags, uint32_t lkid, char *lvb_in); | ||
| 42 | int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | ||
| 43 | uint32_t flags, uint32_t lkid); | ||
| 44 | void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); | ||
| 45 | |||
| 46 | static inline int is_master(struct dlm_rsb *r) | ||
| 47 | { | ||
| 48 | return !r->res_nodeid; | ||
| 49 | } | ||
| 50 | |||
| 51 | static inline void lock_rsb(struct dlm_rsb *r) | ||
| 52 | { | ||
| 53 | mutex_lock(&r->res_mutex); | ||
| 54 | } | ||
| 55 | |||
| 56 | static inline void unlock_rsb(struct dlm_rsb *r) | ||
| 57 | { | ||
| 58 | mutex_unlock(&r->res_mutex); | ||
| 59 | } | ||
| 60 | |||
| 61 | #endif | ||
| 62 | |||
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c new file mode 100644 index 000000000000..109333c8ecb9 --- /dev/null +++ b/fs/dlm/lockspace.c | |||
| @@ -0,0 +1,717 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "lockspace.h" | ||
| 16 | #include "member.h" | ||
| 17 | #include "recoverd.h" | ||
| 18 | #include "ast.h" | ||
| 19 | #include "dir.h" | ||
| 20 | #include "lowcomms.h" | ||
| 21 | #include "config.h" | ||
| 22 | #include "memory.h" | ||
| 23 | #include "lock.h" | ||
| 24 | #include "recover.h" | ||
| 25 | |||
| 26 | #ifdef CONFIG_DLM_DEBUG | ||
| 27 | int dlm_create_debug_file(struct dlm_ls *ls); | ||
| 28 | void dlm_delete_debug_file(struct dlm_ls *ls); | ||
| 29 | #else | ||
| 30 | static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } | ||
| 31 | static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } | ||
| 32 | #endif | ||
| 33 | |||
| 34 | static int ls_count; | ||
| 35 | static struct mutex ls_lock; | ||
| 36 | static struct list_head lslist; | ||
| 37 | static spinlock_t lslist_lock; | ||
| 38 | static struct task_struct * scand_task; | ||
| 39 | |||
| 40 | |||
| 41 | static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len) | ||
| 42 | { | ||
| 43 | ssize_t ret = len; | ||
| 44 | int n = simple_strtol(buf, NULL, 0); | ||
| 45 | |||
| 46 | switch (n) { | ||
| 47 | case 0: | ||
| 48 | dlm_ls_stop(ls); | ||
| 49 | break; | ||
| 50 | case 1: | ||
| 51 | dlm_ls_start(ls); | ||
| 52 | break; | ||
| 53 | default: | ||
| 54 | ret = -EINVAL; | ||
| 55 | } | ||
| 56 | return ret; | ||
| 57 | } | ||
| 58 | |||
| 59 | static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len) | ||
| 60 | { | ||
| 61 | ls->ls_uevent_result = simple_strtol(buf, NULL, 0); | ||
| 62 | set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags); | ||
| 63 | wake_up(&ls->ls_uevent_wait); | ||
| 64 | return len; | ||
| 65 | } | ||
| 66 | |||
| 67 | static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf) | ||
| 68 | { | ||
| 69 | return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id); | ||
| 70 | } | ||
| 71 | |||
| 72 | static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len) | ||
| 73 | { | ||
| 74 | ls->ls_global_id = simple_strtoul(buf, NULL, 0); | ||
| 75 | return len; | ||
| 76 | } | ||
| 77 | |||
| 78 | static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf) | ||
| 79 | { | ||
| 80 | uint32_t status = dlm_recover_status(ls); | ||
| 81 | return snprintf(buf, PAGE_SIZE, "%x\n", status); | ||
| 82 | } | ||
| 83 | |||
| 84 | static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf) | ||
| 85 | { | ||
| 86 | return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid); | ||
| 87 | } | ||
| 88 | |||
| 89 | struct dlm_attr { | ||
| 90 | struct attribute attr; | ||
| 91 | ssize_t (*show)(struct dlm_ls *, char *); | ||
| 92 | ssize_t (*store)(struct dlm_ls *, const char *, size_t); | ||
| 93 | }; | ||
| 94 | |||
| 95 | static struct dlm_attr dlm_attr_control = { | ||
| 96 | .attr = {.name = "control", .mode = S_IWUSR}, | ||
| 97 | .store = dlm_control_store | ||
| 98 | }; | ||
| 99 | |||
| 100 | static struct dlm_attr dlm_attr_event = { | ||
| 101 | .attr = {.name = "event_done", .mode = S_IWUSR}, | ||
| 102 | .store = dlm_event_store | ||
| 103 | }; | ||
| 104 | |||
| 105 | static struct dlm_attr dlm_attr_id = { | ||
| 106 | .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR}, | ||
| 107 | .show = dlm_id_show, | ||
| 108 | .store = dlm_id_store | ||
| 109 | }; | ||
| 110 | |||
| 111 | static struct dlm_attr dlm_attr_recover_status = { | ||
| 112 | .attr = {.name = "recover_status", .mode = S_IRUGO}, | ||
| 113 | .show = dlm_recover_status_show | ||
| 114 | }; | ||
| 115 | |||
| 116 | static struct dlm_attr dlm_attr_recover_nodeid = { | ||
| 117 | .attr = {.name = "recover_nodeid", .mode = S_IRUGO}, | ||
| 118 | .show = dlm_recover_nodeid_show | ||
| 119 | }; | ||
| 120 | |||
| 121 | static struct attribute *dlm_attrs[] = { | ||
| 122 | &dlm_attr_control.attr, | ||
| 123 | &dlm_attr_event.attr, | ||
| 124 | &dlm_attr_id.attr, | ||
| 125 | &dlm_attr_recover_status.attr, | ||
| 126 | &dlm_attr_recover_nodeid.attr, | ||
| 127 | NULL, | ||
| 128 | }; | ||
| 129 | |||
| 130 | static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr, | ||
| 131 | char *buf) | ||
| 132 | { | ||
| 133 | struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); | ||
| 134 | struct dlm_attr *a = container_of(attr, struct dlm_attr, attr); | ||
| 135 | return a->show ? a->show(ls, buf) : 0; | ||
| 136 | } | ||
| 137 | |||
| 138 | static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr, | ||
| 139 | const char *buf, size_t len) | ||
| 140 | { | ||
| 141 | struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); | ||
| 142 | struct dlm_attr *a = container_of(attr, struct dlm_attr, attr); | ||
| 143 | return a->store ? a->store(ls, buf, len) : len; | ||
| 144 | } | ||
| 145 | |||
| 146 | static struct sysfs_ops dlm_attr_ops = { | ||
| 147 | .show = dlm_attr_show, | ||
| 148 | .store = dlm_attr_store, | ||
| 149 | }; | ||
| 150 | |||
| 151 | static struct kobj_type dlm_ktype = { | ||
| 152 | .default_attrs = dlm_attrs, | ||
| 153 | .sysfs_ops = &dlm_attr_ops, | ||
| 154 | }; | ||
| 155 | |||
| 156 | static struct kset dlm_kset = { | ||
| 157 | .subsys = &kernel_subsys, | ||
| 158 | .kobj = {.name = "dlm",}, | ||
| 159 | .ktype = &dlm_ktype, | ||
| 160 | }; | ||
| 161 | |||
| 162 | static int kobject_setup(struct dlm_ls *ls) | ||
| 163 | { | ||
| 164 | char lsname[DLM_LOCKSPACE_LEN]; | ||
| 165 | int error; | ||
| 166 | |||
| 167 | memset(lsname, 0, DLM_LOCKSPACE_LEN); | ||
| 168 | snprintf(lsname, DLM_LOCKSPACE_LEN, "%s", ls->ls_name); | ||
| 169 | |||
| 170 | error = kobject_set_name(&ls->ls_kobj, "%s", lsname); | ||
| 171 | if (error) | ||
| 172 | return error; | ||
| 173 | |||
| 174 | ls->ls_kobj.kset = &dlm_kset; | ||
| 175 | ls->ls_kobj.ktype = &dlm_ktype; | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | |||
| 179 | static int do_uevent(struct dlm_ls *ls, int in) | ||
| 180 | { | ||
| 181 | int error; | ||
| 182 | |||
| 183 | if (in) | ||
| 184 | kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE); | ||
| 185 | else | ||
| 186 | kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); | ||
| 187 | |||
| 188 | error = wait_event_interruptible(ls->ls_uevent_wait, | ||
| 189 | test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); | ||
| 190 | if (error) | ||
| 191 | goto out; | ||
| 192 | |||
| 193 | error = ls->ls_uevent_result; | ||
| 194 | out: | ||
| 195 | return error; | ||
| 196 | } | ||
| 197 | |||
| 198 | |||
| 199 | int dlm_lockspace_init(void) | ||
| 200 | { | ||
| 201 | int error; | ||
| 202 | |||
| 203 | ls_count = 0; | ||
| 204 | mutex_init(&ls_lock); | ||
| 205 | INIT_LIST_HEAD(&lslist); | ||
| 206 | spin_lock_init(&lslist_lock); | ||
| 207 | |||
| 208 | error = kset_register(&dlm_kset); | ||
| 209 | if (error) | ||
| 210 | printk("dlm_lockspace_init: cannot register kset %d\n", error); | ||
| 211 | return error; | ||
| 212 | } | ||
| 213 | |||
| 214 | void dlm_lockspace_exit(void) | ||
| 215 | { | ||
| 216 | kset_unregister(&dlm_kset); | ||
| 217 | } | ||
| 218 | |||
| 219 | static int dlm_scand(void *data) | ||
| 220 | { | ||
| 221 | struct dlm_ls *ls; | ||
| 222 | |||
| 223 | while (!kthread_should_stop()) { | ||
| 224 | list_for_each_entry(ls, &lslist, ls_list) | ||
| 225 | dlm_scan_rsbs(ls); | ||
| 226 | schedule_timeout_interruptible(dlm_config.scan_secs * HZ); | ||
| 227 | } | ||
| 228 | return 0; | ||
| 229 | } | ||
| 230 | |||
| 231 | static int dlm_scand_start(void) | ||
| 232 | { | ||
| 233 | struct task_struct *p; | ||
| 234 | int error = 0; | ||
| 235 | |||
| 236 | p = kthread_run(dlm_scand, NULL, "dlm_scand"); | ||
| 237 | if (IS_ERR(p)) | ||
| 238 | error = PTR_ERR(p); | ||
| 239 | else | ||
| 240 | scand_task = p; | ||
| 241 | return error; | ||
| 242 | } | ||
| 243 | |||
| 244 | static void dlm_scand_stop(void) | ||
| 245 | { | ||
| 246 | kthread_stop(scand_task); | ||
| 247 | } | ||
| 248 | |||
| 249 | static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen) | ||
| 250 | { | ||
| 251 | struct dlm_ls *ls; | ||
| 252 | |||
| 253 | spin_lock(&lslist_lock); | ||
| 254 | |||
| 255 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 256 | if (ls->ls_namelen == namelen && | ||
| 257 | memcmp(ls->ls_name, name, namelen) == 0) | ||
| 258 | goto out; | ||
| 259 | } | ||
| 260 | ls = NULL; | ||
| 261 | out: | ||
| 262 | spin_unlock(&lslist_lock); | ||
| 263 | return ls; | ||
| 264 | } | ||
| 265 | |||
| 266 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) | ||
| 267 | { | ||
| 268 | struct dlm_ls *ls; | ||
| 269 | |||
| 270 | spin_lock(&lslist_lock); | ||
| 271 | |||
| 272 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 273 | if (ls->ls_global_id == id) { | ||
| 274 | ls->ls_count++; | ||
| 275 | goto out; | ||
| 276 | } | ||
| 277 | } | ||
| 278 | ls = NULL; | ||
| 279 | out: | ||
| 280 | spin_unlock(&lslist_lock); | ||
| 281 | return ls; | ||
| 282 | } | ||
| 283 | |||
| 284 | struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace) | ||
| 285 | { | ||
| 286 | struct dlm_ls *ls; | ||
| 287 | |||
| 288 | spin_lock(&lslist_lock); | ||
| 289 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 290 | if (ls->ls_local_handle == lockspace) { | ||
| 291 | ls->ls_count++; | ||
| 292 | goto out; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | ls = NULL; | ||
| 296 | out: | ||
| 297 | spin_unlock(&lslist_lock); | ||
| 298 | return ls; | ||
| 299 | } | ||
| 300 | |||
| 301 | struct dlm_ls *dlm_find_lockspace_device(int minor) | ||
| 302 | { | ||
| 303 | struct dlm_ls *ls; | ||
| 304 | |||
| 305 | spin_lock(&lslist_lock); | ||
| 306 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 307 | if (ls->ls_device.minor == minor) { | ||
| 308 | ls->ls_count++; | ||
| 309 | goto out; | ||
| 310 | } | ||
| 311 | } | ||
| 312 | ls = NULL; | ||
| 313 | out: | ||
| 314 | spin_unlock(&lslist_lock); | ||
| 315 | return ls; | ||
| 316 | } | ||
| 317 | |||
| 318 | void dlm_put_lockspace(struct dlm_ls *ls) | ||
| 319 | { | ||
| 320 | spin_lock(&lslist_lock); | ||
| 321 | ls->ls_count--; | ||
| 322 | spin_unlock(&lslist_lock); | ||
| 323 | } | ||
| 324 | |||
| 325 | static void remove_lockspace(struct dlm_ls *ls) | ||
| 326 | { | ||
| 327 | for (;;) { | ||
| 328 | spin_lock(&lslist_lock); | ||
| 329 | if (ls->ls_count == 0) { | ||
| 330 | list_del(&ls->ls_list); | ||
| 331 | spin_unlock(&lslist_lock); | ||
| 332 | return; | ||
| 333 | } | ||
| 334 | spin_unlock(&lslist_lock); | ||
| 335 | ssleep(1); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 339 | static int threads_start(void) | ||
| 340 | { | ||
| 341 | int error; | ||
| 342 | |||
| 343 | /* Thread which process lock requests for all lockspace's */ | ||
| 344 | error = dlm_astd_start(); | ||
| 345 | if (error) { | ||
| 346 | log_print("cannot start dlm_astd thread %d", error); | ||
| 347 | goto fail; | ||
| 348 | } | ||
| 349 | |||
| 350 | error = dlm_scand_start(); | ||
| 351 | if (error) { | ||
| 352 | log_print("cannot start dlm_scand thread %d", error); | ||
| 353 | goto astd_fail; | ||
| 354 | } | ||
| 355 | |||
| 356 | /* Thread for sending/receiving messages for all lockspace's */ | ||
| 357 | error = dlm_lowcomms_start(); | ||
| 358 | if (error) { | ||
| 359 | log_print("cannot start dlm lowcomms %d", error); | ||
| 360 | goto scand_fail; | ||
| 361 | } | ||
| 362 | |||
| 363 | return 0; | ||
| 364 | |||
| 365 | scand_fail: | ||
| 366 | dlm_scand_stop(); | ||
| 367 | astd_fail: | ||
| 368 | dlm_astd_stop(); | ||
| 369 | fail: | ||
| 370 | return error; | ||
| 371 | } | ||
| 372 | |||
| 373 | static void threads_stop(void) | ||
| 374 | { | ||
| 375 | dlm_scand_stop(); | ||
| 376 | dlm_lowcomms_stop(); | ||
| 377 | dlm_astd_stop(); | ||
| 378 | } | ||
| 379 | |||
| 380 | static int new_lockspace(char *name, int namelen, void **lockspace, | ||
| 381 | uint32_t flags, int lvblen) | ||
| 382 | { | ||
| 383 | struct dlm_ls *ls; | ||
| 384 | int i, size, error = -ENOMEM; | ||
| 385 | |||
| 386 | if (namelen > DLM_LOCKSPACE_LEN) | ||
| 387 | return -EINVAL; | ||
| 388 | |||
| 389 | if (!lvblen || (lvblen % 8)) | ||
| 390 | return -EINVAL; | ||
| 391 | |||
| 392 | if (!try_module_get(THIS_MODULE)) | ||
| 393 | return -EINVAL; | ||
| 394 | |||
| 395 | ls = dlm_find_lockspace_name(name, namelen); | ||
| 396 | if (ls) { | ||
| 397 | *lockspace = ls; | ||
| 398 | module_put(THIS_MODULE); | ||
| 399 | return -EEXIST; | ||
| 400 | } | ||
| 401 | |||
| 402 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); | ||
| 403 | if (!ls) | ||
| 404 | goto out; | ||
| 405 | memcpy(ls->ls_name, name, namelen); | ||
| 406 | ls->ls_namelen = namelen; | ||
| 407 | ls->ls_exflags = flags; | ||
| 408 | ls->ls_lvblen = lvblen; | ||
| 409 | ls->ls_count = 0; | ||
| 410 | ls->ls_flags = 0; | ||
| 411 | |||
| 412 | size = dlm_config.rsbtbl_size; | ||
| 413 | ls->ls_rsbtbl_size = size; | ||
| 414 | |||
| 415 | ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); | ||
| 416 | if (!ls->ls_rsbtbl) | ||
| 417 | goto out_lsfree; | ||
| 418 | for (i = 0; i < size; i++) { | ||
| 419 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); | ||
| 420 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); | ||
| 421 | rwlock_init(&ls->ls_rsbtbl[i].lock); | ||
| 422 | } | ||
| 423 | |||
| 424 | size = dlm_config.lkbtbl_size; | ||
| 425 | ls->ls_lkbtbl_size = size; | ||
| 426 | |||
| 427 | ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); | ||
| 428 | if (!ls->ls_lkbtbl) | ||
| 429 | goto out_rsbfree; | ||
| 430 | for (i = 0; i < size; i++) { | ||
| 431 | INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list); | ||
| 432 | rwlock_init(&ls->ls_lkbtbl[i].lock); | ||
| 433 | ls->ls_lkbtbl[i].counter = 1; | ||
| 434 | } | ||
| 435 | |||
| 436 | size = dlm_config.dirtbl_size; | ||
| 437 | ls->ls_dirtbl_size = size; | ||
| 438 | |||
| 439 | ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); | ||
| 440 | if (!ls->ls_dirtbl) | ||
| 441 | goto out_lkbfree; | ||
| 442 | for (i = 0; i < size; i++) { | ||
| 443 | INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); | ||
| 444 | rwlock_init(&ls->ls_dirtbl[i].lock); | ||
| 445 | } | ||
| 446 | |||
| 447 | INIT_LIST_HEAD(&ls->ls_waiters); | ||
| 448 | mutex_init(&ls->ls_waiters_mutex); | ||
| 449 | |||
| 450 | INIT_LIST_HEAD(&ls->ls_nodes); | ||
| 451 | INIT_LIST_HEAD(&ls->ls_nodes_gone); | ||
| 452 | ls->ls_num_nodes = 0; | ||
| 453 | ls->ls_low_nodeid = 0; | ||
| 454 | ls->ls_total_weight = 0; | ||
| 455 | ls->ls_node_array = NULL; | ||
| 456 | |||
| 457 | memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb)); | ||
| 458 | ls->ls_stub_rsb.res_ls = ls; | ||
| 459 | |||
| 460 | ls->ls_debug_rsb_dentry = NULL; | ||
| 461 | ls->ls_debug_waiters_dentry = NULL; | ||
| 462 | |||
| 463 | init_waitqueue_head(&ls->ls_uevent_wait); | ||
| 464 | ls->ls_uevent_result = 0; | ||
| 465 | |||
| 466 | ls->ls_recoverd_task = NULL; | ||
| 467 | mutex_init(&ls->ls_recoverd_active); | ||
| 468 | spin_lock_init(&ls->ls_recover_lock); | ||
| 469 | ls->ls_recover_status = 0; | ||
| 470 | ls->ls_recover_seq = 0; | ||
| 471 | ls->ls_recover_args = NULL; | ||
| 472 | init_rwsem(&ls->ls_in_recovery); | ||
| 473 | INIT_LIST_HEAD(&ls->ls_requestqueue); | ||
| 474 | mutex_init(&ls->ls_requestqueue_mutex); | ||
| 475 | mutex_init(&ls->ls_clear_proc_locks); | ||
| 476 | |||
| 477 | ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | ||
| 478 | if (!ls->ls_recover_buf) | ||
| 479 | goto out_dirfree; | ||
| 480 | |||
| 481 | INIT_LIST_HEAD(&ls->ls_recover_list); | ||
| 482 | spin_lock_init(&ls->ls_recover_list_lock); | ||
| 483 | ls->ls_recover_list_count = 0; | ||
| 484 | ls->ls_local_handle = ls; | ||
| 485 | init_waitqueue_head(&ls->ls_wait_general); | ||
| 486 | INIT_LIST_HEAD(&ls->ls_root_list); | ||
| 487 | init_rwsem(&ls->ls_root_sem); | ||
| 488 | |||
| 489 | down_write(&ls->ls_in_recovery); | ||
| 490 | |||
| 491 | spin_lock(&lslist_lock); | ||
| 492 | list_add(&ls->ls_list, &lslist); | ||
| 493 | spin_unlock(&lslist_lock); | ||
| 494 | |||
| 495 | /* needs to find ls in lslist */ | ||
| 496 | error = dlm_recoverd_start(ls); | ||
| 497 | if (error) { | ||
| 498 | log_error(ls, "can't start dlm_recoverd %d", error); | ||
| 499 | goto out_rcomfree; | ||
| 500 | } | ||
| 501 | |||
| 502 | dlm_create_debug_file(ls); | ||
| 503 | |||
| 504 | error = kobject_setup(ls); | ||
| 505 | if (error) | ||
| 506 | goto out_del; | ||
| 507 | |||
| 508 | error = kobject_register(&ls->ls_kobj); | ||
| 509 | if (error) | ||
| 510 | goto out_del; | ||
| 511 | |||
| 512 | error = do_uevent(ls, 1); | ||
| 513 | if (error) | ||
| 514 | goto out_unreg; | ||
| 515 | |||
| 516 | *lockspace = ls; | ||
| 517 | return 0; | ||
| 518 | |||
| 519 | out_unreg: | ||
| 520 | kobject_unregister(&ls->ls_kobj); | ||
| 521 | out_del: | ||
| 522 | dlm_delete_debug_file(ls); | ||
| 523 | dlm_recoverd_stop(ls); | ||
| 524 | out_rcomfree: | ||
| 525 | spin_lock(&lslist_lock); | ||
| 526 | list_del(&ls->ls_list); | ||
| 527 | spin_unlock(&lslist_lock); | ||
| 528 | kfree(ls->ls_recover_buf); | ||
| 529 | out_dirfree: | ||
| 530 | kfree(ls->ls_dirtbl); | ||
| 531 | out_lkbfree: | ||
| 532 | kfree(ls->ls_lkbtbl); | ||
| 533 | out_rsbfree: | ||
| 534 | kfree(ls->ls_rsbtbl); | ||
| 535 | out_lsfree: | ||
| 536 | kfree(ls); | ||
| 537 | out: | ||
| 538 | module_put(THIS_MODULE); | ||
| 539 | return error; | ||
| 540 | } | ||
| 541 | |||
| 542 | int dlm_new_lockspace(char *name, int namelen, void **lockspace, | ||
| 543 | uint32_t flags, int lvblen) | ||
| 544 | { | ||
| 545 | int error = 0; | ||
| 546 | |||
| 547 | mutex_lock(&ls_lock); | ||
| 548 | if (!ls_count) | ||
| 549 | error = threads_start(); | ||
| 550 | if (error) | ||
| 551 | goto out; | ||
| 552 | |||
| 553 | error = new_lockspace(name, namelen, lockspace, flags, lvblen); | ||
| 554 | if (!error) | ||
| 555 | ls_count++; | ||
| 556 | out: | ||
| 557 | mutex_unlock(&ls_lock); | ||
| 558 | return error; | ||
| 559 | } | ||
| 560 | |||
| 561 | /* Return 1 if the lockspace still has active remote locks, | ||
| 562 | * 2 if the lockspace still has active local locks. | ||
| 563 | */ | ||
| 564 | static int lockspace_busy(struct dlm_ls *ls) | ||
| 565 | { | ||
| 566 | int i, lkb_found = 0; | ||
| 567 | struct dlm_lkb *lkb; | ||
| 568 | |||
| 569 | /* NOTE: We check the lockidtbl here rather than the resource table. | ||
| 570 | This is because there may be LKBs queued as ASTs that have been | ||
| 571 | unlinked from their RSBs and are pending deletion once the AST has | ||
| 572 | been delivered */ | ||
| 573 | |||
| 574 | for (i = 0; i < ls->ls_lkbtbl_size; i++) { | ||
| 575 | read_lock(&ls->ls_lkbtbl[i].lock); | ||
| 576 | if (!list_empty(&ls->ls_lkbtbl[i].list)) { | ||
| 577 | lkb_found = 1; | ||
| 578 | list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, | ||
| 579 | lkb_idtbl_list) { | ||
| 580 | if (!lkb->lkb_nodeid) { | ||
| 581 | read_unlock(&ls->ls_lkbtbl[i].lock); | ||
| 582 | return 2; | ||
| 583 | } | ||
| 584 | } | ||
| 585 | } | ||
| 586 | read_unlock(&ls->ls_lkbtbl[i].lock); | ||
| 587 | } | ||
| 588 | return lkb_found; | ||
| 589 | } | ||
| 590 | |||
| 591 | static int release_lockspace(struct dlm_ls *ls, int force) | ||
| 592 | { | ||
| 593 | struct dlm_lkb *lkb; | ||
| 594 | struct dlm_rsb *rsb; | ||
| 595 | struct list_head *head; | ||
| 596 | int i; | ||
| 597 | int busy = lockspace_busy(ls); | ||
| 598 | |||
| 599 | if (busy > force) | ||
| 600 | return -EBUSY; | ||
| 601 | |||
| 602 | if (force < 3) | ||
| 603 | do_uevent(ls, 0); | ||
| 604 | |||
| 605 | dlm_recoverd_stop(ls); | ||
| 606 | |||
| 607 | remove_lockspace(ls); | ||
| 608 | |||
| 609 | dlm_delete_debug_file(ls); | ||
| 610 | |||
| 611 | dlm_astd_suspend(); | ||
| 612 | |||
| 613 | kfree(ls->ls_recover_buf); | ||
| 614 | |||
| 615 | /* | ||
| 616 | * Free direntry structs. | ||
| 617 | */ | ||
| 618 | |||
| 619 | dlm_dir_clear(ls); | ||
| 620 | kfree(ls->ls_dirtbl); | ||
| 621 | |||
| 622 | /* | ||
| 623 | * Free all lkb's on lkbtbl[] lists. | ||
| 624 | */ | ||
| 625 | |||
| 626 | for (i = 0; i < ls->ls_lkbtbl_size; i++) { | ||
| 627 | head = &ls->ls_lkbtbl[i].list; | ||
| 628 | while (!list_empty(head)) { | ||
| 629 | lkb = list_entry(head->next, struct dlm_lkb, | ||
| 630 | lkb_idtbl_list); | ||
| 631 | |||
| 632 | list_del(&lkb->lkb_idtbl_list); | ||
| 633 | |||
| 634 | dlm_del_ast(lkb); | ||
| 635 | |||
| 636 | if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
| 637 | free_lvb(lkb->lkb_lvbptr); | ||
| 638 | |||
| 639 | free_lkb(lkb); | ||
| 640 | } | ||
| 641 | } | ||
| 642 | dlm_astd_resume(); | ||
| 643 | |||
| 644 | kfree(ls->ls_lkbtbl); | ||
| 645 | |||
| 646 | /* | ||
| 647 | * Free all rsb's on rsbtbl[] lists | ||
| 648 | */ | ||
| 649 | |||
| 650 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
| 651 | head = &ls->ls_rsbtbl[i].list; | ||
| 652 | while (!list_empty(head)) { | ||
| 653 | rsb = list_entry(head->next, struct dlm_rsb, | ||
| 654 | res_hashchain); | ||
| 655 | |||
| 656 | list_del(&rsb->res_hashchain); | ||
| 657 | free_rsb(rsb); | ||
| 658 | } | ||
| 659 | |||
| 660 | head = &ls->ls_rsbtbl[i].toss; | ||
| 661 | while (!list_empty(head)) { | ||
| 662 | rsb = list_entry(head->next, struct dlm_rsb, | ||
| 663 | res_hashchain); | ||
| 664 | list_del(&rsb->res_hashchain); | ||
| 665 | free_rsb(rsb); | ||
| 666 | } | ||
| 667 | } | ||
| 668 | |||
| 669 | kfree(ls->ls_rsbtbl); | ||
| 670 | |||
| 671 | /* | ||
| 672 | * Free structures on any other lists | ||
| 673 | */ | ||
| 674 | |||
| 675 | kfree(ls->ls_recover_args); | ||
| 676 | dlm_clear_free_entries(ls); | ||
| 677 | dlm_clear_members(ls); | ||
| 678 | dlm_clear_members_gone(ls); | ||
| 679 | kfree(ls->ls_node_array); | ||
| 680 | kobject_unregister(&ls->ls_kobj); | ||
| 681 | kfree(ls); | ||
| 682 | |||
| 683 | mutex_lock(&ls_lock); | ||
| 684 | ls_count--; | ||
| 685 | if (!ls_count) | ||
| 686 | threads_stop(); | ||
| 687 | mutex_unlock(&ls_lock); | ||
| 688 | |||
| 689 | module_put(THIS_MODULE); | ||
| 690 | return 0; | ||
| 691 | } | ||
| 692 | |||
| 693 | /* | ||
| 694 | * Called when a system has released all its locks and is not going to use the | ||
| 695 | * lockspace any longer. We free everything we're managing for this lockspace. | ||
| 696 | * Remaining nodes will go through the recovery process as if we'd died. The | ||
| 697 | * lockspace must continue to function as usual, participating in recoveries, | ||
| 698 | * until this returns. | ||
| 699 | * | ||
| 700 | * Force has 4 possible values: | ||
| 701 | * 0 - don't destroy locksapce if it has any LKBs | ||
| 702 | * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs | ||
| 703 | * 2 - destroy lockspace regardless of LKBs | ||
| 704 | * 3 - destroy lockspace as part of a forced shutdown | ||
| 705 | */ | ||
| 706 | |||
| 707 | int dlm_release_lockspace(void *lockspace, int force) | ||
| 708 | { | ||
| 709 | struct dlm_ls *ls; | ||
| 710 | |||
| 711 | ls = dlm_find_lockspace_local(lockspace); | ||
| 712 | if (!ls) | ||
| 713 | return -EINVAL; | ||
| 714 | dlm_put_lockspace(ls); | ||
| 715 | return release_lockspace(ls, force); | ||
| 716 | } | ||
| 717 | |||
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h new file mode 100644 index 000000000000..891eabbdd021 --- /dev/null +++ b/fs/dlm/lockspace.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __LOCKSPACE_DOT_H__ | ||
| 15 | #define __LOCKSPACE_DOT_H__ | ||
| 16 | |||
| 17 | int dlm_lockspace_init(void); | ||
| 18 | void dlm_lockspace_exit(void); | ||
| 19 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id); | ||
| 20 | struct dlm_ls *dlm_find_lockspace_local(void *id); | ||
| 21 | struct dlm_ls *dlm_find_lockspace_device(int minor); | ||
| 22 | void dlm_put_lockspace(struct dlm_ls *ls); | ||
| 23 | |||
| 24 | #endif /* __LOCKSPACE_DOT_H__ */ | ||
| 25 | |||
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c new file mode 100644 index 000000000000..23f5ce12080b --- /dev/null +++ b/fs/dlm/lowcomms.c | |||
| @@ -0,0 +1,1238 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | /* | ||
| 15 | * lowcomms.c | ||
| 16 | * | ||
| 17 | * This is the "low-level" comms layer. | ||
| 18 | * | ||
| 19 | * It is responsible for sending/receiving messages | ||
| 20 | * from other nodes in the cluster. | ||
| 21 | * | ||
| 22 | * Cluster nodes are referred to by their nodeids. nodeids are | ||
| 23 | * simply 32 bit numbers to the locking module - if they need to | ||
| 24 | * be expanded for the cluster infrastructure then that is it's | ||
| 25 | * responsibility. It is this layer's | ||
| 26 | * responsibility to resolve these into IP address or | ||
| 27 | * whatever it needs for inter-node communication. | ||
| 28 | * | ||
| 29 | * The comms level is two kernel threads that deal mainly with | ||
| 30 | * the receiving of messages from other nodes and passing them | ||
| 31 | * up to the mid-level comms layer (which understands the | ||
| 32 | * message format) for execution by the locking core, and | ||
| 33 | * a send thread which does all the setting up of connections | ||
| 34 | * to remote nodes and the sending of data. Threads are not allowed | ||
| 35 | * to send their own data because it may cause them to wait in times | ||
| 36 | * of high load. Also, this way, the sending thread can collect together | ||
| 37 | * messages bound for one node and send them in one block. | ||
| 38 | * | ||
| 39 | * I don't see any problem with the recv thread executing the locking | ||
| 40 | * code on behalf of remote processes as the locking code is | ||
| 41 | * short, efficient and never (well, hardly ever) waits. | ||
| 42 | * | ||
| 43 | */ | ||
| 44 | |||
| 45 | #include <asm/ioctls.h> | ||
| 46 | #include <net/sock.h> | ||
| 47 | #include <net/tcp.h> | ||
| 48 | #include <net/sctp/user.h> | ||
| 49 | #include <linux/pagemap.h> | ||
| 50 | #include <linux/socket.h> | ||
| 51 | #include <linux/idr.h> | ||
| 52 | |||
| 53 | #include "dlm_internal.h" | ||
| 54 | #include "lowcomms.h" | ||
| 55 | #include "config.h" | ||
| 56 | #include "midcomms.h" | ||
| 57 | |||
| 58 | static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; | ||
| 59 | static int dlm_local_count; | ||
| 60 | static int dlm_local_nodeid; | ||
| 61 | |||
| 62 | /* One of these per connected node */ | ||
| 63 | |||
| 64 | #define NI_INIT_PENDING 1 | ||
| 65 | #define NI_WRITE_PENDING 2 | ||
| 66 | |||
| 67 | struct nodeinfo { | ||
| 68 | spinlock_t lock; | ||
| 69 | sctp_assoc_t assoc_id; | ||
| 70 | unsigned long flags; | ||
| 71 | struct list_head write_list; /* nodes with pending writes */ | ||
| 72 | struct list_head writequeue; /* outgoing writequeue_entries */ | ||
| 73 | spinlock_t writequeue_lock; | ||
| 74 | int nodeid; | ||
| 75 | }; | ||
| 76 | |||
| 77 | static DEFINE_IDR(nodeinfo_idr); | ||
| 78 | static struct rw_semaphore nodeinfo_lock; | ||
| 79 | static int max_nodeid; | ||
| 80 | |||
| 81 | struct cbuf { | ||
| 82 | unsigned base; | ||
| 83 | unsigned len; | ||
| 84 | unsigned mask; | ||
| 85 | }; | ||
| 86 | |||
| 87 | /* Just the one of these, now. But this struct keeps | ||
| 88 | the connection-specific variables together */ | ||
| 89 | |||
| 90 | #define CF_READ_PENDING 1 | ||
| 91 | |||
| 92 | struct connection { | ||
| 93 | struct socket *sock; | ||
| 94 | unsigned long flags; | ||
| 95 | struct page *rx_page; | ||
| 96 | atomic_t waiting_requests; | ||
| 97 | struct cbuf cb; | ||
| 98 | int eagain_flag; | ||
| 99 | }; | ||
| 100 | |||
| 101 | /* An entry waiting to be sent */ | ||
| 102 | |||
| 103 | struct writequeue_entry { | ||
| 104 | struct list_head list; | ||
| 105 | struct page *page; | ||
| 106 | int offset; | ||
| 107 | int len; | ||
| 108 | int end; | ||
| 109 | int users; | ||
| 110 | struct nodeinfo *ni; | ||
| 111 | }; | ||
| 112 | |||
| 113 | #define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0) | ||
| 114 | #define CBUF_EMPTY(cb) ((cb)->len == 0) | ||
| 115 | #define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1)) | ||
| 116 | #define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask) | ||
| 117 | |||
| 118 | #define CBUF_INIT(cb, size) \ | ||
| 119 | do { \ | ||
| 120 | (cb)->base = (cb)->len = 0; \ | ||
| 121 | (cb)->mask = ((size)-1); \ | ||
| 122 | } while(0) | ||
| 123 | |||
| 124 | #define CBUF_EAT(cb, n) \ | ||
| 125 | do { \ | ||
| 126 | (cb)->len -= (n); \ | ||
| 127 | (cb)->base += (n); \ | ||
| 128 | (cb)->base &= (cb)->mask; \ | ||
| 129 | } while(0) | ||
| 130 | |||
| 131 | |||
| 132 | /* List of nodes which have writes pending */ | ||
| 133 | static struct list_head write_nodes; | ||
| 134 | static spinlock_t write_nodes_lock; | ||
| 135 | |||
| 136 | /* Maximum number of incoming messages to process before | ||
| 137 | * doing a schedule() | ||
| 138 | */ | ||
| 139 | #define MAX_RX_MSG_COUNT 25 | ||
| 140 | |||
| 141 | /* Manage daemons */ | ||
| 142 | static struct task_struct *recv_task; | ||
| 143 | static struct task_struct *send_task; | ||
| 144 | static wait_queue_head_t lowcomms_recv_wait; | ||
| 145 | static atomic_t accepting; | ||
| 146 | |||
| 147 | /* The SCTP connection */ | ||
| 148 | static struct connection sctp_con; | ||
| 149 | |||
| 150 | |||
| 151 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | ||
| 152 | { | ||
| 153 | struct sockaddr_storage addr; | ||
| 154 | int error; | ||
| 155 | |||
| 156 | if (!dlm_local_count) | ||
| 157 | return -1; | ||
| 158 | |||
| 159 | error = dlm_nodeid_to_addr(nodeid, &addr); | ||
| 160 | if (error) | ||
| 161 | return error; | ||
| 162 | |||
| 163 | if (dlm_local_addr[0]->ss_family == AF_INET) { | ||
| 164 | struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; | ||
| 165 | struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; | ||
| 166 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | ||
| 167 | } else { | ||
| 168 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | ||
| 169 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | ||
| 170 | memcpy(&ret6->sin6_addr, &in6->sin6_addr, | ||
| 171 | sizeof(in6->sin6_addr)); | ||
| 172 | } | ||
| 173 | |||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 177 | static struct nodeinfo *nodeid2nodeinfo(int nodeid, int alloc) | ||
| 178 | { | ||
| 179 | struct nodeinfo *ni; | ||
| 180 | int r; | ||
| 181 | int n; | ||
| 182 | |||
| 183 | down_read(&nodeinfo_lock); | ||
| 184 | ni = idr_find(&nodeinfo_idr, nodeid); | ||
| 185 | up_read(&nodeinfo_lock); | ||
| 186 | |||
| 187 | if (!ni && alloc) { | ||
| 188 | down_write(&nodeinfo_lock); | ||
| 189 | |||
| 190 | ni = idr_find(&nodeinfo_idr, nodeid); | ||
| 191 | if (ni) | ||
| 192 | goto out_up; | ||
| 193 | |||
| 194 | r = idr_pre_get(&nodeinfo_idr, alloc); | ||
| 195 | if (!r) | ||
| 196 | goto out_up; | ||
| 197 | |||
| 198 | ni = kmalloc(sizeof(struct nodeinfo), alloc); | ||
| 199 | if (!ni) | ||
| 200 | goto out_up; | ||
| 201 | |||
| 202 | r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n); | ||
| 203 | if (r) { | ||
| 204 | kfree(ni); | ||
| 205 | ni = NULL; | ||
| 206 | goto out_up; | ||
| 207 | } | ||
| 208 | if (n != nodeid) { | ||
| 209 | idr_remove(&nodeinfo_idr, n); | ||
| 210 | kfree(ni); | ||
| 211 | ni = NULL; | ||
| 212 | goto out_up; | ||
| 213 | } | ||
| 214 | memset(ni, 0, sizeof(struct nodeinfo)); | ||
| 215 | spin_lock_init(&ni->lock); | ||
| 216 | INIT_LIST_HEAD(&ni->writequeue); | ||
| 217 | spin_lock_init(&ni->writequeue_lock); | ||
| 218 | ni->nodeid = nodeid; | ||
| 219 | |||
| 220 | if (nodeid > max_nodeid) | ||
| 221 | max_nodeid = nodeid; | ||
| 222 | out_up: | ||
| 223 | up_write(&nodeinfo_lock); | ||
| 224 | } | ||
| 225 | |||
| 226 | return ni; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* Don't call this too often... */ | ||
| 230 | static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc) | ||
| 231 | { | ||
| 232 | int i; | ||
| 233 | struct nodeinfo *ni; | ||
| 234 | |||
| 235 | for (i=1; i<=max_nodeid; i++) { | ||
| 236 | ni = nodeid2nodeinfo(i, 0); | ||
| 237 | if (ni && ni->assoc_id == assoc) | ||
| 238 | return ni; | ||
| 239 | } | ||
| 240 | return NULL; | ||
| 241 | } | ||
| 242 | |||
| 243 | /* Data or notification available on socket */ | ||
| 244 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | ||
| 245 | { | ||
| 246 | atomic_inc(&sctp_con.waiting_requests); | ||
| 247 | if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) | ||
| 248 | return; | ||
| 249 | |||
| 250 | wake_up_interruptible(&lowcomms_recv_wait); | ||
| 251 | } | ||
| 252 | |||
| 253 | |||
| 254 | /* Add the port number to an IP6 or 4 sockaddr and return the address length. | ||
| 255 | Also padd out the struct with zeros to make comparisons meaningful */ | ||
| 256 | |||
| 257 | static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | ||
| 258 | int *addr_len) | ||
| 259 | { | ||
| 260 | struct sockaddr_in *local4_addr; | ||
| 261 | struct sockaddr_in6 *local6_addr; | ||
| 262 | |||
| 263 | if (!dlm_local_count) | ||
| 264 | return; | ||
| 265 | |||
| 266 | if (!port) { | ||
| 267 | if (dlm_local_addr[0]->ss_family == AF_INET) { | ||
| 268 | local4_addr = (struct sockaddr_in *)dlm_local_addr[0]; | ||
| 269 | port = be16_to_cpu(local4_addr->sin_port); | ||
| 270 | } else { | ||
| 271 | local6_addr = (struct sockaddr_in6 *)dlm_local_addr[0]; | ||
| 272 | port = be16_to_cpu(local6_addr->sin6_port); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | saddr->ss_family = dlm_local_addr[0]->ss_family; | ||
| 277 | if (dlm_local_addr[0]->ss_family == AF_INET) { | ||
| 278 | struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; | ||
| 279 | in4_addr->sin_port = cpu_to_be16(port); | ||
| 280 | memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero)); | ||
| 281 | memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) - | ||
| 282 | sizeof(struct sockaddr_in)); | ||
| 283 | *addr_len = sizeof(struct sockaddr_in); | ||
| 284 | } else { | ||
| 285 | struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; | ||
| 286 | in6_addr->sin6_port = cpu_to_be16(port); | ||
| 287 | memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) - | ||
| 288 | sizeof(struct sockaddr_in6)); | ||
| 289 | *addr_len = sizeof(struct sockaddr_in6); | ||
| 290 | } | ||
| 291 | } | ||
| 292 | |||
| 293 | /* Close the connection and tidy up */ | ||
| 294 | static void close_connection(void) | ||
| 295 | { | ||
| 296 | if (sctp_con.sock) { | ||
| 297 | sock_release(sctp_con.sock); | ||
| 298 | sctp_con.sock = NULL; | ||
| 299 | } | ||
| 300 | |||
| 301 | if (sctp_con.rx_page) { | ||
| 302 | __free_page(sctp_con.rx_page); | ||
| 303 | sctp_con.rx_page = NULL; | ||
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | /* We only send shutdown messages to nodes that are not part of the cluster */ | ||
| 308 | static void send_shutdown(sctp_assoc_t associd) | ||
| 309 | { | ||
| 310 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 311 | struct msghdr outmessage; | ||
| 312 | struct cmsghdr *cmsg; | ||
| 313 | struct sctp_sndrcvinfo *sinfo; | ||
| 314 | int ret; | ||
| 315 | |||
| 316 | outmessage.msg_name = NULL; | ||
| 317 | outmessage.msg_namelen = 0; | ||
| 318 | outmessage.msg_control = outcmsg; | ||
| 319 | outmessage.msg_controllen = sizeof(outcmsg); | ||
| 320 | outmessage.msg_flags = MSG_EOR; | ||
| 321 | |||
| 322 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
| 323 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 324 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 325 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 326 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
| 327 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
| 328 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 329 | |||
| 330 | sinfo->sinfo_flags |= MSG_EOF; | ||
| 331 | sinfo->sinfo_assoc_id = associd; | ||
| 332 | |||
| 333 | ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0); | ||
| 334 | |||
| 335 | if (ret != 0) | ||
| 336 | log_print("send EOF to node failed: %d", ret); | ||
| 337 | } | ||
| 338 | |||
| 339 | |||
| 340 | /* INIT failed but we don't know which node... | ||
| 341 | restart INIT on all pending nodes */ | ||
| 342 | static void init_failed(void) | ||
| 343 | { | ||
| 344 | int i; | ||
| 345 | struct nodeinfo *ni; | ||
| 346 | |||
| 347 | for (i=1; i<=max_nodeid; i++) { | ||
| 348 | ni = nodeid2nodeinfo(i, 0); | ||
| 349 | if (!ni) | ||
| 350 | continue; | ||
| 351 | |||
| 352 | if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 353 | ni->assoc_id = 0; | ||
| 354 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 355 | spin_lock_bh(&write_nodes_lock); | ||
| 356 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 357 | spin_unlock_bh(&write_nodes_lock); | ||
| 358 | } | ||
| 359 | } | ||
| 360 | } | ||
| 361 | wake_up_process(send_task); | ||
| 362 | } | ||
| 363 | |||
| 364 | /* Something happened to an association */ | ||
| 365 | static void process_sctp_notification(struct msghdr *msg, char *buf) | ||
| 366 | { | ||
| 367 | union sctp_notification *sn = (union sctp_notification *)buf; | ||
| 368 | |||
| 369 | if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { | ||
| 370 | switch (sn->sn_assoc_change.sac_state) { | ||
| 371 | |||
| 372 | case SCTP_COMM_UP: | ||
| 373 | case SCTP_RESTART: | ||
| 374 | { | ||
| 375 | /* Check that the new node is in the lockspace */ | ||
| 376 | struct sctp_prim prim; | ||
| 377 | mm_segment_t fs; | ||
| 378 | int nodeid; | ||
| 379 | int prim_len, ret; | ||
| 380 | int addr_len; | ||
| 381 | struct nodeinfo *ni; | ||
| 382 | |||
| 383 | /* This seems to happen when we received a connection | ||
| 384 | * too early... or something... anyway, it happens but | ||
| 385 | * we always seem to get a real message too, see | ||
| 386 | * receive_from_sock */ | ||
| 387 | |||
| 388 | if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) { | ||
| 389 | log_print("COMM_UP for invalid assoc ID %d", | ||
| 390 | (int)sn->sn_assoc_change.sac_assoc_id); | ||
| 391 | init_failed(); | ||
| 392 | return; | ||
| 393 | } | ||
| 394 | memset(&prim, 0, sizeof(struct sctp_prim)); | ||
| 395 | prim_len = sizeof(struct sctp_prim); | ||
| 396 | prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
| 397 | |||
| 398 | fs = get_fs(); | ||
| 399 | set_fs(get_ds()); | ||
| 400 | ret = sctp_con.sock->ops->getsockopt(sctp_con.sock, | ||
| 401 | IPPROTO_SCTP, SCTP_PRIMARY_ADDR, | ||
| 402 | (char*)&prim, &prim_len); | ||
| 403 | set_fs(fs); | ||
| 404 | if (ret < 0) { | ||
| 405 | struct nodeinfo *ni; | ||
| 406 | |||
| 407 | log_print("getsockopt/sctp_primary_addr on " | ||
| 408 | "new assoc %d failed : %d", | ||
| 409 | (int)sn->sn_assoc_change.sac_assoc_id, ret); | ||
| 410 | |||
| 411 | /* Retry INIT later */ | ||
| 412 | ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id); | ||
| 413 | if (ni) | ||
| 414 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 415 | return; | ||
| 416 | } | ||
| 417 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | ||
| 418 | if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | ||
| 419 | log_print("reject connect from unknown addr"); | ||
| 420 | send_shutdown(prim.ssp_assoc_id); | ||
| 421 | return; | ||
| 422 | } | ||
| 423 | |||
| 424 | ni = nodeid2nodeinfo(nodeid, GFP_KERNEL); | ||
| 425 | if (!ni) | ||
| 426 | return; | ||
| 427 | |||
| 428 | /* Save the assoc ID */ | ||
| 429 | spin_lock(&ni->lock); | ||
| 430 | ni->assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
| 431 | spin_unlock(&ni->lock); | ||
| 432 | |||
| 433 | log_print("got new/restarted association %d nodeid %d", | ||
| 434 | (int)sn->sn_assoc_change.sac_assoc_id, nodeid); | ||
| 435 | |||
| 436 | /* Send any pending writes */ | ||
| 437 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 438 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 439 | spin_lock_bh(&write_nodes_lock); | ||
| 440 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 441 | spin_unlock_bh(&write_nodes_lock); | ||
| 442 | } | ||
| 443 | wake_up_process(send_task); | ||
| 444 | } | ||
| 445 | break; | ||
| 446 | |||
| 447 | case SCTP_COMM_LOST: | ||
| 448 | case SCTP_SHUTDOWN_COMP: | ||
| 449 | { | ||
| 450 | struct nodeinfo *ni; | ||
| 451 | |||
| 452 | ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id); | ||
| 453 | if (ni) { | ||
| 454 | spin_lock(&ni->lock); | ||
| 455 | ni->assoc_id = 0; | ||
| 456 | spin_unlock(&ni->lock); | ||
| 457 | } | ||
| 458 | } | ||
| 459 | break; | ||
| 460 | |||
| 461 | /* We don't know which INIT failed, so clear the PENDING flags | ||
| 462 | * on them all. if assoc_id is zero then it will then try | ||
| 463 | * again */ | ||
| 464 | |||
| 465 | case SCTP_CANT_STR_ASSOC: | ||
| 466 | { | ||
| 467 | log_print("Can't start SCTP association - retrying"); | ||
| 468 | init_failed(); | ||
| 469 | } | ||
| 470 | break; | ||
| 471 | |||
| 472 | default: | ||
| 473 | log_print("unexpected SCTP assoc change id=%d state=%d", | ||
| 474 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
| 475 | sn->sn_assoc_change.sac_state); | ||
| 476 | } | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | /* Data received from remote end */ | ||
| 481 | static int receive_from_sock(void) | ||
| 482 | { | ||
| 483 | int ret = 0; | ||
| 484 | struct msghdr msg; | ||
| 485 | struct kvec iov[2]; | ||
| 486 | unsigned len; | ||
| 487 | int r; | ||
| 488 | struct sctp_sndrcvinfo *sinfo; | ||
| 489 | struct cmsghdr *cmsg; | ||
| 490 | struct nodeinfo *ni; | ||
| 491 | |||
| 492 | /* These two are marginally too big for stack allocation, but this | ||
| 493 | * function is (currently) only called by dlm_recvd so static should be | ||
| 494 | * OK. | ||
| 495 | */ | ||
| 496 | static struct sockaddr_storage msgname; | ||
| 497 | static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 498 | |||
| 499 | if (sctp_con.sock == NULL) | ||
| 500 | goto out; | ||
| 501 | |||
| 502 | if (sctp_con.rx_page == NULL) { | ||
| 503 | /* | ||
| 504 | * This doesn't need to be atomic, but I think it should | ||
| 505 | * improve performance if it is. | ||
| 506 | */ | ||
| 507 | sctp_con.rx_page = alloc_page(GFP_ATOMIC); | ||
| 508 | if (sctp_con.rx_page == NULL) | ||
| 509 | goto out_resched; | ||
| 510 | CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE); | ||
| 511 | } | ||
| 512 | |||
| 513 | memset(&incmsg, 0, sizeof(incmsg)); | ||
| 514 | memset(&msgname, 0, sizeof(msgname)); | ||
| 515 | |||
| 516 | memset(incmsg, 0, sizeof(incmsg)); | ||
| 517 | msg.msg_name = &msgname; | ||
| 518 | msg.msg_namelen = sizeof(msgname); | ||
| 519 | msg.msg_flags = 0; | ||
| 520 | msg.msg_control = incmsg; | ||
| 521 | msg.msg_controllen = sizeof(incmsg); | ||
| 522 | |||
| 523 | /* I don't see why this circular buffer stuff is necessary for SCTP | ||
| 524 | * which is a packet-based protocol, but the whole thing breaks under | ||
| 525 | * load without it! The overhead is minimal (and is in the TCP lowcomms | ||
| 526 | * anyway, of course) so I'll leave it in until I can figure out what's | ||
| 527 | * really happening. | ||
| 528 | */ | ||
| 529 | |||
| 530 | /* | ||
| 531 | * iov[0] is the bit of the circular buffer between the current end | ||
| 532 | * point (cb.base + cb.len) and the end of the buffer. | ||
| 533 | */ | ||
| 534 | iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb); | ||
| 535 | iov[0].iov_base = page_address(sctp_con.rx_page) + | ||
| 536 | CBUF_DATA(&sctp_con.cb); | ||
| 537 | iov[1].iov_len = 0; | ||
| 538 | |||
| 539 | /* | ||
| 540 | * iov[1] is the bit of the circular buffer between the start of the | ||
| 541 | * buffer and the start of the currently used section (cb.base) | ||
| 542 | */ | ||
| 543 | if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) { | ||
| 544 | iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb); | ||
| 545 | iov[1].iov_len = sctp_con.cb.base; | ||
| 546 | iov[1].iov_base = page_address(sctp_con.rx_page); | ||
| 547 | msg.msg_iovlen = 2; | ||
| 548 | } | ||
| 549 | len = iov[0].iov_len + iov[1].iov_len; | ||
| 550 | |||
| 551 | r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, 1, len, | ||
| 552 | MSG_NOSIGNAL | MSG_DONTWAIT); | ||
| 553 | if (ret <= 0) | ||
| 554 | goto out_close; | ||
| 555 | |||
| 556 | msg.msg_control = incmsg; | ||
| 557 | msg.msg_controllen = sizeof(incmsg); | ||
| 558 | cmsg = CMSG_FIRSTHDR(&msg); | ||
| 559 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
| 560 | |||
| 561 | if (msg.msg_flags & MSG_NOTIFICATION) { | ||
| 562 | process_sctp_notification(&msg, page_address(sctp_con.rx_page)); | ||
| 563 | return 0; | ||
| 564 | } | ||
| 565 | |||
| 566 | /* Is this a new association ? */ | ||
| 567 | ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL); | ||
| 568 | if (ni) { | ||
| 569 | ni->assoc_id = sinfo->sinfo_assoc_id; | ||
| 570 | if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 571 | |||
| 572 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 573 | spin_lock_bh(&write_nodes_lock); | ||
| 574 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 575 | spin_unlock_bh(&write_nodes_lock); | ||
| 576 | } | ||
| 577 | wake_up_process(send_task); | ||
| 578 | } | ||
| 579 | } | ||
| 580 | |||
| 581 | /* INIT sends a message with length of 1 - ignore it */ | ||
| 582 | if (r == 1) | ||
| 583 | return 0; | ||
| 584 | |||
| 585 | CBUF_ADD(&sctp_con.cb, ret); | ||
| 586 | ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), | ||
| 587 | page_address(sctp_con.rx_page), | ||
| 588 | sctp_con.cb.base, sctp_con.cb.len, | ||
| 589 | PAGE_CACHE_SIZE); | ||
| 590 | if (ret < 0) | ||
| 591 | goto out_close; | ||
| 592 | CBUF_EAT(&sctp_con.cb, ret); | ||
| 593 | |||
| 594 | out: | ||
| 595 | ret = 0; | ||
| 596 | goto out_ret; | ||
| 597 | |||
| 598 | out_resched: | ||
| 599 | lowcomms_data_ready(sctp_con.sock->sk, 0); | ||
| 600 | ret = 0; | ||
| 601 | schedule(); | ||
| 602 | goto out_ret; | ||
| 603 | |||
| 604 | out_close: | ||
| 605 | if (ret != -EAGAIN) | ||
| 606 | log_print("error reading from sctp socket: %d", ret); | ||
| 607 | out_ret: | ||
| 608 | return ret; | ||
| 609 | } | ||
| 610 | |||
| 611 | /* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */ | ||
| 612 | static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num) | ||
| 613 | { | ||
| 614 | mm_segment_t fs; | ||
| 615 | int result = 0; | ||
| 616 | |||
| 617 | fs = get_fs(); | ||
| 618 | set_fs(get_ds()); | ||
| 619 | if (num == 1) | ||
| 620 | result = sctp_con.sock->ops->bind(sctp_con.sock, | ||
| 621 | (struct sockaddr *) addr, addr_len); | ||
| 622 | else | ||
| 623 | result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP, | ||
| 624 | SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len); | ||
| 625 | set_fs(fs); | ||
| 626 | |||
| 627 | if (result < 0) | ||
| 628 | log_print("Can't bind to port %d addr number %d", | ||
| 629 | dlm_config.tcp_port, num); | ||
| 630 | |||
| 631 | return result; | ||
| 632 | } | ||
| 633 | |||
| 634 | static void init_local(void) | ||
| 635 | { | ||
| 636 | struct sockaddr_storage sas, *addr; | ||
| 637 | int i; | ||
| 638 | |||
| 639 | dlm_local_nodeid = dlm_our_nodeid(); | ||
| 640 | |||
| 641 | for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { | ||
| 642 | if (dlm_our_addr(&sas, i)) | ||
| 643 | break; | ||
| 644 | |||
| 645 | addr = kmalloc(sizeof(*addr), GFP_KERNEL); | ||
| 646 | if (!addr) | ||
| 647 | break; | ||
| 648 | memcpy(addr, &sas, sizeof(*addr)); | ||
| 649 | dlm_local_addr[dlm_local_count++] = addr; | ||
| 650 | } | ||
| 651 | } | ||
| 652 | |||
| 653 | /* Initialise SCTP socket and bind to all interfaces */ | ||
| 654 | static int init_sock(void) | ||
| 655 | { | ||
| 656 | mm_segment_t fs; | ||
| 657 | struct socket *sock = NULL; | ||
| 658 | struct sockaddr_storage localaddr; | ||
| 659 | struct sctp_event_subscribe subscribe; | ||
| 660 | int result = -EINVAL, num = 1, i, addr_len; | ||
| 661 | |||
| 662 | if (!dlm_local_count) { | ||
| 663 | init_local(); | ||
| 664 | if (!dlm_local_count) { | ||
| 665 | log_print("no local IP address has been set"); | ||
| 666 | goto out; | ||
| 667 | } | ||
| 668 | } | ||
| 669 | |||
| 670 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET, | ||
| 671 | IPPROTO_SCTP, &sock); | ||
| 672 | if (result < 0) { | ||
| 673 | log_print("Can't create comms socket, check SCTP is loaded"); | ||
| 674 | goto out; | ||
| 675 | } | ||
| 676 | |||
| 677 | /* Listen for events */ | ||
| 678 | memset(&subscribe, 0, sizeof(subscribe)); | ||
| 679 | subscribe.sctp_data_io_event = 1; | ||
| 680 | subscribe.sctp_association_event = 1; | ||
| 681 | subscribe.sctp_send_failure_event = 1; | ||
| 682 | subscribe.sctp_shutdown_event = 1; | ||
| 683 | subscribe.sctp_partial_delivery_event = 1; | ||
| 684 | |||
| 685 | fs = get_fs(); | ||
| 686 | set_fs(get_ds()); | ||
| 687 | result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS, | ||
| 688 | (char *)&subscribe, sizeof(subscribe)); | ||
| 689 | set_fs(fs); | ||
| 690 | |||
| 691 | if (result < 0) { | ||
| 692 | log_print("Failed to set SCTP_EVENTS on socket: result=%d", | ||
| 693 | result); | ||
| 694 | goto create_delsock; | ||
| 695 | } | ||
| 696 | |||
| 697 | /* Init con struct */ | ||
| 698 | sock->sk->sk_user_data = &sctp_con; | ||
| 699 | sctp_con.sock = sock; | ||
| 700 | sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready; | ||
| 701 | |||
| 702 | /* Bind to all interfaces. */ | ||
| 703 | for (i = 0; i < dlm_local_count; i++) { | ||
| 704 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); | ||
| 705 | make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); | ||
| 706 | |||
| 707 | result = add_bind_addr(&localaddr, addr_len, num); | ||
| 708 | if (result) | ||
| 709 | goto create_delsock; | ||
| 710 | ++num; | ||
| 711 | } | ||
| 712 | |||
| 713 | result = sock->ops->listen(sock, 5); | ||
| 714 | if (result < 0) { | ||
| 715 | log_print("Can't set socket listening"); | ||
| 716 | goto create_delsock; | ||
| 717 | } | ||
| 718 | |||
| 719 | return 0; | ||
| 720 | |||
| 721 | create_delsock: | ||
| 722 | sock_release(sock); | ||
| 723 | sctp_con.sock = NULL; | ||
| 724 | out: | ||
| 725 | return result; | ||
| 726 | } | ||
| 727 | |||
| 728 | |||
| 729 | static struct writequeue_entry *new_writequeue_entry(int allocation) | ||
| 730 | { | ||
| 731 | struct writequeue_entry *entry; | ||
| 732 | |||
| 733 | entry = kmalloc(sizeof(struct writequeue_entry), allocation); | ||
| 734 | if (!entry) | ||
| 735 | return NULL; | ||
| 736 | |||
| 737 | entry->page = alloc_page(allocation); | ||
| 738 | if (!entry->page) { | ||
| 739 | kfree(entry); | ||
| 740 | return NULL; | ||
| 741 | } | ||
| 742 | |||
| 743 | entry->offset = 0; | ||
| 744 | entry->len = 0; | ||
| 745 | entry->end = 0; | ||
| 746 | entry->users = 0; | ||
| 747 | |||
| 748 | return entry; | ||
| 749 | } | ||
| 750 | |||
| 751 | void *dlm_lowcomms_get_buffer(int nodeid, int len, int allocation, char **ppc) | ||
| 752 | { | ||
| 753 | struct writequeue_entry *e; | ||
| 754 | int offset = 0; | ||
| 755 | int users = 0; | ||
| 756 | struct nodeinfo *ni; | ||
| 757 | |||
| 758 | if (!atomic_read(&accepting)) | ||
| 759 | return NULL; | ||
| 760 | |||
| 761 | ni = nodeid2nodeinfo(nodeid, allocation); | ||
| 762 | if (!ni) | ||
| 763 | return NULL; | ||
| 764 | |||
| 765 | spin_lock(&ni->writequeue_lock); | ||
| 766 | e = list_entry(ni->writequeue.prev, struct writequeue_entry, list); | ||
| 767 | if (((struct list_head *) e == &ni->writequeue) || | ||
| 768 | (PAGE_CACHE_SIZE - e->end < len)) { | ||
| 769 | e = NULL; | ||
| 770 | } else { | ||
| 771 | offset = e->end; | ||
| 772 | e->end += len; | ||
| 773 | users = e->users++; | ||
| 774 | } | ||
| 775 | spin_unlock(&ni->writequeue_lock); | ||
| 776 | |||
| 777 | if (e) { | ||
| 778 | got_one: | ||
| 779 | if (users == 0) | ||
| 780 | kmap(e->page); | ||
| 781 | *ppc = page_address(e->page) + offset; | ||
| 782 | return e; | ||
| 783 | } | ||
| 784 | |||
| 785 | e = new_writequeue_entry(allocation); | ||
| 786 | if (e) { | ||
| 787 | spin_lock(&ni->writequeue_lock); | ||
| 788 | offset = e->end; | ||
| 789 | e->end += len; | ||
| 790 | e->ni = ni; | ||
| 791 | users = e->users++; | ||
| 792 | list_add_tail(&e->list, &ni->writequeue); | ||
| 793 | spin_unlock(&ni->writequeue_lock); | ||
| 794 | goto got_one; | ||
| 795 | } | ||
| 796 | return NULL; | ||
| 797 | } | ||
| 798 | |||
| 799 | void dlm_lowcomms_commit_buffer(void *arg) | ||
| 800 | { | ||
| 801 | struct writequeue_entry *e = (struct writequeue_entry *) arg; | ||
| 802 | int users; | ||
| 803 | struct nodeinfo *ni = e->ni; | ||
| 804 | |||
| 805 | if (!atomic_read(&accepting)) | ||
| 806 | return; | ||
| 807 | |||
| 808 | spin_lock(&ni->writequeue_lock); | ||
| 809 | users = --e->users; | ||
| 810 | if (users) | ||
| 811 | goto out; | ||
| 812 | e->len = e->end - e->offset; | ||
| 813 | kunmap(e->page); | ||
| 814 | spin_unlock(&ni->writequeue_lock); | ||
| 815 | |||
| 816 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 817 | spin_lock_bh(&write_nodes_lock); | ||
| 818 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 819 | spin_unlock_bh(&write_nodes_lock); | ||
| 820 | wake_up_process(send_task); | ||
| 821 | } | ||
| 822 | return; | ||
| 823 | |||
| 824 | out: | ||
| 825 | spin_unlock(&ni->writequeue_lock); | ||
| 826 | return; | ||
| 827 | } | ||
| 828 | |||
| 829 | static void free_entry(struct writequeue_entry *e) | ||
| 830 | { | ||
| 831 | __free_page(e->page); | ||
| 832 | kfree(e); | ||
| 833 | } | ||
| 834 | |||
| 835 | /* Initiate an SCTP association. In theory we could just use sendmsg() on | ||
| 836 | the first IP address and it should work, but this allows us to set up the | ||
| 837 | association before sending any valuable data that we can't afford to lose. | ||
| 838 | It also keeps the send path clean as it can now always use the association ID */ | ||
| 839 | static void initiate_association(int nodeid) | ||
| 840 | { | ||
| 841 | struct sockaddr_storage rem_addr; | ||
| 842 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 843 | struct msghdr outmessage; | ||
| 844 | struct cmsghdr *cmsg; | ||
| 845 | struct sctp_sndrcvinfo *sinfo; | ||
| 846 | int ret; | ||
| 847 | int addrlen; | ||
| 848 | char buf[1]; | ||
| 849 | struct kvec iov[1]; | ||
| 850 | struct nodeinfo *ni; | ||
| 851 | |||
| 852 | log_print("Initiating association with node %d", nodeid); | ||
| 853 | |||
| 854 | ni = nodeid2nodeinfo(nodeid, GFP_KERNEL); | ||
| 855 | if (!ni) | ||
| 856 | return; | ||
| 857 | |||
| 858 | if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) { | ||
| 859 | log_print("no address for nodeid %d", nodeid); | ||
| 860 | return; | ||
| 861 | } | ||
| 862 | |||
| 863 | make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); | ||
| 864 | |||
| 865 | outmessage.msg_name = &rem_addr; | ||
| 866 | outmessage.msg_namelen = addrlen; | ||
| 867 | outmessage.msg_control = outcmsg; | ||
| 868 | outmessage.msg_controllen = sizeof(outcmsg); | ||
| 869 | outmessage.msg_flags = MSG_EOR; | ||
| 870 | |||
| 871 | iov[0].iov_base = buf; | ||
| 872 | iov[0].iov_len = 1; | ||
| 873 | |||
| 874 | /* Real INIT messages seem to cause trouble. Just send a 1 byte message | ||
| 875 | we can afford to lose */ | ||
| 876 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
| 877 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 878 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 879 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 880 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
| 881 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 882 | sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid); | ||
| 883 | |||
| 884 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
| 885 | ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1); | ||
| 886 | if (ret < 0) { | ||
| 887 | log_print("send INIT to node failed: %d", ret); | ||
| 888 | /* Try again later */ | ||
| 889 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 890 | } | ||
| 891 | } | ||
| 892 | |||
| 893 | /* Send a message */ | ||
| 894 | static int send_to_sock(struct nodeinfo *ni) | ||
| 895 | { | ||
| 896 | int ret = 0; | ||
| 897 | struct writequeue_entry *e; | ||
| 898 | int len, offset; | ||
| 899 | struct msghdr outmsg; | ||
| 900 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 901 | struct cmsghdr *cmsg; | ||
| 902 | struct sctp_sndrcvinfo *sinfo; | ||
| 903 | struct kvec iov; | ||
| 904 | |||
| 905 | /* See if we need to init an association before we start | ||
| 906 | sending precious messages */ | ||
| 907 | spin_lock(&ni->lock); | ||
| 908 | if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 909 | spin_unlock(&ni->lock); | ||
| 910 | initiate_association(ni->nodeid); | ||
| 911 | return 0; | ||
| 912 | } | ||
| 913 | spin_unlock(&ni->lock); | ||
| 914 | |||
| 915 | outmsg.msg_name = NULL; /* We use assoc_id */ | ||
| 916 | outmsg.msg_namelen = 0; | ||
| 917 | outmsg.msg_control = outcmsg; | ||
| 918 | outmsg.msg_controllen = sizeof(outcmsg); | ||
| 919 | outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR; | ||
| 920 | |||
| 921 | cmsg = CMSG_FIRSTHDR(&outmsg); | ||
| 922 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 923 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 924 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 925 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
| 926 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 927 | sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid); | ||
| 928 | sinfo->sinfo_assoc_id = ni->assoc_id; | ||
| 929 | outmsg.msg_controllen = cmsg->cmsg_len; | ||
| 930 | |||
| 931 | spin_lock(&ni->writequeue_lock); | ||
| 932 | for (;;) { | ||
| 933 | if (list_empty(&ni->writequeue)) | ||
| 934 | break; | ||
| 935 | e = list_entry(ni->writequeue.next, struct writequeue_entry, | ||
| 936 | list); | ||
| 937 | len = e->len; | ||
| 938 | offset = e->offset; | ||
| 939 | BUG_ON(len == 0 && e->users == 0); | ||
| 940 | spin_unlock(&ni->writequeue_lock); | ||
| 941 | kmap(e->page); | ||
| 942 | |||
| 943 | ret = 0; | ||
| 944 | if (len) { | ||
| 945 | iov.iov_base = page_address(e->page)+offset; | ||
| 946 | iov.iov_len = len; | ||
| 947 | |||
| 948 | ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1, | ||
| 949 | len); | ||
| 950 | if (ret == -EAGAIN) { | ||
| 951 | sctp_con.eagain_flag = 1; | ||
| 952 | goto out; | ||
| 953 | } else if (ret < 0) | ||
| 954 | goto send_error; | ||
| 955 | } else { | ||
| 956 | /* Don't starve people filling buffers */ | ||
| 957 | schedule(); | ||
| 958 | } | ||
| 959 | |||
| 960 | spin_lock(&ni->writequeue_lock); | ||
| 961 | e->offset += ret; | ||
| 962 | e->len -= ret; | ||
| 963 | |||
| 964 | if (e->len == 0 && e->users == 0) { | ||
| 965 | list_del(&e->list); | ||
| 966 | free_entry(e); | ||
| 967 | continue; | ||
| 968 | } | ||
| 969 | } | ||
| 970 | spin_unlock(&ni->writequeue_lock); | ||
| 971 | out: | ||
| 972 | return ret; | ||
| 973 | |||
| 974 | send_error: | ||
| 975 | log_print("Error sending to node %d %d", ni->nodeid, ret); | ||
| 976 | spin_lock(&ni->lock); | ||
| 977 | if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 978 | ni->assoc_id = 0; | ||
| 979 | spin_unlock(&ni->lock); | ||
| 980 | initiate_association(ni->nodeid); | ||
| 981 | } else | ||
| 982 | spin_unlock(&ni->lock); | ||
| 983 | |||
| 984 | return ret; | ||
| 985 | } | ||
| 986 | |||
| 987 | /* Try to send any messages that are pending */ | ||
| 988 | static void process_output_queue(void) | ||
| 989 | { | ||
| 990 | struct list_head *list; | ||
| 991 | struct list_head *temp; | ||
| 992 | |||
| 993 | spin_lock_bh(&write_nodes_lock); | ||
| 994 | list_for_each_safe(list, temp, &write_nodes) { | ||
| 995 | struct nodeinfo *ni = | ||
| 996 | list_entry(list, struct nodeinfo, write_list); | ||
| 997 | clear_bit(NI_WRITE_PENDING, &ni->flags); | ||
| 998 | list_del(&ni->write_list); | ||
| 999 | |||
| 1000 | spin_unlock_bh(&write_nodes_lock); | ||
| 1001 | |||
| 1002 | send_to_sock(ni); | ||
| 1003 | spin_lock_bh(&write_nodes_lock); | ||
| 1004 | } | ||
| 1005 | spin_unlock_bh(&write_nodes_lock); | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | /* Called after we've had -EAGAIN and been woken up */ | ||
| 1009 | static void refill_write_queue(void) | ||
| 1010 | { | ||
| 1011 | int i; | ||
| 1012 | |||
| 1013 | for (i=1; i<=max_nodeid; i++) { | ||
| 1014 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
| 1015 | |||
| 1016 | if (ni) { | ||
| 1017 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 1018 | spin_lock_bh(&write_nodes_lock); | ||
| 1019 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 1020 | spin_unlock_bh(&write_nodes_lock); | ||
| 1021 | } | ||
| 1022 | } | ||
| 1023 | } | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | static void clean_one_writequeue(struct nodeinfo *ni) | ||
| 1027 | { | ||
| 1028 | struct list_head *list; | ||
| 1029 | struct list_head *temp; | ||
| 1030 | |||
| 1031 | spin_lock(&ni->writequeue_lock); | ||
| 1032 | list_for_each_safe(list, temp, &ni->writequeue) { | ||
| 1033 | struct writequeue_entry *e = | ||
| 1034 | list_entry(list, struct writequeue_entry, list); | ||
| 1035 | list_del(&e->list); | ||
| 1036 | free_entry(e); | ||
| 1037 | } | ||
| 1038 | spin_unlock(&ni->writequeue_lock); | ||
| 1039 | } | ||
| 1040 | |||
| 1041 | static void clean_writequeues(void) | ||
| 1042 | { | ||
| 1043 | int i; | ||
| 1044 | |||
| 1045 | for (i=1; i<=max_nodeid; i++) { | ||
| 1046 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
| 1047 | if (ni) | ||
| 1048 | clean_one_writequeue(ni); | ||
| 1049 | } | ||
| 1050 | } | ||
| 1051 | |||
| 1052 | |||
| 1053 | static void dealloc_nodeinfo(void) | ||
| 1054 | { | ||
| 1055 | int i; | ||
| 1056 | |||
| 1057 | for (i=1; i<=max_nodeid; i++) { | ||
| 1058 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
| 1059 | if (ni) { | ||
| 1060 | idr_remove(&nodeinfo_idr, i); | ||
| 1061 | kfree(ni); | ||
| 1062 | } | ||
| 1063 | } | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | int dlm_lowcomms_close(int nodeid) | ||
| 1067 | { | ||
| 1068 | struct nodeinfo *ni; | ||
| 1069 | |||
| 1070 | ni = nodeid2nodeinfo(nodeid, 0); | ||
| 1071 | if (!ni) | ||
| 1072 | return -1; | ||
| 1073 | |||
| 1074 | spin_lock(&ni->lock); | ||
| 1075 | if (ni->assoc_id) { | ||
| 1076 | ni->assoc_id = 0; | ||
| 1077 | /* Don't send shutdown here, sctp will just queue it | ||
| 1078 | till the node comes back up! */ | ||
| 1079 | } | ||
| 1080 | spin_unlock(&ni->lock); | ||
| 1081 | |||
| 1082 | clean_one_writequeue(ni); | ||
| 1083 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 1084 | return 0; | ||
| 1085 | } | ||
| 1086 | |||
| 1087 | static int write_list_empty(void) | ||
| 1088 | { | ||
| 1089 | int status; | ||
| 1090 | |||
| 1091 | spin_lock_bh(&write_nodes_lock); | ||
| 1092 | status = list_empty(&write_nodes); | ||
| 1093 | spin_unlock_bh(&write_nodes_lock); | ||
| 1094 | |||
| 1095 | return status; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | static int dlm_recvd(void *data) | ||
| 1099 | { | ||
| 1100 | DECLARE_WAITQUEUE(wait, current); | ||
| 1101 | |||
| 1102 | while (!kthread_should_stop()) { | ||
| 1103 | int count = 0; | ||
| 1104 | |||
| 1105 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1106 | add_wait_queue(&lowcomms_recv_wait, &wait); | ||
| 1107 | if (!test_bit(CF_READ_PENDING, &sctp_con.flags)) | ||
| 1108 | schedule(); | ||
| 1109 | remove_wait_queue(&lowcomms_recv_wait, &wait); | ||
| 1110 | set_current_state(TASK_RUNNING); | ||
| 1111 | |||
| 1112 | if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { | ||
| 1113 | int ret; | ||
| 1114 | |||
| 1115 | do { | ||
| 1116 | ret = receive_from_sock(); | ||
| 1117 | |||
| 1118 | /* Don't starve out everyone else */ | ||
| 1119 | if (++count >= MAX_RX_MSG_COUNT) { | ||
| 1120 | schedule(); | ||
| 1121 | count = 0; | ||
| 1122 | } | ||
| 1123 | } while (!kthread_should_stop() && ret >=0); | ||
| 1124 | } | ||
| 1125 | schedule(); | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | return 0; | ||
| 1129 | } | ||
| 1130 | |||
| 1131 | static int dlm_sendd(void *data) | ||
| 1132 | { | ||
| 1133 | DECLARE_WAITQUEUE(wait, current); | ||
| 1134 | |||
| 1135 | add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | ||
| 1136 | |||
| 1137 | while (!kthread_should_stop()) { | ||
| 1138 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1139 | if (write_list_empty()) | ||
| 1140 | schedule(); | ||
| 1141 | set_current_state(TASK_RUNNING); | ||
| 1142 | |||
| 1143 | if (sctp_con.eagain_flag) { | ||
| 1144 | sctp_con.eagain_flag = 0; | ||
| 1145 | refill_write_queue(); | ||
| 1146 | } | ||
| 1147 | process_output_queue(); | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | ||
| 1151 | |||
| 1152 | return 0; | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | static void daemons_stop(void) | ||
| 1156 | { | ||
| 1157 | kthread_stop(recv_task); | ||
| 1158 | kthread_stop(send_task); | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | static int daemons_start(void) | ||
| 1162 | { | ||
| 1163 | struct task_struct *p; | ||
| 1164 | int error; | ||
| 1165 | |||
| 1166 | p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); | ||
| 1167 | error = IS_ERR(p); | ||
| 1168 | if (error) { | ||
| 1169 | log_print("can't start dlm_recvd %d", error); | ||
| 1170 | return error; | ||
| 1171 | } | ||
| 1172 | recv_task = p; | ||
| 1173 | |||
| 1174 | p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); | ||
| 1175 | error = IS_ERR(p); | ||
| 1176 | if (error) { | ||
| 1177 | log_print("can't start dlm_sendd %d", error); | ||
| 1178 | kthread_stop(recv_task); | ||
| 1179 | return error; | ||
| 1180 | } | ||
| 1181 | send_task = p; | ||
| 1182 | |||
| 1183 | return 0; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | /* | ||
| 1187 | * This is quite likely to sleep... | ||
| 1188 | */ | ||
| 1189 | int dlm_lowcomms_start(void) | ||
| 1190 | { | ||
| 1191 | int error; | ||
| 1192 | |||
| 1193 | error = init_sock(); | ||
| 1194 | if (error) | ||
| 1195 | goto fail_sock; | ||
| 1196 | error = daemons_start(); | ||
| 1197 | if (error) | ||
| 1198 | goto fail_sock; | ||
| 1199 | atomic_set(&accepting, 1); | ||
| 1200 | return 0; | ||
| 1201 | |||
| 1202 | fail_sock: | ||
| 1203 | close_connection(); | ||
| 1204 | return error; | ||
| 1205 | } | ||
| 1206 | |||
| 1207 | /* Set all the activity flags to prevent any socket activity. */ | ||
| 1208 | |||
| 1209 | void dlm_lowcomms_stop(void) | ||
| 1210 | { | ||
| 1211 | atomic_set(&accepting, 0); | ||
| 1212 | sctp_con.flags = 0x7; | ||
| 1213 | daemons_stop(); | ||
| 1214 | clean_writequeues(); | ||
| 1215 | close_connection(); | ||
| 1216 | dealloc_nodeinfo(); | ||
| 1217 | max_nodeid = 0; | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | int dlm_lowcomms_init(void) | ||
| 1221 | { | ||
| 1222 | init_waitqueue_head(&lowcomms_recv_wait); | ||
| 1223 | spin_lock_init(&write_nodes_lock); | ||
| 1224 | INIT_LIST_HEAD(&write_nodes); | ||
| 1225 | init_rwsem(&nodeinfo_lock); | ||
| 1226 | return 0; | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | void dlm_lowcomms_exit(void) | ||
| 1230 | { | ||
| 1231 | int i; | ||
| 1232 | |||
| 1233 | for (i = 0; i < dlm_local_count; i++) | ||
| 1234 | kfree(dlm_local_addr[i]); | ||
| 1235 | dlm_local_count = 0; | ||
| 1236 | dlm_local_nodeid = 0; | ||
| 1237 | } | ||
| 1238 | |||
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h new file mode 100644 index 000000000000..6c04bb09cfa8 --- /dev/null +++ b/fs/dlm/lowcomms.h | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __LOWCOMMS_DOT_H__ | ||
| 15 | #define __LOWCOMMS_DOT_H__ | ||
| 16 | |||
| 17 | int dlm_lowcomms_init(void); | ||
| 18 | void dlm_lowcomms_exit(void); | ||
| 19 | int dlm_lowcomms_start(void); | ||
| 20 | void dlm_lowcomms_stop(void); | ||
| 21 | int dlm_lowcomms_close(int nodeid); | ||
| 22 | void *dlm_lowcomms_get_buffer(int nodeid, int len, int allocation, char **ppc); | ||
| 23 | void dlm_lowcomms_commit_buffer(void *mh); | ||
| 24 | |||
| 25 | #endif /* __LOWCOMMS_DOT_H__ */ | ||
| 26 | |||
diff --git a/fs/dlm/lvb_table.h b/fs/dlm/lvb_table.h new file mode 100644 index 000000000000..cc3e92f3feef --- /dev/null +++ b/fs/dlm/lvb_table.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #ifndef __LVB_TABLE_DOT_H__ | ||
| 14 | #define __LVB_TABLE_DOT_H__ | ||
| 15 | |||
| 16 | extern const int dlm_lvb_operations[8][8]; | ||
| 17 | |||
| 18 | #endif | ||
diff --git a/fs/dlm/main.c b/fs/dlm/main.c new file mode 100644 index 000000000000..a8da8dc36b2e --- /dev/null +++ b/fs/dlm/main.c | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "lockspace.h" | ||
| 16 | #include "lock.h" | ||
| 17 | #include "user.h" | ||
| 18 | #include "memory.h" | ||
| 19 | #include "lowcomms.h" | ||
| 20 | #include "config.h" | ||
| 21 | |||
| 22 | #ifdef CONFIG_DLM_DEBUG | ||
| 23 | int dlm_register_debugfs(void); | ||
| 24 | void dlm_unregister_debugfs(void); | ||
| 25 | #else | ||
| 26 | static inline int dlm_register_debugfs(void) { return 0; } | ||
| 27 | static inline void dlm_unregister_debugfs(void) { } | ||
| 28 | #endif | ||
| 29 | |||
| 30 | static int __init init_dlm(void) | ||
| 31 | { | ||
| 32 | int error; | ||
| 33 | |||
| 34 | error = dlm_memory_init(); | ||
| 35 | if (error) | ||
| 36 | goto out; | ||
| 37 | |||
| 38 | error = dlm_lockspace_init(); | ||
| 39 | if (error) | ||
| 40 | goto out_mem; | ||
| 41 | |||
| 42 | error = dlm_config_init(); | ||
| 43 | if (error) | ||
| 44 | goto out_lockspace; | ||
| 45 | |||
| 46 | error = dlm_register_debugfs(); | ||
| 47 | if (error) | ||
| 48 | goto out_config; | ||
| 49 | |||
| 50 | error = dlm_lowcomms_init(); | ||
| 51 | if (error) | ||
| 52 | goto out_debug; | ||
| 53 | |||
| 54 | error = dlm_user_init(); | ||
| 55 | if (error) | ||
| 56 | goto out_lowcomms; | ||
| 57 | |||
| 58 | printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); | ||
| 59 | |||
| 60 | return 0; | ||
| 61 | |||
| 62 | out_lowcomms: | ||
| 63 | dlm_lowcomms_exit(); | ||
| 64 | out_debug: | ||
| 65 | dlm_unregister_debugfs(); | ||
| 66 | out_config: | ||
| 67 | dlm_config_exit(); | ||
| 68 | out_lockspace: | ||
| 69 | dlm_lockspace_exit(); | ||
| 70 | out_mem: | ||
| 71 | dlm_memory_exit(); | ||
| 72 | out: | ||
| 73 | return error; | ||
| 74 | } | ||
| 75 | |||
| 76 | static void __exit exit_dlm(void) | ||
| 77 | { | ||
| 78 | dlm_user_exit(); | ||
| 79 | dlm_lowcomms_exit(); | ||
| 80 | dlm_config_exit(); | ||
| 81 | dlm_memory_exit(); | ||
| 82 | dlm_lockspace_exit(); | ||
| 83 | dlm_unregister_debugfs(); | ||
| 84 | } | ||
| 85 | |||
| 86 | module_init(init_dlm); | ||
| 87 | module_exit(exit_dlm); | ||
| 88 | |||
| 89 | MODULE_DESCRIPTION("Distributed Lock Manager"); | ||
| 90 | MODULE_AUTHOR("Red Hat, Inc."); | ||
| 91 | MODULE_LICENSE("GPL"); | ||
| 92 | |||
| 93 | EXPORT_SYMBOL_GPL(dlm_new_lockspace); | ||
| 94 | EXPORT_SYMBOL_GPL(dlm_release_lockspace); | ||
| 95 | EXPORT_SYMBOL_GPL(dlm_lock); | ||
| 96 | EXPORT_SYMBOL_GPL(dlm_unlock); | ||
| 97 | |||
diff --git a/fs/dlm/member.c b/fs/dlm/member.c new file mode 100644 index 000000000000..a3f7de7f3a8f --- /dev/null +++ b/fs/dlm/member.c | |||
| @@ -0,0 +1,327 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #include "dlm_internal.h" | ||
| 14 | #include "lockspace.h" | ||
| 15 | #include "member.h" | ||
| 16 | #include "recoverd.h" | ||
| 17 | #include "recover.h" | ||
| 18 | #include "rcom.h" | ||
| 19 | #include "config.h" | ||
| 20 | |||
| 21 | /* | ||
| 22 | * Following called by dlm_recoverd thread | ||
| 23 | */ | ||
| 24 | |||
| 25 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | ||
| 26 | { | ||
| 27 | struct dlm_member *memb = NULL; | ||
| 28 | struct list_head *tmp; | ||
| 29 | struct list_head *newlist = &new->list; | ||
| 30 | struct list_head *head = &ls->ls_nodes; | ||
| 31 | |||
| 32 | list_for_each(tmp, head) { | ||
| 33 | memb = list_entry(tmp, struct dlm_member, list); | ||
| 34 | if (new->nodeid < memb->nodeid) | ||
| 35 | break; | ||
| 36 | } | ||
| 37 | |||
| 38 | if (!memb) | ||
| 39 | list_add_tail(newlist, head); | ||
| 40 | else { | ||
| 41 | /* FIXME: can use list macro here */ | ||
| 42 | newlist->prev = tmp->prev; | ||
| 43 | newlist->next = tmp; | ||
| 44 | tmp->prev->next = newlist; | ||
| 45 | tmp->prev = newlist; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | static int dlm_add_member(struct dlm_ls *ls, int nodeid) | ||
| 50 | { | ||
| 51 | struct dlm_member *memb; | ||
| 52 | int w; | ||
| 53 | |||
| 54 | memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); | ||
| 55 | if (!memb) | ||
| 56 | return -ENOMEM; | ||
| 57 | |||
| 58 | w = dlm_node_weight(ls->ls_name, nodeid); | ||
| 59 | if (w < 0) | ||
| 60 | return w; | ||
| 61 | |||
| 62 | memb->nodeid = nodeid; | ||
| 63 | memb->weight = w; | ||
| 64 | add_ordered_member(ls, memb); | ||
| 65 | ls->ls_num_nodes++; | ||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | |||
| 69 | static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) | ||
| 70 | { | ||
| 71 | list_move(&memb->list, &ls->ls_nodes_gone); | ||
| 72 | ls->ls_num_nodes--; | ||
| 73 | } | ||
| 74 | |||
| 75 | static int dlm_is_member(struct dlm_ls *ls, int nodeid) | ||
| 76 | { | ||
| 77 | struct dlm_member *memb; | ||
| 78 | |||
| 79 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
| 80 | if (memb->nodeid == nodeid) | ||
| 81 | return 1; | ||
| 82 | } | ||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | int dlm_is_removed(struct dlm_ls *ls, int nodeid) | ||
| 87 | { | ||
| 88 | struct dlm_member *memb; | ||
| 89 | |||
| 90 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { | ||
| 91 | if (memb->nodeid == nodeid) | ||
| 92 | return 1; | ||
| 93 | } | ||
| 94 | return 0; | ||
| 95 | } | ||
| 96 | |||
| 97 | static void clear_memb_list(struct list_head *head) | ||
| 98 | { | ||
| 99 | struct dlm_member *memb; | ||
| 100 | |||
| 101 | while (!list_empty(head)) { | ||
| 102 | memb = list_entry(head->next, struct dlm_member, list); | ||
| 103 | list_del(&memb->list); | ||
| 104 | kfree(memb); | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | void dlm_clear_members(struct dlm_ls *ls) | ||
| 109 | { | ||
| 110 | clear_memb_list(&ls->ls_nodes); | ||
| 111 | ls->ls_num_nodes = 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | void dlm_clear_members_gone(struct dlm_ls *ls) | ||
| 115 | { | ||
| 116 | clear_memb_list(&ls->ls_nodes_gone); | ||
| 117 | } | ||
| 118 | |||
| 119 | static void make_member_array(struct dlm_ls *ls) | ||
| 120 | { | ||
| 121 | struct dlm_member *memb; | ||
| 122 | int i, w, x = 0, total = 0, all_zero = 0, *array; | ||
| 123 | |||
| 124 | kfree(ls->ls_node_array); | ||
| 125 | ls->ls_node_array = NULL; | ||
| 126 | |||
| 127 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
| 128 | if (memb->weight) | ||
| 129 | total += memb->weight; | ||
| 130 | } | ||
| 131 | |||
| 132 | /* all nodes revert to weight of 1 if all have weight 0 */ | ||
| 133 | |||
| 134 | if (!total) { | ||
| 135 | total = ls->ls_num_nodes; | ||
| 136 | all_zero = 1; | ||
| 137 | } | ||
| 138 | |||
| 139 | ls->ls_total_weight = total; | ||
| 140 | |||
| 141 | array = kmalloc(sizeof(int) * total, GFP_KERNEL); | ||
| 142 | if (!array) | ||
| 143 | return; | ||
| 144 | |||
| 145 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
| 146 | if (!all_zero && !memb->weight) | ||
| 147 | continue; | ||
| 148 | |||
| 149 | if (all_zero) | ||
| 150 | w = 1; | ||
| 151 | else | ||
| 152 | w = memb->weight; | ||
| 153 | |||
| 154 | DLM_ASSERT(x < total, printk("total %d x %d\n", total, x);); | ||
| 155 | |||
| 156 | for (i = 0; i < w; i++) | ||
| 157 | array[x++] = memb->nodeid; | ||
| 158 | } | ||
| 159 | |||
| 160 | ls->ls_node_array = array; | ||
| 161 | } | ||
| 162 | |||
| 163 | /* send a status request to all members just to establish comms connections */ | ||
| 164 | |||
| 165 | static int ping_members(struct dlm_ls *ls) | ||
| 166 | { | ||
| 167 | struct dlm_member *memb; | ||
| 168 | int error = 0; | ||
| 169 | |||
| 170 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
| 171 | error = dlm_recovery_stopped(ls); | ||
| 172 | if (error) | ||
| 173 | break; | ||
| 174 | error = dlm_rcom_status(ls, memb->nodeid); | ||
| 175 | if (error) | ||
| 176 | break; | ||
| 177 | } | ||
| 178 | if (error) | ||
| 179 | log_debug(ls, "ping_members aborted %d last nodeid %d", | ||
| 180 | error, ls->ls_recover_nodeid); | ||
| 181 | return error; | ||
| 182 | } | ||
| 183 | |||
| 184 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | ||
| 185 | { | ||
| 186 | struct dlm_member *memb, *safe; | ||
| 187 | int i, error, found, pos = 0, neg = 0, low = -1; | ||
| 188 | |||
| 189 | /* move departed members from ls_nodes to ls_nodes_gone */ | ||
| 190 | |||
| 191 | list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { | ||
| 192 | found = 0; | ||
| 193 | for (i = 0; i < rv->node_count; i++) { | ||
| 194 | if (memb->nodeid == rv->nodeids[i]) { | ||
| 195 | found = 1; | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | if (!found) { | ||
| 201 | neg++; | ||
| 202 | dlm_remove_member(ls, memb); | ||
| 203 | log_debug(ls, "remove member %d", memb->nodeid); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | /* add new members to ls_nodes */ | ||
| 208 | |||
| 209 | for (i = 0; i < rv->node_count; i++) { | ||
| 210 | if (dlm_is_member(ls, rv->nodeids[i])) | ||
| 211 | continue; | ||
| 212 | dlm_add_member(ls, rv->nodeids[i]); | ||
| 213 | pos++; | ||
| 214 | log_debug(ls, "add member %d", rv->nodeids[i]); | ||
| 215 | } | ||
| 216 | |||
| 217 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
| 218 | if (low == -1 || memb->nodeid < low) | ||
| 219 | low = memb->nodeid; | ||
| 220 | } | ||
| 221 | ls->ls_low_nodeid = low; | ||
| 222 | |||
| 223 | make_member_array(ls); | ||
| 224 | dlm_set_recover_status(ls, DLM_RS_NODES); | ||
| 225 | *neg_out = neg; | ||
| 226 | |||
| 227 | error = ping_members(ls); | ||
| 228 | if (error) | ||
| 229 | goto out; | ||
| 230 | |||
| 231 | error = dlm_recover_members_wait(ls); | ||
| 232 | out: | ||
| 233 | log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error); | ||
| 234 | return error; | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * Following called from lockspace.c | ||
| 239 | */ | ||
| 240 | |||
| 241 | int dlm_ls_stop(struct dlm_ls *ls) | ||
| 242 | { | ||
| 243 | int new; | ||
| 244 | |||
| 245 | /* | ||
| 246 | * A stop cancels any recovery that's in progress (see RECOVERY_STOP, | ||
| 247 | * dlm_recovery_stopped()) and prevents any new locks from being | ||
| 248 | * processed (see RUNNING, dlm_locking_stopped()). | ||
| 249 | */ | ||
| 250 | |||
| 251 | spin_lock(&ls->ls_recover_lock); | ||
| 252 | set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | ||
| 253 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); | ||
| 254 | ls->ls_recover_seq++; | ||
| 255 | spin_unlock(&ls->ls_recover_lock); | ||
| 256 | |||
| 257 | /* | ||
| 258 | * This in_recovery lock does two things: | ||
| 259 | * | ||
| 260 | * 1) Keeps this function from returning until all threads are out | ||
| 261 | * of locking routines and locking is truely stopped. | ||
| 262 | * 2) Keeps any new requests from being processed until it's unlocked | ||
| 263 | * when recovery is complete. | ||
| 264 | */ | ||
| 265 | |||
| 266 | if (new) | ||
| 267 | down_write(&ls->ls_in_recovery); | ||
| 268 | |||
| 269 | /* | ||
| 270 | * The recoverd suspend/resume makes sure that dlm_recoverd (if | ||
| 271 | * running) has noticed the clearing of RUNNING above and quit | ||
| 272 | * processing the previous recovery. This will be true for all nodes | ||
| 273 | * before any nodes start the new recovery. | ||
| 274 | */ | ||
| 275 | |||
| 276 | dlm_recoverd_suspend(ls); | ||
| 277 | ls->ls_recover_status = 0; | ||
| 278 | dlm_recoverd_resume(ls); | ||
| 279 | return 0; | ||
| 280 | } | ||
| 281 | |||
| 282 | int dlm_ls_start(struct dlm_ls *ls) | ||
| 283 | { | ||
| 284 | struct dlm_recover *rv = NULL, *rv_old; | ||
| 285 | int *ids = NULL; | ||
| 286 | int error, count; | ||
| 287 | |||
| 288 | rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); | ||
| 289 | if (!rv) | ||
| 290 | return -ENOMEM; | ||
| 291 | |||
| 292 | error = count = dlm_nodeid_list(ls->ls_name, &ids); | ||
| 293 | if (error <= 0) | ||
| 294 | goto fail; | ||
| 295 | |||
| 296 | spin_lock(&ls->ls_recover_lock); | ||
| 297 | |||
| 298 | /* the lockspace needs to be stopped before it can be started */ | ||
| 299 | |||
| 300 | if (!dlm_locking_stopped(ls)) { | ||
| 301 | spin_unlock(&ls->ls_recover_lock); | ||
| 302 | log_error(ls, "start ignored: lockspace running"); | ||
| 303 | error = -EINVAL; | ||
| 304 | goto fail; | ||
| 305 | } | ||
| 306 | |||
| 307 | rv->nodeids = ids; | ||
| 308 | rv->node_count = count; | ||
| 309 | rv->seq = ++ls->ls_recover_seq; | ||
| 310 | rv_old = ls->ls_recover_args; | ||
| 311 | ls->ls_recover_args = rv; | ||
| 312 | spin_unlock(&ls->ls_recover_lock); | ||
| 313 | |||
| 314 | if (rv_old) { | ||
| 315 | kfree(rv_old->nodeids); | ||
| 316 | kfree(rv_old); | ||
| 317 | } | ||
| 318 | |||
| 319 | dlm_recoverd_kick(ls); | ||
| 320 | return 0; | ||
| 321 | |||
| 322 | fail: | ||
| 323 | kfree(rv); | ||
| 324 | kfree(ids); | ||
| 325 | return error; | ||
| 326 | } | ||
| 327 | |||
diff --git a/fs/dlm/member.h b/fs/dlm/member.h new file mode 100644 index 000000000000..927c08c19214 --- /dev/null +++ b/fs/dlm/member.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #ifndef __MEMBER_DOT_H__ | ||
| 14 | #define __MEMBER_DOT_H__ | ||
| 15 | |||
| 16 | int dlm_ls_stop(struct dlm_ls *ls); | ||
| 17 | int dlm_ls_start(struct dlm_ls *ls); | ||
| 18 | void dlm_clear_members(struct dlm_ls *ls); | ||
| 19 | void dlm_clear_members_gone(struct dlm_ls *ls); | ||
| 20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); | ||
| 21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); | ||
| 22 | |||
| 23 | #endif /* __MEMBER_DOT_H__ */ | ||
| 24 | |||
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c new file mode 100644 index 000000000000..989b608fd836 --- /dev/null +++ b/fs/dlm/memory.c | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "config.h" | ||
| 16 | #include "memory.h" | ||
| 17 | |||
| 18 | static kmem_cache_t *lkb_cache; | ||
| 19 | |||
| 20 | |||
| 21 | int dlm_memory_init(void) | ||
| 22 | { | ||
| 23 | int ret = 0; | ||
| 24 | |||
| 25 | lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), | ||
| 26 | __alignof__(struct dlm_lkb), 0, NULL, NULL); | ||
| 27 | if (!lkb_cache) | ||
| 28 | ret = -ENOMEM; | ||
| 29 | return ret; | ||
| 30 | } | ||
| 31 | |||
| 32 | void dlm_memory_exit(void) | ||
| 33 | { | ||
| 34 | if (lkb_cache) | ||
| 35 | kmem_cache_destroy(lkb_cache); | ||
| 36 | } | ||
| 37 | |||
| 38 | char *allocate_lvb(struct dlm_ls *ls) | ||
| 39 | { | ||
| 40 | char *p; | ||
| 41 | |||
| 42 | p = kmalloc(ls->ls_lvblen, GFP_KERNEL); | ||
| 43 | if (p) | ||
| 44 | memset(p, 0, ls->ls_lvblen); | ||
| 45 | return p; | ||
| 46 | } | ||
| 47 | |||
| 48 | void free_lvb(char *p) | ||
| 49 | { | ||
| 50 | kfree(p); | ||
| 51 | } | ||
| 52 | |||
| 53 | /* FIXME: have some minimal space built-in to rsb for the name and | ||
| 54 | kmalloc a separate name if needed, like dentries are done */ | ||
| 55 | |||
| 56 | struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) | ||
| 57 | { | ||
| 58 | struct dlm_rsb *r; | ||
| 59 | |||
| 60 | DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); | ||
| 61 | |||
| 62 | r = kmalloc(sizeof(*r) + namelen, GFP_KERNEL); | ||
| 63 | if (r) | ||
| 64 | memset(r, 0, sizeof(*r) + namelen); | ||
| 65 | return r; | ||
| 66 | } | ||
| 67 | |||
| 68 | void free_rsb(struct dlm_rsb *r) | ||
| 69 | { | ||
| 70 | if (r->res_lvbptr) | ||
| 71 | free_lvb(r->res_lvbptr); | ||
| 72 | kfree(r); | ||
| 73 | } | ||
| 74 | |||
| 75 | struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) | ||
| 76 | { | ||
| 77 | struct dlm_lkb *lkb; | ||
| 78 | |||
| 79 | lkb = kmem_cache_alloc(lkb_cache, GFP_KERNEL); | ||
| 80 | if (lkb) | ||
| 81 | memset(lkb, 0, sizeof(*lkb)); | ||
| 82 | return lkb; | ||
| 83 | } | ||
| 84 | |||
| 85 | void free_lkb(struct dlm_lkb *lkb) | ||
| 86 | { | ||
| 87 | if (lkb->lkb_flags & DLM_IFL_USER) { | ||
| 88 | struct dlm_user_args *ua; | ||
| 89 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 90 | if (ua) { | ||
| 91 | if (ua->lksb.sb_lvbptr) | ||
| 92 | kfree(ua->lksb.sb_lvbptr); | ||
| 93 | kfree(ua); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | kmem_cache_free(lkb_cache, lkb); | ||
| 97 | } | ||
| 98 | |||
| 99 | struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen) | ||
| 100 | { | ||
| 101 | struct dlm_direntry *de; | ||
| 102 | |||
| 103 | DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN, | ||
| 104 | printk("namelen = %d\n", namelen);); | ||
| 105 | |||
| 106 | de = kmalloc(sizeof(*de) + namelen, GFP_KERNEL); | ||
| 107 | if (de) | ||
| 108 | memset(de, 0, sizeof(*de) + namelen); | ||
| 109 | return de; | ||
| 110 | } | ||
| 111 | |||
| 112 | void free_direntry(struct dlm_direntry *de) | ||
| 113 | { | ||
| 114 | kfree(de); | ||
| 115 | } | ||
| 116 | |||
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h new file mode 100644 index 000000000000..6ead158ccc5c --- /dev/null +++ b/fs/dlm/memory.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __MEMORY_DOT_H__ | ||
| 15 | #define __MEMORY_DOT_H__ | ||
| 16 | |||
| 17 | int dlm_memory_init(void); | ||
| 18 | void dlm_memory_exit(void); | ||
| 19 | struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen); | ||
| 20 | void free_rsb(struct dlm_rsb *r); | ||
| 21 | struct dlm_lkb *allocate_lkb(struct dlm_ls *ls); | ||
| 22 | void free_lkb(struct dlm_lkb *l); | ||
| 23 | struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen); | ||
| 24 | void free_direntry(struct dlm_direntry *de); | ||
| 25 | char *allocate_lvb(struct dlm_ls *ls); | ||
| 26 | void free_lvb(char *l); | ||
| 27 | |||
| 28 | #endif /* __MEMORY_DOT_H__ */ | ||
| 29 | |||
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c new file mode 100644 index 000000000000..c9b1c3d535f4 --- /dev/null +++ b/fs/dlm/midcomms.c | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | /* | ||
| 15 | * midcomms.c | ||
| 16 | * | ||
| 17 | * This is the appallingly named "mid-level" comms layer. | ||
| 18 | * | ||
| 19 | * Its purpose is to take packets from the "real" comms layer, | ||
| 20 | * split them up into packets and pass them to the interested | ||
| 21 | * part of the locking mechanism. | ||
| 22 | * | ||
| 23 | * It also takes messages from the locking layer, formats them | ||
| 24 | * into packets and sends them to the comms layer. | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include "dlm_internal.h" | ||
| 28 | #include "lowcomms.h" | ||
| 29 | #include "config.h" | ||
| 30 | #include "rcom.h" | ||
| 31 | #include "lock.h" | ||
| 32 | #include "midcomms.h" | ||
| 33 | |||
| 34 | |||
| 35 | static void copy_from_cb(void *dst, const void *base, unsigned offset, | ||
| 36 | unsigned len, unsigned limit) | ||
| 37 | { | ||
| 38 | unsigned copy = len; | ||
| 39 | |||
| 40 | if ((copy + offset) > limit) | ||
| 41 | copy = limit - offset; | ||
| 42 | memcpy(dst, base + offset, copy); | ||
| 43 | len -= copy; | ||
| 44 | if (len) | ||
| 45 | memcpy(dst + copy, base, len); | ||
| 46 | } | ||
| 47 | |||
| 48 | /* | ||
| 49 | * Called from the low-level comms layer to process a buffer of | ||
| 50 | * commands. | ||
| 51 | * | ||
| 52 | * Only complete messages are processed here, any "spare" bytes from | ||
| 53 | * the end of a buffer are saved and tacked onto the front of the next | ||
| 54 | * message that comes in. I doubt this will happen very often but we | ||
| 55 | * need to be able to cope with it and I don't want the task to be waiting | ||
| 56 | * for packets to come in when there is useful work to be done. | ||
| 57 | */ | ||
| 58 | |||
| 59 | int dlm_process_incoming_buffer(int nodeid, const void *base, | ||
| 60 | unsigned offset, unsigned len, unsigned limit) | ||
| 61 | { | ||
| 62 | unsigned char __tmp[DLM_INBUF_LEN]; | ||
| 63 | struct dlm_header *msg = (struct dlm_header *) __tmp; | ||
| 64 | int ret = 0; | ||
| 65 | int err = 0; | ||
| 66 | uint16_t msglen; | ||
| 67 | uint32_t lockspace; | ||
| 68 | |||
| 69 | while (len > sizeof(struct dlm_header)) { | ||
| 70 | |||
| 71 | /* Copy just the header to check the total length. The | ||
| 72 | message may wrap around the end of the buffer back to the | ||
| 73 | start, so we need to use a temp buffer and copy_from_cb. */ | ||
| 74 | |||
| 75 | copy_from_cb(msg, base, offset, sizeof(struct dlm_header), | ||
| 76 | limit); | ||
| 77 | |||
| 78 | msglen = le16_to_cpu(msg->h_length); | ||
| 79 | lockspace = msg->h_lockspace; | ||
| 80 | |||
| 81 | err = -EINVAL; | ||
| 82 | if (msglen < sizeof(struct dlm_header)) | ||
| 83 | break; | ||
| 84 | err = -E2BIG; | ||
| 85 | if (msglen > dlm_config.buffer_size) { | ||
| 86 | log_print("message size %d from %d too big, buf len %d", | ||
| 87 | msglen, nodeid, len); | ||
| 88 | break; | ||
| 89 | } | ||
| 90 | err = 0; | ||
| 91 | |||
| 92 | /* If only part of the full message is contained in this | ||
| 93 | buffer, then do nothing and wait for lowcomms to call | ||
| 94 | us again later with more data. We return 0 meaning | ||
| 95 | we've consumed none of the input buffer. */ | ||
| 96 | |||
| 97 | if (msglen > len) | ||
| 98 | break; | ||
| 99 | |||
| 100 | /* Allocate a larger temp buffer if the full message won't fit | ||
| 101 | in the buffer on the stack (which should work for most | ||
| 102 | ordinary messages). */ | ||
| 103 | |||
| 104 | if (msglen > sizeof(__tmp) && | ||
| 105 | msg == (struct dlm_header *) __tmp) { | ||
| 106 | msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | ||
| 107 | if (msg == NULL) | ||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | |||
| 111 | copy_from_cb(msg, base, offset, msglen, limit); | ||
| 112 | |||
| 113 | BUG_ON(lockspace != msg->h_lockspace); | ||
| 114 | |||
| 115 | ret += msglen; | ||
| 116 | offset += msglen; | ||
| 117 | offset &= (limit - 1); | ||
| 118 | len -= msglen; | ||
| 119 | |||
| 120 | switch (msg->h_cmd) { | ||
| 121 | case DLM_MSG: | ||
| 122 | dlm_receive_message(msg, nodeid, 0); | ||
| 123 | break; | ||
| 124 | |||
| 125 | case DLM_RCOM: | ||
| 126 | dlm_receive_rcom(msg, nodeid); | ||
| 127 | break; | ||
| 128 | |||
| 129 | default: | ||
| 130 | log_print("unknown msg type %x from %u: %u %u %u %u", | ||
| 131 | msg->h_cmd, nodeid, msglen, len, offset, ret); | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | if (msg != (struct dlm_header *) __tmp) | ||
| 136 | kfree(msg); | ||
| 137 | |||
| 138 | return err ? err : ret; | ||
| 139 | } | ||
| 140 | |||
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h new file mode 100644 index 000000000000..95852a5f111d --- /dev/null +++ b/fs/dlm/midcomms.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __MIDCOMMS_DOT_H__ | ||
| 15 | #define __MIDCOMMS_DOT_H__ | ||
| 16 | |||
| 17 | int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset, | ||
| 18 | unsigned len, unsigned limit); | ||
| 19 | |||
| 20 | #endif /* __MIDCOMMS_DOT_H__ */ | ||
| 21 | |||
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c new file mode 100644 index 000000000000..518239a8b1e9 --- /dev/null +++ b/fs/dlm/rcom.c | |||
| @@ -0,0 +1,472 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "lockspace.h" | ||
| 16 | #include "member.h" | ||
| 17 | #include "lowcomms.h" | ||
| 18 | #include "midcomms.h" | ||
| 19 | #include "rcom.h" | ||
| 20 | #include "recover.h" | ||
| 21 | #include "dir.h" | ||
| 22 | #include "config.h" | ||
| 23 | #include "memory.h" | ||
| 24 | #include "lock.h" | ||
| 25 | #include "util.h" | ||
| 26 | |||
| 27 | |||
| 28 | static int rcom_response(struct dlm_ls *ls) | ||
| 29 | { | ||
| 30 | return test_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
| 31 | } | ||
| 32 | |||
| 33 | static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len, | ||
| 34 | struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret) | ||
| 35 | { | ||
| 36 | struct dlm_rcom *rc; | ||
| 37 | struct dlm_mhandle *mh; | ||
| 38 | char *mb; | ||
| 39 | int mb_len = sizeof(struct dlm_rcom) + len; | ||
| 40 | |||
| 41 | mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); | ||
| 42 | if (!mh) { | ||
| 43 | log_print("create_rcom to %d type %d len %d ENOBUFS", | ||
| 44 | to_nodeid, type, len); | ||
| 45 | return -ENOBUFS; | ||
| 46 | } | ||
| 47 | memset(mb, 0, mb_len); | ||
| 48 | |||
| 49 | rc = (struct dlm_rcom *) mb; | ||
| 50 | |||
| 51 | rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | ||
| 52 | rc->rc_header.h_lockspace = ls->ls_global_id; | ||
| 53 | rc->rc_header.h_nodeid = dlm_our_nodeid(); | ||
| 54 | rc->rc_header.h_length = mb_len; | ||
| 55 | rc->rc_header.h_cmd = DLM_RCOM; | ||
| 56 | |||
| 57 | rc->rc_type = type; | ||
| 58 | |||
| 59 | *mh_ret = mh; | ||
| 60 | *rc_ret = rc; | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh, | ||
| 65 | struct dlm_rcom *rc) | ||
| 66 | { | ||
| 67 | dlm_rcom_out(rc); | ||
| 68 | dlm_lowcomms_commit_buffer(mh); | ||
| 69 | } | ||
| 70 | |||
| 71 | /* When replying to a status request, a node also sends back its | ||
| 72 | configuration values. The requesting node then checks that the remote | ||
| 73 | node is configured the same way as itself. */ | ||
| 74 | |||
| 75 | static void make_config(struct dlm_ls *ls, struct rcom_config *rf) | ||
| 76 | { | ||
| 77 | rf->rf_lvblen = ls->ls_lvblen; | ||
| 78 | rf->rf_lsflags = ls->ls_exflags; | ||
| 79 | } | ||
| 80 | |||
| 81 | static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) | ||
| 82 | { | ||
| 83 | if (rf->rf_lvblen != ls->ls_lvblen || | ||
| 84 | rf->rf_lsflags != ls->ls_exflags) { | ||
| 85 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", | ||
| 86 | ls->ls_lvblen, ls->ls_exflags, | ||
| 87 | nodeid, rf->rf_lvblen, rf->rf_lsflags); | ||
| 88 | return -EINVAL; | ||
| 89 | } | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | ||
| 94 | { | ||
| 95 | struct dlm_rcom *rc; | ||
| 96 | struct dlm_mhandle *mh; | ||
| 97 | int error = 0; | ||
| 98 | |||
| 99 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | ||
| 100 | ls->ls_recover_nodeid = nodeid; | ||
| 101 | |||
| 102 | if (nodeid == dlm_our_nodeid()) { | ||
| 103 | rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
| 104 | rc->rc_result = dlm_recover_status(ls); | ||
| 105 | goto out; | ||
| 106 | } | ||
| 107 | |||
| 108 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); | ||
| 109 | if (error) | ||
| 110 | goto out; | ||
| 111 | rc->rc_id = ++ls->ls_rcom_seq; | ||
| 112 | |||
| 113 | send_rcom(ls, mh, rc); | ||
| 114 | |||
| 115 | error = dlm_wait_function(ls, &rcom_response); | ||
| 116 | clear_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
| 117 | if (error) | ||
| 118 | goto out; | ||
| 119 | |||
| 120 | rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
| 121 | |||
| 122 | if (rc->rc_result == -ESRCH) { | ||
| 123 | /* we pretend the remote lockspace exists with 0 status */ | ||
| 124 | log_debug(ls, "remote node %d not ready", nodeid); | ||
| 125 | rc->rc_result = 0; | ||
| 126 | } else | ||
| 127 | error = check_config(ls, (struct rcom_config *) rc->rc_buf, | ||
| 128 | nodeid); | ||
| 129 | /* the caller looks at rc_result for the remote recovery status */ | ||
| 130 | out: | ||
| 131 | return error; | ||
| 132 | } | ||
| 133 | |||
| 134 | static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 135 | { | ||
| 136 | struct dlm_rcom *rc; | ||
| 137 | struct dlm_mhandle *mh; | ||
| 138 | int error, nodeid = rc_in->rc_header.h_nodeid; | ||
| 139 | |||
| 140 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, | ||
| 141 | sizeof(struct rcom_config), &rc, &mh); | ||
| 142 | if (error) | ||
| 143 | return; | ||
| 144 | rc->rc_id = rc_in->rc_id; | ||
| 145 | rc->rc_result = dlm_recover_status(ls); | ||
| 146 | make_config(ls, (struct rcom_config *) rc->rc_buf); | ||
| 147 | |||
| 148 | send_rcom(ls, mh, rc); | ||
| 149 | } | ||
| 150 | |||
| 151 | static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 152 | { | ||
| 153 | if (rc_in->rc_id != ls->ls_rcom_seq) { | ||
| 154 | log_debug(ls, "reject old reply %d got %llx wanted %llx", | ||
| 155 | rc_in->rc_type, rc_in->rc_id, ls->ls_rcom_seq); | ||
| 156 | return; | ||
| 157 | } | ||
| 158 | memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length); | ||
| 159 | set_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
| 160 | wake_up(&ls->ls_wait_general); | ||
| 161 | } | ||
| 162 | |||
| 163 | static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 164 | { | ||
| 165 | receive_sync_reply(ls, rc_in); | ||
| 166 | } | ||
| 167 | |||
| 168 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | ||
| 169 | { | ||
| 170 | struct dlm_rcom *rc; | ||
| 171 | struct dlm_mhandle *mh; | ||
| 172 | int error = 0, len = sizeof(struct dlm_rcom); | ||
| 173 | |||
| 174 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | ||
| 175 | ls->ls_recover_nodeid = nodeid; | ||
| 176 | |||
| 177 | if (nodeid == dlm_our_nodeid()) { | ||
| 178 | dlm_copy_master_names(ls, last_name, last_len, | ||
| 179 | ls->ls_recover_buf + len, | ||
| 180 | dlm_config.buffer_size - len, nodeid); | ||
| 181 | goto out; | ||
| 182 | } | ||
| 183 | |||
| 184 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); | ||
| 185 | if (error) | ||
| 186 | goto out; | ||
| 187 | memcpy(rc->rc_buf, last_name, last_len); | ||
| 188 | rc->rc_id = ++ls->ls_rcom_seq; | ||
| 189 | |||
| 190 | send_rcom(ls, mh, rc); | ||
| 191 | |||
| 192 | error = dlm_wait_function(ls, &rcom_response); | ||
| 193 | clear_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
| 194 | out: | ||
| 195 | return error; | ||
| 196 | } | ||
| 197 | |||
| 198 | static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 199 | { | ||
| 200 | struct dlm_rcom *rc; | ||
| 201 | struct dlm_mhandle *mh; | ||
| 202 | int error, inlen, outlen; | ||
| 203 | int nodeid = rc_in->rc_header.h_nodeid; | ||
| 204 | uint32_t status = dlm_recover_status(ls); | ||
| 205 | |||
| 206 | /* | ||
| 207 | * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while | ||
| 208 | * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes). | ||
| 209 | * It could only happen in rare cases where we get a late NAMES | ||
| 210 | * message from a previous instance of recovery. | ||
| 211 | */ | ||
| 212 | |||
| 213 | if (!(status & DLM_RS_NODES)) { | ||
| 214 | log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid); | ||
| 215 | return; | ||
| 216 | } | ||
| 217 | |||
| 218 | nodeid = rc_in->rc_header.h_nodeid; | ||
| 219 | inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); | ||
| 220 | outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); | ||
| 221 | |||
| 222 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); | ||
| 223 | if (error) | ||
| 224 | return; | ||
| 225 | rc->rc_id = rc_in->rc_id; | ||
| 226 | |||
| 227 | dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, | ||
| 228 | nodeid); | ||
| 229 | send_rcom(ls, mh, rc); | ||
| 230 | } | ||
| 231 | |||
| 232 | static void receive_rcom_names_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 233 | { | ||
| 234 | receive_sync_reply(ls, rc_in); | ||
| 235 | } | ||
| 236 | |||
| 237 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) | ||
| 238 | { | ||
| 239 | struct dlm_rcom *rc; | ||
| 240 | struct dlm_mhandle *mh; | ||
| 241 | struct dlm_ls *ls = r->res_ls; | ||
| 242 | int error; | ||
| 243 | |||
| 244 | error = create_rcom(ls, dir_nodeid, DLM_RCOM_LOOKUP, r->res_length, | ||
| 245 | &rc, &mh); | ||
| 246 | if (error) | ||
| 247 | goto out; | ||
| 248 | memcpy(rc->rc_buf, r->res_name, r->res_length); | ||
| 249 | rc->rc_id = (unsigned long) r; | ||
| 250 | |||
| 251 | send_rcom(ls, mh, rc); | ||
| 252 | out: | ||
| 253 | return error; | ||
| 254 | } | ||
| 255 | |||
| 256 | static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 257 | { | ||
| 258 | struct dlm_rcom *rc; | ||
| 259 | struct dlm_mhandle *mh; | ||
| 260 | int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid; | ||
| 261 | int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); | ||
| 262 | |||
| 263 | error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh); | ||
| 264 | if (error) | ||
| 265 | return; | ||
| 266 | |||
| 267 | error = dlm_dir_lookup(ls, nodeid, rc_in->rc_buf, len, &ret_nodeid); | ||
| 268 | if (error) | ||
| 269 | ret_nodeid = error; | ||
| 270 | rc->rc_result = ret_nodeid; | ||
| 271 | rc->rc_id = rc_in->rc_id; | ||
| 272 | |||
| 273 | send_rcom(ls, mh, rc); | ||
| 274 | } | ||
| 275 | |||
| 276 | static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 277 | { | ||
| 278 | dlm_recover_master_reply(ls, rc_in); | ||
| 279 | } | ||
| 280 | |||
| 281 | static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 282 | struct rcom_lock *rl) | ||
| 283 | { | ||
| 284 | memset(rl, 0, sizeof(*rl)); | ||
| 285 | |||
| 286 | rl->rl_ownpid = lkb->lkb_ownpid; | ||
| 287 | rl->rl_lkid = lkb->lkb_id; | ||
| 288 | rl->rl_exflags = lkb->lkb_exflags; | ||
| 289 | rl->rl_flags = lkb->lkb_flags; | ||
| 290 | rl->rl_lvbseq = lkb->lkb_lvbseq; | ||
| 291 | rl->rl_rqmode = lkb->lkb_rqmode; | ||
| 292 | rl->rl_grmode = lkb->lkb_grmode; | ||
| 293 | rl->rl_status = lkb->lkb_status; | ||
| 294 | rl->rl_wait_type = lkb->lkb_wait_type; | ||
| 295 | |||
| 296 | if (lkb->lkb_bastaddr) | ||
| 297 | rl->rl_asts |= AST_BAST; | ||
| 298 | if (lkb->lkb_astaddr) | ||
| 299 | rl->rl_asts |= AST_COMP; | ||
| 300 | |||
| 301 | rl->rl_namelen = r->res_length; | ||
| 302 | memcpy(rl->rl_name, r->res_name, r->res_length); | ||
| 303 | |||
| 304 | /* FIXME: might we have an lvb without DLM_LKF_VALBLK set ? | ||
| 305 | If so, receive_rcom_lock_args() won't take this copy. */ | ||
| 306 | |||
| 307 | if (lkb->lkb_lvbptr) | ||
| 308 | memcpy(rl->rl_lvb, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | ||
| 309 | } | ||
| 310 | |||
| 311 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 312 | { | ||
| 313 | struct dlm_ls *ls = r->res_ls; | ||
| 314 | struct dlm_rcom *rc; | ||
| 315 | struct dlm_mhandle *mh; | ||
| 316 | struct rcom_lock *rl; | ||
| 317 | int error, len = sizeof(struct rcom_lock); | ||
| 318 | |||
| 319 | if (lkb->lkb_lvbptr) | ||
| 320 | len += ls->ls_lvblen; | ||
| 321 | |||
| 322 | error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh); | ||
| 323 | if (error) | ||
| 324 | goto out; | ||
| 325 | |||
| 326 | rl = (struct rcom_lock *) rc->rc_buf; | ||
| 327 | pack_rcom_lock(r, lkb, rl); | ||
| 328 | rc->rc_id = (unsigned long) r; | ||
| 329 | |||
| 330 | send_rcom(ls, mh, rc); | ||
| 331 | out: | ||
| 332 | return error; | ||
| 333 | } | ||
| 334 | |||
| 335 | static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 336 | { | ||
| 337 | struct dlm_rcom *rc; | ||
| 338 | struct dlm_mhandle *mh; | ||
| 339 | int error, nodeid = rc_in->rc_header.h_nodeid; | ||
| 340 | |||
| 341 | dlm_recover_master_copy(ls, rc_in); | ||
| 342 | |||
| 343 | error = create_rcom(ls, nodeid, DLM_RCOM_LOCK_REPLY, | ||
| 344 | sizeof(struct rcom_lock), &rc, &mh); | ||
| 345 | if (error) | ||
| 346 | return; | ||
| 347 | |||
| 348 | /* We send back the same rcom_lock struct we received, but | ||
| 349 | dlm_recover_master_copy() has filled in rl_remid and rl_result */ | ||
| 350 | |||
| 351 | memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); | ||
| 352 | rc->rc_id = rc_in->rc_id; | ||
| 353 | |||
| 354 | send_rcom(ls, mh, rc); | ||
| 355 | } | ||
| 356 | |||
| 357 | static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
| 358 | { | ||
| 359 | uint32_t status = dlm_recover_status(ls); | ||
| 360 | |||
| 361 | if (!(status & DLM_RS_DIR)) { | ||
| 362 | log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u", | ||
| 363 | rc_in->rc_header.h_nodeid); | ||
| 364 | return; | ||
| 365 | } | ||
| 366 | |||
| 367 | dlm_recover_process_copy(ls, rc_in); | ||
| 368 | } | ||
| 369 | |||
| 370 | static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | ||
| 371 | { | ||
| 372 | struct dlm_rcom *rc; | ||
| 373 | struct dlm_mhandle *mh; | ||
| 374 | char *mb; | ||
| 375 | int mb_len = sizeof(struct dlm_rcom); | ||
| 376 | |||
| 377 | mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb); | ||
| 378 | if (!mh) | ||
| 379 | return -ENOBUFS; | ||
| 380 | memset(mb, 0, mb_len); | ||
| 381 | |||
| 382 | rc = (struct dlm_rcom *) mb; | ||
| 383 | |||
| 384 | rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | ||
| 385 | rc->rc_header.h_lockspace = rc_in->rc_header.h_lockspace; | ||
| 386 | rc->rc_header.h_nodeid = dlm_our_nodeid(); | ||
| 387 | rc->rc_header.h_length = mb_len; | ||
| 388 | rc->rc_header.h_cmd = DLM_RCOM; | ||
| 389 | |||
| 390 | rc->rc_type = DLM_RCOM_STATUS_REPLY; | ||
| 391 | rc->rc_id = rc_in->rc_id; | ||
| 392 | rc->rc_result = -ESRCH; | ||
| 393 | |||
| 394 | dlm_rcom_out(rc); | ||
| 395 | dlm_lowcomms_commit_buffer(mh); | ||
| 396 | |||
| 397 | return 0; | ||
| 398 | } | ||
| 399 | |||
| 400 | /* Called by dlm_recvd; corresponds to dlm_receive_message() but special | ||
| 401 | recovery-only comms are sent through here. */ | ||
| 402 | |||
| 403 | void dlm_receive_rcom(struct dlm_header *hd, int nodeid) | ||
| 404 | { | ||
| 405 | struct dlm_rcom *rc = (struct dlm_rcom *) hd; | ||
| 406 | struct dlm_ls *ls; | ||
| 407 | |||
| 408 | dlm_rcom_in(rc); | ||
| 409 | |||
| 410 | /* If the lockspace doesn't exist then still send a status message | ||
| 411 | back; it's possible that it just doesn't have its global_id yet. */ | ||
| 412 | |||
| 413 | ls = dlm_find_lockspace_global(hd->h_lockspace); | ||
| 414 | if (!ls) { | ||
| 415 | log_print("lockspace %x from %d not found", | ||
| 416 | hd->h_lockspace, nodeid); | ||
| 417 | send_ls_not_ready(nodeid, rc); | ||
| 418 | return; | ||
| 419 | } | ||
| 420 | |||
| 421 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { | ||
| 422 | log_error(ls, "ignoring recovery message %x from %d", | ||
| 423 | rc->rc_type, nodeid); | ||
| 424 | goto out; | ||
| 425 | } | ||
| 426 | |||
| 427 | if (nodeid != rc->rc_header.h_nodeid) { | ||
| 428 | log_error(ls, "bad rcom nodeid %d from %d", | ||
| 429 | rc->rc_header.h_nodeid, nodeid); | ||
| 430 | goto out; | ||
| 431 | } | ||
| 432 | |||
| 433 | switch (rc->rc_type) { | ||
| 434 | case DLM_RCOM_STATUS: | ||
| 435 | receive_rcom_status(ls, rc); | ||
| 436 | break; | ||
| 437 | |||
| 438 | case DLM_RCOM_NAMES: | ||
| 439 | receive_rcom_names(ls, rc); | ||
| 440 | break; | ||
| 441 | |||
| 442 | case DLM_RCOM_LOOKUP: | ||
| 443 | receive_rcom_lookup(ls, rc); | ||
| 444 | break; | ||
| 445 | |||
| 446 | case DLM_RCOM_LOCK: | ||
| 447 | receive_rcom_lock(ls, rc); | ||
| 448 | break; | ||
| 449 | |||
| 450 | case DLM_RCOM_STATUS_REPLY: | ||
| 451 | receive_rcom_status_reply(ls, rc); | ||
| 452 | break; | ||
| 453 | |||
| 454 | case DLM_RCOM_NAMES_REPLY: | ||
| 455 | receive_rcom_names_reply(ls, rc); | ||
| 456 | break; | ||
| 457 | |||
| 458 | case DLM_RCOM_LOOKUP_REPLY: | ||
| 459 | receive_rcom_lookup_reply(ls, rc); | ||
| 460 | break; | ||
| 461 | |||
| 462 | case DLM_RCOM_LOCK_REPLY: | ||
| 463 | receive_rcom_lock_reply(ls, rc); | ||
| 464 | break; | ||
| 465 | |||
| 466 | default: | ||
| 467 | DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); | ||
| 468 | } | ||
| 469 | out: | ||
| 470 | dlm_put_lockspace(ls); | ||
| 471 | } | ||
| 472 | |||
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h new file mode 100644 index 000000000000..d7984321ff41 --- /dev/null +++ b/fs/dlm/rcom.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __RCOM_DOT_H__ | ||
| 15 | #define __RCOM_DOT_H__ | ||
| 16 | |||
| 17 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid); | ||
| 18 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); | ||
| 19 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); | ||
| 20 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
| 21 | void dlm_receive_rcom(struct dlm_header *hd, int nodeid); | ||
| 22 | |||
| 23 | #endif | ||
| 24 | |||
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c new file mode 100644 index 000000000000..a5e6d184872e --- /dev/null +++ b/fs/dlm/recover.c | |||
| @@ -0,0 +1,765 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "lockspace.h" | ||
| 16 | #include "dir.h" | ||
| 17 | #include "config.h" | ||
| 18 | #include "ast.h" | ||
| 19 | #include "memory.h" | ||
| 20 | #include "rcom.h" | ||
| 21 | #include "lock.h" | ||
| 22 | #include "lowcomms.h" | ||
| 23 | #include "member.h" | ||
| 24 | #include "recover.h" | ||
| 25 | |||
| 26 | |||
| 27 | /* | ||
| 28 | * Recovery waiting routines: these functions wait for a particular reply from | ||
| 29 | * a remote node, or for the remote node to report a certain status. They need | ||
| 30 | * to abort if the lockspace is stopped indicating a node has failed (perhaps | ||
| 31 | * the one being waited for). | ||
| 32 | */ | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Wait until given function returns non-zero or lockspace is stopped | ||
| 36 | * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another | ||
| 37 | * function thinks it could have completed the waited-on task, they should wake | ||
| 38 | * up ls_wait_general to get an immediate response rather than waiting for the | ||
| 39 | * timer to detect the result. A timer wakes us up periodically while waiting | ||
| 40 | * to see if we should abort due to a node failure. This should only be called | ||
| 41 | * by the dlm_recoverd thread. | ||
| 42 | */ | ||
| 43 | |||
| 44 | static void dlm_wait_timer_fn(unsigned long data) | ||
| 45 | { | ||
| 46 | struct dlm_ls *ls = (struct dlm_ls *) data; | ||
| 47 | mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); | ||
| 48 | wake_up(&ls->ls_wait_general); | ||
| 49 | } | ||
| 50 | |||
| 51 | int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) | ||
| 52 | { | ||
| 53 | int error = 0; | ||
| 54 | |||
| 55 | init_timer(&ls->ls_timer); | ||
| 56 | ls->ls_timer.function = dlm_wait_timer_fn; | ||
| 57 | ls->ls_timer.data = (long) ls; | ||
| 58 | ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); | ||
| 59 | add_timer(&ls->ls_timer); | ||
| 60 | |||
| 61 | wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); | ||
| 62 | del_timer_sync(&ls->ls_timer); | ||
| 63 | |||
| 64 | if (dlm_recovery_stopped(ls)) { | ||
| 65 | log_debug(ls, "dlm_wait_function aborted"); | ||
| 66 | error = -EINTR; | ||
| 67 | } | ||
| 68 | return error; | ||
| 69 | } | ||
| 70 | |||
| 71 | /* | ||
| 72 | * An efficient way for all nodes to wait for all others to have a certain | ||
| 73 | * status. The node with the lowest nodeid polls all the others for their | ||
| 74 | * status (wait_status_all) and all the others poll the node with the low id | ||
| 75 | * for its accumulated result (wait_status_low). When all nodes have set | ||
| 76 | * status flag X, then status flag X_ALL will be set on the low nodeid. | ||
| 77 | */ | ||
| 78 | |||
| 79 | uint32_t dlm_recover_status(struct dlm_ls *ls) | ||
| 80 | { | ||
| 81 | uint32_t status; | ||
| 82 | spin_lock(&ls->ls_recover_lock); | ||
| 83 | status = ls->ls_recover_status; | ||
| 84 | spin_unlock(&ls->ls_recover_lock); | ||
| 85 | return status; | ||
| 86 | } | ||
| 87 | |||
| 88 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) | ||
| 89 | { | ||
| 90 | spin_lock(&ls->ls_recover_lock); | ||
| 91 | ls->ls_recover_status |= status; | ||
| 92 | spin_unlock(&ls->ls_recover_lock); | ||
| 93 | } | ||
| 94 | |||
| 95 | static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) | ||
| 96 | { | ||
| 97 | struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
| 98 | struct dlm_member *memb; | ||
| 99 | int error = 0, delay; | ||
| 100 | |||
| 101 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
| 102 | delay = 0; | ||
| 103 | for (;;) { | ||
| 104 | if (dlm_recovery_stopped(ls)) { | ||
| 105 | error = -EINTR; | ||
| 106 | goto out; | ||
| 107 | } | ||
| 108 | |||
| 109 | error = dlm_rcom_status(ls, memb->nodeid); | ||
| 110 | if (error) | ||
| 111 | goto out; | ||
| 112 | |||
| 113 | if (rc->rc_result & wait_status) | ||
| 114 | break; | ||
| 115 | if (delay < 1000) | ||
| 116 | delay += 20; | ||
| 117 | msleep(delay); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | out: | ||
| 121 | return error; | ||
| 122 | } | ||
| 123 | |||
| 124 | static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) | ||
| 125 | { | ||
| 126 | struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
| 127 | int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; | ||
| 128 | |||
| 129 | for (;;) { | ||
| 130 | if (dlm_recovery_stopped(ls)) { | ||
| 131 | error = -EINTR; | ||
| 132 | goto out; | ||
| 133 | } | ||
| 134 | |||
| 135 | error = dlm_rcom_status(ls, nodeid); | ||
| 136 | if (error) | ||
| 137 | break; | ||
| 138 | |||
| 139 | if (rc->rc_result & wait_status) | ||
| 140 | break; | ||
| 141 | if (delay < 1000) | ||
| 142 | delay += 20; | ||
| 143 | msleep(delay); | ||
| 144 | } | ||
| 145 | out: | ||
| 146 | return error; | ||
| 147 | } | ||
| 148 | |||
| 149 | static int wait_status(struct dlm_ls *ls, uint32_t status) | ||
| 150 | { | ||
| 151 | uint32_t status_all = status << 1; | ||
| 152 | int error; | ||
| 153 | |||
| 154 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { | ||
| 155 | error = wait_status_all(ls, status); | ||
| 156 | if (!error) | ||
| 157 | dlm_set_recover_status(ls, status_all); | ||
| 158 | } else | ||
| 159 | error = wait_status_low(ls, status_all); | ||
| 160 | |||
| 161 | return error; | ||
| 162 | } | ||
| 163 | |||
| 164 | int dlm_recover_members_wait(struct dlm_ls *ls) | ||
| 165 | { | ||
| 166 | return wait_status(ls, DLM_RS_NODES); | ||
| 167 | } | ||
| 168 | |||
| 169 | int dlm_recover_directory_wait(struct dlm_ls *ls) | ||
| 170 | { | ||
| 171 | return wait_status(ls, DLM_RS_DIR); | ||
| 172 | } | ||
| 173 | |||
| 174 | int dlm_recover_locks_wait(struct dlm_ls *ls) | ||
| 175 | { | ||
| 176 | return wait_status(ls, DLM_RS_LOCKS); | ||
| 177 | } | ||
| 178 | |||
| 179 | int dlm_recover_done_wait(struct dlm_ls *ls) | ||
| 180 | { | ||
| 181 | return wait_status(ls, DLM_RS_DONE); | ||
| 182 | } | ||
| 183 | |||
| 184 | /* | ||
| 185 | * The recover_list contains all the rsb's for which we've requested the new | ||
| 186 | * master nodeid. As replies are returned from the resource directories the | ||
| 187 | * rsb's are removed from the list. When the list is empty we're done. | ||
| 188 | * | ||
| 189 | * The recover_list is later similarly used for all rsb's for which we've sent | ||
| 190 | * new lkb's and need to receive new corresponding lkid's. | ||
| 191 | * | ||
| 192 | * We use the address of the rsb struct as a simple local identifier for the | ||
| 193 | * rsb so we can match an rcom reply with the rsb it was sent for. | ||
| 194 | */ | ||
| 195 | |||
| 196 | static int recover_list_empty(struct dlm_ls *ls) | ||
| 197 | { | ||
| 198 | int empty; | ||
| 199 | |||
| 200 | spin_lock(&ls->ls_recover_list_lock); | ||
| 201 | empty = list_empty(&ls->ls_recover_list); | ||
| 202 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 203 | |||
| 204 | return empty; | ||
| 205 | } | ||
| 206 | |||
| 207 | static void recover_list_add(struct dlm_rsb *r) | ||
| 208 | { | ||
| 209 | struct dlm_ls *ls = r->res_ls; | ||
| 210 | |||
| 211 | spin_lock(&ls->ls_recover_list_lock); | ||
| 212 | if (list_empty(&r->res_recover_list)) { | ||
| 213 | list_add_tail(&r->res_recover_list, &ls->ls_recover_list); | ||
| 214 | ls->ls_recover_list_count++; | ||
| 215 | dlm_hold_rsb(r); | ||
| 216 | } | ||
| 217 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 218 | } | ||
| 219 | |||
| 220 | static void recover_list_del(struct dlm_rsb *r) | ||
| 221 | { | ||
| 222 | struct dlm_ls *ls = r->res_ls; | ||
| 223 | |||
| 224 | spin_lock(&ls->ls_recover_list_lock); | ||
| 225 | list_del_init(&r->res_recover_list); | ||
| 226 | ls->ls_recover_list_count--; | ||
| 227 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 228 | |||
| 229 | dlm_put_rsb(r); | ||
| 230 | } | ||
| 231 | |||
| 232 | static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id) | ||
| 233 | { | ||
| 234 | struct dlm_rsb *r = NULL; | ||
| 235 | |||
| 236 | spin_lock(&ls->ls_recover_list_lock); | ||
| 237 | |||
| 238 | list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) { | ||
| 239 | if (id == (unsigned long) r) | ||
| 240 | goto out; | ||
| 241 | } | ||
| 242 | r = NULL; | ||
| 243 | out: | ||
| 244 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 245 | return r; | ||
| 246 | } | ||
| 247 | |||
| 248 | static void recover_list_clear(struct dlm_ls *ls) | ||
| 249 | { | ||
| 250 | struct dlm_rsb *r, *s; | ||
| 251 | |||
| 252 | spin_lock(&ls->ls_recover_list_lock); | ||
| 253 | list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) { | ||
| 254 | list_del_init(&r->res_recover_list); | ||
| 255 | dlm_put_rsb(r); | ||
| 256 | ls->ls_recover_list_count--; | ||
| 257 | } | ||
| 258 | |||
| 259 | if (ls->ls_recover_list_count != 0) { | ||
| 260 | log_error(ls, "warning: recover_list_count %d", | ||
| 261 | ls->ls_recover_list_count); | ||
| 262 | ls->ls_recover_list_count = 0; | ||
| 263 | } | ||
| 264 | spin_unlock(&ls->ls_recover_list_lock); | ||
| 265 | } | ||
| 266 | |||
| 267 | |||
| 268 | /* Master recovery: find new master node for rsb's that were | ||
| 269 | mastered on nodes that have been removed. | ||
| 270 | |||
| 271 | dlm_recover_masters | ||
| 272 | recover_master | ||
| 273 | dlm_send_rcom_lookup -> receive_rcom_lookup | ||
| 274 | dlm_dir_lookup | ||
| 275 | receive_rcom_lookup_reply <- | ||
| 276 | dlm_recover_master_reply | ||
| 277 | set_new_master | ||
| 278 | set_master_lkbs | ||
| 279 | set_lock_master | ||
| 280 | */ | ||
| 281 | |||
| 282 | /* | ||
| 283 | * Set the lock master for all LKBs in a lock queue | ||
| 284 | * If we are the new master of the rsb, we may have received new | ||
| 285 | * MSTCPY locks from other nodes already which we need to ignore | ||
| 286 | * when setting the new nodeid. | ||
| 287 | */ | ||
| 288 | |||
| 289 | static void set_lock_master(struct list_head *queue, int nodeid) | ||
| 290 | { | ||
| 291 | struct dlm_lkb *lkb; | ||
| 292 | |||
| 293 | list_for_each_entry(lkb, queue, lkb_statequeue) | ||
| 294 | if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) | ||
| 295 | lkb->lkb_nodeid = nodeid; | ||
| 296 | } | ||
| 297 | |||
| 298 | static void set_master_lkbs(struct dlm_rsb *r) | ||
| 299 | { | ||
| 300 | set_lock_master(&r->res_grantqueue, r->res_nodeid); | ||
| 301 | set_lock_master(&r->res_convertqueue, r->res_nodeid); | ||
| 302 | set_lock_master(&r->res_waitqueue, r->res_nodeid); | ||
| 303 | } | ||
| 304 | |||
| 305 | /* | ||
| 306 | * Propogate the new master nodeid to locks | ||
| 307 | * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. | ||
| 308 | * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which | ||
| 309 | * rsb's to consider. | ||
| 310 | */ | ||
| 311 | |||
| 312 | static void set_new_master(struct dlm_rsb *r, int nodeid) | ||
| 313 | { | ||
| 314 | lock_rsb(r); | ||
| 315 | r->res_nodeid = nodeid; | ||
| 316 | set_master_lkbs(r); | ||
| 317 | rsb_set_flag(r, RSB_NEW_MASTER); | ||
| 318 | rsb_set_flag(r, RSB_NEW_MASTER2); | ||
| 319 | unlock_rsb(r); | ||
| 320 | } | ||
| 321 | |||
| 322 | /* | ||
| 323 | * We do async lookups on rsb's that need new masters. The rsb's | ||
| 324 | * waiting for a lookup reply are kept on the recover_list. | ||
| 325 | */ | ||
| 326 | |||
| 327 | static int recover_master(struct dlm_rsb *r) | ||
| 328 | { | ||
| 329 | struct dlm_ls *ls = r->res_ls; | ||
| 330 | int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); | ||
| 331 | |||
| 332 | dir_nodeid = dlm_dir_nodeid(r); | ||
| 333 | |||
| 334 | if (dir_nodeid == our_nodeid) { | ||
| 335 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, | ||
| 336 | r->res_length, &ret_nodeid); | ||
| 337 | if (error) | ||
| 338 | log_error(ls, "recover dir lookup error %d", error); | ||
| 339 | |||
| 340 | if (ret_nodeid == our_nodeid) | ||
| 341 | ret_nodeid = 0; | ||
| 342 | set_new_master(r, ret_nodeid); | ||
| 343 | } else { | ||
| 344 | recover_list_add(r); | ||
| 345 | error = dlm_send_rcom_lookup(r, dir_nodeid); | ||
| 346 | } | ||
| 347 | |||
| 348 | return error; | ||
| 349 | } | ||
| 350 | |||
| 351 | /* | ||
| 352 | * When not using a directory, most resource names will hash to a new static | ||
| 353 | * master nodeid and the resource will need to be remastered. | ||
| 354 | */ | ||
| 355 | |||
| 356 | static int recover_master_static(struct dlm_rsb *r) | ||
| 357 | { | ||
| 358 | int master = dlm_dir_nodeid(r); | ||
| 359 | |||
| 360 | if (master == dlm_our_nodeid()) | ||
| 361 | master = 0; | ||
| 362 | |||
| 363 | if (r->res_nodeid != master) { | ||
| 364 | if (is_master(r)) | ||
| 365 | dlm_purge_mstcpy_locks(r); | ||
| 366 | set_new_master(r, master); | ||
| 367 | return 1; | ||
| 368 | } | ||
| 369 | return 0; | ||
| 370 | } | ||
| 371 | |||
| 372 | /* | ||
| 373 | * Go through local root resources and for each rsb which has a master which | ||
| 374 | * has departed, get the new master nodeid from the directory. The dir will | ||
| 375 | * assign mastery to the first node to look up the new master. That means | ||
| 376 | * we'll discover in this lookup if we're the new master of any rsb's. | ||
| 377 | * | ||
| 378 | * We fire off all the dir lookup requests individually and asynchronously to | ||
| 379 | * the correct dir node. | ||
| 380 | */ | ||
| 381 | |||
| 382 | int dlm_recover_masters(struct dlm_ls *ls) | ||
| 383 | { | ||
| 384 | struct dlm_rsb *r; | ||
| 385 | int error = 0, count = 0; | ||
| 386 | |||
| 387 | log_debug(ls, "dlm_recover_masters"); | ||
| 388 | |||
| 389 | down_read(&ls->ls_root_sem); | ||
| 390 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
| 391 | if (dlm_recovery_stopped(ls)) { | ||
| 392 | up_read(&ls->ls_root_sem); | ||
| 393 | error = -EINTR; | ||
| 394 | goto out; | ||
| 395 | } | ||
| 396 | |||
| 397 | if (dlm_no_directory(ls)) | ||
| 398 | count += recover_master_static(r); | ||
| 399 | else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { | ||
| 400 | recover_master(r); | ||
| 401 | count++; | ||
| 402 | } | ||
| 403 | |||
| 404 | schedule(); | ||
| 405 | } | ||
| 406 | up_read(&ls->ls_root_sem); | ||
| 407 | |||
| 408 | log_debug(ls, "dlm_recover_masters %d resources", count); | ||
| 409 | |||
| 410 | error = dlm_wait_function(ls, &recover_list_empty); | ||
| 411 | out: | ||
| 412 | if (error) | ||
| 413 | recover_list_clear(ls); | ||
| 414 | return error; | ||
| 415 | } | ||
| 416 | |||
| 417 | int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
| 418 | { | ||
| 419 | struct dlm_rsb *r; | ||
| 420 | int nodeid; | ||
| 421 | |||
| 422 | r = recover_list_find(ls, rc->rc_id); | ||
| 423 | if (!r) { | ||
| 424 | log_error(ls, "dlm_recover_master_reply no id %llx", | ||
| 425 | (unsigned long long)rc->rc_id); | ||
| 426 | goto out; | ||
| 427 | } | ||
| 428 | |||
| 429 | nodeid = rc->rc_result; | ||
| 430 | if (nodeid == dlm_our_nodeid()) | ||
| 431 | nodeid = 0; | ||
| 432 | |||
| 433 | set_new_master(r, nodeid); | ||
| 434 | recover_list_del(r); | ||
| 435 | |||
| 436 | if (recover_list_empty(ls)) | ||
| 437 | wake_up(&ls->ls_wait_general); | ||
| 438 | out: | ||
| 439 | return 0; | ||
| 440 | } | ||
| 441 | |||
| 442 | |||
| 443 | /* Lock recovery: rebuild the process-copy locks we hold on a | ||
| 444 | remastered rsb on the new rsb master. | ||
| 445 | |||
| 446 | dlm_recover_locks | ||
| 447 | recover_locks | ||
| 448 | recover_locks_queue | ||
| 449 | dlm_send_rcom_lock -> receive_rcom_lock | ||
| 450 | dlm_recover_master_copy | ||
| 451 | receive_rcom_lock_reply <- | ||
| 452 | dlm_recover_process_copy | ||
| 453 | */ | ||
| 454 | |||
| 455 | |||
| 456 | /* | ||
| 457 | * keep a count of the number of lkb's we send to the new master; when we get | ||
| 458 | * an equal number of replies then recovery for the rsb is done | ||
| 459 | */ | ||
| 460 | |||
| 461 | static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head) | ||
| 462 | { | ||
| 463 | struct dlm_lkb *lkb; | ||
| 464 | int error = 0; | ||
| 465 | |||
| 466 | list_for_each_entry(lkb, head, lkb_statequeue) { | ||
| 467 | error = dlm_send_rcom_lock(r, lkb); | ||
| 468 | if (error) | ||
| 469 | break; | ||
| 470 | r->res_recover_locks_count++; | ||
| 471 | } | ||
| 472 | |||
| 473 | return error; | ||
| 474 | } | ||
| 475 | |||
| 476 | static int recover_locks(struct dlm_rsb *r) | ||
| 477 | { | ||
| 478 | int error = 0; | ||
| 479 | |||
| 480 | lock_rsb(r); | ||
| 481 | |||
| 482 | DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r);); | ||
| 483 | |||
| 484 | error = recover_locks_queue(r, &r->res_grantqueue); | ||
| 485 | if (error) | ||
| 486 | goto out; | ||
| 487 | error = recover_locks_queue(r, &r->res_convertqueue); | ||
| 488 | if (error) | ||
| 489 | goto out; | ||
| 490 | error = recover_locks_queue(r, &r->res_waitqueue); | ||
| 491 | if (error) | ||
| 492 | goto out; | ||
| 493 | |||
| 494 | if (r->res_recover_locks_count) | ||
| 495 | recover_list_add(r); | ||
| 496 | else | ||
| 497 | rsb_clear_flag(r, RSB_NEW_MASTER); | ||
| 498 | out: | ||
| 499 | unlock_rsb(r); | ||
| 500 | return error; | ||
| 501 | } | ||
| 502 | |||
| 503 | int dlm_recover_locks(struct dlm_ls *ls) | ||
| 504 | { | ||
| 505 | struct dlm_rsb *r; | ||
| 506 | int error, count = 0; | ||
| 507 | |||
| 508 | log_debug(ls, "dlm_recover_locks"); | ||
| 509 | |||
| 510 | down_read(&ls->ls_root_sem); | ||
| 511 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
| 512 | if (is_master(r)) { | ||
| 513 | rsb_clear_flag(r, RSB_NEW_MASTER); | ||
| 514 | continue; | ||
| 515 | } | ||
| 516 | |||
| 517 | if (!rsb_flag(r, RSB_NEW_MASTER)) | ||
| 518 | continue; | ||
| 519 | |||
| 520 | if (dlm_recovery_stopped(ls)) { | ||
| 521 | error = -EINTR; | ||
| 522 | up_read(&ls->ls_root_sem); | ||
| 523 | goto out; | ||
| 524 | } | ||
| 525 | |||
| 526 | error = recover_locks(r); | ||
| 527 | if (error) { | ||
| 528 | up_read(&ls->ls_root_sem); | ||
| 529 | goto out; | ||
| 530 | } | ||
| 531 | |||
| 532 | count += r->res_recover_locks_count; | ||
| 533 | } | ||
| 534 | up_read(&ls->ls_root_sem); | ||
| 535 | |||
| 536 | log_debug(ls, "dlm_recover_locks %d locks", count); | ||
| 537 | |||
| 538 | error = dlm_wait_function(ls, &recover_list_empty); | ||
| 539 | out: | ||
| 540 | if (error) | ||
| 541 | recover_list_clear(ls); | ||
| 542 | else | ||
| 543 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | ||
| 544 | return error; | ||
| 545 | } | ||
| 546 | |||
| 547 | void dlm_recovered_lock(struct dlm_rsb *r) | ||
| 548 | { | ||
| 549 | DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r);); | ||
| 550 | |||
| 551 | r->res_recover_locks_count--; | ||
| 552 | if (!r->res_recover_locks_count) { | ||
| 553 | rsb_clear_flag(r, RSB_NEW_MASTER); | ||
| 554 | recover_list_del(r); | ||
| 555 | } | ||
| 556 | |||
| 557 | if (recover_list_empty(r->res_ls)) | ||
| 558 | wake_up(&r->res_ls->ls_wait_general); | ||
| 559 | } | ||
| 560 | |||
| 561 | /* | ||
| 562 | * The lvb needs to be recovered on all master rsb's. This includes setting | ||
| 563 | * the VALNOTVALID flag if necessary, and determining the correct lvb contents | ||
| 564 | * based on the lvb's of the locks held on the rsb. | ||
| 565 | * | ||
| 566 | * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it | ||
| 567 | * was already set prior to recovery, it's not cleared, regardless of locks. | ||
| 568 | * | ||
| 569 | * The LVB contents are only considered for changing when this is a new master | ||
| 570 | * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with | ||
| 571 | * mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken | ||
| 572 | * from the lkb with the largest lvb sequence number. | ||
| 573 | */ | ||
| 574 | |||
| 575 | static void recover_lvb(struct dlm_rsb *r) | ||
| 576 | { | ||
| 577 | struct dlm_lkb *lkb, *high_lkb = NULL; | ||
| 578 | uint32_t high_seq = 0; | ||
| 579 | int lock_lvb_exists = 0; | ||
| 580 | int big_lock_exists = 0; | ||
| 581 | int lvblen = r->res_ls->ls_lvblen; | ||
| 582 | |||
| 583 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { | ||
| 584 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
| 585 | continue; | ||
| 586 | |||
| 587 | lock_lvb_exists = 1; | ||
| 588 | |||
| 589 | if (lkb->lkb_grmode > DLM_LOCK_CR) { | ||
| 590 | big_lock_exists = 1; | ||
| 591 | goto setflag; | ||
| 592 | } | ||
| 593 | |||
| 594 | if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { | ||
| 595 | high_lkb = lkb; | ||
| 596 | high_seq = lkb->lkb_lvbseq; | ||
| 597 | } | ||
| 598 | } | ||
| 599 | |||
| 600 | list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { | ||
| 601 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
| 602 | continue; | ||
| 603 | |||
| 604 | lock_lvb_exists = 1; | ||
| 605 | |||
| 606 | if (lkb->lkb_grmode > DLM_LOCK_CR) { | ||
| 607 | big_lock_exists = 1; | ||
| 608 | goto setflag; | ||
| 609 | } | ||
| 610 | |||
| 611 | if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { | ||
| 612 | high_lkb = lkb; | ||
| 613 | high_seq = lkb->lkb_lvbseq; | ||
| 614 | } | ||
| 615 | } | ||
| 616 | |||
| 617 | setflag: | ||
| 618 | if (!lock_lvb_exists) | ||
| 619 | goto out; | ||
| 620 | |||
| 621 | if (!big_lock_exists) | ||
| 622 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
| 623 | |||
| 624 | /* don't mess with the lvb unless we're the new master */ | ||
| 625 | if (!rsb_flag(r, RSB_NEW_MASTER2)) | ||
| 626 | goto out; | ||
| 627 | |||
| 628 | if (!r->res_lvbptr) { | ||
| 629 | r->res_lvbptr = allocate_lvb(r->res_ls); | ||
| 630 | if (!r->res_lvbptr) | ||
| 631 | goto out; | ||
| 632 | } | ||
| 633 | |||
| 634 | if (big_lock_exists) { | ||
| 635 | r->res_lvbseq = lkb->lkb_lvbseq; | ||
| 636 | memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen); | ||
| 637 | } else if (high_lkb) { | ||
| 638 | r->res_lvbseq = high_lkb->lkb_lvbseq; | ||
| 639 | memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen); | ||
| 640 | } else { | ||
| 641 | r->res_lvbseq = 0; | ||
| 642 | memset(r->res_lvbptr, 0, lvblen); | ||
| 643 | } | ||
| 644 | out: | ||
| 645 | return; | ||
| 646 | } | ||
| 647 | |||
| 648 | /* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks | ||
| 649 | converting PR->CW or CW->PR need to have their lkb_grmode set. */ | ||
| 650 | |||
| 651 | static void recover_conversion(struct dlm_rsb *r) | ||
| 652 | { | ||
| 653 | struct dlm_lkb *lkb; | ||
| 654 | int grmode = -1; | ||
| 655 | |||
| 656 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { | ||
| 657 | if (lkb->lkb_grmode == DLM_LOCK_PR || | ||
| 658 | lkb->lkb_grmode == DLM_LOCK_CW) { | ||
| 659 | grmode = lkb->lkb_grmode; | ||
| 660 | break; | ||
| 661 | } | ||
| 662 | } | ||
| 663 | |||
| 664 | list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { | ||
| 665 | if (lkb->lkb_grmode != DLM_LOCK_IV) | ||
| 666 | continue; | ||
| 667 | if (grmode == -1) | ||
| 668 | lkb->lkb_grmode = lkb->lkb_rqmode; | ||
| 669 | else | ||
| 670 | lkb->lkb_grmode = grmode; | ||
| 671 | } | ||
| 672 | } | ||
| 673 | |||
| 674 | /* We've become the new master for this rsb and waiting/converting locks may | ||
| 675 | need to be granted in dlm_grant_after_purge() due to locks that may have | ||
| 676 | existed from a removed node. */ | ||
| 677 | |||
| 678 | static void set_locks_purged(struct dlm_rsb *r) | ||
| 679 | { | ||
| 680 | if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) | ||
| 681 | rsb_set_flag(r, RSB_LOCKS_PURGED); | ||
| 682 | } | ||
| 683 | |||
| 684 | void dlm_recover_rsbs(struct dlm_ls *ls) | ||
| 685 | { | ||
| 686 | struct dlm_rsb *r; | ||
| 687 | int count = 0; | ||
| 688 | |||
| 689 | log_debug(ls, "dlm_recover_rsbs"); | ||
| 690 | |||
| 691 | down_read(&ls->ls_root_sem); | ||
| 692 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
| 693 | lock_rsb(r); | ||
| 694 | if (is_master(r)) { | ||
| 695 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) | ||
| 696 | recover_conversion(r); | ||
| 697 | if (rsb_flag(r, RSB_NEW_MASTER2)) | ||
| 698 | set_locks_purged(r); | ||
| 699 | recover_lvb(r); | ||
| 700 | count++; | ||
| 701 | } | ||
| 702 | rsb_clear_flag(r, RSB_RECOVER_CONVERT); | ||
| 703 | rsb_clear_flag(r, RSB_NEW_MASTER2); | ||
| 704 | unlock_rsb(r); | ||
| 705 | } | ||
| 706 | up_read(&ls->ls_root_sem); | ||
| 707 | |||
| 708 | log_debug(ls, "dlm_recover_rsbs %d rsbs", count); | ||
| 709 | } | ||
| 710 | |||
| 711 | /* Create a single list of all root rsb's to be used during recovery */ | ||
| 712 | |||
| 713 | int dlm_create_root_list(struct dlm_ls *ls) | ||
| 714 | { | ||
| 715 | struct dlm_rsb *r; | ||
| 716 | int i, error = 0; | ||
| 717 | |||
| 718 | down_write(&ls->ls_root_sem); | ||
| 719 | if (!list_empty(&ls->ls_root_list)) { | ||
| 720 | log_error(ls, "root list not empty"); | ||
| 721 | error = -EINVAL; | ||
| 722 | goto out; | ||
| 723 | } | ||
| 724 | |||
| 725 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
| 726 | read_lock(&ls->ls_rsbtbl[i].lock); | ||
| 727 | list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { | ||
| 728 | list_add(&r->res_root_list, &ls->ls_root_list); | ||
| 729 | dlm_hold_rsb(r); | ||
| 730 | } | ||
| 731 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
| 732 | } | ||
| 733 | out: | ||
| 734 | up_write(&ls->ls_root_sem); | ||
| 735 | return error; | ||
| 736 | } | ||
| 737 | |||
| 738 | void dlm_release_root_list(struct dlm_ls *ls) | ||
| 739 | { | ||
| 740 | struct dlm_rsb *r, *safe; | ||
| 741 | |||
| 742 | down_write(&ls->ls_root_sem); | ||
| 743 | list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) { | ||
| 744 | list_del_init(&r->res_root_list); | ||
| 745 | dlm_put_rsb(r); | ||
| 746 | } | ||
| 747 | up_write(&ls->ls_root_sem); | ||
| 748 | } | ||
| 749 | |||
| 750 | void dlm_clear_toss_list(struct dlm_ls *ls) | ||
| 751 | { | ||
| 752 | struct dlm_rsb *r, *safe; | ||
| 753 | int i; | ||
| 754 | |||
| 755 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
| 756 | write_lock(&ls->ls_rsbtbl[i].lock); | ||
| 757 | list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, | ||
| 758 | res_hashchain) { | ||
| 759 | list_del(&r->res_hashchain); | ||
| 760 | free_rsb(r); | ||
| 761 | } | ||
| 762 | write_unlock(&ls->ls_rsbtbl[i].lock); | ||
| 763 | } | ||
| 764 | } | ||
| 765 | |||
diff --git a/fs/dlm/recover.h b/fs/dlm/recover.h new file mode 100644 index 000000000000..ebd0363f1e08 --- /dev/null +++ b/fs/dlm/recover.h | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __RECOVER_DOT_H__ | ||
| 15 | #define __RECOVER_DOT_H__ | ||
| 16 | |||
| 17 | int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)); | ||
| 18 | uint32_t dlm_recover_status(struct dlm_ls *ls); | ||
| 19 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status); | ||
| 20 | int dlm_recover_members_wait(struct dlm_ls *ls); | ||
| 21 | int dlm_recover_directory_wait(struct dlm_ls *ls); | ||
| 22 | int dlm_recover_locks_wait(struct dlm_ls *ls); | ||
| 23 | int dlm_recover_done_wait(struct dlm_ls *ls); | ||
| 24 | int dlm_recover_masters(struct dlm_ls *ls); | ||
| 25 | int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
| 26 | int dlm_recover_locks(struct dlm_ls *ls); | ||
| 27 | void dlm_recovered_lock(struct dlm_rsb *r); | ||
| 28 | int dlm_create_root_list(struct dlm_ls *ls); | ||
| 29 | void dlm_release_root_list(struct dlm_ls *ls); | ||
| 30 | void dlm_clear_toss_list(struct dlm_ls *ls); | ||
| 31 | void dlm_recover_rsbs(struct dlm_ls *ls); | ||
| 32 | |||
| 33 | #endif /* __RECOVER_DOT_H__ */ | ||
| 34 | |||
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c new file mode 100644 index 000000000000..362e3eff4dc9 --- /dev/null +++ b/fs/dlm/recoverd.c | |||
| @@ -0,0 +1,290 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #include "dlm_internal.h" | ||
| 15 | #include "lockspace.h" | ||
| 16 | #include "member.h" | ||
| 17 | #include "dir.h" | ||
| 18 | #include "ast.h" | ||
| 19 | #include "recover.h" | ||
| 20 | #include "lowcomms.h" | ||
| 21 | #include "lock.h" | ||
| 22 | #include "requestqueue.h" | ||
| 23 | #include "recoverd.h" | ||
| 24 | |||
| 25 | |||
| 26 | /* If the start for which we're re-enabling locking (seq) has been superseded | ||
| 27 | by a newer stop (ls_recover_seq), we need to leave locking disabled. */ | ||
| 28 | |||
| 29 | static int enable_locking(struct dlm_ls *ls, uint64_t seq) | ||
| 30 | { | ||
| 31 | int error = -EINTR; | ||
| 32 | |||
| 33 | spin_lock(&ls->ls_recover_lock); | ||
| 34 | if (ls->ls_recover_seq == seq) { | ||
| 35 | set_bit(LSFL_RUNNING, &ls->ls_flags); | ||
| 36 | up_write(&ls->ls_in_recovery); | ||
| 37 | error = 0; | ||
| 38 | } | ||
| 39 | spin_unlock(&ls->ls_recover_lock); | ||
| 40 | return error; | ||
| 41 | } | ||
| 42 | |||
| 43 | static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | ||
| 44 | { | ||
| 45 | unsigned long start; | ||
| 46 | int error, neg = 0; | ||
| 47 | |||
| 48 | log_debug(ls, "recover %llx", rv->seq); | ||
| 49 | |||
| 50 | mutex_lock(&ls->ls_recoverd_active); | ||
| 51 | |||
| 52 | /* | ||
| 53 | * Suspending and resuming dlm_astd ensures that no lkb's from this ls | ||
| 54 | * will be processed by dlm_astd during recovery. | ||
| 55 | */ | ||
| 56 | |||
| 57 | dlm_astd_suspend(); | ||
| 58 | dlm_astd_resume(); | ||
| 59 | |||
| 60 | /* | ||
| 61 | * This list of root rsb's will be the basis of most of the recovery | ||
| 62 | * routines. | ||
| 63 | */ | ||
| 64 | |||
| 65 | dlm_create_root_list(ls); | ||
| 66 | |||
| 67 | /* | ||
| 68 | * Free all the tossed rsb's so we don't have to recover them. | ||
| 69 | */ | ||
| 70 | |||
| 71 | dlm_clear_toss_list(ls); | ||
| 72 | |||
| 73 | /* | ||
| 74 | * Add or remove nodes from the lockspace's ls_nodes list. | ||
| 75 | * Also waits for all nodes to complete dlm_recover_members. | ||
| 76 | */ | ||
| 77 | |||
| 78 | error = dlm_recover_members(ls, rv, &neg); | ||
| 79 | if (error) { | ||
| 80 | log_error(ls, "recover_members failed %d", error); | ||
| 81 | goto fail; | ||
| 82 | } | ||
| 83 | start = jiffies; | ||
| 84 | |||
| 85 | /* | ||
| 86 | * Rebuild our own share of the directory by collecting from all other | ||
| 87 | * nodes their master rsb names that hash to us. | ||
| 88 | */ | ||
| 89 | |||
| 90 | error = dlm_recover_directory(ls); | ||
| 91 | if (error) { | ||
| 92 | log_error(ls, "recover_directory failed %d", error); | ||
| 93 | goto fail; | ||
| 94 | } | ||
| 95 | |||
| 96 | /* | ||
| 97 | * Purge directory-related requests that are saved in requestqueue. | ||
| 98 | * All dir requests from before recovery are invalid now due to the dir | ||
| 99 | * rebuild and will be resent by the requesting nodes. | ||
| 100 | */ | ||
| 101 | |||
| 102 | dlm_purge_requestqueue(ls); | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Wait for all nodes to complete directory rebuild. | ||
| 106 | */ | ||
| 107 | |||
| 108 | error = dlm_recover_directory_wait(ls); | ||
| 109 | if (error) { | ||
| 110 | log_error(ls, "recover_directory_wait failed %d", error); | ||
| 111 | goto fail; | ||
| 112 | } | ||
| 113 | |||
| 114 | /* | ||
| 115 | * We may have outstanding operations that are waiting for a reply from | ||
| 116 | * a failed node. Mark these to be resent after recovery. Unlock and | ||
| 117 | * cancel ops can just be completed. | ||
| 118 | */ | ||
| 119 | |||
| 120 | dlm_recover_waiters_pre(ls); | ||
| 121 | |||
| 122 | error = dlm_recovery_stopped(ls); | ||
| 123 | if (error) | ||
| 124 | goto fail; | ||
| 125 | |||
| 126 | if (neg || dlm_no_directory(ls)) { | ||
| 127 | /* | ||
| 128 | * Clear lkb's for departed nodes. | ||
| 129 | */ | ||
| 130 | |||
| 131 | dlm_purge_locks(ls); | ||
| 132 | |||
| 133 | /* | ||
| 134 | * Get new master nodeid's for rsb's that were mastered on | ||
| 135 | * departed nodes. | ||
| 136 | */ | ||
| 137 | |||
| 138 | error = dlm_recover_masters(ls); | ||
| 139 | if (error) { | ||
| 140 | log_error(ls, "recover_masters failed %d", error); | ||
| 141 | goto fail; | ||
| 142 | } | ||
| 143 | |||
| 144 | /* | ||
| 145 | * Send our locks on remastered rsb's to the new masters. | ||
| 146 | */ | ||
| 147 | |||
| 148 | error = dlm_recover_locks(ls); | ||
| 149 | if (error) { | ||
| 150 | log_error(ls, "recover_locks failed %d", error); | ||
| 151 | goto fail; | ||
| 152 | } | ||
| 153 | |||
| 154 | error = dlm_recover_locks_wait(ls); | ||
| 155 | if (error) { | ||
| 156 | log_error(ls, "recover_locks_wait failed %d", error); | ||
| 157 | goto fail; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Finalize state in master rsb's now that all locks can be | ||
| 162 | * checked. This includes conversion resolution and lvb | ||
| 163 | * settings. | ||
| 164 | */ | ||
| 165 | |||
| 166 | dlm_recover_rsbs(ls); | ||
| 167 | } | ||
| 168 | |||
| 169 | dlm_release_root_list(ls); | ||
| 170 | |||
| 171 | dlm_set_recover_status(ls, DLM_RS_DONE); | ||
| 172 | error = dlm_recover_done_wait(ls); | ||
| 173 | if (error) { | ||
| 174 | log_error(ls, "recover_done_wait failed %d", error); | ||
| 175 | goto fail; | ||
| 176 | } | ||
| 177 | |||
| 178 | dlm_clear_members_gone(ls); | ||
| 179 | |||
| 180 | error = enable_locking(ls, rv->seq); | ||
| 181 | if (error) { | ||
| 182 | log_error(ls, "enable_locking failed %d", error); | ||
| 183 | goto fail; | ||
| 184 | } | ||
| 185 | |||
| 186 | error = dlm_process_requestqueue(ls); | ||
| 187 | if (error) { | ||
| 188 | log_error(ls, "process_requestqueue failed %d", error); | ||
| 189 | goto fail; | ||
| 190 | } | ||
| 191 | |||
| 192 | error = dlm_recover_waiters_post(ls); | ||
| 193 | if (error) { | ||
| 194 | log_error(ls, "recover_waiters_post failed %d", error); | ||
| 195 | goto fail; | ||
| 196 | } | ||
| 197 | |||
| 198 | dlm_grant_after_purge(ls); | ||
| 199 | |||
| 200 | dlm_astd_wake(); | ||
| 201 | |||
| 202 | log_debug(ls, "recover %llx done: %u ms", rv->seq, | ||
| 203 | jiffies_to_msecs(jiffies - start)); | ||
| 204 | mutex_unlock(&ls->ls_recoverd_active); | ||
| 205 | |||
| 206 | return 0; | ||
| 207 | |||
| 208 | fail: | ||
| 209 | dlm_release_root_list(ls); | ||
| 210 | log_debug(ls, "recover %llx error %d", rv->seq, error); | ||
| 211 | mutex_unlock(&ls->ls_recoverd_active); | ||
| 212 | return error; | ||
| 213 | } | ||
| 214 | |||
| 215 | static void do_ls_recovery(struct dlm_ls *ls) | ||
| 216 | { | ||
| 217 | struct dlm_recover *rv = NULL; | ||
| 218 | |||
| 219 | spin_lock(&ls->ls_recover_lock); | ||
| 220 | rv = ls->ls_recover_args; | ||
| 221 | ls->ls_recover_args = NULL; | ||
| 222 | clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | ||
| 223 | spin_unlock(&ls->ls_recover_lock); | ||
| 224 | |||
| 225 | if (rv) { | ||
| 226 | ls_recover(ls, rv); | ||
| 227 | kfree(rv->nodeids); | ||
| 228 | kfree(rv); | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | static int dlm_recoverd(void *arg) | ||
| 233 | { | ||
| 234 | struct dlm_ls *ls; | ||
| 235 | |||
| 236 | ls = dlm_find_lockspace_local(arg); | ||
| 237 | if (!ls) { | ||
| 238 | log_print("dlm_recoverd: no lockspace %p", arg); | ||
| 239 | return -1; | ||
| 240 | } | ||
| 241 | |||
| 242 | while (!kthread_should_stop()) { | ||
| 243 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 244 | if (!test_bit(LSFL_WORK, &ls->ls_flags)) | ||
| 245 | schedule(); | ||
| 246 | set_current_state(TASK_RUNNING); | ||
| 247 | |||
| 248 | if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) | ||
| 249 | do_ls_recovery(ls); | ||
| 250 | } | ||
| 251 | |||
| 252 | dlm_put_lockspace(ls); | ||
| 253 | return 0; | ||
| 254 | } | ||
| 255 | |||
| 256 | void dlm_recoverd_kick(struct dlm_ls *ls) | ||
| 257 | { | ||
| 258 | set_bit(LSFL_WORK, &ls->ls_flags); | ||
| 259 | wake_up_process(ls->ls_recoverd_task); | ||
| 260 | } | ||
| 261 | |||
| 262 | int dlm_recoverd_start(struct dlm_ls *ls) | ||
| 263 | { | ||
| 264 | struct task_struct *p; | ||
| 265 | int error = 0; | ||
| 266 | |||
| 267 | p = kthread_run(dlm_recoverd, ls, "dlm_recoverd"); | ||
| 268 | if (IS_ERR(p)) | ||
| 269 | error = PTR_ERR(p); | ||
| 270 | else | ||
| 271 | ls->ls_recoverd_task = p; | ||
| 272 | return error; | ||
| 273 | } | ||
| 274 | |||
| 275 | void dlm_recoverd_stop(struct dlm_ls *ls) | ||
| 276 | { | ||
| 277 | kthread_stop(ls->ls_recoverd_task); | ||
| 278 | } | ||
| 279 | |||
| 280 | void dlm_recoverd_suspend(struct dlm_ls *ls) | ||
| 281 | { | ||
| 282 | wake_up(&ls->ls_wait_general); | ||
| 283 | mutex_lock(&ls->ls_recoverd_active); | ||
| 284 | } | ||
| 285 | |||
| 286 | void dlm_recoverd_resume(struct dlm_ls *ls) | ||
| 287 | { | ||
| 288 | mutex_unlock(&ls->ls_recoverd_active); | ||
| 289 | } | ||
| 290 | |||
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h new file mode 100644 index 000000000000..866657c5d69d --- /dev/null +++ b/fs/dlm/recoverd.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __RECOVERD_DOT_H__ | ||
| 15 | #define __RECOVERD_DOT_H__ | ||
| 16 | |||
| 17 | void dlm_recoverd_kick(struct dlm_ls *ls); | ||
| 18 | void dlm_recoverd_stop(struct dlm_ls *ls); | ||
| 19 | int dlm_recoverd_start(struct dlm_ls *ls); | ||
| 20 | void dlm_recoverd_suspend(struct dlm_ls *ls); | ||
| 21 | void dlm_recoverd_resume(struct dlm_ls *ls); | ||
| 22 | |||
| 23 | #endif /* __RECOVERD_DOT_H__ */ | ||
| 24 | |||
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c new file mode 100644 index 000000000000..7b2b089634a2 --- /dev/null +++ b/fs/dlm/requestqueue.c | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #include "dlm_internal.h" | ||
| 14 | #include "member.h" | ||
| 15 | #include "lock.h" | ||
| 16 | #include "dir.h" | ||
| 17 | #include "config.h" | ||
| 18 | #include "requestqueue.h" | ||
| 19 | |||
| 20 | struct rq_entry { | ||
| 21 | struct list_head list; | ||
| 22 | int nodeid; | ||
| 23 | char request[1]; | ||
| 24 | }; | ||
| 25 | |||
| 26 | /* | ||
| 27 | * Requests received while the lockspace is in recovery get added to the | ||
| 28 | * request queue and processed when recovery is complete. This happens when | ||
| 29 | * the lockspace is suspended on some nodes before it is on others, or the | ||
| 30 | * lockspace is enabled on some while still suspended on others. | ||
| 31 | */ | ||
| 32 | |||
| 33 | void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) | ||
| 34 | { | ||
| 35 | struct rq_entry *e; | ||
| 36 | int length = hd->h_length; | ||
| 37 | |||
| 38 | if (dlm_is_removed(ls, nodeid)) | ||
| 39 | return; | ||
| 40 | |||
| 41 | e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); | ||
| 42 | if (!e) { | ||
| 43 | log_print("dlm_add_requestqueue: out of memory\n"); | ||
| 44 | return; | ||
| 45 | } | ||
| 46 | |||
| 47 | e->nodeid = nodeid; | ||
| 48 | memcpy(e->request, hd, length); | ||
| 49 | |||
| 50 | mutex_lock(&ls->ls_requestqueue_mutex); | ||
| 51 | list_add_tail(&e->list, &ls->ls_requestqueue); | ||
| 52 | mutex_unlock(&ls->ls_requestqueue_mutex); | ||
| 53 | } | ||
| 54 | |||
| 55 | int dlm_process_requestqueue(struct dlm_ls *ls) | ||
| 56 | { | ||
| 57 | struct rq_entry *e; | ||
| 58 | struct dlm_header *hd; | ||
| 59 | int error = 0; | ||
| 60 | |||
| 61 | mutex_lock(&ls->ls_requestqueue_mutex); | ||
| 62 | |||
| 63 | for (;;) { | ||
| 64 | if (list_empty(&ls->ls_requestqueue)) { | ||
| 65 | mutex_unlock(&ls->ls_requestqueue_mutex); | ||
| 66 | error = 0; | ||
| 67 | break; | ||
| 68 | } | ||
| 69 | e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); | ||
| 70 | mutex_unlock(&ls->ls_requestqueue_mutex); | ||
| 71 | |||
| 72 | hd = (struct dlm_header *) e->request; | ||
| 73 | error = dlm_receive_message(hd, e->nodeid, 1); | ||
| 74 | |||
| 75 | if (error == -EINTR) { | ||
| 76 | /* entry is left on requestqueue */ | ||
| 77 | log_debug(ls, "process_requestqueue abort eintr"); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | |||
| 81 | mutex_lock(&ls->ls_requestqueue_mutex); | ||
| 82 | list_del(&e->list); | ||
| 83 | kfree(e); | ||
| 84 | |||
| 85 | if (dlm_locking_stopped(ls)) { | ||
| 86 | log_debug(ls, "process_requestqueue abort running"); | ||
| 87 | mutex_unlock(&ls->ls_requestqueue_mutex); | ||
| 88 | error = -EINTR; | ||
| 89 | break; | ||
| 90 | } | ||
| 91 | schedule(); | ||
| 92 | } | ||
| 93 | |||
| 94 | return error; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* | ||
| 98 | * After recovery is done, locking is resumed and dlm_recoverd takes all the | ||
| 99 | * saved requests and processes them as they would have been by dlm_recvd. At | ||
| 100 | * the same time, dlm_recvd will start receiving new requests from remote | ||
| 101 | * nodes. We want to delay dlm_recvd processing new requests until | ||
| 102 | * dlm_recoverd has finished processing the old saved requests. | ||
| 103 | */ | ||
| 104 | |||
| 105 | void dlm_wait_requestqueue(struct dlm_ls *ls) | ||
| 106 | { | ||
| 107 | for (;;) { | ||
| 108 | mutex_lock(&ls->ls_requestqueue_mutex); | ||
| 109 | if (list_empty(&ls->ls_requestqueue)) | ||
| 110 | break; | ||
| 111 | if (dlm_locking_stopped(ls)) | ||
| 112 | break; | ||
| 113 | mutex_unlock(&ls->ls_requestqueue_mutex); | ||
| 114 | schedule(); | ||
| 115 | } | ||
| 116 | mutex_unlock(&ls->ls_requestqueue_mutex); | ||
| 117 | } | ||
| 118 | |||
| 119 | static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) | ||
| 120 | { | ||
| 121 | uint32_t type = ms->m_type; | ||
| 122 | |||
| 123 | if (dlm_is_removed(ls, nodeid)) | ||
| 124 | return 1; | ||
| 125 | |||
| 126 | /* directory operations are always purged because the directory is | ||
| 127 | always rebuilt during recovery and the lookups resent */ | ||
| 128 | |||
| 129 | if (type == DLM_MSG_REMOVE || | ||
| 130 | type == DLM_MSG_LOOKUP || | ||
| 131 | type == DLM_MSG_LOOKUP_REPLY) | ||
| 132 | return 1; | ||
| 133 | |||
| 134 | if (!dlm_no_directory(ls)) | ||
| 135 | return 0; | ||
| 136 | |||
| 137 | /* with no directory, the master is likely to change as a part of | ||
| 138 | recovery; requests to/from the defunct master need to be purged */ | ||
| 139 | |||
| 140 | switch (type) { | ||
| 141 | case DLM_MSG_REQUEST: | ||
| 142 | case DLM_MSG_CONVERT: | ||
| 143 | case DLM_MSG_UNLOCK: | ||
| 144 | case DLM_MSG_CANCEL: | ||
| 145 | /* we're no longer the master of this resource, the sender | ||
| 146 | will resend to the new master (see waiter_needs_recovery) */ | ||
| 147 | |||
| 148 | if (dlm_hash2nodeid(ls, ms->m_hash) != dlm_our_nodeid()) | ||
| 149 | return 1; | ||
| 150 | break; | ||
| 151 | |||
| 152 | case DLM_MSG_REQUEST_REPLY: | ||
| 153 | case DLM_MSG_CONVERT_REPLY: | ||
| 154 | case DLM_MSG_UNLOCK_REPLY: | ||
| 155 | case DLM_MSG_CANCEL_REPLY: | ||
| 156 | case DLM_MSG_GRANT: | ||
| 157 | /* this reply is from the former master of the resource, | ||
| 158 | we'll resend to the new master if needed */ | ||
| 159 | |||
| 160 | if (dlm_hash2nodeid(ls, ms->m_hash) != nodeid) | ||
| 161 | return 1; | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | |||
| 165 | return 0; | ||
| 166 | } | ||
| 167 | |||
| 168 | void dlm_purge_requestqueue(struct dlm_ls *ls) | ||
| 169 | { | ||
| 170 | struct dlm_message *ms; | ||
| 171 | struct rq_entry *e, *safe; | ||
| 172 | |||
| 173 | mutex_lock(&ls->ls_requestqueue_mutex); | ||
| 174 | list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) { | ||
| 175 | ms = (struct dlm_message *) e->request; | ||
| 176 | |||
| 177 | if (purge_request(ls, ms, e->nodeid)) { | ||
| 178 | list_del(&e->list); | ||
| 179 | kfree(e); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | mutex_unlock(&ls->ls_requestqueue_mutex); | ||
| 183 | } | ||
| 184 | |||
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h new file mode 100644 index 000000000000..349f0d292d95 --- /dev/null +++ b/fs/dlm/requestqueue.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #ifndef __REQUESTQUEUE_DOT_H__ | ||
| 14 | #define __REQUESTQUEUE_DOT_H__ | ||
| 15 | |||
| 16 | void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); | ||
| 17 | int dlm_process_requestqueue(struct dlm_ls *ls); | ||
| 18 | void dlm_wait_requestqueue(struct dlm_ls *ls); | ||
| 19 | void dlm_purge_requestqueue(struct dlm_ls *ls); | ||
| 20 | |||
| 21 | #endif | ||
| 22 | |||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c new file mode 100644 index 000000000000..c37e93e4f2df --- /dev/null +++ b/fs/dlm/user.c | |||
| @@ -0,0 +1,788 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This copyrighted material is made available to anyone wishing to use, | ||
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 6 | * of the GNU General Public License v.2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/miscdevice.h> | ||
| 10 | #include <linux/init.h> | ||
| 11 | #include <linux/wait.h> | ||
| 12 | #include <linux/module.h> | ||
| 13 | #include <linux/file.h> | ||
| 14 | #include <linux/fs.h> | ||
| 15 | #include <linux/poll.h> | ||
| 16 | #include <linux/signal.h> | ||
| 17 | #include <linux/spinlock.h> | ||
| 18 | #include <linux/dlm.h> | ||
| 19 | #include <linux/dlm_device.h> | ||
| 20 | |||
| 21 | #include "dlm_internal.h" | ||
| 22 | #include "lockspace.h" | ||
| 23 | #include "lock.h" | ||
| 24 | #include "lvb_table.h" | ||
| 25 | |||
| 26 | static const char *name_prefix="dlm"; | ||
| 27 | static struct miscdevice ctl_device; | ||
| 28 | static struct file_operations device_fops; | ||
| 29 | |||
| 30 | #ifdef CONFIG_COMPAT | ||
| 31 | |||
| 32 | struct dlm_lock_params32 { | ||
| 33 | __u8 mode; | ||
| 34 | __u8 namelen; | ||
| 35 | __u16 flags; | ||
| 36 | __u32 lkid; | ||
| 37 | __u32 parent; | ||
| 38 | |||
| 39 | __u32 castparam; | ||
| 40 | __u32 castaddr; | ||
| 41 | __u32 bastparam; | ||
| 42 | __u32 bastaddr; | ||
| 43 | __u32 lksb; | ||
| 44 | |||
| 45 | char lvb[DLM_USER_LVB_LEN]; | ||
| 46 | char name[0]; | ||
| 47 | }; | ||
| 48 | |||
| 49 | struct dlm_write_request32 { | ||
| 50 | __u32 version[3]; | ||
| 51 | __u8 cmd; | ||
| 52 | __u8 is64bit; | ||
| 53 | __u8 unused[2]; | ||
| 54 | |||
| 55 | union { | ||
| 56 | struct dlm_lock_params32 lock; | ||
| 57 | struct dlm_lspace_params lspace; | ||
| 58 | } i; | ||
| 59 | }; | ||
| 60 | |||
| 61 | struct dlm_lksb32 { | ||
| 62 | __u32 sb_status; | ||
| 63 | __u32 sb_lkid; | ||
| 64 | __u8 sb_flags; | ||
| 65 | __u32 sb_lvbptr; | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct dlm_lock_result32 { | ||
| 69 | __u32 length; | ||
| 70 | __u32 user_astaddr; | ||
| 71 | __u32 user_astparam; | ||
| 72 | __u32 user_lksb; | ||
| 73 | struct dlm_lksb32 lksb; | ||
| 74 | __u8 bast_mode; | ||
| 75 | __u8 unused[3]; | ||
| 76 | /* Offsets may be zero if no data is present */ | ||
| 77 | __u32 lvb_offset; | ||
| 78 | }; | ||
| 79 | |||
| 80 | static void compat_input(struct dlm_write_request *kb, | ||
| 81 | struct dlm_write_request32 *kb32) | ||
| 82 | { | ||
| 83 | kb->version[0] = kb32->version[0]; | ||
| 84 | kb->version[1] = kb32->version[1]; | ||
| 85 | kb->version[2] = kb32->version[2]; | ||
| 86 | |||
| 87 | kb->cmd = kb32->cmd; | ||
| 88 | kb->is64bit = kb32->is64bit; | ||
| 89 | if (kb->cmd == DLM_USER_CREATE_LOCKSPACE || | ||
| 90 | kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { | ||
| 91 | kb->i.lspace.flags = kb32->i.lspace.flags; | ||
| 92 | kb->i.lspace.minor = kb32->i.lspace.minor; | ||
| 93 | strcpy(kb->i.lspace.name, kb32->i.lspace.name); | ||
| 94 | } else { | ||
| 95 | kb->i.lock.mode = kb32->i.lock.mode; | ||
| 96 | kb->i.lock.namelen = kb32->i.lock.namelen; | ||
| 97 | kb->i.lock.flags = kb32->i.lock.flags; | ||
| 98 | kb->i.lock.lkid = kb32->i.lock.lkid; | ||
| 99 | kb->i.lock.parent = kb32->i.lock.parent; | ||
| 100 | kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam; | ||
| 101 | kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr; | ||
| 102 | kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam; | ||
| 103 | kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; | ||
| 104 | kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; | ||
| 105 | memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); | ||
| 106 | memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | static void compat_output(struct dlm_lock_result *res, | ||
| 111 | struct dlm_lock_result32 *res32) | ||
| 112 | { | ||
| 113 | res32->length = res->length - (sizeof(struct dlm_lock_result) - | ||
| 114 | sizeof(struct dlm_lock_result32)); | ||
| 115 | res32->user_astaddr = (__u32)(long)res->user_astaddr; | ||
| 116 | res32->user_astparam = (__u32)(long)res->user_astparam; | ||
| 117 | res32->user_lksb = (__u32)(long)res->user_lksb; | ||
| 118 | res32->bast_mode = res->bast_mode; | ||
| 119 | |||
| 120 | res32->lvb_offset = res->lvb_offset; | ||
| 121 | res32->length = res->length; | ||
| 122 | |||
| 123 | res32->lksb.sb_status = res->lksb.sb_status; | ||
| 124 | res32->lksb.sb_flags = res->lksb.sb_flags; | ||
| 125 | res32->lksb.sb_lkid = res->lksb.sb_lkid; | ||
| 126 | res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr; | ||
| 127 | } | ||
| 128 | #endif | ||
| 129 | |||
| 130 | |||
| 131 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | ||
| 132 | { | ||
| 133 | struct dlm_ls *ls; | ||
| 134 | struct dlm_user_args *ua; | ||
| 135 | struct dlm_user_proc *proc; | ||
| 136 | int remove_ownqueue = 0; | ||
| 137 | |||
| 138 | /* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each | ||
| 139 | lkb before dealing with it. We need to check this | ||
| 140 | flag before taking ls_clear_proc_locks mutex because if | ||
| 141 | it's set, dlm_clear_proc_locks() holds the mutex. */ | ||
| 142 | |||
| 143 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { | ||
| 144 | /* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */ | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | |||
| 148 | ls = lkb->lkb_resource->res_ls; | ||
| 149 | mutex_lock(&ls->ls_clear_proc_locks); | ||
| 150 | |||
| 151 | /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast | ||
| 152 | can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed | ||
| 153 | lkb->ua so we can't try to use it. */ | ||
| 154 | |||
| 155 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { | ||
| 156 | /* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */ | ||
| 157 | goto out; | ||
| 158 | } | ||
| 159 | |||
| 160 | DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); | ||
| 161 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 162 | proc = ua->proc; | ||
| 163 | |||
| 164 | if (type == AST_BAST && ua->bastaddr == NULL) | ||
| 165 | goto out; | ||
| 166 | |||
| 167 | spin_lock(&proc->asts_spin); | ||
| 168 | if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { | ||
| 169 | kref_get(&lkb->lkb_ref); | ||
| 170 | list_add_tail(&lkb->lkb_astqueue, &proc->asts); | ||
| 171 | lkb->lkb_ast_type |= type; | ||
| 172 | wake_up_interruptible(&proc->wait); | ||
| 173 | } | ||
| 174 | |||
| 175 | /* noqueue requests that fail may need to be removed from the | ||
| 176 | proc's locks list, there should be a better way of detecting | ||
| 177 | this situation than checking all these things... */ | ||
| 178 | |||
| 179 | if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV && | ||
| 180 | ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) | ||
| 181 | remove_ownqueue = 1; | ||
| 182 | |||
| 183 | /* We want to copy the lvb to userspace when the completion | ||
| 184 | ast is read if the status is 0, the lock has an lvb and | ||
| 185 | lvb_ops says we should. We could probably have set_lvb_lock() | ||
| 186 | set update_user_lvb instead and not need old_mode */ | ||
| 187 | |||
| 188 | if ((lkb->lkb_ast_type & AST_COMP) && | ||
| 189 | (lkb->lkb_lksb->sb_status == 0) && | ||
| 190 | lkb->lkb_lksb->sb_lvbptr && | ||
| 191 | dlm_lvb_operations[ua->old_mode + 1][lkb->lkb_grmode + 1]) | ||
| 192 | ua->update_user_lvb = 1; | ||
| 193 | else | ||
| 194 | ua->update_user_lvb = 0; | ||
| 195 | |||
| 196 | spin_unlock(&proc->asts_spin); | ||
| 197 | |||
| 198 | if (remove_ownqueue) { | ||
| 199 | spin_lock(&ua->proc->locks_spin); | ||
| 200 | list_del_init(&lkb->lkb_ownqueue); | ||
| 201 | spin_unlock(&ua->proc->locks_spin); | ||
| 202 | dlm_put_lkb(lkb); | ||
| 203 | } | ||
| 204 | out: | ||
| 205 | mutex_unlock(&ls->ls_clear_proc_locks); | ||
| 206 | } | ||
| 207 | |||
| 208 | static int device_user_lock(struct dlm_user_proc *proc, | ||
| 209 | struct dlm_lock_params *params) | ||
| 210 | { | ||
| 211 | struct dlm_ls *ls; | ||
| 212 | struct dlm_user_args *ua; | ||
| 213 | int error = -ENOMEM; | ||
| 214 | |||
| 215 | ls = dlm_find_lockspace_local(proc->lockspace); | ||
| 216 | if (!ls) | ||
| 217 | return -ENOENT; | ||
| 218 | |||
| 219 | if (!params->castaddr || !params->lksb) { | ||
| 220 | error = -EINVAL; | ||
| 221 | goto out; | ||
| 222 | } | ||
| 223 | |||
| 224 | ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); | ||
| 225 | if (!ua) | ||
| 226 | goto out; | ||
| 227 | ua->proc = proc; | ||
| 228 | ua->user_lksb = params->lksb; | ||
| 229 | ua->castparam = params->castparam; | ||
| 230 | ua->castaddr = params->castaddr; | ||
| 231 | ua->bastparam = params->bastparam; | ||
| 232 | ua->bastaddr = params->bastaddr; | ||
| 233 | |||
| 234 | if (params->flags & DLM_LKF_CONVERT) | ||
| 235 | error = dlm_user_convert(ls, ua, | ||
| 236 | params->mode, params->flags, | ||
| 237 | params->lkid, params->lvb); | ||
| 238 | else { | ||
| 239 | error = dlm_user_request(ls, ua, | ||
| 240 | params->mode, params->flags, | ||
| 241 | params->name, params->namelen, | ||
| 242 | params->parent); | ||
| 243 | if (!error) | ||
| 244 | error = ua->lksb.sb_lkid; | ||
| 245 | } | ||
| 246 | out: | ||
| 247 | dlm_put_lockspace(ls); | ||
| 248 | return error; | ||
| 249 | } | ||
| 250 | |||
| 251 | static int device_user_unlock(struct dlm_user_proc *proc, | ||
| 252 | struct dlm_lock_params *params) | ||
| 253 | { | ||
| 254 | struct dlm_ls *ls; | ||
| 255 | struct dlm_user_args *ua; | ||
| 256 | int error = -ENOMEM; | ||
| 257 | |||
| 258 | ls = dlm_find_lockspace_local(proc->lockspace); | ||
| 259 | if (!ls) | ||
| 260 | return -ENOENT; | ||
| 261 | |||
| 262 | ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); | ||
| 263 | if (!ua) | ||
| 264 | goto out; | ||
| 265 | ua->proc = proc; | ||
| 266 | ua->user_lksb = params->lksb; | ||
| 267 | ua->castparam = params->castparam; | ||
| 268 | ua->castaddr = params->castaddr; | ||
| 269 | |||
| 270 | if (params->flags & DLM_LKF_CANCEL) | ||
| 271 | error = dlm_user_cancel(ls, ua, params->flags, params->lkid); | ||
| 272 | else | ||
| 273 | error = dlm_user_unlock(ls, ua, params->flags, params->lkid, | ||
| 274 | params->lvb); | ||
| 275 | out: | ||
| 276 | dlm_put_lockspace(ls); | ||
| 277 | return error; | ||
| 278 | } | ||
| 279 | |||
| 280 | static int device_create_lockspace(struct dlm_lspace_params *params) | ||
| 281 | { | ||
| 282 | dlm_lockspace_t *lockspace; | ||
| 283 | struct dlm_ls *ls; | ||
| 284 | int error, len; | ||
| 285 | |||
| 286 | if (!capable(CAP_SYS_ADMIN)) | ||
| 287 | return -EPERM; | ||
| 288 | |||
| 289 | error = dlm_new_lockspace(params->name, strlen(params->name), | ||
| 290 | &lockspace, 0, DLM_USER_LVB_LEN); | ||
| 291 | if (error) | ||
| 292 | return error; | ||
| 293 | |||
| 294 | ls = dlm_find_lockspace_local(lockspace); | ||
| 295 | if (!ls) | ||
| 296 | return -ENOENT; | ||
| 297 | |||
| 298 | error = -ENOMEM; | ||
| 299 | len = strlen(params->name) + strlen(name_prefix) + 2; | ||
| 300 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); | ||
| 301 | if (!ls->ls_device.name) | ||
| 302 | goto fail; | ||
| 303 | snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix, | ||
| 304 | params->name); | ||
| 305 | ls->ls_device.fops = &device_fops; | ||
| 306 | ls->ls_device.minor = MISC_DYNAMIC_MINOR; | ||
| 307 | |||
| 308 | error = misc_register(&ls->ls_device); | ||
| 309 | if (error) { | ||
| 310 | kfree(ls->ls_device.name); | ||
| 311 | goto fail; | ||
| 312 | } | ||
| 313 | |||
| 314 | error = ls->ls_device.minor; | ||
| 315 | dlm_put_lockspace(ls); | ||
| 316 | return error; | ||
| 317 | |||
| 318 | fail: | ||
| 319 | dlm_put_lockspace(ls); | ||
| 320 | dlm_release_lockspace(lockspace, 0); | ||
| 321 | return error; | ||
| 322 | } | ||
| 323 | |||
| 324 | static int device_remove_lockspace(struct dlm_lspace_params *params) | ||
| 325 | { | ||
| 326 | dlm_lockspace_t *lockspace; | ||
| 327 | struct dlm_ls *ls; | ||
| 328 | int error, force = 0; | ||
| 329 | |||
| 330 | if (!capable(CAP_SYS_ADMIN)) | ||
| 331 | return -EPERM; | ||
| 332 | |||
| 333 | ls = dlm_find_lockspace_device(params->minor); | ||
| 334 | if (!ls) | ||
| 335 | return -ENOENT; | ||
| 336 | |||
| 337 | error = misc_deregister(&ls->ls_device); | ||
| 338 | if (error) { | ||
| 339 | dlm_put_lockspace(ls); | ||
| 340 | goto out; | ||
| 341 | } | ||
| 342 | kfree(ls->ls_device.name); | ||
| 343 | |||
| 344 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) | ||
| 345 | force = 2; | ||
| 346 | |||
| 347 | lockspace = ls->ls_local_handle; | ||
| 348 | |||
| 349 | /* dlm_release_lockspace waits for references to go to zero, | ||
| 350 | so all processes will need to close their device for the ls | ||
| 351 | before the release will procede */ | ||
| 352 | |||
| 353 | dlm_put_lockspace(ls); | ||
| 354 | error = dlm_release_lockspace(lockspace, force); | ||
| 355 | out: | ||
| 356 | return error; | ||
| 357 | } | ||
| 358 | |||
| 359 | /* Check the user's version matches ours */ | ||
| 360 | static int check_version(struct dlm_write_request *req) | ||
| 361 | { | ||
| 362 | if (req->version[0] != DLM_DEVICE_VERSION_MAJOR || | ||
| 363 | (req->version[0] == DLM_DEVICE_VERSION_MAJOR && | ||
| 364 | req->version[1] > DLM_DEVICE_VERSION_MINOR)) { | ||
| 365 | |||
| 366 | printk(KERN_DEBUG "dlm: process %s (%d) version mismatch " | ||
| 367 | "user (%d.%d.%d) kernel (%d.%d.%d)\n", | ||
| 368 | current->comm, | ||
| 369 | current->pid, | ||
| 370 | req->version[0], | ||
| 371 | req->version[1], | ||
| 372 | req->version[2], | ||
| 373 | DLM_DEVICE_VERSION_MAJOR, | ||
| 374 | DLM_DEVICE_VERSION_MINOR, | ||
| 375 | DLM_DEVICE_VERSION_PATCH); | ||
| 376 | return -EINVAL; | ||
| 377 | } | ||
| 378 | return 0; | ||
| 379 | } | ||
| 380 | |||
| 381 | /* | ||
| 382 | * device_write | ||
| 383 | * | ||
| 384 | * device_user_lock | ||
| 385 | * dlm_user_request -> request_lock | ||
| 386 | * dlm_user_convert -> convert_lock | ||
| 387 | * | ||
| 388 | * device_user_unlock | ||
| 389 | * dlm_user_unlock -> unlock_lock | ||
| 390 | * dlm_user_cancel -> cancel_lock | ||
| 391 | * | ||
| 392 | * device_create_lockspace | ||
| 393 | * dlm_new_lockspace | ||
| 394 | * | ||
| 395 | * device_remove_lockspace | ||
| 396 | * dlm_release_lockspace | ||
| 397 | */ | ||
| 398 | |||
| 399 | /* a write to a lockspace device is a lock or unlock request, a write | ||
| 400 | to the control device is to create/remove a lockspace */ | ||
| 401 | |||
| 402 | static ssize_t device_write(struct file *file, const char __user *buf, | ||
| 403 | size_t count, loff_t *ppos) | ||
| 404 | { | ||
| 405 | struct dlm_user_proc *proc = file->private_data; | ||
| 406 | struct dlm_write_request *kbuf; | ||
| 407 | sigset_t tmpsig, allsigs; | ||
| 408 | int error; | ||
| 409 | |||
| 410 | #ifdef CONFIG_COMPAT | ||
| 411 | if (count < sizeof(struct dlm_write_request32)) | ||
| 412 | #else | ||
| 413 | if (count < sizeof(struct dlm_write_request)) | ||
| 414 | #endif | ||
| 415 | return -EINVAL; | ||
| 416 | |||
| 417 | kbuf = kmalloc(count, GFP_KERNEL); | ||
| 418 | if (!kbuf) | ||
| 419 | return -ENOMEM; | ||
| 420 | |||
| 421 | if (copy_from_user(kbuf, buf, count)) { | ||
| 422 | error = -EFAULT; | ||
| 423 | goto out_free; | ||
| 424 | } | ||
| 425 | |||
| 426 | if (check_version(kbuf)) { | ||
| 427 | error = -EBADE; | ||
| 428 | goto out_free; | ||
| 429 | } | ||
| 430 | |||
| 431 | #ifdef CONFIG_COMPAT | ||
| 432 | if (!kbuf->is64bit) { | ||
| 433 | struct dlm_write_request32 *k32buf; | ||
| 434 | k32buf = (struct dlm_write_request32 *)kbuf; | ||
| 435 | kbuf = kmalloc(count + (sizeof(struct dlm_write_request) - | ||
| 436 | sizeof(struct dlm_write_request32)), GFP_KERNEL); | ||
| 437 | if (!kbuf) | ||
| 438 | return -ENOMEM; | ||
| 439 | |||
| 440 | if (proc) | ||
| 441 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); | ||
| 442 | compat_input(kbuf, k32buf); | ||
| 443 | kfree(k32buf); | ||
| 444 | } | ||
| 445 | #endif | ||
| 446 | |||
| 447 | /* do we really need this? can a write happen after a close? */ | ||
| 448 | if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && | ||
| 449 | test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) | ||
| 450 | return -EINVAL; | ||
| 451 | |||
| 452 | sigfillset(&allsigs); | ||
| 453 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | ||
| 454 | |||
| 455 | error = -EINVAL; | ||
| 456 | |||
| 457 | switch (kbuf->cmd) | ||
| 458 | { | ||
| 459 | case DLM_USER_LOCK: | ||
| 460 | if (!proc) { | ||
| 461 | log_print("no locking on control device"); | ||
| 462 | goto out_sig; | ||
| 463 | } | ||
| 464 | error = device_user_lock(proc, &kbuf->i.lock); | ||
| 465 | break; | ||
| 466 | |||
| 467 | case DLM_USER_UNLOCK: | ||
| 468 | if (!proc) { | ||
| 469 | log_print("no locking on control device"); | ||
| 470 | goto out_sig; | ||
| 471 | } | ||
| 472 | error = device_user_unlock(proc, &kbuf->i.lock); | ||
| 473 | break; | ||
| 474 | |||
| 475 | case DLM_USER_CREATE_LOCKSPACE: | ||
| 476 | if (proc) { | ||
| 477 | log_print("create/remove only on control device"); | ||
| 478 | goto out_sig; | ||
| 479 | } | ||
| 480 | error = device_create_lockspace(&kbuf->i.lspace); | ||
| 481 | break; | ||
| 482 | |||
| 483 | case DLM_USER_REMOVE_LOCKSPACE: | ||
| 484 | if (proc) { | ||
| 485 | log_print("create/remove only on control device"); | ||
| 486 | goto out_sig; | ||
| 487 | } | ||
| 488 | error = device_remove_lockspace(&kbuf->i.lspace); | ||
| 489 | break; | ||
| 490 | |||
| 491 | default: | ||
| 492 | log_print("Unknown command passed to DLM device : %d\n", | ||
| 493 | kbuf->cmd); | ||
| 494 | } | ||
| 495 | |||
| 496 | out_sig: | ||
| 497 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | ||
| 498 | recalc_sigpending(); | ||
| 499 | out_free: | ||
| 500 | kfree(kbuf); | ||
| 501 | return error; | ||
| 502 | } | ||
| 503 | |||
| 504 | /* Every process that opens the lockspace device has its own "proc" structure | ||
| 505 | hanging off the open file that's used to keep track of locks owned by the | ||
| 506 | process and asts that need to be delivered to the process. */ | ||
| 507 | |||
| 508 | static int device_open(struct inode *inode, struct file *file) | ||
| 509 | { | ||
| 510 | struct dlm_user_proc *proc; | ||
| 511 | struct dlm_ls *ls; | ||
| 512 | |||
| 513 | ls = dlm_find_lockspace_device(iminor(inode)); | ||
| 514 | if (!ls) | ||
| 515 | return -ENOENT; | ||
| 516 | |||
| 517 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); | ||
| 518 | if (!proc) { | ||
| 519 | dlm_put_lockspace(ls); | ||
| 520 | return -ENOMEM; | ||
| 521 | } | ||
| 522 | |||
| 523 | proc->lockspace = ls->ls_local_handle; | ||
| 524 | INIT_LIST_HEAD(&proc->asts); | ||
| 525 | INIT_LIST_HEAD(&proc->locks); | ||
| 526 | spin_lock_init(&proc->asts_spin); | ||
| 527 | spin_lock_init(&proc->locks_spin); | ||
| 528 | init_waitqueue_head(&proc->wait); | ||
| 529 | file->private_data = proc; | ||
| 530 | |||
| 531 | return 0; | ||
| 532 | } | ||
| 533 | |||
| 534 | static int device_close(struct inode *inode, struct file *file) | ||
| 535 | { | ||
| 536 | struct dlm_user_proc *proc = file->private_data; | ||
| 537 | struct dlm_ls *ls; | ||
| 538 | sigset_t tmpsig, allsigs; | ||
| 539 | |||
| 540 | ls = dlm_find_lockspace_local(proc->lockspace); | ||
| 541 | if (!ls) | ||
| 542 | return -ENOENT; | ||
| 543 | |||
| 544 | sigfillset(&allsigs); | ||
| 545 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | ||
| 546 | |||
| 547 | set_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags); | ||
| 548 | |||
| 549 | dlm_clear_proc_locks(ls, proc); | ||
| 550 | |||
| 551 | /* at this point no more lkb's should exist for this lockspace, | ||
| 552 | so there's no chance of dlm_user_add_ast() being called and | ||
| 553 | looking for lkb->ua->proc */ | ||
| 554 | |||
| 555 | kfree(proc); | ||
| 556 | file->private_data = NULL; | ||
| 557 | |||
| 558 | dlm_put_lockspace(ls); | ||
| 559 | dlm_put_lockspace(ls); /* for the find in device_open() */ | ||
| 560 | |||
| 561 | /* FIXME: AUTOFREE: if this ls is no longer used do | ||
| 562 | device_remove_lockspace() */ | ||
| 563 | |||
| 564 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | ||
| 565 | recalc_sigpending(); | ||
| 566 | |||
| 567 | return 0; | ||
| 568 | } | ||
| 569 | |||
| 570 | static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type, | ||
| 571 | int bmode, char __user *buf, size_t count) | ||
| 572 | { | ||
| 573 | #ifdef CONFIG_COMPAT | ||
| 574 | struct dlm_lock_result32 result32; | ||
| 575 | #endif | ||
| 576 | struct dlm_lock_result result; | ||
| 577 | void *resultptr; | ||
| 578 | int error=0; | ||
| 579 | int len; | ||
| 580 | int struct_len; | ||
| 581 | |||
| 582 | memset(&result, 0, sizeof(struct dlm_lock_result)); | ||
| 583 | memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); | ||
| 584 | result.user_lksb = ua->user_lksb; | ||
| 585 | |||
| 586 | /* FIXME: dlm1 provides for the user's bastparam/addr to not be updated | ||
| 587 | in a conversion unless the conversion is successful. See code | ||
| 588 | in dlm_user_convert() for updating ua from ua_tmp. OpenVMS, though, | ||
| 589 | notes that a new blocking AST address and parameter are set even if | ||
| 590 | the conversion fails, so maybe we should just do that. */ | ||
| 591 | |||
| 592 | if (type == AST_BAST) { | ||
| 593 | result.user_astaddr = ua->bastaddr; | ||
| 594 | result.user_astparam = ua->bastparam; | ||
| 595 | result.bast_mode = bmode; | ||
| 596 | } else { | ||
| 597 | result.user_astaddr = ua->castaddr; | ||
| 598 | result.user_astparam = ua->castparam; | ||
| 599 | } | ||
| 600 | |||
| 601 | #ifdef CONFIG_COMPAT | ||
| 602 | if (compat) | ||
| 603 | len = sizeof(struct dlm_lock_result32); | ||
| 604 | else | ||
| 605 | #endif | ||
| 606 | len = sizeof(struct dlm_lock_result); | ||
| 607 | struct_len = len; | ||
| 608 | |||
| 609 | /* copy lvb to userspace if there is one, it's been updated, and | ||
| 610 | the user buffer has space for it */ | ||
| 611 | |||
| 612 | if (ua->update_user_lvb && ua->lksb.sb_lvbptr && | ||
| 613 | count >= len + DLM_USER_LVB_LEN) { | ||
| 614 | if (copy_to_user(buf+len, ua->lksb.sb_lvbptr, | ||
| 615 | DLM_USER_LVB_LEN)) { | ||
| 616 | error = -EFAULT; | ||
| 617 | goto out; | ||
| 618 | } | ||
| 619 | |||
| 620 | result.lvb_offset = len; | ||
| 621 | len += DLM_USER_LVB_LEN; | ||
| 622 | } | ||
| 623 | |||
| 624 | result.length = len; | ||
| 625 | resultptr = &result; | ||
| 626 | #ifdef CONFIG_COMPAT | ||
| 627 | if (compat) { | ||
| 628 | compat_output(&result, &result32); | ||
| 629 | resultptr = &result32; | ||
| 630 | } | ||
| 631 | #endif | ||
| 632 | |||
| 633 | if (copy_to_user(buf, resultptr, struct_len)) | ||
| 634 | error = -EFAULT; | ||
| 635 | else | ||
| 636 | error = len; | ||
| 637 | out: | ||
| 638 | return error; | ||
| 639 | } | ||
| 640 | |||
| 641 | /* a read returns a single ast described in a struct dlm_lock_result */ | ||
| 642 | |||
| 643 | static ssize_t device_read(struct file *file, char __user *buf, size_t count, | ||
| 644 | loff_t *ppos) | ||
| 645 | { | ||
| 646 | struct dlm_user_proc *proc = file->private_data; | ||
| 647 | struct dlm_lkb *lkb; | ||
| 648 | struct dlm_user_args *ua; | ||
| 649 | DECLARE_WAITQUEUE(wait, current); | ||
| 650 | int error, type=0, bmode=0, removed = 0; | ||
| 651 | |||
| 652 | #ifdef CONFIG_COMPAT | ||
| 653 | if (count < sizeof(struct dlm_lock_result32)) | ||
| 654 | #else | ||
| 655 | if (count < sizeof(struct dlm_lock_result)) | ||
| 656 | #endif | ||
| 657 | return -EINVAL; | ||
| 658 | |||
| 659 | /* do we really need this? can a read happen after a close? */ | ||
| 660 | if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) | ||
| 661 | return -EINVAL; | ||
| 662 | |||
| 663 | spin_lock(&proc->asts_spin); | ||
| 664 | if (list_empty(&proc->asts)) { | ||
| 665 | if (file->f_flags & O_NONBLOCK) { | ||
| 666 | spin_unlock(&proc->asts_spin); | ||
| 667 | return -EAGAIN; | ||
| 668 | } | ||
| 669 | |||
| 670 | add_wait_queue(&proc->wait, &wait); | ||
| 671 | |||
| 672 | repeat: | ||
| 673 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 674 | if (list_empty(&proc->asts) && !signal_pending(current)) { | ||
| 675 | spin_unlock(&proc->asts_spin); | ||
| 676 | schedule(); | ||
| 677 | spin_lock(&proc->asts_spin); | ||
| 678 | goto repeat; | ||
| 679 | } | ||
| 680 | set_current_state(TASK_RUNNING); | ||
| 681 | remove_wait_queue(&proc->wait, &wait); | ||
| 682 | |||
| 683 | if (signal_pending(current)) { | ||
| 684 | spin_unlock(&proc->asts_spin); | ||
| 685 | return -ERESTARTSYS; | ||
| 686 | } | ||
| 687 | } | ||
| 688 | |||
| 689 | if (list_empty(&proc->asts)) { | ||
| 690 | spin_unlock(&proc->asts_spin); | ||
| 691 | return -EAGAIN; | ||
| 692 | } | ||
| 693 | |||
| 694 | /* there may be both completion and blocking asts to return for | ||
| 695 | the lkb, don't remove lkb from asts list unless no asts remain */ | ||
| 696 | |||
| 697 | lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); | ||
| 698 | |||
| 699 | if (lkb->lkb_ast_type & AST_COMP) { | ||
| 700 | lkb->lkb_ast_type &= ~AST_COMP; | ||
| 701 | type = AST_COMP; | ||
| 702 | } else if (lkb->lkb_ast_type & AST_BAST) { | ||
| 703 | lkb->lkb_ast_type &= ~AST_BAST; | ||
| 704 | type = AST_BAST; | ||
| 705 | bmode = lkb->lkb_bastmode; | ||
| 706 | } | ||
| 707 | |||
| 708 | if (!lkb->lkb_ast_type) { | ||
| 709 | list_del(&lkb->lkb_astqueue); | ||
| 710 | removed = 1; | ||
| 711 | } | ||
| 712 | spin_unlock(&proc->asts_spin); | ||
| 713 | |||
| 714 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | ||
| 715 | error = copy_result_to_user(ua, | ||
| 716 | test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), | ||
| 717 | type, bmode, buf, count); | ||
| 718 | |||
| 719 | /* removes reference for the proc->asts lists added by | ||
| 720 | dlm_user_add_ast() and may result in the lkb being freed */ | ||
| 721 | if (removed) | ||
| 722 | dlm_put_lkb(lkb); | ||
| 723 | |||
| 724 | return error; | ||
| 725 | } | ||
| 726 | |||
| 727 | static unsigned int device_poll(struct file *file, poll_table *wait) | ||
| 728 | { | ||
| 729 | struct dlm_user_proc *proc = file->private_data; | ||
| 730 | |||
| 731 | poll_wait(file, &proc->wait, wait); | ||
| 732 | |||
| 733 | spin_lock(&proc->asts_spin); | ||
| 734 | if (!list_empty(&proc->asts)) { | ||
| 735 | spin_unlock(&proc->asts_spin); | ||
| 736 | return POLLIN | POLLRDNORM; | ||
| 737 | } | ||
| 738 | spin_unlock(&proc->asts_spin); | ||
| 739 | return 0; | ||
| 740 | } | ||
| 741 | |||
| 742 | static int ctl_device_open(struct inode *inode, struct file *file) | ||
| 743 | { | ||
| 744 | file->private_data = NULL; | ||
| 745 | return 0; | ||
| 746 | } | ||
| 747 | |||
| 748 | static int ctl_device_close(struct inode *inode, struct file *file) | ||
| 749 | { | ||
| 750 | return 0; | ||
| 751 | } | ||
| 752 | |||
| 753 | static struct file_operations device_fops = { | ||
| 754 | .open = device_open, | ||
| 755 | .release = device_close, | ||
| 756 | .read = device_read, | ||
| 757 | .write = device_write, | ||
| 758 | .poll = device_poll, | ||
| 759 | .owner = THIS_MODULE, | ||
| 760 | }; | ||
| 761 | |||
| 762 | static struct file_operations ctl_device_fops = { | ||
| 763 | .open = ctl_device_open, | ||
| 764 | .release = ctl_device_close, | ||
| 765 | .write = device_write, | ||
| 766 | .owner = THIS_MODULE, | ||
| 767 | }; | ||
| 768 | |||
| 769 | int dlm_user_init(void) | ||
| 770 | { | ||
| 771 | int error; | ||
| 772 | |||
| 773 | ctl_device.name = "dlm-control"; | ||
| 774 | ctl_device.fops = &ctl_device_fops; | ||
| 775 | ctl_device.minor = MISC_DYNAMIC_MINOR; | ||
| 776 | |||
| 777 | error = misc_register(&ctl_device); | ||
| 778 | if (error) | ||
| 779 | log_print("misc_register failed for control device"); | ||
| 780 | |||
| 781 | return error; | ||
| 782 | } | ||
| 783 | |||
| 784 | void dlm_user_exit(void) | ||
| 785 | { | ||
| 786 | misc_deregister(&ctl_device); | ||
| 787 | } | ||
| 788 | |||
diff --git a/fs/dlm/user.h b/fs/dlm/user.h new file mode 100644 index 000000000000..d38e9f3e4151 --- /dev/null +++ b/fs/dlm/user.h | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This copyrighted material is made available to anyone wishing to use, | ||
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 6 | * of the GNU General Public License v.2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #ifndef __USER_DOT_H__ | ||
| 10 | #define __USER_DOT_H__ | ||
| 11 | |||
| 12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); | ||
| 13 | int dlm_user_init(void); | ||
| 14 | void dlm_user_exit(void); | ||
| 15 | |||
| 16 | #endif | ||
diff --git a/fs/dlm/util.c b/fs/dlm/util.c new file mode 100644 index 000000000000..767197db9944 --- /dev/null +++ b/fs/dlm/util.c | |||
| @@ -0,0 +1,161 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #include "dlm_internal.h" | ||
| 14 | #include "rcom.h" | ||
| 15 | #include "util.h" | ||
| 16 | |||
| 17 | static void header_out(struct dlm_header *hd) | ||
| 18 | { | ||
| 19 | hd->h_version = cpu_to_le32(hd->h_version); | ||
| 20 | hd->h_lockspace = cpu_to_le32(hd->h_lockspace); | ||
| 21 | hd->h_nodeid = cpu_to_le32(hd->h_nodeid); | ||
| 22 | hd->h_length = cpu_to_le16(hd->h_length); | ||
| 23 | } | ||
| 24 | |||
| 25 | static void header_in(struct dlm_header *hd) | ||
| 26 | { | ||
| 27 | hd->h_version = le32_to_cpu(hd->h_version); | ||
| 28 | hd->h_lockspace = le32_to_cpu(hd->h_lockspace); | ||
| 29 | hd->h_nodeid = le32_to_cpu(hd->h_nodeid); | ||
| 30 | hd->h_length = le16_to_cpu(hd->h_length); | ||
| 31 | } | ||
| 32 | |||
| 33 | void dlm_message_out(struct dlm_message *ms) | ||
| 34 | { | ||
| 35 | struct dlm_header *hd = (struct dlm_header *) ms; | ||
| 36 | |||
| 37 | header_out(hd); | ||
| 38 | |||
| 39 | ms->m_type = cpu_to_le32(ms->m_type); | ||
| 40 | ms->m_nodeid = cpu_to_le32(ms->m_nodeid); | ||
| 41 | ms->m_pid = cpu_to_le32(ms->m_pid); | ||
| 42 | ms->m_lkid = cpu_to_le32(ms->m_lkid); | ||
| 43 | ms->m_remid = cpu_to_le32(ms->m_remid); | ||
| 44 | ms->m_parent_lkid = cpu_to_le32(ms->m_parent_lkid); | ||
| 45 | ms->m_parent_remid = cpu_to_le32(ms->m_parent_remid); | ||
| 46 | ms->m_exflags = cpu_to_le32(ms->m_exflags); | ||
| 47 | ms->m_sbflags = cpu_to_le32(ms->m_sbflags); | ||
| 48 | ms->m_flags = cpu_to_le32(ms->m_flags); | ||
| 49 | ms->m_lvbseq = cpu_to_le32(ms->m_lvbseq); | ||
| 50 | ms->m_hash = cpu_to_le32(ms->m_hash); | ||
| 51 | ms->m_status = cpu_to_le32(ms->m_status); | ||
| 52 | ms->m_grmode = cpu_to_le32(ms->m_grmode); | ||
| 53 | ms->m_rqmode = cpu_to_le32(ms->m_rqmode); | ||
| 54 | ms->m_bastmode = cpu_to_le32(ms->m_bastmode); | ||
| 55 | ms->m_asts = cpu_to_le32(ms->m_asts); | ||
| 56 | ms->m_result = cpu_to_le32(ms->m_result); | ||
| 57 | } | ||
| 58 | |||
| 59 | void dlm_message_in(struct dlm_message *ms) | ||
| 60 | { | ||
| 61 | struct dlm_header *hd = (struct dlm_header *) ms; | ||
| 62 | |||
| 63 | header_in(hd); | ||
| 64 | |||
| 65 | ms->m_type = le32_to_cpu(ms->m_type); | ||
| 66 | ms->m_nodeid = le32_to_cpu(ms->m_nodeid); | ||
| 67 | ms->m_pid = le32_to_cpu(ms->m_pid); | ||
| 68 | ms->m_lkid = le32_to_cpu(ms->m_lkid); | ||
| 69 | ms->m_remid = le32_to_cpu(ms->m_remid); | ||
| 70 | ms->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid); | ||
| 71 | ms->m_parent_remid = le32_to_cpu(ms->m_parent_remid); | ||
| 72 | ms->m_exflags = le32_to_cpu(ms->m_exflags); | ||
| 73 | ms->m_sbflags = le32_to_cpu(ms->m_sbflags); | ||
| 74 | ms->m_flags = le32_to_cpu(ms->m_flags); | ||
| 75 | ms->m_lvbseq = le32_to_cpu(ms->m_lvbseq); | ||
| 76 | ms->m_hash = le32_to_cpu(ms->m_hash); | ||
| 77 | ms->m_status = le32_to_cpu(ms->m_status); | ||
| 78 | ms->m_grmode = le32_to_cpu(ms->m_grmode); | ||
| 79 | ms->m_rqmode = le32_to_cpu(ms->m_rqmode); | ||
| 80 | ms->m_bastmode = le32_to_cpu(ms->m_bastmode); | ||
| 81 | ms->m_asts = le32_to_cpu(ms->m_asts); | ||
| 82 | ms->m_result = le32_to_cpu(ms->m_result); | ||
| 83 | } | ||
| 84 | |||
| 85 | static void rcom_lock_out(struct rcom_lock *rl) | ||
| 86 | { | ||
| 87 | rl->rl_ownpid = cpu_to_le32(rl->rl_ownpid); | ||
| 88 | rl->rl_lkid = cpu_to_le32(rl->rl_lkid); | ||
| 89 | rl->rl_remid = cpu_to_le32(rl->rl_remid); | ||
| 90 | rl->rl_parent_lkid = cpu_to_le32(rl->rl_parent_lkid); | ||
| 91 | rl->rl_parent_remid = cpu_to_le32(rl->rl_parent_remid); | ||
| 92 | rl->rl_exflags = cpu_to_le32(rl->rl_exflags); | ||
| 93 | rl->rl_flags = cpu_to_le32(rl->rl_flags); | ||
| 94 | rl->rl_lvbseq = cpu_to_le32(rl->rl_lvbseq); | ||
| 95 | rl->rl_result = cpu_to_le32(rl->rl_result); | ||
| 96 | rl->rl_wait_type = cpu_to_le16(rl->rl_wait_type); | ||
| 97 | rl->rl_namelen = cpu_to_le16(rl->rl_namelen); | ||
| 98 | } | ||
| 99 | |||
| 100 | static void rcom_lock_in(struct rcom_lock *rl) | ||
| 101 | { | ||
| 102 | rl->rl_ownpid = le32_to_cpu(rl->rl_ownpid); | ||
| 103 | rl->rl_lkid = le32_to_cpu(rl->rl_lkid); | ||
| 104 | rl->rl_remid = le32_to_cpu(rl->rl_remid); | ||
| 105 | rl->rl_parent_lkid = le32_to_cpu(rl->rl_parent_lkid); | ||
| 106 | rl->rl_parent_remid = le32_to_cpu(rl->rl_parent_remid); | ||
| 107 | rl->rl_exflags = le32_to_cpu(rl->rl_exflags); | ||
| 108 | rl->rl_flags = le32_to_cpu(rl->rl_flags); | ||
| 109 | rl->rl_lvbseq = le32_to_cpu(rl->rl_lvbseq); | ||
| 110 | rl->rl_result = le32_to_cpu(rl->rl_result); | ||
| 111 | rl->rl_wait_type = le16_to_cpu(rl->rl_wait_type); | ||
| 112 | rl->rl_namelen = le16_to_cpu(rl->rl_namelen); | ||
| 113 | } | ||
| 114 | |||
| 115 | static void rcom_config_out(struct rcom_config *rf) | ||
| 116 | { | ||
| 117 | rf->rf_lvblen = cpu_to_le32(rf->rf_lvblen); | ||
| 118 | rf->rf_lsflags = cpu_to_le32(rf->rf_lsflags); | ||
| 119 | } | ||
| 120 | |||
| 121 | static void rcom_config_in(struct rcom_config *rf) | ||
| 122 | { | ||
| 123 | rf->rf_lvblen = le32_to_cpu(rf->rf_lvblen); | ||
| 124 | rf->rf_lsflags = le32_to_cpu(rf->rf_lsflags); | ||
| 125 | } | ||
| 126 | |||
| 127 | void dlm_rcom_out(struct dlm_rcom *rc) | ||
| 128 | { | ||
| 129 | struct dlm_header *hd = (struct dlm_header *) rc; | ||
| 130 | int type = rc->rc_type; | ||
| 131 | |||
| 132 | header_out(hd); | ||
| 133 | |||
| 134 | rc->rc_type = cpu_to_le32(rc->rc_type); | ||
| 135 | rc->rc_result = cpu_to_le32(rc->rc_result); | ||
| 136 | rc->rc_id = cpu_to_le64(rc->rc_id); | ||
| 137 | |||
| 138 | if (type == DLM_RCOM_LOCK) | ||
| 139 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); | ||
| 140 | |||
| 141 | else if (type == DLM_RCOM_STATUS_REPLY) | ||
| 142 | rcom_config_out((struct rcom_config *) rc->rc_buf); | ||
| 143 | } | ||
| 144 | |||
| 145 | void dlm_rcom_in(struct dlm_rcom *rc) | ||
| 146 | { | ||
| 147 | struct dlm_header *hd = (struct dlm_header *) rc; | ||
| 148 | |||
| 149 | header_in(hd); | ||
| 150 | |||
| 151 | rc->rc_type = le32_to_cpu(rc->rc_type); | ||
| 152 | rc->rc_result = le32_to_cpu(rc->rc_result); | ||
| 153 | rc->rc_id = le64_to_cpu(rc->rc_id); | ||
| 154 | |||
| 155 | if (rc->rc_type == DLM_RCOM_LOCK) | ||
| 156 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); | ||
| 157 | |||
| 158 | else if (rc->rc_type == DLM_RCOM_STATUS_REPLY) | ||
| 159 | rcom_config_in((struct rcom_config *) rc->rc_buf); | ||
| 160 | } | ||
| 161 | |||
diff --git a/fs/dlm/util.h b/fs/dlm/util.h new file mode 100644 index 000000000000..2b25915161c0 --- /dev/null +++ b/fs/dlm/util.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 5 | ** | ||
| 6 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 8 | ** of the GNU General Public License v.2. | ||
| 9 | ** | ||
| 10 | ******************************************************************************* | ||
| 11 | ******************************************************************************/ | ||
| 12 | |||
| 13 | #ifndef __UTIL_DOT_H__ | ||
| 14 | #define __UTIL_DOT_H__ | ||
| 15 | |||
| 16 | void dlm_message_out(struct dlm_message *ms); | ||
| 17 | void dlm_message_in(struct dlm_message *ms); | ||
| 18 | void dlm_rcom_out(struct dlm_rcom *rc); | ||
| 19 | void dlm_rcom_in(struct dlm_rcom *rc); | ||
| 20 | |||
| 21 | #endif | ||
| 22 | |||
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig new file mode 100644 index 000000000000..8c27de8b9568 --- /dev/null +++ b/fs/gfs2/Kconfig | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | config GFS2_FS | ||
| 2 | tristate "GFS2 file system support" | ||
| 3 | depends on EXPERIMENTAL | ||
| 4 | select FS_POSIX_ACL | ||
| 5 | help | ||
| 6 | A cluster filesystem. | ||
| 7 | |||
| 8 | Allows a cluster of computers to simultaneously use a block device | ||
| 9 | that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads | ||
| 10 | and writes to the block device like a local filesystem, but also uses | ||
| 11 | a lock module to allow the computers coordinate their I/O so | ||
| 12 | filesystem consistency is maintained. One of the nifty features of | ||
| 13 | GFS is perfect consistency -- changes made to the filesystem on one | ||
| 14 | machine show up immediately on all other machines in the cluster. | ||
| 15 | |||
| 16 | To use the GFS2 filesystem, you will need to enable one or more of | ||
| 17 | the below locking modules. Documentation and utilities for GFS2 can | ||
| 18 | be found here: http://sources.redhat.com/cluster | ||
| 19 | |||
| 20 | config GFS2_FS_LOCKING_NOLOCK | ||
| 21 | tristate "GFS2 \"nolock\" locking module" | ||
| 22 | depends on GFS2_FS | ||
| 23 | help | ||
| 24 | Single node locking module for GFS2. | ||
| 25 | |||
| 26 | Use this module if you want to use GFS2 on a single node without | ||
| 27 | its clustering features. You can still take advantage of the | ||
| 28 | large file support, and upgrade to running a full cluster later on | ||
| 29 | if required. | ||
| 30 | |||
| 31 | If you will only be using GFS2 in cluster mode, you do not need this | ||
| 32 | module. | ||
| 33 | |||
| 34 | config GFS2_FS_LOCKING_DLM | ||
| 35 | tristate "GFS2 DLM locking module" | ||
| 36 | depends on GFS2_FS | ||
| 37 | select DLM | ||
| 38 | help | ||
| 39 | Multiple node locking module for GFS2 | ||
| 40 | |||
| 41 | Most users of GFS2 will require this module. It provides the locking | ||
| 42 | interface between GFS2 and the DLM, which is required to use GFS2 | ||
| 43 | in a cluster environment. | ||
| 44 | |||
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile new file mode 100644 index 000000000000..e3f1ada643ac --- /dev/null +++ b/fs/gfs2/Makefile | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | obj-$(CONFIG_GFS2_FS) += gfs2.o | ||
| 2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ | ||
| 3 | glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ | ||
| 4 | mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ | ||
| 5 | ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ | ||
| 6 | recovery.o rgrp.o super.o sys.o trans.o util.o | ||
| 7 | |||
| 8 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ | ||
| 9 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ | ||
| 10 | |||
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c new file mode 100644 index 000000000000..5f959b8ce406 --- /dev/null +++ b/fs/gfs2/acl.c | |||
| @@ -0,0 +1,309 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/posix_acl.h> | ||
| 16 | #include <linux/posix_acl_xattr.h> | ||
| 17 | #include <linux/gfs2_ondisk.h> | ||
| 18 | #include <linux/lm_interface.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "acl.h" | ||
| 23 | #include "eaops.h" | ||
| 24 | #include "eattr.h" | ||
| 25 | #include "glock.h" | ||
| 26 | #include "inode.h" | ||
| 27 | #include "meta_io.h" | ||
| 28 | #include "trans.h" | ||
| 29 | #include "util.h" | ||
| 30 | |||
| 31 | #define ACL_ACCESS 1 | ||
| 32 | #define ACL_DEFAULT 0 | ||
| 33 | |||
| 34 | int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, | ||
| 35 | struct gfs2_ea_request *er, | ||
| 36 | int *remove, mode_t *mode) | ||
| 37 | { | ||
| 38 | struct posix_acl *acl; | ||
| 39 | int error; | ||
| 40 | |||
| 41 | error = gfs2_acl_validate_remove(ip, access); | ||
| 42 | if (error) | ||
| 43 | return error; | ||
| 44 | |||
| 45 | if (!er->er_data) | ||
| 46 | return -EINVAL; | ||
| 47 | |||
| 48 | acl = posix_acl_from_xattr(er->er_data, er->er_data_len); | ||
| 49 | if (IS_ERR(acl)) | ||
| 50 | return PTR_ERR(acl); | ||
| 51 | if (!acl) { | ||
| 52 | *remove = 1; | ||
| 53 | return 0; | ||
| 54 | } | ||
| 55 | |||
| 56 | error = posix_acl_valid(acl); | ||
| 57 | if (error) | ||
| 58 | goto out; | ||
| 59 | |||
| 60 | if (access) { | ||
| 61 | error = posix_acl_equiv_mode(acl, mode); | ||
| 62 | if (!error) | ||
| 63 | *remove = 1; | ||
| 64 | else if (error > 0) | ||
| 65 | error = 0; | ||
| 66 | } | ||
| 67 | |||
| 68 | out: | ||
| 69 | posix_acl_release(acl); | ||
| 70 | return error; | ||
| 71 | } | ||
| 72 | |||
| 73 | int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) | ||
| 74 | { | ||
| 75 | if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl) | ||
| 76 | return -EOPNOTSUPP; | ||
| 77 | if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER)) | ||
| 78 | return -EPERM; | ||
| 79 | if (S_ISLNK(ip->i_di.di_mode)) | ||
| 80 | return -EOPNOTSUPP; | ||
| 81 | if (!access && !S_ISDIR(ip->i_di.di_mode)) | ||
| 82 | return -EACCES; | ||
| 83 | |||
| 84 | return 0; | ||
| 85 | } | ||
| 86 | |||
| 87 | static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, | ||
| 88 | struct gfs2_ea_location *el, char **data, unsigned int *len) | ||
| 89 | { | ||
| 90 | struct gfs2_ea_request er; | ||
| 91 | struct gfs2_ea_location el_this; | ||
| 92 | int error; | ||
| 93 | |||
| 94 | if (!ip->i_di.di_eattr) | ||
| 95 | return 0; | ||
| 96 | |||
| 97 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
| 98 | if (access) { | ||
| 99 | er.er_name = GFS2_POSIX_ACL_ACCESS; | ||
| 100 | er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; | ||
| 101 | } else { | ||
| 102 | er.er_name = GFS2_POSIX_ACL_DEFAULT; | ||
| 103 | er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; | ||
| 104 | } | ||
| 105 | er.er_type = GFS2_EATYPE_SYS; | ||
| 106 | |||
| 107 | if (!el) | ||
| 108 | el = &el_this; | ||
| 109 | |||
| 110 | error = gfs2_ea_find(ip, &er, el); | ||
| 111 | if (error) | ||
| 112 | return error; | ||
| 113 | if (!el->el_ea) | ||
| 114 | return 0; | ||
| 115 | if (!GFS2_EA_DATA_LEN(el->el_ea)) | ||
| 116 | goto out; | ||
| 117 | |||
| 118 | er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); | ||
| 119 | er.er_data = kmalloc(er.er_data_len, GFP_KERNEL); | ||
| 120 | error = -ENOMEM; | ||
| 121 | if (!er.er_data) | ||
| 122 | goto out; | ||
| 123 | |||
| 124 | error = gfs2_ea_get_copy(ip, el, er.er_data); | ||
| 125 | if (error) | ||
| 126 | goto out_kfree; | ||
| 127 | |||
| 128 | if (acl) { | ||
| 129 | *acl = posix_acl_from_xattr(er.er_data, er.er_data_len); | ||
| 130 | if (IS_ERR(*acl)) | ||
| 131 | error = PTR_ERR(*acl); | ||
| 132 | } | ||
| 133 | |||
| 134 | out_kfree: | ||
| 135 | if (error || !data) | ||
| 136 | kfree(er.er_data); | ||
| 137 | else { | ||
| 138 | *data = er.er_data; | ||
| 139 | *len = er.er_data_len; | ||
| 140 | } | ||
| 141 | out: | ||
| 142 | if (error || el == &el_this) | ||
| 143 | brelse(el->el_bh); | ||
| 144 | return error; | ||
| 145 | } | ||
| 146 | |||
| 147 | /** | ||
| 148 | * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something | ||
| 149 | * @inode: the file we want to do something to | ||
| 150 | * @mask: what we want to do | ||
| 151 | * | ||
| 152 | * Returns: errno | ||
| 153 | */ | ||
| 154 | |||
| 155 | int gfs2_check_acl_locked(struct inode *inode, int mask) | ||
| 156 | { | ||
| 157 | struct posix_acl *acl = NULL; | ||
| 158 | int error; | ||
| 159 | |||
| 160 | error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL); | ||
| 161 | if (error) | ||
| 162 | return error; | ||
| 163 | |||
| 164 | if (acl) { | ||
| 165 | error = posix_acl_permission(inode, acl, mask); | ||
| 166 | posix_acl_release(acl); | ||
| 167 | return error; | ||
| 168 | } | ||
| 169 | |||
| 170 | return -EAGAIN; | ||
| 171 | } | ||
| 172 | |||
| 173 | int gfs2_check_acl(struct inode *inode, int mask) | ||
| 174 | { | ||
| 175 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 176 | struct gfs2_holder i_gh; | ||
| 177 | int error; | ||
| 178 | |||
| 179 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | ||
| 180 | if (!error) { | ||
| 181 | error = gfs2_check_acl_locked(inode, mask); | ||
| 182 | gfs2_glock_dq_uninit(&i_gh); | ||
| 183 | } | ||
| 184 | |||
| 185 | return error; | ||
| 186 | } | ||
| 187 | |||
| 188 | static int munge_mode(struct gfs2_inode *ip, mode_t mode) | ||
| 189 | { | ||
| 190 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 191 | struct buffer_head *dibh; | ||
| 192 | int error; | ||
| 193 | |||
| 194 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 195 | if (error) | ||
| 196 | return error; | ||
| 197 | |||
| 198 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 199 | if (!error) { | ||
| 200 | gfs2_assert_withdraw(sdp, | ||
| 201 | (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT)); | ||
| 202 | ip->i_di.di_mode = mode; | ||
| 203 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 204 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 205 | brelse(dibh); | ||
| 206 | } | ||
| 207 | |||
| 208 | gfs2_trans_end(sdp); | ||
| 209 | |||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | ||
| 214 | { | ||
| 215 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 216 | struct posix_acl *acl = NULL, *clone; | ||
| 217 | struct gfs2_ea_request er; | ||
| 218 | mode_t mode = ip->i_di.di_mode; | ||
| 219 | int error; | ||
| 220 | |||
| 221 | if (!sdp->sd_args.ar_posix_acl) | ||
| 222 | return 0; | ||
| 223 | if (S_ISLNK(ip->i_di.di_mode)) | ||
| 224 | return 0; | ||
| 225 | |||
| 226 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
| 227 | er.er_type = GFS2_EATYPE_SYS; | ||
| 228 | |||
| 229 | error = acl_get(dip, ACL_DEFAULT, &acl, NULL, | ||
| 230 | &er.er_data, &er.er_data_len); | ||
| 231 | if (error) | ||
| 232 | return error; | ||
| 233 | if (!acl) { | ||
| 234 | mode &= ~current->fs->umask; | ||
| 235 | if (mode != ip->i_di.di_mode) | ||
| 236 | error = munge_mode(ip, mode); | ||
| 237 | return error; | ||
| 238 | } | ||
| 239 | |||
| 240 | clone = posix_acl_clone(acl, GFP_KERNEL); | ||
| 241 | error = -ENOMEM; | ||
| 242 | if (!clone) | ||
| 243 | goto out; | ||
| 244 | posix_acl_release(acl); | ||
| 245 | acl = clone; | ||
| 246 | |||
| 247 | if (S_ISDIR(ip->i_di.di_mode)) { | ||
| 248 | er.er_name = GFS2_POSIX_ACL_DEFAULT; | ||
| 249 | er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; | ||
| 250 | error = gfs2_system_eaops.eo_set(ip, &er); | ||
| 251 | if (error) | ||
| 252 | goto out; | ||
| 253 | } | ||
| 254 | |||
| 255 | error = posix_acl_create_masq(acl, &mode); | ||
| 256 | if (error < 0) | ||
| 257 | goto out; | ||
| 258 | if (error > 0) { | ||
| 259 | er.er_name = GFS2_POSIX_ACL_ACCESS; | ||
| 260 | er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; | ||
| 261 | posix_acl_to_xattr(acl, er.er_data, er.er_data_len); | ||
| 262 | er.er_mode = mode; | ||
| 263 | er.er_flags = GFS2_ERF_MODE; | ||
| 264 | error = gfs2_system_eaops.eo_set(ip, &er); | ||
| 265 | if (error) | ||
| 266 | goto out; | ||
| 267 | } else | ||
| 268 | munge_mode(ip, mode); | ||
| 269 | |||
| 270 | out: | ||
| 271 | posix_acl_release(acl); | ||
| 272 | kfree(er.er_data); | ||
| 273 | return error; | ||
| 274 | } | ||
| 275 | |||
| 276 | int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | ||
| 277 | { | ||
| 278 | struct posix_acl *acl = NULL, *clone; | ||
| 279 | struct gfs2_ea_location el; | ||
| 280 | char *data; | ||
| 281 | unsigned int len; | ||
| 282 | int error; | ||
| 283 | |||
| 284 | error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len); | ||
| 285 | if (error) | ||
| 286 | return error; | ||
| 287 | if (!acl) | ||
| 288 | return gfs2_setattr_simple(ip, attr); | ||
| 289 | |||
| 290 | clone = posix_acl_clone(acl, GFP_KERNEL); | ||
| 291 | error = -ENOMEM; | ||
| 292 | if (!clone) | ||
| 293 | goto out; | ||
| 294 | posix_acl_release(acl); | ||
| 295 | acl = clone; | ||
| 296 | |||
| 297 | error = posix_acl_chmod_masq(acl, attr->ia_mode); | ||
| 298 | if (!error) { | ||
| 299 | posix_acl_to_xattr(acl, data, len); | ||
| 300 | error = gfs2_ea_acl_chmod(ip, &el, attr, data); | ||
| 301 | } | ||
| 302 | |||
| 303 | out: | ||
| 304 | posix_acl_release(acl); | ||
| 305 | brelse(el.el_bh); | ||
| 306 | kfree(data); | ||
| 307 | return error; | ||
| 308 | } | ||
| 309 | |||
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h new file mode 100644 index 000000000000..05c294fe0d78 --- /dev/null +++ b/fs/gfs2/acl.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __ACL_DOT_H__ | ||
| 11 | #define __ACL_DOT_H__ | ||
| 12 | |||
| 13 | #include "incore.h" | ||
| 14 | |||
| 15 | #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" | ||
| 16 | #define GFS2_POSIX_ACL_ACCESS_LEN 16 | ||
| 17 | #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" | ||
| 18 | #define GFS2_POSIX_ACL_DEFAULT_LEN 17 | ||
| 19 | |||
| 20 | #define GFS2_ACL_IS_ACCESS(name, len) \ | ||
| 21 | ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ | ||
| 22 | !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) | ||
| 23 | |||
| 24 | #define GFS2_ACL_IS_DEFAULT(name, len) \ | ||
| 25 | ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \ | ||
| 26 | !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len))) | ||
| 27 | |||
| 28 | struct gfs2_ea_request; | ||
| 29 | |||
| 30 | int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, | ||
| 31 | struct gfs2_ea_request *er, | ||
| 32 | int *remove, mode_t *mode); | ||
| 33 | int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access); | ||
| 34 | int gfs2_check_acl_locked(struct inode *inode, int mask); | ||
| 35 | int gfs2_check_acl(struct inode *inode, int mask); | ||
| 36 | int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); | ||
| 37 | int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); | ||
| 38 | |||
| 39 | #endif /* __ACL_DOT_H__ */ | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c new file mode 100644 index 000000000000..cc57f2ecd219 --- /dev/null +++ b/fs/gfs2/bmap.c | |||
| @@ -0,0 +1,1221 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/crc32.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | #include "gfs2.h" | ||
| 20 | #include "incore.h" | ||
| 21 | #include "bmap.h" | ||
| 22 | #include "glock.h" | ||
| 23 | #include "inode.h" | ||
| 24 | #include "meta_io.h" | ||
| 25 | #include "quota.h" | ||
| 26 | #include "rgrp.h" | ||
| 27 | #include "trans.h" | ||
| 28 | #include "dir.h" | ||
| 29 | #include "util.h" | ||
| 30 | #include "ops_address.h" | ||
| 31 | |||
| 32 | /* This doesn't need to be that large as max 64 bit pointers in a 4k | ||
| 33 | * block is 512, so __u16 is fine for that. It saves stack space to | ||
| 34 | * keep it small. | ||
| 35 | */ | ||
| 36 | struct metapath { | ||
| 37 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; | ||
| 38 | }; | ||
| 39 | |||
| 40 | typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, | ||
| 41 | struct buffer_head *bh, u64 *top, | ||
| 42 | u64 *bottom, unsigned int height, | ||
| 43 | void *data); | ||
| 44 | |||
| 45 | struct strip_mine { | ||
| 46 | int sm_first; | ||
| 47 | unsigned int sm_height; | ||
| 48 | }; | ||
| 49 | |||
| 50 | /** | ||
| 51 | * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page | ||
| 52 | * @ip: the inode | ||
| 53 | * @dibh: the dinode buffer | ||
| 54 | * @block: the block number that was allocated | ||
| 55 | * @private: any locked page held by the caller process | ||
| 56 | * | ||
| 57 | * Returns: errno | ||
| 58 | */ | ||
| 59 | |||
| 60 | static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | ||
| 61 | u64 block, struct page *page) | ||
| 62 | { | ||
| 63 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 64 | struct inode *inode = &ip->i_inode; | ||
| 65 | struct buffer_head *bh; | ||
| 66 | int release = 0; | ||
| 67 | |||
| 68 | if (!page || page->index) { | ||
| 69 | page = grab_cache_page(inode->i_mapping, 0); | ||
| 70 | if (!page) | ||
| 71 | return -ENOMEM; | ||
| 72 | release = 1; | ||
| 73 | } | ||
| 74 | |||
| 75 | if (!PageUptodate(page)) { | ||
| 76 | void *kaddr = kmap(page); | ||
| 77 | |||
| 78 | memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), | ||
| 79 | ip->i_di.di_size); | ||
| 80 | memset(kaddr + ip->i_di.di_size, 0, | ||
| 81 | PAGE_CACHE_SIZE - ip->i_di.di_size); | ||
| 82 | kunmap(page); | ||
| 83 | |||
| 84 | SetPageUptodate(page); | ||
| 85 | } | ||
| 86 | |||
| 87 | if (!page_has_buffers(page)) | ||
| 88 | create_empty_buffers(page, 1 << inode->i_blkbits, | ||
| 89 | (1 << BH_Uptodate)); | ||
| 90 | |||
| 91 | bh = page_buffers(page); | ||
| 92 | |||
| 93 | if (!buffer_mapped(bh)) | ||
| 94 | map_bh(bh, inode->i_sb, block); | ||
| 95 | |||
| 96 | set_buffer_uptodate(bh); | ||
| 97 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | ||
| 98 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | ||
| 99 | mark_buffer_dirty(bh); | ||
| 100 | |||
| 101 | if (release) { | ||
| 102 | unlock_page(page); | ||
| 103 | page_cache_release(page); | ||
| 104 | } | ||
| 105 | |||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | /** | ||
| 110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big | ||
| 111 | * @ip: The GFS2 inode to unstuff | ||
| 112 | * @unstuffer: the routine that handles unstuffing a non-zero length file | ||
| 113 | * @private: private data for the unstuffer | ||
| 114 | * | ||
| 115 | * This routine unstuffs a dinode and returns it to a "normal" state such | ||
| 116 | * that the height can be grown in the traditional way. | ||
| 117 | * | ||
| 118 | * Returns: errno | ||
| 119 | */ | ||
| 120 | |||
| 121 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | ||
| 122 | { | ||
| 123 | struct buffer_head *bh, *dibh; | ||
| 124 | struct gfs2_dinode *di; | ||
| 125 | u64 block = 0; | ||
| 126 | int isdir = gfs2_is_dir(ip); | ||
| 127 | int error; | ||
| 128 | |||
| 129 | down_write(&ip->i_rw_mutex); | ||
| 130 | |||
| 131 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 132 | if (error) | ||
| 133 | goto out; | ||
| 134 | |||
| 135 | if (ip->i_di.di_size) { | ||
| 136 | /* Get a free block, fill it with the stuffed data, | ||
| 137 | and write it out to disk */ | ||
| 138 | |||
| 139 | if (isdir) { | ||
| 140 | block = gfs2_alloc_meta(ip); | ||
| 141 | |||
| 142 | error = gfs2_dir_get_new_buffer(ip, block, &bh); | ||
| 143 | if (error) | ||
| 144 | goto out_brelse; | ||
| 145 | gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), | ||
| 146 | dibh, sizeof(struct gfs2_dinode)); | ||
| 147 | brelse(bh); | ||
| 148 | } else { | ||
| 149 | block = gfs2_alloc_data(ip); | ||
| 150 | |||
| 151 | error = gfs2_unstuffer_page(ip, dibh, block, page); | ||
| 152 | if (error) | ||
| 153 | goto out_brelse; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | /* Set up the pointer to the new block */ | ||
| 158 | |||
| 159 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 160 | di = (struct gfs2_dinode *)dibh->b_data; | ||
| 161 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
| 162 | |||
| 163 | if (ip->i_di.di_size) { | ||
| 164 | *(__be64 *)(di + 1) = cpu_to_be64(block); | ||
| 165 | ip->i_di.di_blocks++; | ||
| 166 | di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); | ||
| 167 | } | ||
| 168 | |||
| 169 | ip->i_di.di_height = 1; | ||
| 170 | di->di_height = cpu_to_be16(1); | ||
| 171 | |||
| 172 | out_brelse: | ||
| 173 | brelse(dibh); | ||
| 174 | out: | ||
| 175 | up_write(&ip->i_rw_mutex); | ||
| 176 | return error; | ||
| 177 | } | ||
| 178 | |||
| 179 | /** | ||
| 180 | * calc_tree_height - Calculate the height of a metadata tree | ||
| 181 | * @ip: The GFS2 inode | ||
| 182 | * @size: The proposed size of the file | ||
| 183 | * | ||
| 184 | * Work out how tall a metadata tree needs to be in order to accommodate a | ||
| 185 | * file of a particular size. If size is less than the current size of | ||
| 186 | * the inode, then the current size of the inode is used instead of the | ||
| 187 | * supplied one. | ||
| 188 | * | ||
| 189 | * Returns: the height the tree should be | ||
| 190 | */ | ||
| 191 | |||
| 192 | static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size) | ||
| 193 | { | ||
| 194 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 195 | u64 *arr; | ||
| 196 | unsigned int max, height; | ||
| 197 | |||
| 198 | if (ip->i_di.di_size > size) | ||
| 199 | size = ip->i_di.di_size; | ||
| 200 | |||
| 201 | if (gfs2_is_dir(ip)) { | ||
| 202 | arr = sdp->sd_jheightsize; | ||
| 203 | max = sdp->sd_max_jheight; | ||
| 204 | } else { | ||
| 205 | arr = sdp->sd_heightsize; | ||
| 206 | max = sdp->sd_max_height; | ||
| 207 | } | ||
| 208 | |||
| 209 | for (height = 0; height < max; height++) | ||
| 210 | if (arr[height] >= size) | ||
| 211 | break; | ||
| 212 | |||
| 213 | return height; | ||
| 214 | } | ||
| 215 | |||
| 216 | /** | ||
| 217 | * build_height - Build a metadata tree of the requested height | ||
| 218 | * @ip: The GFS2 inode | ||
| 219 | * @height: The height to build to | ||
| 220 | * | ||
| 221 | * | ||
| 222 | * Returns: errno | ||
| 223 | */ | ||
| 224 | |||
| 225 | static int build_height(struct inode *inode, unsigned height) | ||
| 226 | { | ||
| 227 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 228 | unsigned new_height = height - ip->i_di.di_height; | ||
| 229 | struct buffer_head *dibh; | ||
| 230 | struct buffer_head *blocks[GFS2_MAX_META_HEIGHT]; | ||
| 231 | struct gfs2_dinode *di; | ||
| 232 | int error; | ||
| 233 | u64 *bp; | ||
| 234 | u64 bn; | ||
| 235 | unsigned n; | ||
| 236 | |||
| 237 | if (height <= ip->i_di.di_height) | ||
| 238 | return 0; | ||
| 239 | |||
| 240 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 241 | if (error) | ||
| 242 | return error; | ||
| 243 | |||
| 244 | for(n = 0; n < new_height; n++) { | ||
| 245 | bn = gfs2_alloc_meta(ip); | ||
| 246 | blocks[n] = gfs2_meta_new(ip->i_gl, bn); | ||
| 247 | gfs2_trans_add_bh(ip->i_gl, blocks[n], 1); | ||
| 248 | } | ||
| 249 | |||
| 250 | n = 0; | ||
| 251 | bn = blocks[0]->b_blocknr; | ||
| 252 | if (new_height > 1) { | ||
| 253 | for(; n < new_height-1; n++) { | ||
| 254 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, | ||
| 255 | GFS2_FORMAT_IN); | ||
| 256 | gfs2_buffer_clear_tail(blocks[n], | ||
| 257 | sizeof(struct gfs2_meta_header)); | ||
| 258 | bp = (u64 *)(blocks[n]->b_data + | ||
| 259 | sizeof(struct gfs2_meta_header)); | ||
| 260 | *bp = cpu_to_be64(blocks[n+1]->b_blocknr); | ||
| 261 | brelse(blocks[n]); | ||
| 262 | blocks[n] = NULL; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); | ||
| 266 | gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header), | ||
| 267 | dibh, sizeof(struct gfs2_dinode)); | ||
| 268 | brelse(blocks[n]); | ||
| 269 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 270 | di = (struct gfs2_dinode *)dibh->b_data; | ||
| 271 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
| 272 | *(__be64 *)(di + 1) = cpu_to_be64(bn); | ||
| 273 | ip->i_di.di_height += new_height; | ||
| 274 | ip->i_di.di_blocks += new_height; | ||
| 275 | di->di_height = cpu_to_be16(ip->i_di.di_height); | ||
| 276 | di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); | ||
| 277 | brelse(dibh); | ||
| 278 | return error; | ||
| 279 | } | ||
| 280 | |||
| 281 | /** | ||
| 282 | * find_metapath - Find path through the metadata tree | ||
| 283 | * @ip: The inode pointer | ||
| 284 | * @mp: The metapath to return the result in | ||
| 285 | * @block: The disk block to look up | ||
| 286 | * | ||
| 287 | * This routine returns a struct metapath structure that defines a path | ||
| 288 | * through the metadata of inode "ip" to get to block "block". | ||
| 289 | * | ||
| 290 | * Example: | ||
| 291 | * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a | ||
| 292 | * filesystem with a blocksize of 4096. | ||
| 293 | * | ||
| 294 | * find_metapath() would return a struct metapath structure set to: | ||
| 295 | * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, | ||
| 296 | * and mp_list[2] = 165. | ||
| 297 | * | ||
| 298 | * That means that in order to get to the block containing the byte at | ||
| 299 | * offset 101342453, we would load the indirect block pointed to by pointer | ||
| 300 | * 0 in the dinode. We would then load the indirect block pointed to by | ||
| 301 | * pointer 48 in that indirect block. We would then load the data block | ||
| 302 | * pointed to by pointer 165 in that indirect block. | ||
| 303 | * | ||
| 304 | * ---------------------------------------- | ||
| 305 | * | Dinode | | | ||
| 306 | * | | 4| | ||
| 307 | * | |0 1 2 3 4 5 9| | ||
| 308 | * | | 6| | ||
| 309 | * ---------------------------------------- | ||
| 310 | * | | ||
| 311 | * | | ||
| 312 | * V | ||
| 313 | * ---------------------------------------- | ||
| 314 | * | Indirect Block | | ||
| 315 | * | 5| | ||
| 316 | * | 4 4 4 4 4 5 5 1| | ||
| 317 | * |0 5 6 7 8 9 0 1 2| | ||
| 318 | * ---------------------------------------- | ||
| 319 | * | | ||
| 320 | * | | ||
| 321 | * V | ||
| 322 | * ---------------------------------------- | ||
| 323 | * | Indirect Block | | ||
| 324 | * | 1 1 1 1 1 5| | ||
| 325 | * | 6 6 6 6 6 1| | ||
| 326 | * |0 3 4 5 6 7 2| | ||
| 327 | * ---------------------------------------- | ||
| 328 | * | | ||
| 329 | * | | ||
| 330 | * V | ||
| 331 | * ---------------------------------------- | ||
| 332 | * | Data block containing offset | | ||
| 333 | * | 101342453 | | ||
| 334 | * | | | ||
| 335 | * | | | ||
| 336 | * ---------------------------------------- | ||
| 337 | * | ||
| 338 | */ | ||
| 339 | |||
| 340 | static void find_metapath(struct gfs2_inode *ip, u64 block, | ||
| 341 | struct metapath *mp) | ||
| 342 | { | ||
| 343 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 344 | u64 b = block; | ||
| 345 | unsigned int i; | ||
| 346 | |||
| 347 | for (i = ip->i_di.di_height; i--;) | ||
| 348 | mp->mp_list[i] = do_div(b, sdp->sd_inptrs); | ||
| 349 | |||
| 350 | } | ||
| 351 | |||
| 352 | /** | ||
| 353 | * metapointer - Return pointer to start of metadata in a buffer | ||
| 354 | * @bh: The buffer | ||
| 355 | * @height: The metadata height (0 = dinode) | ||
| 356 | * @mp: The metapath | ||
| 357 | * | ||
| 358 | * Return a pointer to the block number of the next height of the metadata | ||
| 359 | * tree given a buffer containing the pointer to the current height of the | ||
| 360 | * metadata tree. | ||
| 361 | */ | ||
| 362 | |||
| 363 | static inline u64 *metapointer(struct buffer_head *bh, int *boundary, | ||
| 364 | unsigned int height, const struct metapath *mp) | ||
| 365 | { | ||
| 366 | unsigned int head_size = (height > 0) ? | ||
| 367 | sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); | ||
| 368 | u64 *ptr; | ||
| 369 | *boundary = 0; | ||
| 370 | ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height]; | ||
| 371 | if (ptr + 1 == (u64 *)(bh->b_data + bh->b_size)) | ||
| 372 | *boundary = 1; | ||
| 373 | return ptr; | ||
| 374 | } | ||
| 375 | |||
| 376 | /** | ||
| 377 | * lookup_block - Get the next metadata block in metadata tree | ||
| 378 | * @ip: The GFS2 inode | ||
| 379 | * @bh: Buffer containing the pointers to metadata blocks | ||
| 380 | * @height: The height of the tree (0 = dinode) | ||
| 381 | * @mp: The metapath | ||
| 382 | * @create: Non-zero if we may create a new meatdata block | ||
| 383 | * @new: Used to indicate if we did create a new metadata block | ||
| 384 | * @block: the returned disk block number | ||
| 385 | * | ||
| 386 | * Given a metatree, complete to a particular height, checks to see if the next | ||
| 387 | * height of the tree exists. If not the next height of the tree is created. | ||
| 388 | * The block number of the next height of the metadata tree is returned. | ||
| 389 | * | ||
| 390 | */ | ||
| 391 | |||
| 392 | static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 393 | unsigned int height, struct metapath *mp, int create, | ||
| 394 | int *new, u64 *block) | ||
| 395 | { | ||
| 396 | int boundary; | ||
| 397 | u64 *ptr = metapointer(bh, &boundary, height, mp); | ||
| 398 | |||
| 399 | if (*ptr) { | ||
| 400 | *block = be64_to_cpu(*ptr); | ||
| 401 | return boundary; | ||
| 402 | } | ||
| 403 | |||
| 404 | *block = 0; | ||
| 405 | |||
| 406 | if (!create) | ||
| 407 | return 0; | ||
| 408 | |||
| 409 | if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip)) | ||
| 410 | *block = gfs2_alloc_data(ip); | ||
| 411 | else | ||
| 412 | *block = gfs2_alloc_meta(ip); | ||
| 413 | |||
| 414 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 415 | |||
| 416 | *ptr = cpu_to_be64(*block); | ||
| 417 | ip->i_di.di_blocks++; | ||
| 418 | |||
| 419 | *new = 1; | ||
| 420 | return 0; | ||
| 421 | } | ||
| 422 | |||
| 423 | /** | ||
| 424 | * gfs2_block_pointers - Map a block from an inode to a disk block | ||
| 425 | * @inode: The inode | ||
| 426 | * @lblock: The logical block number | ||
| 427 | * @map_bh: The bh to be mapped | ||
| 428 | * @mp: metapath to use | ||
| 429 | * | ||
| 430 | * Find the block number on the current device which corresponds to an | ||
| 431 | * inode's block. If the block had to be created, "new" will be set. | ||
| 432 | * | ||
| 433 | * Returns: errno | ||
| 434 | */ | ||
| 435 | |||
| 436 | static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create, | ||
| 437 | struct buffer_head *bh_map, struct metapath *mp, | ||
| 438 | unsigned int maxlen) | ||
| 439 | { | ||
| 440 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 441 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 442 | struct buffer_head *bh; | ||
| 443 | unsigned int bsize; | ||
| 444 | unsigned int height; | ||
| 445 | unsigned int end_of_metadata; | ||
| 446 | unsigned int x; | ||
| 447 | int error = 0; | ||
| 448 | int new = 0; | ||
| 449 | u64 dblock = 0; | ||
| 450 | int boundary; | ||
| 451 | |||
| 452 | BUG_ON(maxlen == 0); | ||
| 453 | |||
| 454 | if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) | ||
| 455 | return 0; | ||
| 456 | |||
| 457 | bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; | ||
| 458 | |||
| 459 | height = calc_tree_height(ip, (lblock + 1) * bsize); | ||
| 460 | if (ip->i_di.di_height < height) { | ||
| 461 | if (!create) | ||
| 462 | return 0; | ||
| 463 | |||
| 464 | error = build_height(inode, height); | ||
| 465 | if (error) | ||
| 466 | return error; | ||
| 467 | } | ||
| 468 | |||
| 469 | find_metapath(ip, lblock, mp); | ||
| 470 | end_of_metadata = ip->i_di.di_height - 1; | ||
| 471 | |||
| 472 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 473 | if (error) | ||
| 474 | return error; | ||
| 475 | |||
| 476 | for (x = 0; x < end_of_metadata; x++) { | ||
| 477 | lookup_block(ip, bh, x, mp, create, &new, &dblock); | ||
| 478 | brelse(bh); | ||
| 479 | if (!dblock) | ||
| 480 | return 0; | ||
| 481 | |||
| 482 | error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh); | ||
| 483 | if (error) | ||
| 484 | return error; | ||
| 485 | } | ||
| 486 | |||
| 487 | boundary = lookup_block(ip, bh, end_of_metadata, mp, create, &new, &dblock); | ||
| 488 | clear_buffer_mapped(bh_map); | ||
| 489 | clear_buffer_new(bh_map); | ||
| 490 | clear_buffer_boundary(bh_map); | ||
| 491 | |||
| 492 | if (dblock) { | ||
| 493 | map_bh(bh_map, inode->i_sb, dblock); | ||
| 494 | if (boundary) | ||
| 495 | set_buffer_boundary(bh); | ||
| 496 | if (new) { | ||
| 497 | struct buffer_head *dibh; | ||
| 498 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 499 | if (!error) { | ||
| 500 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 501 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 502 | brelse(dibh); | ||
| 503 | } | ||
| 504 | set_buffer_new(bh_map); | ||
| 505 | goto out_brelse; | ||
| 506 | } | ||
| 507 | while(--maxlen && !buffer_boundary(bh_map)) { | ||
| 508 | u64 eblock; | ||
| 509 | |||
| 510 | mp->mp_list[end_of_metadata]++; | ||
| 511 | boundary = lookup_block(ip, bh, end_of_metadata, mp, 0, &new, &eblock); | ||
| 512 | if (eblock != ++dblock) | ||
| 513 | break; | ||
| 514 | bh_map->b_size += (1 << inode->i_blkbits); | ||
| 515 | if (boundary) | ||
| 516 | set_buffer_boundary(bh_map); | ||
| 517 | } | ||
| 518 | } | ||
| 519 | out_brelse: | ||
| 520 | brelse(bh); | ||
| 521 | return 0; | ||
| 522 | } | ||
| 523 | |||
| 524 | |||
| 525 | static inline void bmap_lock(struct inode *inode, int create) | ||
| 526 | { | ||
| 527 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 528 | if (create) | ||
| 529 | down_write(&ip->i_rw_mutex); | ||
| 530 | else | ||
| 531 | down_read(&ip->i_rw_mutex); | ||
| 532 | } | ||
| 533 | |||
| 534 | static inline void bmap_unlock(struct inode *inode, int create) | ||
| 535 | { | ||
| 536 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 537 | if (create) | ||
| 538 | up_write(&ip->i_rw_mutex); | ||
| 539 | else | ||
| 540 | up_read(&ip->i_rw_mutex); | ||
| 541 | } | ||
| 542 | |||
| 543 | int gfs2_block_map(struct inode *inode, u64 lblock, int create, | ||
| 544 | struct buffer_head *bh, unsigned int maxlen) | ||
| 545 | { | ||
| 546 | struct metapath mp; | ||
| 547 | int ret; | ||
| 548 | |||
| 549 | bmap_lock(inode, create); | ||
| 550 | ret = gfs2_block_pointers(inode, lblock, create, bh, &mp, maxlen); | ||
| 551 | bmap_unlock(inode, create); | ||
| 552 | return ret; | ||
| 553 | } | ||
| 554 | |||
| 555 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) | ||
| 556 | { | ||
| 557 | struct metapath mp; | ||
| 558 | struct buffer_head bh = { .b_state = 0, .b_blocknr = 0, .b_size = 0 }; | ||
| 559 | int ret; | ||
| 560 | int create = *new; | ||
| 561 | |||
| 562 | BUG_ON(!extlen); | ||
| 563 | BUG_ON(!dblock); | ||
| 564 | BUG_ON(!new); | ||
| 565 | |||
| 566 | bmap_lock(inode, create); | ||
| 567 | ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp, 32); | ||
| 568 | bmap_unlock(inode, create); | ||
| 569 | *extlen = bh.b_size >> inode->i_blkbits; | ||
| 570 | *dblock = bh.b_blocknr; | ||
| 571 | if (buffer_new(&bh)) | ||
| 572 | *new = 1; | ||
| 573 | else | ||
| 574 | *new = 0; | ||
| 575 | return ret; | ||
| 576 | } | ||
| 577 | |||
| 578 | /** | ||
| 579 | * recursive_scan - recursively scan through the end of a file | ||
| 580 | * @ip: the inode | ||
| 581 | * @dibh: the dinode buffer | ||
| 582 | * @mp: the path through the metadata to the point to start | ||
| 583 | * @height: the height the recursion is at | ||
| 584 | * @block: the indirect block to look at | ||
| 585 | * @first: 1 if this is the first block | ||
| 586 | * @bc: the call to make for each piece of metadata | ||
| 587 | * @data: data opaque to this function to pass to @bc | ||
| 588 | * | ||
| 589 | * When this is first called @height and @block should be zero and | ||
| 590 | * @first should be 1. | ||
| 591 | * | ||
| 592 | * Returns: errno | ||
| 593 | */ | ||
| 594 | |||
| 595 | static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, | ||
| 596 | struct metapath *mp, unsigned int height, | ||
| 597 | u64 block, int first, block_call_t bc, | ||
| 598 | void *data) | ||
| 599 | { | ||
| 600 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 601 | struct buffer_head *bh = NULL; | ||
| 602 | u64 *top, *bottom; | ||
| 603 | u64 bn; | ||
| 604 | int error; | ||
| 605 | int mh_size = sizeof(struct gfs2_meta_header); | ||
| 606 | |||
| 607 | if (!height) { | ||
| 608 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 609 | if (error) | ||
| 610 | return error; | ||
| 611 | dibh = bh; | ||
| 612 | |||
| 613 | top = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; | ||
| 614 | bottom = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; | ||
| 615 | } else { | ||
| 616 | error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); | ||
| 617 | if (error) | ||
| 618 | return error; | ||
| 619 | |||
| 620 | top = (u64 *)(bh->b_data + mh_size) + | ||
| 621 | (first ? mp->mp_list[height] : 0); | ||
| 622 | |||
| 623 | bottom = (u64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; | ||
| 624 | } | ||
| 625 | |||
| 626 | error = bc(ip, dibh, bh, top, bottom, height, data); | ||
| 627 | if (error) | ||
| 628 | goto out; | ||
| 629 | |||
| 630 | if (height < ip->i_di.di_height - 1) | ||
| 631 | for (; top < bottom; top++, first = 0) { | ||
| 632 | if (!*top) | ||
| 633 | continue; | ||
| 634 | |||
| 635 | bn = be64_to_cpu(*top); | ||
| 636 | |||
| 637 | error = recursive_scan(ip, dibh, mp, height + 1, bn, | ||
| 638 | first, bc, data); | ||
| 639 | if (error) | ||
| 640 | break; | ||
| 641 | } | ||
| 642 | |||
| 643 | out: | ||
| 644 | brelse(bh); | ||
| 645 | return error; | ||
| 646 | } | ||
| 647 | |||
| 648 | /** | ||
| 649 | * do_strip - Look for a layer a particular layer of the file and strip it off | ||
| 650 | * @ip: the inode | ||
| 651 | * @dibh: the dinode buffer | ||
| 652 | * @bh: A buffer of pointers | ||
| 653 | * @top: The first pointer in the buffer | ||
| 654 | * @bottom: One more than the last pointer | ||
| 655 | * @height: the height this buffer is at | ||
| 656 | * @data: a pointer to a struct strip_mine | ||
| 657 | * | ||
| 658 | * Returns: errno | ||
| 659 | */ | ||
| 660 | |||
| 661 | static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | ||
| 662 | struct buffer_head *bh, u64 *top, u64 *bottom, | ||
| 663 | unsigned int height, void *data) | ||
| 664 | { | ||
| 665 | struct strip_mine *sm = data; | ||
| 666 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 667 | struct gfs2_rgrp_list rlist; | ||
| 668 | u64 bn, bstart; | ||
| 669 | u32 blen; | ||
| 670 | u64 *p; | ||
| 671 | unsigned int rg_blocks = 0; | ||
| 672 | int metadata; | ||
| 673 | unsigned int revokes = 0; | ||
| 674 | int x; | ||
| 675 | int error; | ||
| 676 | |||
| 677 | if (!*top) | ||
| 678 | sm->sm_first = 0; | ||
| 679 | |||
| 680 | if (height != sm->sm_height) | ||
| 681 | return 0; | ||
| 682 | |||
| 683 | if (sm->sm_first) { | ||
| 684 | top++; | ||
| 685 | sm->sm_first = 0; | ||
| 686 | } | ||
| 687 | |||
| 688 | metadata = (height != ip->i_di.di_height - 1); | ||
| 689 | if (metadata) | ||
| 690 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; | ||
| 691 | |||
| 692 | error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh); | ||
| 693 | if (error) | ||
| 694 | return error; | ||
| 695 | |||
| 696 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); | ||
| 697 | bstart = 0; | ||
| 698 | blen = 0; | ||
| 699 | |||
| 700 | for (p = top; p < bottom; p++) { | ||
| 701 | if (!*p) | ||
| 702 | continue; | ||
| 703 | |||
| 704 | bn = be64_to_cpu(*p); | ||
| 705 | |||
| 706 | if (bstart + blen == bn) | ||
| 707 | blen++; | ||
| 708 | else { | ||
| 709 | if (bstart) | ||
| 710 | gfs2_rlist_add(sdp, &rlist, bstart); | ||
| 711 | |||
| 712 | bstart = bn; | ||
| 713 | blen = 1; | ||
| 714 | } | ||
| 715 | } | ||
| 716 | |||
| 717 | if (bstart) | ||
| 718 | gfs2_rlist_add(sdp, &rlist, bstart); | ||
| 719 | else | ||
| 720 | goto out; /* Nothing to do */ | ||
| 721 | |||
| 722 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | ||
| 723 | |||
| 724 | for (x = 0; x < rlist.rl_rgrps; x++) { | ||
| 725 | struct gfs2_rgrpd *rgd; | ||
| 726 | rgd = rlist.rl_ghs[x].gh_gl->gl_object; | ||
| 727 | rg_blocks += rgd->rd_ri.ri_length; | ||
| 728 | } | ||
| 729 | |||
| 730 | error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); | ||
| 731 | if (error) | ||
| 732 | goto out_rlist; | ||
| 733 | |||
| 734 | error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + | ||
| 735 | RES_INDIRECT + RES_STATFS + RES_QUOTA, | ||
| 736 | revokes); | ||
| 737 | if (error) | ||
| 738 | goto out_rg_gunlock; | ||
| 739 | |||
| 740 | down_write(&ip->i_rw_mutex); | ||
| 741 | |||
| 742 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 743 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 744 | |||
| 745 | bstart = 0; | ||
| 746 | blen = 0; | ||
| 747 | |||
| 748 | for (p = top; p < bottom; p++) { | ||
| 749 | if (!*p) | ||
| 750 | continue; | ||
| 751 | |||
| 752 | bn = be64_to_cpu(*p); | ||
| 753 | |||
| 754 | if (bstart + blen == bn) | ||
| 755 | blen++; | ||
| 756 | else { | ||
| 757 | if (bstart) { | ||
| 758 | if (metadata) | ||
| 759 | gfs2_free_meta(ip, bstart, blen); | ||
| 760 | else | ||
| 761 | gfs2_free_data(ip, bstart, blen); | ||
| 762 | } | ||
| 763 | |||
| 764 | bstart = bn; | ||
| 765 | blen = 1; | ||
| 766 | } | ||
| 767 | |||
| 768 | *p = 0; | ||
| 769 | if (!ip->i_di.di_blocks) | ||
| 770 | gfs2_consist_inode(ip); | ||
| 771 | ip->i_di.di_blocks--; | ||
| 772 | } | ||
| 773 | if (bstart) { | ||
| 774 | if (metadata) | ||
| 775 | gfs2_free_meta(ip, bstart, blen); | ||
| 776 | else | ||
| 777 | gfs2_free_data(ip, bstart, blen); | ||
| 778 | } | ||
| 779 | |||
| 780 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 781 | |||
| 782 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 783 | |||
| 784 | up_write(&ip->i_rw_mutex); | ||
| 785 | |||
| 786 | gfs2_trans_end(sdp); | ||
| 787 | |||
| 788 | out_rg_gunlock: | ||
| 789 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); | ||
| 790 | out_rlist: | ||
| 791 | gfs2_rlist_free(&rlist); | ||
| 792 | out: | ||
| 793 | gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh); | ||
| 794 | return error; | ||
| 795 | } | ||
| 796 | |||
| 797 | /** | ||
| 798 | * do_grow - Make a file look bigger than it is | ||
| 799 | * @ip: the inode | ||
| 800 | * @size: the size to set the file to | ||
| 801 | * | ||
| 802 | * Called with an exclusive lock on @ip. | ||
| 803 | * | ||
| 804 | * Returns: errno | ||
| 805 | */ | ||
| 806 | |||
| 807 | static int do_grow(struct gfs2_inode *ip, u64 size) | ||
| 808 | { | ||
| 809 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 810 | struct gfs2_alloc *al; | ||
| 811 | struct buffer_head *dibh; | ||
| 812 | unsigned int h; | ||
| 813 | int error; | ||
| 814 | |||
| 815 | al = gfs2_alloc_get(ip); | ||
| 816 | |||
| 817 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 818 | if (error) | ||
| 819 | goto out; | ||
| 820 | |||
| 821 | error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 822 | if (error) | ||
| 823 | goto out_gunlock_q; | ||
| 824 | |||
| 825 | al->al_requested = sdp->sd_max_height + RES_DATA; | ||
| 826 | |||
| 827 | error = gfs2_inplace_reserve(ip); | ||
| 828 | if (error) | ||
| 829 | goto out_gunlock_q; | ||
| 830 | |||
| 831 | error = gfs2_trans_begin(sdp, | ||
| 832 | sdp->sd_max_height + al->al_rgd->rd_ri.ri_length + | ||
| 833 | RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); | ||
| 834 | if (error) | ||
| 835 | goto out_ipres; | ||
| 836 | |||
| 837 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | ||
| 838 | if (gfs2_is_stuffed(ip)) { | ||
| 839 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 840 | if (error) | ||
| 841 | goto out_end_trans; | ||
| 842 | } | ||
| 843 | |||
| 844 | h = calc_tree_height(ip, size); | ||
| 845 | if (ip->i_di.di_height < h) { | ||
| 846 | down_write(&ip->i_rw_mutex); | ||
| 847 | error = build_height(&ip->i_inode, h); | ||
| 848 | up_write(&ip->i_rw_mutex); | ||
| 849 | if (error) | ||
| 850 | goto out_end_trans; | ||
| 851 | } | ||
| 852 | } | ||
| 853 | |||
| 854 | ip->i_di.di_size = size; | ||
| 855 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 856 | |||
| 857 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 858 | if (error) | ||
| 859 | goto out_end_trans; | ||
| 860 | |||
| 861 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 862 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 863 | brelse(dibh); | ||
| 864 | |||
| 865 | out_end_trans: | ||
| 866 | gfs2_trans_end(sdp); | ||
| 867 | out_ipres: | ||
| 868 | gfs2_inplace_release(ip); | ||
| 869 | out_gunlock_q: | ||
| 870 | gfs2_quota_unlock(ip); | ||
| 871 | out: | ||
| 872 | gfs2_alloc_put(ip); | ||
| 873 | return error; | ||
| 874 | } | ||
| 875 | |||
| 876 | |||
| 877 | /** | ||
| 878 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate | ||
| 879 | * | ||
| 880 | * This is partly borrowed from ext3. | ||
| 881 | */ | ||
| 882 | static int gfs2_block_truncate_page(struct address_space *mapping) | ||
| 883 | { | ||
| 884 | struct inode *inode = mapping->host; | ||
| 885 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 886 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 887 | loff_t from = inode->i_size; | ||
| 888 | unsigned long index = from >> PAGE_CACHE_SHIFT; | ||
| 889 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
| 890 | unsigned blocksize, iblock, length, pos; | ||
| 891 | struct buffer_head *bh; | ||
| 892 | struct page *page; | ||
| 893 | void *kaddr; | ||
| 894 | int err; | ||
| 895 | |||
| 896 | page = grab_cache_page(mapping, index); | ||
| 897 | if (!page) | ||
| 898 | return 0; | ||
| 899 | |||
| 900 | blocksize = inode->i_sb->s_blocksize; | ||
| 901 | length = blocksize - (offset & (blocksize - 1)); | ||
| 902 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
| 903 | |||
| 904 | if (!page_has_buffers(page)) | ||
| 905 | create_empty_buffers(page, blocksize, 0); | ||
| 906 | |||
| 907 | /* Find the buffer that contains "offset" */ | ||
| 908 | bh = page_buffers(page); | ||
| 909 | pos = blocksize; | ||
| 910 | while (offset >= pos) { | ||
| 911 | bh = bh->b_this_page; | ||
| 912 | iblock++; | ||
| 913 | pos += blocksize; | ||
| 914 | } | ||
| 915 | |||
| 916 | err = 0; | ||
| 917 | |||
| 918 | if (!buffer_mapped(bh)) { | ||
| 919 | gfs2_get_block(inode, iblock, bh, 0); | ||
| 920 | /* unmapped? It's a hole - nothing to do */ | ||
| 921 | if (!buffer_mapped(bh)) | ||
| 922 | goto unlock; | ||
| 923 | } | ||
| 924 | |||
| 925 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
| 926 | if (PageUptodate(page)) | ||
| 927 | set_buffer_uptodate(bh); | ||
| 928 | |||
| 929 | if (!buffer_uptodate(bh)) { | ||
| 930 | err = -EIO; | ||
| 931 | ll_rw_block(READ, 1, &bh); | ||
| 932 | wait_on_buffer(bh); | ||
| 933 | /* Uhhuh. Read error. Complain and punt. */ | ||
| 934 | if (!buffer_uptodate(bh)) | ||
| 935 | goto unlock; | ||
| 936 | } | ||
| 937 | |||
| 938 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | ||
| 939 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | ||
| 940 | |||
| 941 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 942 | memset(kaddr + offset, 0, length); | ||
| 943 | flush_dcache_page(page); | ||
| 944 | kunmap_atomic(kaddr, KM_USER0); | ||
| 945 | |||
| 946 | unlock: | ||
| 947 | unlock_page(page); | ||
| 948 | page_cache_release(page); | ||
| 949 | return err; | ||
| 950 | } | ||
| 951 | |||
| 952 | static int trunc_start(struct gfs2_inode *ip, u64 size) | ||
| 953 | { | ||
| 954 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 955 | struct buffer_head *dibh; | ||
| 956 | int journaled = gfs2_is_jdata(ip); | ||
| 957 | int error; | ||
| 958 | |||
| 959 | error = gfs2_trans_begin(sdp, | ||
| 960 | RES_DINODE + (journaled ? RES_JDATA : 0), 0); | ||
| 961 | if (error) | ||
| 962 | return error; | ||
| 963 | |||
| 964 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 965 | if (error) | ||
| 966 | goto out; | ||
| 967 | |||
| 968 | if (gfs2_is_stuffed(ip)) { | ||
| 969 | ip->i_di.di_size = size; | ||
| 970 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 971 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 972 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 973 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); | ||
| 974 | error = 1; | ||
| 975 | |||
| 976 | } else { | ||
| 977 | if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) | ||
| 978 | error = gfs2_block_truncate_page(ip->i_inode.i_mapping); | ||
| 979 | |||
| 980 | if (!error) { | ||
| 981 | ip->i_di.di_size = size; | ||
| 982 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 983 | ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; | ||
| 984 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 985 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | brelse(dibh); | ||
| 990 | |||
| 991 | out: | ||
| 992 | gfs2_trans_end(sdp); | ||
| 993 | return error; | ||
| 994 | } | ||
| 995 | |||
| 996 | static int trunc_dealloc(struct gfs2_inode *ip, u64 size) | ||
| 997 | { | ||
| 998 | unsigned int height = ip->i_di.di_height; | ||
| 999 | u64 lblock; | ||
| 1000 | struct metapath mp; | ||
| 1001 | int error; | ||
| 1002 | |||
| 1003 | if (!size) | ||
| 1004 | lblock = 0; | ||
| 1005 | else | ||
| 1006 | lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift; | ||
| 1007 | |||
| 1008 | find_metapath(ip, lblock, &mp); | ||
| 1009 | gfs2_alloc_get(ip); | ||
| 1010 | |||
| 1011 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 1012 | if (error) | ||
| 1013 | goto out; | ||
| 1014 | |||
| 1015 | while (height--) { | ||
| 1016 | struct strip_mine sm; | ||
| 1017 | sm.sm_first = !!size; | ||
| 1018 | sm.sm_height = height; | ||
| 1019 | |||
| 1020 | error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm); | ||
| 1021 | if (error) | ||
| 1022 | break; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | gfs2_quota_unhold(ip); | ||
| 1026 | |||
| 1027 | out: | ||
| 1028 | gfs2_alloc_put(ip); | ||
| 1029 | return error; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | static int trunc_end(struct gfs2_inode *ip) | ||
| 1033 | { | ||
| 1034 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1035 | struct buffer_head *dibh; | ||
| 1036 | int error; | ||
| 1037 | |||
| 1038 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 1039 | if (error) | ||
| 1040 | return error; | ||
| 1041 | |||
| 1042 | down_write(&ip->i_rw_mutex); | ||
| 1043 | |||
| 1044 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1045 | if (error) | ||
| 1046 | goto out; | ||
| 1047 | |||
| 1048 | if (!ip->i_di.di_size) { | ||
| 1049 | ip->i_di.di_height = 0; | ||
| 1050 | ip->i_di.di_goal_meta = | ||
| 1051 | ip->i_di.di_goal_data = | ||
| 1052 | ip->i_num.no_addr; | ||
| 1053 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
| 1054 | } | ||
| 1055 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 1056 | ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; | ||
| 1057 | |||
| 1058 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1059 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 1060 | brelse(dibh); | ||
| 1061 | |||
| 1062 | out: | ||
| 1063 | up_write(&ip->i_rw_mutex); | ||
| 1064 | gfs2_trans_end(sdp); | ||
| 1065 | return error; | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | /** | ||
| 1069 | * do_shrink - make a file smaller | ||
| 1070 | * @ip: the inode | ||
| 1071 | * @size: the size to make the file | ||
| 1072 | * @truncator: function to truncate the last partial block | ||
| 1073 | * | ||
| 1074 | * Called with an exclusive lock on @ip. | ||
| 1075 | * | ||
| 1076 | * Returns: errno | ||
| 1077 | */ | ||
| 1078 | |||
| 1079 | static int do_shrink(struct gfs2_inode *ip, u64 size) | ||
| 1080 | { | ||
| 1081 | int error; | ||
| 1082 | |||
| 1083 | error = trunc_start(ip, size); | ||
| 1084 | if (error < 0) | ||
| 1085 | return error; | ||
| 1086 | if (error > 0) | ||
| 1087 | return 0; | ||
| 1088 | |||
| 1089 | error = trunc_dealloc(ip, size); | ||
| 1090 | if (!error) | ||
| 1091 | error = trunc_end(ip); | ||
| 1092 | |||
| 1093 | return error; | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | /** | ||
| 1097 | * gfs2_truncatei - make a file a given size | ||
| 1098 | * @ip: the inode | ||
| 1099 | * @size: the size to make the file | ||
| 1100 | * @truncator: function to truncate the last partial block | ||
| 1101 | * | ||
| 1102 | * The file size can grow, shrink, or stay the same size. | ||
| 1103 | * | ||
| 1104 | * Returns: errno | ||
| 1105 | */ | ||
| 1106 | |||
| 1107 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size) | ||
| 1108 | { | ||
| 1109 | int error; | ||
| 1110 | |||
| 1111 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode))) | ||
| 1112 | return -EINVAL; | ||
| 1113 | |||
| 1114 | if (size > ip->i_di.di_size) | ||
| 1115 | error = do_grow(ip, size); | ||
| 1116 | else | ||
| 1117 | error = do_shrink(ip, size); | ||
| 1118 | |||
| 1119 | return error; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | int gfs2_truncatei_resume(struct gfs2_inode *ip) | ||
| 1123 | { | ||
| 1124 | int error; | ||
| 1125 | error = trunc_dealloc(ip, ip->i_di.di_size); | ||
| 1126 | if (!error) | ||
| 1127 | error = trunc_end(ip); | ||
| 1128 | return error; | ||
| 1129 | } | ||
| 1130 | |||
| 1131 | int gfs2_file_dealloc(struct gfs2_inode *ip) | ||
| 1132 | { | ||
| 1133 | return trunc_dealloc(ip, 0); | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | /** | ||
| 1137 | * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file | ||
| 1138 | * @ip: the file | ||
| 1139 | * @len: the number of bytes to be written to the file | ||
| 1140 | * @data_blocks: returns the number of data blocks required | ||
| 1141 | * @ind_blocks: returns the number of indirect blocks required | ||
| 1142 | * | ||
| 1143 | */ | ||
| 1144 | |||
| 1145 | void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, | ||
| 1146 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
| 1147 | { | ||
| 1148 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1149 | unsigned int tmp; | ||
| 1150 | |||
| 1151 | if (gfs2_is_dir(ip)) { | ||
| 1152 | *data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2; | ||
| 1153 | *ind_blocks = 3 * (sdp->sd_max_jheight - 1); | ||
| 1154 | } else { | ||
| 1155 | *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3; | ||
| 1156 | *ind_blocks = 3 * (sdp->sd_max_height - 1); | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) { | ||
| 1160 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
| 1161 | *ind_blocks += tmp; | ||
| 1162 | } | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | /** | ||
| 1166 | * gfs2_write_alloc_required - figure out if a write will require an allocation | ||
| 1167 | * @ip: the file being written to | ||
| 1168 | * @offset: the offset to write to | ||
| 1169 | * @len: the number of bytes being written | ||
| 1170 | * @alloc_required: set to 1 if an alloc is required, 0 otherwise | ||
| 1171 | * | ||
| 1172 | * Returns: errno | ||
| 1173 | */ | ||
| 1174 | |||
| 1175 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | ||
| 1176 | unsigned int len, int *alloc_required) | ||
| 1177 | { | ||
| 1178 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1179 | u64 lblock, lblock_stop, dblock; | ||
| 1180 | u32 extlen; | ||
| 1181 | int new = 0; | ||
| 1182 | int error = 0; | ||
| 1183 | |||
| 1184 | *alloc_required = 0; | ||
| 1185 | |||
| 1186 | if (!len) | ||
| 1187 | return 0; | ||
| 1188 | |||
| 1189 | if (gfs2_is_stuffed(ip)) { | ||
| 1190 | if (offset + len > | ||
| 1191 | sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) | ||
| 1192 | *alloc_required = 1; | ||
| 1193 | return 0; | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | if (gfs2_is_dir(ip)) { | ||
| 1197 | unsigned int bsize = sdp->sd_jbsize; | ||
| 1198 | lblock = offset; | ||
| 1199 | do_div(lblock, bsize); | ||
| 1200 | lblock_stop = offset + len + bsize - 1; | ||
| 1201 | do_div(lblock_stop, bsize); | ||
| 1202 | } else { | ||
| 1203 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; | ||
| 1204 | lblock = offset >> shift; | ||
| 1205 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | for (; lblock < lblock_stop; lblock += extlen) { | ||
| 1209 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); | ||
| 1210 | if (error) | ||
| 1211 | return error; | ||
| 1212 | |||
| 1213 | if (!dblock) { | ||
| 1214 | *alloc_required = 1; | ||
| 1215 | return 0; | ||
| 1216 | } | ||
| 1217 | } | ||
| 1218 | |||
| 1219 | return 0; | ||
| 1220 | } | ||
| 1221 | |||
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h new file mode 100644 index 000000000000..0fd379b4cd9e --- /dev/null +++ b/fs/gfs2/bmap.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __BMAP_DOT_H__ | ||
| 11 | #define __BMAP_DOT_H__ | ||
| 12 | |||
| 13 | struct inode; | ||
| 14 | struct gfs2_inode; | ||
| 15 | struct page; | ||
| 16 | |||
| 17 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); | ||
| 18 | int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh, unsigned int maxlen); | ||
| 19 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); | ||
| 20 | |||
| 21 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); | ||
| 22 | int gfs2_truncatei_resume(struct gfs2_inode *ip); | ||
| 23 | int gfs2_file_dealloc(struct gfs2_inode *ip); | ||
| 24 | |||
| 25 | void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, | ||
| 26 | unsigned int *data_blocks, | ||
| 27 | unsigned int *ind_blocks); | ||
| 28 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | ||
| 29 | unsigned int len, int *alloc_required); | ||
| 30 | |||
| 31 | #endif /* __BMAP_DOT_H__ */ | ||
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c new file mode 100644 index 000000000000..cab1f68d4685 --- /dev/null +++ b/fs/gfs2/daemon.c | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/kthread.h> | ||
| 16 | #include <linux/delay.h> | ||
| 17 | #include <linux/gfs2_ondisk.h> | ||
| 18 | #include <linux/lm_interface.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "daemon.h" | ||
| 23 | #include "glock.h" | ||
| 24 | #include "log.h" | ||
| 25 | #include "quota.h" | ||
| 26 | #include "recovery.h" | ||
| 27 | #include "super.h" | ||
| 28 | #include "util.h" | ||
| 29 | |||
| 30 | /* This uses schedule_timeout() instead of msleep() because it's good for | ||
| 31 | the daemons to wake up more often than the timeout when unmounting so | ||
| 32 | the user's unmount doesn't sit there forever. | ||
| 33 | |||
| 34 | The kthread functions used to start these daemons block and flush signals. */ | ||
| 35 | |||
| 36 | /** | ||
| 37 | * gfs2_scand - Look for cached glocks and inodes to toss from memory | ||
| 38 | * @sdp: Pointer to GFS2 superblock | ||
| 39 | * | ||
| 40 | * One of these daemons runs, finding candidates to add to sd_reclaim_list. | ||
| 41 | * See gfs2_glockd() | ||
| 42 | */ | ||
| 43 | |||
| 44 | int gfs2_scand(void *data) | ||
| 45 | { | ||
| 46 | struct gfs2_sbd *sdp = data; | ||
| 47 | unsigned long t; | ||
| 48 | |||
| 49 | while (!kthread_should_stop()) { | ||
| 50 | gfs2_scand_internal(sdp); | ||
| 51 | t = gfs2_tune_get(sdp, gt_scand_secs) * HZ; | ||
| 52 | schedule_timeout_interruptible(t); | ||
| 53 | } | ||
| 54 | |||
| 55 | return 0; | ||
| 56 | } | ||
| 57 | |||
| 58 | /** | ||
| 59 | * gfs2_glockd - Reclaim unused glock structures | ||
| 60 | * @sdp: Pointer to GFS2 superblock | ||
| 61 | * | ||
| 62 | * One or more of these daemons run, reclaiming glocks on sd_reclaim_list. | ||
| 63 | * Number of daemons can be set by user, with num_glockd mount option. | ||
| 64 | */ | ||
| 65 | |||
| 66 | int gfs2_glockd(void *data) | ||
| 67 | { | ||
| 68 | struct gfs2_sbd *sdp = data; | ||
| 69 | |||
| 70 | while (!kthread_should_stop()) { | ||
| 71 | while (atomic_read(&sdp->sd_reclaim_count)) | ||
| 72 | gfs2_reclaim_glock(sdp); | ||
| 73 | |||
| 74 | wait_event_interruptible(sdp->sd_reclaim_wq, | ||
| 75 | (atomic_read(&sdp->sd_reclaim_count) || | ||
| 76 | kthread_should_stop())); | ||
| 77 | } | ||
| 78 | |||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | /** | ||
| 83 | * gfs2_recoverd - Recover dead machine's journals | ||
| 84 | * @sdp: Pointer to GFS2 superblock | ||
| 85 | * | ||
| 86 | */ | ||
| 87 | |||
| 88 | int gfs2_recoverd(void *data) | ||
| 89 | { | ||
| 90 | struct gfs2_sbd *sdp = data; | ||
| 91 | unsigned long t; | ||
| 92 | |||
| 93 | while (!kthread_should_stop()) { | ||
| 94 | gfs2_check_journals(sdp); | ||
| 95 | t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; | ||
| 96 | schedule_timeout_interruptible(t); | ||
| 97 | } | ||
| 98 | |||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | /** | ||
| 103 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks | ||
| 104 | * @sdp: Pointer to GFS2 superblock | ||
| 105 | * | ||
| 106 | * Also, periodically check to make sure that we're using the most recent | ||
| 107 | * journal index. | ||
| 108 | */ | ||
| 109 | |||
| 110 | int gfs2_logd(void *data) | ||
| 111 | { | ||
| 112 | struct gfs2_sbd *sdp = data; | ||
| 113 | struct gfs2_holder ji_gh; | ||
| 114 | unsigned long t; | ||
| 115 | |||
| 116 | while (!kthread_should_stop()) { | ||
| 117 | /* Advance the log tail */ | ||
| 118 | |||
| 119 | t = sdp->sd_log_flush_time + | ||
| 120 | gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; | ||
| 121 | |||
| 122 | gfs2_ail1_empty(sdp, DIO_ALL); | ||
| 123 | |||
| 124 | if (time_after_eq(jiffies, t)) { | ||
| 125 | gfs2_log_flush(sdp, NULL); | ||
| 126 | sdp->sd_log_flush_time = jiffies; | ||
| 127 | } | ||
| 128 | |||
| 129 | /* Check for latest journal index */ | ||
| 130 | |||
| 131 | t = sdp->sd_jindex_refresh_time + | ||
| 132 | gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ; | ||
| 133 | |||
| 134 | if (time_after_eq(jiffies, t)) { | ||
| 135 | if (!gfs2_jindex_hold(sdp, &ji_gh)) | ||
| 136 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 137 | sdp->sd_jindex_refresh_time = jiffies; | ||
| 138 | } | ||
| 139 | |||
| 140 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; | ||
| 141 | schedule_timeout_interruptible(t); | ||
| 142 | } | ||
| 143 | |||
| 144 | return 0; | ||
| 145 | } | ||
| 146 | |||
| 147 | /** | ||
| 148 | * gfs2_quotad - Write cached quota changes into the quota file | ||
| 149 | * @sdp: Pointer to GFS2 superblock | ||
| 150 | * | ||
| 151 | */ | ||
| 152 | |||
| 153 | int gfs2_quotad(void *data) | ||
| 154 | { | ||
| 155 | struct gfs2_sbd *sdp = data; | ||
| 156 | unsigned long t; | ||
| 157 | int error; | ||
| 158 | |||
| 159 | while (!kthread_should_stop()) { | ||
| 160 | /* Update the master statfs file */ | ||
| 161 | |||
| 162 | t = sdp->sd_statfs_sync_time + | ||
| 163 | gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; | ||
| 164 | |||
| 165 | if (time_after_eq(jiffies, t)) { | ||
| 166 | error = gfs2_statfs_sync(sdp); | ||
| 167 | if (error && | ||
| 168 | error != -EROFS && | ||
| 169 | !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 170 | fs_err(sdp, "quotad: (1) error=%d\n", error); | ||
| 171 | sdp->sd_statfs_sync_time = jiffies; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* Update quota file */ | ||
| 175 | |||
| 176 | t = sdp->sd_quota_sync_time + | ||
| 177 | gfs2_tune_get(sdp, gt_quota_quantum) * HZ; | ||
| 178 | |||
| 179 | if (time_after_eq(jiffies, t)) { | ||
| 180 | error = gfs2_quota_sync(sdp); | ||
| 181 | if (error && | ||
| 182 | error != -EROFS && | ||
| 183 | !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 184 | fs_err(sdp, "quotad: (2) error=%d\n", error); | ||
| 185 | sdp->sd_quota_sync_time = jiffies; | ||
| 186 | } | ||
| 187 | |||
| 188 | gfs2_quota_scan(sdp); | ||
| 189 | |||
| 190 | t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ; | ||
| 191 | schedule_timeout_interruptible(t); | ||
| 192 | } | ||
| 193 | |||
| 194 | return 0; | ||
| 195 | } | ||
| 196 | |||
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h new file mode 100644 index 000000000000..801007120fb2 --- /dev/null +++ b/fs/gfs2/daemon.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __DAEMON_DOT_H__ | ||
| 11 | #define __DAEMON_DOT_H__ | ||
| 12 | |||
| 13 | int gfs2_scand(void *data); | ||
| 14 | int gfs2_glockd(void *data); | ||
| 15 | int gfs2_recoverd(void *data); | ||
| 16 | int gfs2_logd(void *data); | ||
| 17 | int gfs2_quotad(void *data); | ||
| 18 | |||
| 19 | #endif /* __DAEMON_DOT_H__ */ | ||
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c new file mode 100644 index 000000000000..459498cac93b --- /dev/null +++ b/fs/gfs2/dir.c | |||
| @@ -0,0 +1,1961 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | /* | ||
| 11 | * Implements Extendible Hashing as described in: | ||
| 12 | * "Extendible Hashing" by Fagin, et al in | ||
| 13 | * __ACM Trans. on Database Systems__, Sept 1979. | ||
| 14 | * | ||
| 15 | * | ||
| 16 | * Here's the layout of dirents which is essentially the same as that of ext2 | ||
| 17 | * within a single block. The field de_name_len is the number of bytes | ||
| 18 | * actually required for the name (no null terminator). The field de_rec_len | ||
| 19 | * is the number of bytes allocated to the dirent. The offset of the next | ||
| 20 | * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is | ||
| 21 | * deleted, the preceding dirent inherits its allocated space, ie | ||
| 22 | * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained | ||
| 23 | * by adding de_rec_len to the current dirent, this essentially causes the | ||
| 24 | * deleted dirent to get jumped over when iterating through all the dirents. | ||
| 25 | * | ||
| 26 | * When deleting the first dirent in a block, there is no previous dirent so | ||
| 27 | * the field de_ino is set to zero to designate it as deleted. When allocating | ||
| 28 | * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the | ||
| 29 | * first dirent has (de_ino == 0) and de_rec_len is large enough, this first | ||
| 30 | * dirent is allocated. Otherwise it must go through all the 'used' dirents | ||
| 31 | * searching for one in which the amount of total space minus the amount of | ||
| 32 | * used space will provide enough space for the new dirent. | ||
| 33 | * | ||
| 34 | * There are two types of blocks in which dirents reside. In a stuffed dinode, | ||
| 35 | * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of | ||
| 36 | * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the | ||
| 37 | * beginning of the leaf block. The dirents reside in leaves when | ||
| 38 | * | ||
| 39 | * dip->i_di.di_flags & GFS2_DIF_EXHASH is true | ||
| 40 | * | ||
| 41 | * Otherwise, the dirents are "linear", within a single stuffed dinode block. | ||
| 42 | * | ||
| 43 | * When the dirents are in leaves, the actual contents of the directory file are | ||
| 44 | * used as an array of 64-bit block pointers pointing to the leaf blocks. The | ||
| 45 | * dirents are NOT in the directory file itself. There can be more than one | ||
| 46 | * block pointer in the array that points to the same leaf. In fact, when a | ||
| 47 | * directory is first converted from linear to exhash, all of the pointers | ||
| 48 | * point to the same leaf. | ||
| 49 | * | ||
| 50 | * When a leaf is completely full, the size of the hash table can be | ||
| 51 | * doubled unless it is already at the maximum size which is hard coded into | ||
| 52 | * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, | ||
| 53 | * but never before the maximum hash table size has been reached. | ||
| 54 | */ | ||
| 55 | |||
| 56 | #include <linux/sched.h> | ||
| 57 | #include <linux/slab.h> | ||
| 58 | #include <linux/spinlock.h> | ||
| 59 | #include <linux/buffer_head.h> | ||
| 60 | #include <linux/sort.h> | ||
| 61 | #include <linux/gfs2_ondisk.h> | ||
| 62 | #include <linux/crc32.h> | ||
| 63 | #include <linux/vmalloc.h> | ||
| 64 | #include <linux/lm_interface.h> | ||
| 65 | |||
| 66 | #include "gfs2.h" | ||
| 67 | #include "incore.h" | ||
| 68 | #include "dir.h" | ||
| 69 | #include "glock.h" | ||
| 70 | #include "inode.h" | ||
| 71 | #include "meta_io.h" | ||
| 72 | #include "quota.h" | ||
| 73 | #include "rgrp.h" | ||
| 74 | #include "trans.h" | ||
| 75 | #include "bmap.h" | ||
| 76 | #include "util.h" | ||
| 77 | |||
| 78 | #define IS_LEAF 1 /* Hashed (leaf) directory */ | ||
| 79 | #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ | ||
| 80 | |||
| 81 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) | ||
| 82 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) | ||
| 83 | |||
| 84 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, | ||
| 85 | u64 leaf_no, void *data); | ||
| 86 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, | ||
| 87 | const struct qstr *name, void *opaque); | ||
| 88 | |||
| 89 | |||
| 90 | int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | ||
| 91 | struct buffer_head **bhp) | ||
| 92 | { | ||
| 93 | struct buffer_head *bh; | ||
| 94 | |||
| 95 | bh = gfs2_meta_new(ip->i_gl, block); | ||
| 96 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 97 | gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); | ||
| 98 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); | ||
| 99 | *bhp = bh; | ||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, | ||
| 104 | struct buffer_head **bhp) | ||
| 105 | { | ||
| 106 | struct buffer_head *bh; | ||
| 107 | int error; | ||
| 108 | |||
| 109 | error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh); | ||
| 110 | if (error) | ||
| 111 | return error; | ||
| 112 | if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) { | ||
| 113 | brelse(bh); | ||
| 114 | return -EIO; | ||
| 115 | } | ||
| 116 | *bhp = bh; | ||
| 117 | return 0; | ||
| 118 | } | ||
| 119 | |||
| 120 | static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, | ||
| 121 | unsigned int offset, unsigned int size) | ||
| 122 | { | ||
| 123 | struct buffer_head *dibh; | ||
| 124 | int error; | ||
| 125 | |||
| 126 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 127 | if (error) | ||
| 128 | return error; | ||
| 129 | |||
| 130 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 131 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); | ||
| 132 | if (ip->i_di.di_size < offset + size) | ||
| 133 | ip->i_di.di_size = offset + size; | ||
| 134 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 135 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 136 | |||
| 137 | brelse(dibh); | ||
| 138 | |||
| 139 | return size; | ||
| 140 | } | ||
| 141 | |||
| 142 | |||
| 143 | |||
| 144 | /** | ||
| 145 | * gfs2_dir_write_data - Write directory information to the inode | ||
| 146 | * @ip: The GFS2 inode | ||
| 147 | * @buf: The buffer containing information to be written | ||
| 148 | * @offset: The file offset to start writing at | ||
| 149 | * @size: The amount of data to write | ||
| 150 | * | ||
| 151 | * Returns: The number of bytes correctly written or error code | ||
| 152 | */ | ||
| 153 | static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, | ||
| 154 | u64 offset, unsigned int size) | ||
| 155 | { | ||
| 156 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 157 | struct buffer_head *dibh; | ||
| 158 | u64 lblock, dblock; | ||
| 159 | u32 extlen = 0; | ||
| 160 | unsigned int o; | ||
| 161 | int copied = 0; | ||
| 162 | int error = 0; | ||
| 163 | |||
| 164 | if (!size) | ||
| 165 | return 0; | ||
| 166 | |||
| 167 | if (gfs2_is_stuffed(ip) && | ||
| 168 | offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) | ||
| 169 | return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, | ||
| 170 | size); | ||
| 171 | |||
| 172 | if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) | ||
| 173 | return -EINVAL; | ||
| 174 | |||
| 175 | if (gfs2_is_stuffed(ip)) { | ||
| 176 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 177 | if (error) | ||
| 178 | return error; | ||
| 179 | } | ||
| 180 | |||
| 181 | lblock = offset; | ||
| 182 | o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); | ||
| 183 | |||
| 184 | while (copied < size) { | ||
| 185 | unsigned int amount; | ||
| 186 | struct buffer_head *bh; | ||
| 187 | int new; | ||
| 188 | |||
| 189 | amount = size - copied; | ||
| 190 | if (amount > sdp->sd_sb.sb_bsize - o) | ||
| 191 | amount = sdp->sd_sb.sb_bsize - o; | ||
| 192 | |||
| 193 | if (!extlen) { | ||
| 194 | new = 1; | ||
| 195 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, | ||
| 196 | &dblock, &extlen); | ||
| 197 | if (error) | ||
| 198 | goto fail; | ||
| 199 | error = -EIO; | ||
| 200 | if (gfs2_assert_withdraw(sdp, dblock)) | ||
| 201 | goto fail; | ||
| 202 | } | ||
| 203 | |||
| 204 | if (amount == sdp->sd_jbsize || new) | ||
| 205 | error = gfs2_dir_get_new_buffer(ip, dblock, &bh); | ||
| 206 | else | ||
| 207 | error = gfs2_dir_get_existing_buffer(ip, dblock, &bh); | ||
| 208 | |||
| 209 | if (error) | ||
| 210 | goto fail; | ||
| 211 | |||
| 212 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 213 | memcpy(bh->b_data + o, buf, amount); | ||
| 214 | brelse(bh); | ||
| 215 | if (error) | ||
| 216 | goto fail; | ||
| 217 | |||
| 218 | buf += amount; | ||
| 219 | copied += amount; | ||
| 220 | lblock++; | ||
| 221 | dblock++; | ||
| 222 | extlen--; | ||
| 223 | |||
| 224 | o = sizeof(struct gfs2_meta_header); | ||
| 225 | } | ||
| 226 | |||
| 227 | out: | ||
| 228 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 229 | if (error) | ||
| 230 | return error; | ||
| 231 | |||
| 232 | if (ip->i_di.di_size < offset + copied) | ||
| 233 | ip->i_di.di_size = offset + copied; | ||
| 234 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 235 | |||
| 236 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 237 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 238 | brelse(dibh); | ||
| 239 | |||
| 240 | return copied; | ||
| 241 | fail: | ||
| 242 | if (copied) | ||
| 243 | goto out; | ||
| 244 | return error; | ||
| 245 | } | ||
| 246 | |||
| 247 | static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf, | ||
| 248 | u64 offset, unsigned int size) | ||
| 249 | { | ||
| 250 | struct buffer_head *dibh; | ||
| 251 | int error; | ||
| 252 | |||
| 253 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 254 | if (!error) { | ||
| 255 | offset += sizeof(struct gfs2_dinode); | ||
| 256 | memcpy(buf, dibh->b_data + offset, size); | ||
| 257 | brelse(dibh); | ||
| 258 | } | ||
| 259 | |||
| 260 | return (error) ? error : size; | ||
| 261 | } | ||
| 262 | |||
| 263 | |||
| 264 | /** | ||
| 265 | * gfs2_dir_read_data - Read a data from a directory inode | ||
| 266 | * @ip: The GFS2 Inode | ||
| 267 | * @buf: The buffer to place result into | ||
| 268 | * @offset: File offset to begin jdata_readng from | ||
| 269 | * @size: Amount of data to transfer | ||
| 270 | * | ||
| 271 | * Returns: The amount of data actually copied or the error | ||
| 272 | */ | ||
| 273 | static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | ||
| 274 | unsigned int size, unsigned ra) | ||
| 275 | { | ||
| 276 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 277 | u64 lblock, dblock; | ||
| 278 | u32 extlen = 0; | ||
| 279 | unsigned int o; | ||
| 280 | int copied = 0; | ||
| 281 | int error = 0; | ||
| 282 | |||
| 283 | if (offset >= ip->i_di.di_size) | ||
| 284 | return 0; | ||
| 285 | |||
| 286 | if (offset + size > ip->i_di.di_size) | ||
| 287 | size = ip->i_di.di_size - offset; | ||
| 288 | |||
| 289 | if (!size) | ||
| 290 | return 0; | ||
| 291 | |||
| 292 | if (gfs2_is_stuffed(ip)) | ||
| 293 | return gfs2_dir_read_stuffed(ip, buf, offset, size); | ||
| 294 | |||
| 295 | if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) | ||
| 296 | return -EINVAL; | ||
| 297 | |||
| 298 | lblock = offset; | ||
| 299 | o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); | ||
| 300 | |||
| 301 | while (copied < size) { | ||
| 302 | unsigned int amount; | ||
| 303 | struct buffer_head *bh; | ||
| 304 | int new; | ||
| 305 | |||
| 306 | amount = size - copied; | ||
| 307 | if (amount > sdp->sd_sb.sb_bsize - o) | ||
| 308 | amount = sdp->sd_sb.sb_bsize - o; | ||
| 309 | |||
| 310 | if (!extlen) { | ||
| 311 | new = 0; | ||
| 312 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, | ||
| 313 | &dblock, &extlen); | ||
| 314 | if (error || !dblock) | ||
| 315 | goto fail; | ||
| 316 | BUG_ON(extlen < 1); | ||
| 317 | if (!ra) | ||
| 318 | extlen = 1; | ||
| 319 | bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); | ||
| 320 | } | ||
| 321 | if (!bh) { | ||
| 322 | error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); | ||
| 323 | if (error) | ||
| 324 | goto fail; | ||
| 325 | } | ||
| 326 | error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD); | ||
| 327 | if (error) { | ||
| 328 | brelse(bh); | ||
| 329 | goto fail; | ||
| 330 | } | ||
| 331 | dblock++; | ||
| 332 | extlen--; | ||
| 333 | memcpy(buf, bh->b_data + o, amount); | ||
| 334 | brelse(bh); | ||
| 335 | bh = NULL; | ||
| 336 | buf += amount; | ||
| 337 | copied += amount; | ||
| 338 | lblock++; | ||
| 339 | o = sizeof(struct gfs2_meta_header); | ||
| 340 | } | ||
| 341 | |||
| 342 | return copied; | ||
| 343 | fail: | ||
| 344 | return (copied) ? copied : error; | ||
| 345 | } | ||
| 346 | |||
| 347 | static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent, | ||
| 348 | const struct qstr *name, int ret) | ||
| 349 | { | ||
| 350 | if (dent->de_inum.no_addr != 0 && | ||
| 351 | be32_to_cpu(dent->de_hash) == name->hash && | ||
| 352 | be16_to_cpu(dent->de_name_len) == name->len && | ||
| 353 | memcmp(dent+1, name->name, name->len) == 0) | ||
| 354 | return ret; | ||
| 355 | return 0; | ||
| 356 | } | ||
| 357 | |||
| 358 | static int gfs2_dirent_find(const struct gfs2_dirent *dent, | ||
| 359 | const struct qstr *name, | ||
| 360 | void *opaque) | ||
| 361 | { | ||
| 362 | return __gfs2_dirent_find(dent, name, 1); | ||
| 363 | } | ||
| 364 | |||
| 365 | static int gfs2_dirent_prev(const struct gfs2_dirent *dent, | ||
| 366 | const struct qstr *name, | ||
| 367 | void *opaque) | ||
| 368 | { | ||
| 369 | return __gfs2_dirent_find(dent, name, 2); | ||
| 370 | } | ||
| 371 | |||
| 372 | /* | ||
| 373 | * name->name holds ptr to start of block. | ||
| 374 | * name->len holds size of block. | ||
| 375 | */ | ||
| 376 | static int gfs2_dirent_last(const struct gfs2_dirent *dent, | ||
| 377 | const struct qstr *name, | ||
| 378 | void *opaque) | ||
| 379 | { | ||
| 380 | const char *start = name->name; | ||
| 381 | const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len); | ||
| 382 | if (name->len == (end - start)) | ||
| 383 | return 1; | ||
| 384 | return 0; | ||
| 385 | } | ||
| 386 | |||
| 387 | static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, | ||
| 388 | const struct qstr *name, | ||
| 389 | void *opaque) | ||
| 390 | { | ||
| 391 | unsigned required = GFS2_DIRENT_SIZE(name->len); | ||
| 392 | unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); | ||
| 393 | unsigned totlen = be16_to_cpu(dent->de_rec_len); | ||
| 394 | |||
| 395 | if (!dent->de_inum.no_addr) | ||
| 396 | actual = GFS2_DIRENT_SIZE(0); | ||
| 397 | if (totlen - actual >= required) | ||
| 398 | return 1; | ||
| 399 | return 0; | ||
| 400 | } | ||
| 401 | |||
| 402 | struct dirent_gather { | ||
| 403 | const struct gfs2_dirent **pdent; | ||
| 404 | unsigned offset; | ||
| 405 | }; | ||
| 406 | |||
| 407 | static int gfs2_dirent_gather(const struct gfs2_dirent *dent, | ||
| 408 | const struct qstr *name, | ||
| 409 | void *opaque) | ||
| 410 | { | ||
| 411 | struct dirent_gather *g = opaque; | ||
| 412 | if (dent->de_inum.no_addr) { | ||
| 413 | g->pdent[g->offset++] = dent; | ||
| 414 | } | ||
| 415 | return 0; | ||
| 416 | } | ||
| 417 | |||
| 418 | /* | ||
| 419 | * Other possible things to check: | ||
| 420 | * - Inode located within filesystem size (and on valid block) | ||
| 421 | * - Valid directory entry type | ||
| 422 | * Not sure how heavy-weight we want to make this... could also check | ||
| 423 | * hash is correct for example, but that would take a lot of extra time. | ||
| 424 | * For now the most important thing is to check that the various sizes | ||
| 425 | * are correct. | ||
| 426 | */ | ||
| 427 | static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, | ||
| 428 | unsigned int size, unsigned int len, int first) | ||
| 429 | { | ||
| 430 | const char *msg = "gfs2_dirent too small"; | ||
| 431 | if (unlikely(size < sizeof(struct gfs2_dirent))) | ||
| 432 | goto error; | ||
| 433 | msg = "gfs2_dirent misaligned"; | ||
| 434 | if (unlikely(offset & 0x7)) | ||
| 435 | goto error; | ||
| 436 | msg = "gfs2_dirent points beyond end of block"; | ||
| 437 | if (unlikely(offset + size > len)) | ||
| 438 | goto error; | ||
| 439 | msg = "zero inode number"; | ||
| 440 | if (unlikely(!first && !dent->de_inum.no_addr)) | ||
| 441 | goto error; | ||
| 442 | msg = "name length is greater than space in dirent"; | ||
| 443 | if (dent->de_inum.no_addr && | ||
| 444 | unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) > | ||
| 445 | size)) | ||
| 446 | goto error; | ||
| 447 | return 0; | ||
| 448 | error: | ||
| 449 | printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg, | ||
| 450 | first ? "first in block" : "not first in block"); | ||
| 451 | return -EIO; | ||
| 452 | } | ||
| 453 | |||
| 454 | static int gfs2_dirent_offset(const void *buf) | ||
| 455 | { | ||
| 456 | const struct gfs2_meta_header *h = buf; | ||
| 457 | int offset; | ||
| 458 | |||
| 459 | BUG_ON(buf == NULL); | ||
| 460 | |||
| 461 | switch(be32_to_cpu(h->mh_type)) { | ||
| 462 | case GFS2_METATYPE_LF: | ||
| 463 | offset = sizeof(struct gfs2_leaf); | ||
| 464 | break; | ||
| 465 | case GFS2_METATYPE_DI: | ||
| 466 | offset = sizeof(struct gfs2_dinode); | ||
| 467 | break; | ||
| 468 | default: | ||
| 469 | goto wrong_type; | ||
| 470 | } | ||
| 471 | return offset; | ||
| 472 | wrong_type: | ||
| 473 | printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n", | ||
| 474 | be32_to_cpu(h->mh_type)); | ||
| 475 | return -1; | ||
| 476 | } | ||
| 477 | |||
| 478 | static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, | ||
| 479 | unsigned int len, gfs2_dscan_t scan, | ||
| 480 | const struct qstr *name, | ||
| 481 | void *opaque) | ||
| 482 | { | ||
| 483 | struct gfs2_dirent *dent, *prev; | ||
| 484 | unsigned offset; | ||
| 485 | unsigned size; | ||
| 486 | int ret = 0; | ||
| 487 | |||
| 488 | ret = gfs2_dirent_offset(buf); | ||
| 489 | if (ret < 0) | ||
| 490 | goto consist_inode; | ||
| 491 | |||
| 492 | offset = ret; | ||
| 493 | prev = NULL; | ||
| 494 | dent = buf + offset; | ||
| 495 | size = be16_to_cpu(dent->de_rec_len); | ||
| 496 | if (gfs2_check_dirent(dent, offset, size, len, 1)) | ||
| 497 | goto consist_inode; | ||
| 498 | do { | ||
| 499 | ret = scan(dent, name, opaque); | ||
| 500 | if (ret) | ||
| 501 | break; | ||
| 502 | offset += size; | ||
| 503 | if (offset == len) | ||
| 504 | break; | ||
| 505 | prev = dent; | ||
| 506 | dent = buf + offset; | ||
| 507 | size = be16_to_cpu(dent->de_rec_len); | ||
| 508 | if (gfs2_check_dirent(dent, offset, size, len, 0)) | ||
| 509 | goto consist_inode; | ||
| 510 | } while(1); | ||
| 511 | |||
| 512 | switch(ret) { | ||
| 513 | case 0: | ||
| 514 | return NULL; | ||
| 515 | case 1: | ||
| 516 | return dent; | ||
| 517 | case 2: | ||
| 518 | return prev ? prev : dent; | ||
| 519 | default: | ||
| 520 | BUG_ON(ret > 0); | ||
| 521 | return ERR_PTR(ret); | ||
| 522 | } | ||
| 523 | |||
| 524 | consist_inode: | ||
| 525 | gfs2_consist_inode(GFS2_I(inode)); | ||
| 526 | return ERR_PTR(-EIO); | ||
| 527 | } | ||
| 528 | |||
| 529 | |||
| 530 | /** | ||
| 531 | * dirent_first - Return the first dirent | ||
| 532 | * @dip: the directory | ||
| 533 | * @bh: The buffer | ||
| 534 | * @dent: Pointer to list of dirents | ||
| 535 | * | ||
| 536 | * return first dirent whether bh points to leaf or stuffed dinode | ||
| 537 | * | ||
| 538 | * Returns: IS_LEAF, IS_DINODE, or -errno | ||
| 539 | */ | ||
| 540 | |||
| 541 | static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh, | ||
| 542 | struct gfs2_dirent **dent) | ||
| 543 | { | ||
| 544 | struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; | ||
| 545 | |||
| 546 | if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { | ||
| 547 | if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh)) | ||
| 548 | return -EIO; | ||
| 549 | *dent = (struct gfs2_dirent *)(bh->b_data + | ||
| 550 | sizeof(struct gfs2_leaf)); | ||
| 551 | return IS_LEAF; | ||
| 552 | } else { | ||
| 553 | if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI)) | ||
| 554 | return -EIO; | ||
| 555 | *dent = (struct gfs2_dirent *)(bh->b_data + | ||
| 556 | sizeof(struct gfs2_dinode)); | ||
| 557 | return IS_DINODE; | ||
| 558 | } | ||
| 559 | } | ||
| 560 | |||
| 561 | static int dirent_check_reclen(struct gfs2_inode *dip, | ||
| 562 | const struct gfs2_dirent *d, const void *end_p) | ||
| 563 | { | ||
| 564 | const void *ptr = d; | ||
| 565 | u16 rec_len = be16_to_cpu(d->de_rec_len); | ||
| 566 | |||
| 567 | if (unlikely(rec_len < sizeof(struct gfs2_dirent))) | ||
| 568 | goto broken; | ||
| 569 | ptr += rec_len; | ||
| 570 | if (ptr < end_p) | ||
| 571 | return rec_len; | ||
| 572 | if (ptr == end_p) | ||
| 573 | return -ENOENT; | ||
| 574 | broken: | ||
| 575 | gfs2_consist_inode(dip); | ||
| 576 | return -EIO; | ||
| 577 | } | ||
| 578 | |||
| 579 | /** | ||
| 580 | * dirent_next - Next dirent | ||
| 581 | * @dip: the directory | ||
| 582 | * @bh: The buffer | ||
| 583 | * @dent: Pointer to list of dirents | ||
| 584 | * | ||
| 585 | * Returns: 0 on success, error code otherwise | ||
| 586 | */ | ||
| 587 | |||
| 588 | static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, | ||
| 589 | struct gfs2_dirent **dent) | ||
| 590 | { | ||
| 591 | struct gfs2_dirent *cur = *dent, *tmp; | ||
| 592 | char *bh_end = bh->b_data + bh->b_size; | ||
| 593 | int ret; | ||
| 594 | |||
| 595 | ret = dirent_check_reclen(dip, cur, bh_end); | ||
| 596 | if (ret < 0) | ||
| 597 | return ret; | ||
| 598 | |||
| 599 | tmp = (void *)cur + ret; | ||
| 600 | ret = dirent_check_reclen(dip, tmp, bh_end); | ||
| 601 | if (ret == -EIO) | ||
| 602 | return ret; | ||
| 603 | |||
| 604 | /* Only the first dent could ever have de_inum.no_addr == 0 */ | ||
| 605 | if (!tmp->de_inum.no_addr) { | ||
| 606 | gfs2_consist_inode(dip); | ||
| 607 | return -EIO; | ||
| 608 | } | ||
| 609 | |||
| 610 | *dent = tmp; | ||
| 611 | return 0; | ||
| 612 | } | ||
| 613 | |||
| 614 | /** | ||
| 615 | * dirent_del - Delete a dirent | ||
| 616 | * @dip: The GFS2 inode | ||
| 617 | * @bh: The buffer | ||
| 618 | * @prev: The previous dirent | ||
| 619 | * @cur: The current dirent | ||
| 620 | * | ||
| 621 | */ | ||
| 622 | |||
| 623 | static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, | ||
| 624 | struct gfs2_dirent *prev, struct gfs2_dirent *cur) | ||
| 625 | { | ||
| 626 | u16 cur_rec_len, prev_rec_len; | ||
| 627 | |||
| 628 | if (!cur->de_inum.no_addr) { | ||
| 629 | gfs2_consist_inode(dip); | ||
| 630 | return; | ||
| 631 | } | ||
| 632 | |||
| 633 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | ||
| 634 | |||
| 635 | /* If there is no prev entry, this is the first entry in the block. | ||
| 636 | The de_rec_len is already as big as it needs to be. Just zero | ||
| 637 | out the inode number and return. */ | ||
| 638 | |||
| 639 | if (!prev) { | ||
| 640 | cur->de_inum.no_addr = 0; /* No endianess worries */ | ||
| 641 | return; | ||
| 642 | } | ||
| 643 | |||
| 644 | /* Combine this dentry with the previous one. */ | ||
| 645 | |||
| 646 | prev_rec_len = be16_to_cpu(prev->de_rec_len); | ||
| 647 | cur_rec_len = be16_to_cpu(cur->de_rec_len); | ||
| 648 | |||
| 649 | if ((char *)prev + prev_rec_len != (char *)cur) | ||
| 650 | gfs2_consist_inode(dip); | ||
| 651 | if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) | ||
| 652 | gfs2_consist_inode(dip); | ||
| 653 | |||
| 654 | prev_rec_len += cur_rec_len; | ||
| 655 | prev->de_rec_len = cpu_to_be16(prev_rec_len); | ||
| 656 | } | ||
| 657 | |||
| 658 | /* | ||
| 659 | * Takes a dent from which to grab space as an argument. Returns the | ||
| 660 | * newly created dent. | ||
| 661 | */ | ||
| 662 | static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, | ||
| 663 | struct gfs2_dirent *dent, | ||
| 664 | const struct qstr *name, | ||
| 665 | struct buffer_head *bh) | ||
| 666 | { | ||
| 667 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 668 | struct gfs2_dirent *ndent; | ||
| 669 | unsigned offset = 0, totlen; | ||
| 670 | |||
| 671 | if (dent->de_inum.no_addr) | ||
| 672 | offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); | ||
| 673 | totlen = be16_to_cpu(dent->de_rec_len); | ||
| 674 | BUG_ON(offset + name->len > totlen); | ||
| 675 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 676 | ndent = (struct gfs2_dirent *)((char *)dent + offset); | ||
| 677 | dent->de_rec_len = cpu_to_be16(offset); | ||
| 678 | gfs2_qstr2dirent(name, totlen - offset, ndent); | ||
| 679 | return ndent; | ||
| 680 | } | ||
| 681 | |||
| 682 | static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode, | ||
| 683 | struct buffer_head *bh, | ||
| 684 | const struct qstr *name) | ||
| 685 | { | ||
| 686 | struct gfs2_dirent *dent; | ||
| 687 | dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, | ||
| 688 | gfs2_dirent_find_space, name, NULL); | ||
| 689 | if (!dent || IS_ERR(dent)) | ||
| 690 | return dent; | ||
| 691 | return gfs2_init_dirent(inode, dent, name, bh); | ||
| 692 | } | ||
| 693 | |||
| 694 | static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, | ||
| 695 | struct buffer_head **bhp) | ||
| 696 | { | ||
| 697 | int error; | ||
| 698 | |||
| 699 | error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); | ||
| 700 | if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { | ||
| 701 | /* printk(KERN_INFO "block num=%llu\n", leaf_no); */ | ||
| 702 | error = -EIO; | ||
| 703 | } | ||
| 704 | |||
| 705 | return error; | ||
| 706 | } | ||
| 707 | |||
| 708 | /** | ||
| 709 | * get_leaf_nr - Get a leaf number associated with the index | ||
| 710 | * @dip: The GFS2 inode | ||
| 711 | * @index: | ||
| 712 | * @leaf_out: | ||
| 713 | * | ||
| 714 | * Returns: 0 on success, error code otherwise | ||
| 715 | */ | ||
| 716 | |||
| 717 | static int get_leaf_nr(struct gfs2_inode *dip, u32 index, | ||
| 718 | u64 *leaf_out) | ||
| 719 | { | ||
| 720 | u64 leaf_no; | ||
| 721 | int error; | ||
| 722 | |||
| 723 | error = gfs2_dir_read_data(dip, (char *)&leaf_no, | ||
| 724 | index * sizeof(u64), | ||
| 725 | sizeof(u64), 0); | ||
| 726 | if (error != sizeof(u64)) | ||
| 727 | return (error < 0) ? error : -EIO; | ||
| 728 | |||
| 729 | *leaf_out = be64_to_cpu(leaf_no); | ||
| 730 | |||
| 731 | return 0; | ||
| 732 | } | ||
| 733 | |||
| 734 | static int get_first_leaf(struct gfs2_inode *dip, u32 index, | ||
| 735 | struct buffer_head **bh_out) | ||
| 736 | { | ||
| 737 | u64 leaf_no; | ||
| 738 | int error; | ||
| 739 | |||
| 740 | error = get_leaf_nr(dip, index, &leaf_no); | ||
| 741 | if (!error) | ||
| 742 | error = get_leaf(dip, leaf_no, bh_out); | ||
| 743 | |||
| 744 | return error; | ||
| 745 | } | ||
| 746 | |||
| 747 | static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | ||
| 748 | const struct qstr *name, | ||
| 749 | gfs2_dscan_t scan, | ||
| 750 | struct buffer_head **pbh) | ||
| 751 | { | ||
| 752 | struct buffer_head *bh; | ||
| 753 | struct gfs2_dirent *dent; | ||
| 754 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 755 | int error; | ||
| 756 | |||
| 757 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { | ||
| 758 | struct gfs2_leaf *leaf; | ||
| 759 | unsigned hsize = 1 << ip->i_di.di_depth; | ||
| 760 | unsigned index; | ||
| 761 | u64 ln; | ||
| 762 | if (hsize * sizeof(u64) != ip->i_di.di_size) { | ||
| 763 | gfs2_consist_inode(ip); | ||
| 764 | return ERR_PTR(-EIO); | ||
| 765 | } | ||
| 766 | |||
| 767 | index = name->hash >> (32 - ip->i_di.di_depth); | ||
| 768 | error = get_first_leaf(ip, index, &bh); | ||
| 769 | if (error) | ||
| 770 | return ERR_PTR(error); | ||
| 771 | do { | ||
| 772 | dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, | ||
| 773 | scan, name, NULL); | ||
| 774 | if (dent) | ||
| 775 | goto got_dent; | ||
| 776 | leaf = (struct gfs2_leaf *)bh->b_data; | ||
| 777 | ln = be64_to_cpu(leaf->lf_next); | ||
| 778 | brelse(bh); | ||
| 779 | if (!ln) | ||
| 780 | break; | ||
| 781 | |||
| 782 | error = get_leaf(ip, ln, &bh); | ||
| 783 | } while(!error); | ||
| 784 | |||
| 785 | return error ? ERR_PTR(error) : NULL; | ||
| 786 | } | ||
| 787 | |||
| 788 | |||
| 789 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 790 | if (error) | ||
| 791 | return ERR_PTR(error); | ||
| 792 | dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); | ||
| 793 | got_dent: | ||
| 794 | if (unlikely(dent == NULL || IS_ERR(dent))) { | ||
| 795 | brelse(bh); | ||
| 796 | bh = NULL; | ||
| 797 | } | ||
| 798 | *pbh = bh; | ||
| 799 | return dent; | ||
| 800 | } | ||
| 801 | |||
| 802 | static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) | ||
| 803 | { | ||
| 804 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 805 | u64 bn = gfs2_alloc_meta(ip); | ||
| 806 | struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); | ||
| 807 | struct gfs2_leaf *leaf; | ||
| 808 | struct gfs2_dirent *dent; | ||
| 809 | struct qstr name = { .name = "", .len = 0, .hash = 0 }; | ||
| 810 | if (!bh) | ||
| 811 | return NULL; | ||
| 812 | |||
| 813 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 814 | gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); | ||
| 815 | leaf = (struct gfs2_leaf *)bh->b_data; | ||
| 816 | leaf->lf_depth = cpu_to_be16(depth); | ||
| 817 | leaf->lf_entries = 0; | ||
| 818 | leaf->lf_dirent_format = cpu_to_be16(GFS2_FORMAT_DE); | ||
| 819 | leaf->lf_next = 0; | ||
| 820 | memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved)); | ||
| 821 | dent = (struct gfs2_dirent *)(leaf+1); | ||
| 822 | gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); | ||
| 823 | *pbh = bh; | ||
| 824 | return leaf; | ||
| 825 | } | ||
| 826 | |||
| 827 | /** | ||
| 828 | * dir_make_exhash - Convert a stuffed directory into an ExHash directory | ||
| 829 | * @dip: The GFS2 inode | ||
| 830 | * | ||
| 831 | * Returns: 0 on success, error code otherwise | ||
| 832 | */ | ||
| 833 | |||
| 834 | static int dir_make_exhash(struct inode *inode) | ||
| 835 | { | ||
| 836 | struct gfs2_inode *dip = GFS2_I(inode); | ||
| 837 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 838 | struct gfs2_dirent *dent; | ||
| 839 | struct qstr args; | ||
| 840 | struct buffer_head *bh, *dibh; | ||
| 841 | struct gfs2_leaf *leaf; | ||
| 842 | int y; | ||
| 843 | u32 x; | ||
| 844 | u64 *lp, bn; | ||
| 845 | int error; | ||
| 846 | |||
| 847 | error = gfs2_meta_inode_buffer(dip, &dibh); | ||
| 848 | if (error) | ||
| 849 | return error; | ||
| 850 | |||
| 851 | /* Turn over a new leaf */ | ||
| 852 | |||
| 853 | leaf = new_leaf(inode, &bh, 0); | ||
| 854 | if (!leaf) | ||
| 855 | return -ENOSPC; | ||
| 856 | bn = bh->b_blocknr; | ||
| 857 | |||
| 858 | gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16)); | ||
| 859 | leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries); | ||
| 860 | |||
| 861 | /* Copy dirents */ | ||
| 862 | |||
| 863 | gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, | ||
| 864 | sizeof(struct gfs2_dinode)); | ||
| 865 | |||
| 866 | /* Find last entry */ | ||
| 867 | |||
| 868 | x = 0; | ||
| 869 | args.len = bh->b_size - sizeof(struct gfs2_dinode) + | ||
| 870 | sizeof(struct gfs2_leaf); | ||
| 871 | args.name = bh->b_data; | ||
| 872 | dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size, | ||
| 873 | gfs2_dirent_last, &args, NULL); | ||
| 874 | if (!dent) { | ||
| 875 | brelse(bh); | ||
| 876 | brelse(dibh); | ||
| 877 | return -EIO; | ||
| 878 | } | ||
| 879 | if (IS_ERR(dent)) { | ||
| 880 | brelse(bh); | ||
| 881 | brelse(dibh); | ||
| 882 | return PTR_ERR(dent); | ||
| 883 | } | ||
| 884 | |||
| 885 | /* Adjust the last dirent's record length | ||
| 886 | (Remember that dent still points to the last entry.) */ | ||
| 887 | |||
| 888 | dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) + | ||
| 889 | sizeof(struct gfs2_dinode) - | ||
| 890 | sizeof(struct gfs2_leaf)); | ||
| 891 | |||
| 892 | brelse(bh); | ||
| 893 | |||
| 894 | /* We're done with the new leaf block, now setup the new | ||
| 895 | hash table. */ | ||
| 896 | |||
| 897 | gfs2_trans_add_bh(dip->i_gl, dibh, 1); | ||
| 898 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
| 899 | |||
| 900 | lp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); | ||
| 901 | |||
| 902 | for (x = sdp->sd_hash_ptrs; x--; lp++) | ||
| 903 | *lp = cpu_to_be64(bn); | ||
| 904 | |||
| 905 | dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; | ||
| 906 | dip->i_di.di_blocks++; | ||
| 907 | dip->i_di.di_flags |= GFS2_DIF_EXHASH; | ||
| 908 | dip->i_di.di_payload_format = 0; | ||
| 909 | |||
| 910 | for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; | ||
| 911 | dip->i_di.di_depth = y; | ||
| 912 | |||
| 913 | gfs2_dinode_out(&dip->i_di, dibh->b_data); | ||
| 914 | |||
| 915 | brelse(dibh); | ||
| 916 | |||
| 917 | return 0; | ||
| 918 | } | ||
| 919 | |||
| 920 | /** | ||
| 921 | * dir_split_leaf - Split a leaf block into two | ||
| 922 | * @dip: The GFS2 inode | ||
| 923 | * @index: | ||
| 924 | * @leaf_no: | ||
| 925 | * | ||
| 926 | * Returns: 0 on success, error code on failure | ||
| 927 | */ | ||
| 928 | |||
| 929 | static int dir_split_leaf(struct inode *inode, const struct qstr *name) | ||
| 930 | { | ||
| 931 | struct gfs2_inode *dip = GFS2_I(inode); | ||
| 932 | struct buffer_head *nbh, *obh, *dibh; | ||
| 933 | struct gfs2_leaf *nleaf, *oleaf; | ||
| 934 | struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new; | ||
| 935 | u32 start, len, half_len, divider; | ||
| 936 | u64 bn, *lp, leaf_no; | ||
| 937 | u32 index; | ||
| 938 | int x, moved = 0; | ||
| 939 | int error; | ||
| 940 | |||
| 941 | index = name->hash >> (32 - dip->i_di.di_depth); | ||
| 942 | error = get_leaf_nr(dip, index, &leaf_no); | ||
| 943 | if (error) | ||
| 944 | return error; | ||
| 945 | |||
| 946 | /* Get the old leaf block */ | ||
| 947 | error = get_leaf(dip, leaf_no, &obh); | ||
| 948 | if (error) | ||
| 949 | return error; | ||
| 950 | |||
| 951 | oleaf = (struct gfs2_leaf *)obh->b_data; | ||
| 952 | if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) { | ||
| 953 | brelse(obh); | ||
| 954 | return 1; /* can't split */ | ||
| 955 | } | ||
| 956 | |||
| 957 | gfs2_trans_add_bh(dip->i_gl, obh, 1); | ||
| 958 | |||
| 959 | nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); | ||
| 960 | if (!nleaf) { | ||
| 961 | brelse(obh); | ||
| 962 | return -ENOSPC; | ||
| 963 | } | ||
| 964 | bn = nbh->b_blocknr; | ||
| 965 | |||
| 966 | /* Compute the start and len of leaf pointers in the hash table. */ | ||
| 967 | len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth)); | ||
| 968 | half_len = len >> 1; | ||
| 969 | if (!half_len) { | ||
| 970 | printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index); | ||
| 971 | gfs2_consist_inode(dip); | ||
| 972 | error = -EIO; | ||
| 973 | goto fail_brelse; | ||
| 974 | } | ||
| 975 | |||
| 976 | start = (index & ~(len - 1)); | ||
| 977 | |||
| 978 | /* Change the pointers. | ||
| 979 | Don't bother distinguishing stuffed from non-stuffed. | ||
| 980 | This code is complicated enough already. */ | ||
| 981 | lp = kmalloc(half_len * sizeof(u64), GFP_NOFS | __GFP_NOFAIL); | ||
| 982 | /* Change the pointers */ | ||
| 983 | for (x = 0; x < half_len; x++) | ||
| 984 | lp[x] = cpu_to_be64(bn); | ||
| 985 | |||
| 986 | error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), | ||
| 987 | half_len * sizeof(u64)); | ||
| 988 | if (error != half_len * sizeof(u64)) { | ||
| 989 | if (error >= 0) | ||
| 990 | error = -EIO; | ||
| 991 | goto fail_lpfree; | ||
| 992 | } | ||
| 993 | |||
| 994 | kfree(lp); | ||
| 995 | |||
| 996 | /* Compute the divider */ | ||
| 997 | divider = (start + half_len) << (32 - dip->i_di.di_depth); | ||
| 998 | |||
| 999 | /* Copy the entries */ | ||
| 1000 | dirent_first(dip, obh, &dent); | ||
| 1001 | |||
| 1002 | do { | ||
| 1003 | next = dent; | ||
| 1004 | if (dirent_next(dip, obh, &next)) | ||
| 1005 | next = NULL; | ||
| 1006 | |||
| 1007 | if (dent->de_inum.no_addr && | ||
| 1008 | be32_to_cpu(dent->de_hash) < divider) { | ||
| 1009 | struct qstr str; | ||
| 1010 | str.name = (char*)(dent+1); | ||
| 1011 | str.len = be16_to_cpu(dent->de_name_len); | ||
| 1012 | str.hash = be32_to_cpu(dent->de_hash); | ||
| 1013 | new = gfs2_dirent_alloc(inode, nbh, &str); | ||
| 1014 | if (IS_ERR(new)) { | ||
| 1015 | error = PTR_ERR(new); | ||
| 1016 | break; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | new->de_inum = dent->de_inum; /* No endian worries */ | ||
| 1020 | new->de_type = dent->de_type; /* No endian worries */ | ||
| 1021 | nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1); | ||
| 1022 | |||
| 1023 | dirent_del(dip, obh, prev, dent); | ||
| 1024 | |||
| 1025 | if (!oleaf->lf_entries) | ||
| 1026 | gfs2_consist_inode(dip); | ||
| 1027 | oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1); | ||
| 1028 | |||
| 1029 | if (!prev) | ||
| 1030 | prev = dent; | ||
| 1031 | |||
| 1032 | moved = 1; | ||
| 1033 | } else { | ||
| 1034 | prev = dent; | ||
| 1035 | } | ||
| 1036 | dent = next; | ||
| 1037 | } while (dent); | ||
| 1038 | |||
| 1039 | oleaf->lf_depth = nleaf->lf_depth; | ||
| 1040 | |||
| 1041 | error = gfs2_meta_inode_buffer(dip, &dibh); | ||
| 1042 | if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { | ||
| 1043 | dip->i_di.di_blocks++; | ||
| 1044 | gfs2_dinode_out(&dip->i_di, dibh->b_data); | ||
| 1045 | brelse(dibh); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | brelse(obh); | ||
| 1049 | brelse(nbh); | ||
| 1050 | |||
| 1051 | return error; | ||
| 1052 | |||
| 1053 | fail_lpfree: | ||
| 1054 | kfree(lp); | ||
| 1055 | |||
| 1056 | fail_brelse: | ||
| 1057 | brelse(obh); | ||
| 1058 | brelse(nbh); | ||
| 1059 | return error; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | /** | ||
| 1063 | * dir_double_exhash - Double size of ExHash table | ||
| 1064 | * @dip: The GFS2 dinode | ||
| 1065 | * | ||
| 1066 | * Returns: 0 on success, error code on failure | ||
| 1067 | */ | ||
| 1068 | |||
| 1069 | static int dir_double_exhash(struct gfs2_inode *dip) | ||
| 1070 | { | ||
| 1071 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 1072 | struct buffer_head *dibh; | ||
| 1073 | u32 hsize; | ||
| 1074 | u64 *buf; | ||
| 1075 | u64 *from, *to; | ||
| 1076 | u64 block; | ||
| 1077 | int x; | ||
| 1078 | int error = 0; | ||
| 1079 | |||
| 1080 | hsize = 1 << dip->i_di.di_depth; | ||
| 1081 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | ||
| 1082 | gfs2_consist_inode(dip); | ||
| 1083 | return -EIO; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | /* Allocate both the "from" and "to" buffers in one big chunk */ | ||
| 1087 | |||
| 1088 | buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL); | ||
| 1089 | |||
| 1090 | for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { | ||
| 1091 | error = gfs2_dir_read_data(dip, (char *)buf, | ||
| 1092 | block * sdp->sd_hash_bsize, | ||
| 1093 | sdp->sd_hash_bsize, 1); | ||
| 1094 | if (error != sdp->sd_hash_bsize) { | ||
| 1095 | if (error >= 0) | ||
| 1096 | error = -EIO; | ||
| 1097 | goto fail; | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | from = buf; | ||
| 1101 | to = (u64 *)((char *)buf + sdp->sd_hash_bsize); | ||
| 1102 | |||
| 1103 | for (x = sdp->sd_hash_ptrs; x--; from++) { | ||
| 1104 | *to++ = *from; /* No endianess worries */ | ||
| 1105 | *to++ = *from; | ||
| 1106 | } | ||
| 1107 | |||
| 1108 | error = gfs2_dir_write_data(dip, | ||
| 1109 | (char *)buf + sdp->sd_hash_bsize, | ||
| 1110 | block * sdp->sd_sb.sb_bsize, | ||
| 1111 | sdp->sd_sb.sb_bsize); | ||
| 1112 | if (error != sdp->sd_sb.sb_bsize) { | ||
| 1113 | if (error >= 0) | ||
| 1114 | error = -EIO; | ||
| 1115 | goto fail; | ||
| 1116 | } | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | kfree(buf); | ||
| 1120 | |||
| 1121 | error = gfs2_meta_inode_buffer(dip, &dibh); | ||
| 1122 | if (!gfs2_assert_withdraw(sdp, !error)) { | ||
| 1123 | dip->i_di.di_depth++; | ||
| 1124 | gfs2_dinode_out(&dip->i_di, dibh->b_data); | ||
| 1125 | brelse(dibh); | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | return error; | ||
| 1129 | |||
| 1130 | fail: | ||
| 1131 | kfree(buf); | ||
| 1132 | return error; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | /** | ||
| 1136 | * compare_dents - compare directory entries by hash value | ||
| 1137 | * @a: first dent | ||
| 1138 | * @b: second dent | ||
| 1139 | * | ||
| 1140 | * When comparing the hash entries of @a to @b: | ||
| 1141 | * gt: returns 1 | ||
| 1142 | * lt: returns -1 | ||
| 1143 | * eq: returns 0 | ||
| 1144 | */ | ||
| 1145 | |||
| 1146 | static int compare_dents(const void *a, const void *b) | ||
| 1147 | { | ||
| 1148 | const struct gfs2_dirent *dent_a, *dent_b; | ||
| 1149 | u32 hash_a, hash_b; | ||
| 1150 | int ret = 0; | ||
| 1151 | |||
| 1152 | dent_a = *(const struct gfs2_dirent **)a; | ||
| 1153 | hash_a = be32_to_cpu(dent_a->de_hash); | ||
| 1154 | |||
| 1155 | dent_b = *(const struct gfs2_dirent **)b; | ||
| 1156 | hash_b = be32_to_cpu(dent_b->de_hash); | ||
| 1157 | |||
| 1158 | if (hash_a > hash_b) | ||
| 1159 | ret = 1; | ||
| 1160 | else if (hash_a < hash_b) | ||
| 1161 | ret = -1; | ||
| 1162 | else { | ||
| 1163 | unsigned int len_a = be16_to_cpu(dent_a->de_name_len); | ||
| 1164 | unsigned int len_b = be16_to_cpu(dent_b->de_name_len); | ||
| 1165 | |||
| 1166 | if (len_a > len_b) | ||
| 1167 | ret = 1; | ||
| 1168 | else if (len_a < len_b) | ||
| 1169 | ret = -1; | ||
| 1170 | else | ||
| 1171 | ret = memcmp(dent_a + 1, dent_b + 1, len_a); | ||
| 1172 | } | ||
| 1173 | |||
| 1174 | return ret; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | /** | ||
| 1178 | * do_filldir_main - read out directory entries | ||
| 1179 | * @dip: The GFS2 inode | ||
| 1180 | * @offset: The offset in the file to read from | ||
| 1181 | * @opaque: opaque data to pass to filldir | ||
| 1182 | * @filldir: The function to pass entries to | ||
| 1183 | * @darr: an array of struct gfs2_dirent pointers to read | ||
| 1184 | * @entries: the number of entries in darr | ||
| 1185 | * @copied: pointer to int that's non-zero if a entry has been copied out | ||
| 1186 | * | ||
| 1187 | * Jump through some hoops to make sure that if there are hash collsions, | ||
| 1188 | * they are read out at the beginning of a buffer. We want to minimize | ||
| 1189 | * the possibility that they will fall into different readdir buffers or | ||
| 1190 | * that someone will want to seek to that location. | ||
| 1191 | * | ||
| 1192 | * Returns: errno, >0 on exception from filldir | ||
| 1193 | */ | ||
| 1194 | |||
| 1195 | static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | ||
| 1196 | void *opaque, gfs2_filldir_t filldir, | ||
| 1197 | const struct gfs2_dirent **darr, u32 entries, | ||
| 1198 | int *copied) | ||
| 1199 | { | ||
| 1200 | const struct gfs2_dirent *dent, *dent_next; | ||
| 1201 | struct gfs2_inum inum; | ||
| 1202 | u64 off, off_next; | ||
| 1203 | unsigned int x, y; | ||
| 1204 | int run = 0; | ||
| 1205 | int error = 0; | ||
| 1206 | |||
| 1207 | sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); | ||
| 1208 | |||
| 1209 | dent_next = darr[0]; | ||
| 1210 | off_next = be32_to_cpu(dent_next->de_hash); | ||
| 1211 | off_next = gfs2_disk_hash2offset(off_next); | ||
| 1212 | |||
| 1213 | for (x = 0, y = 1; x < entries; x++, y++) { | ||
| 1214 | dent = dent_next; | ||
| 1215 | off = off_next; | ||
| 1216 | |||
| 1217 | if (y < entries) { | ||
| 1218 | dent_next = darr[y]; | ||
| 1219 | off_next = be32_to_cpu(dent_next->de_hash); | ||
| 1220 | off_next = gfs2_disk_hash2offset(off_next); | ||
| 1221 | |||
| 1222 | if (off < *offset) | ||
| 1223 | continue; | ||
| 1224 | *offset = off; | ||
| 1225 | |||
| 1226 | if (off_next == off) { | ||
| 1227 | if (*copied && !run) | ||
| 1228 | return 1; | ||
| 1229 | run = 1; | ||
| 1230 | } else | ||
| 1231 | run = 0; | ||
| 1232 | } else { | ||
| 1233 | if (off < *offset) | ||
| 1234 | continue; | ||
| 1235 | *offset = off; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | gfs2_inum_in(&inum, (char *)&dent->de_inum); | ||
| 1239 | |||
| 1240 | error = filldir(opaque, (const char *)(dent + 1), | ||
| 1241 | be16_to_cpu(dent->de_name_len), | ||
| 1242 | off, &inum, | ||
| 1243 | be16_to_cpu(dent->de_type)); | ||
| 1244 | if (error) | ||
| 1245 | return 1; | ||
| 1246 | |||
| 1247 | *copied = 1; | ||
| 1248 | } | ||
| 1249 | |||
| 1250 | /* Increment the *offset by one, so the next time we come into the | ||
| 1251 | do_filldir fxn, we get the next entry instead of the last one in the | ||
| 1252 | current leaf */ | ||
| 1253 | |||
| 1254 | (*offset)++; | ||
| 1255 | |||
| 1256 | return 0; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | ||
| 1260 | gfs2_filldir_t filldir, int *copied, | ||
| 1261 | unsigned *depth, u64 leaf_no) | ||
| 1262 | { | ||
| 1263 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1264 | struct buffer_head *bh; | ||
| 1265 | struct gfs2_leaf *lf; | ||
| 1266 | unsigned entries = 0; | ||
| 1267 | unsigned leaves = 0; | ||
| 1268 | const struct gfs2_dirent **darr, *dent; | ||
| 1269 | struct dirent_gather g; | ||
| 1270 | struct buffer_head **larr; | ||
| 1271 | int leaf = 0; | ||
| 1272 | int error, i; | ||
| 1273 | u64 lfn = leaf_no; | ||
| 1274 | |||
| 1275 | do { | ||
| 1276 | error = get_leaf(ip, lfn, &bh); | ||
| 1277 | if (error) | ||
| 1278 | goto out; | ||
| 1279 | lf = (struct gfs2_leaf *)bh->b_data; | ||
| 1280 | if (leaves == 0) | ||
| 1281 | *depth = be16_to_cpu(lf->lf_depth); | ||
| 1282 | entries += be16_to_cpu(lf->lf_entries); | ||
| 1283 | leaves++; | ||
| 1284 | lfn = be64_to_cpu(lf->lf_next); | ||
| 1285 | brelse(bh); | ||
| 1286 | } while(lfn); | ||
| 1287 | |||
| 1288 | if (!entries) | ||
| 1289 | return 0; | ||
| 1290 | |||
| 1291 | error = -ENOMEM; | ||
| 1292 | larr = vmalloc((leaves + entries) * sizeof(void *)); | ||
| 1293 | if (!larr) | ||
| 1294 | goto out; | ||
| 1295 | darr = (const struct gfs2_dirent **)(larr + leaves); | ||
| 1296 | g.pdent = darr; | ||
| 1297 | g.offset = 0; | ||
| 1298 | lfn = leaf_no; | ||
| 1299 | |||
| 1300 | do { | ||
| 1301 | error = get_leaf(ip, lfn, &bh); | ||
| 1302 | if (error) | ||
| 1303 | goto out_kfree; | ||
| 1304 | lf = (struct gfs2_leaf *)bh->b_data; | ||
| 1305 | lfn = be64_to_cpu(lf->lf_next); | ||
| 1306 | if (lf->lf_entries) { | ||
| 1307 | dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, | ||
| 1308 | gfs2_dirent_gather, NULL, &g); | ||
| 1309 | error = PTR_ERR(dent); | ||
| 1310 | if (IS_ERR(dent)) { | ||
| 1311 | goto out_kfree; | ||
| 1312 | } | ||
| 1313 | error = 0; | ||
| 1314 | larr[leaf++] = bh; | ||
| 1315 | } else { | ||
| 1316 | brelse(bh); | ||
| 1317 | } | ||
| 1318 | } while(lfn); | ||
| 1319 | |||
| 1320 | error = do_filldir_main(ip, offset, opaque, filldir, darr, | ||
| 1321 | entries, copied); | ||
| 1322 | out_kfree: | ||
| 1323 | for(i = 0; i < leaf; i++) | ||
| 1324 | brelse(larr[i]); | ||
| 1325 | vfree(larr); | ||
| 1326 | out: | ||
| 1327 | return error; | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | /** | ||
| 1331 | * dir_e_read - Reads the entries from a directory into a filldir buffer | ||
| 1332 | * @dip: dinode pointer | ||
| 1333 | * @offset: the hash of the last entry read shifted to the right once | ||
| 1334 | * @opaque: buffer for the filldir function to fill | ||
| 1335 | * @filldir: points to the filldir function to use | ||
| 1336 | * | ||
| 1337 | * Returns: errno | ||
| 1338 | */ | ||
| 1339 | |||
| 1340 | static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | ||
| 1341 | gfs2_filldir_t filldir) | ||
| 1342 | { | ||
| 1343 | struct gfs2_inode *dip = GFS2_I(inode); | ||
| 1344 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1345 | u32 hsize, len = 0; | ||
| 1346 | u32 ht_offset, lp_offset, ht_offset_cur = -1; | ||
| 1347 | u32 hash, index; | ||
| 1348 | u64 *lp; | ||
| 1349 | int copied = 0; | ||
| 1350 | int error = 0; | ||
| 1351 | unsigned depth = 0; | ||
| 1352 | |||
| 1353 | hsize = 1 << dip->i_di.di_depth; | ||
| 1354 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | ||
| 1355 | gfs2_consist_inode(dip); | ||
| 1356 | return -EIO; | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | hash = gfs2_dir_offset2hash(*offset); | ||
| 1360 | index = hash >> (32 - dip->i_di.di_depth); | ||
| 1361 | |||
| 1362 | lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); | ||
| 1363 | if (!lp) | ||
| 1364 | return -ENOMEM; | ||
| 1365 | |||
| 1366 | while (index < hsize) { | ||
| 1367 | lp_offset = index & (sdp->sd_hash_ptrs - 1); | ||
| 1368 | ht_offset = index - lp_offset; | ||
| 1369 | |||
| 1370 | if (ht_offset_cur != ht_offset) { | ||
| 1371 | error = gfs2_dir_read_data(dip, (char *)lp, | ||
| 1372 | ht_offset * sizeof(u64), | ||
| 1373 | sdp->sd_hash_bsize, 1); | ||
| 1374 | if (error != sdp->sd_hash_bsize) { | ||
| 1375 | if (error >= 0) | ||
| 1376 | error = -EIO; | ||
| 1377 | goto out; | ||
| 1378 | } | ||
| 1379 | ht_offset_cur = ht_offset; | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, | ||
| 1383 | &copied, &depth, | ||
| 1384 | be64_to_cpu(lp[lp_offset])); | ||
| 1385 | if (error) | ||
| 1386 | break; | ||
| 1387 | |||
| 1388 | len = 1 << (dip->i_di.di_depth - depth); | ||
| 1389 | index = (index & ~(len - 1)) + len; | ||
| 1390 | } | ||
| 1391 | |||
| 1392 | out: | ||
| 1393 | kfree(lp); | ||
| 1394 | if (error > 0) | ||
| 1395 | error = 0; | ||
| 1396 | return error; | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | ||
| 1400 | gfs2_filldir_t filldir) | ||
| 1401 | { | ||
| 1402 | struct gfs2_inode *dip = GFS2_I(inode); | ||
| 1403 | struct dirent_gather g; | ||
| 1404 | const struct gfs2_dirent **darr, *dent; | ||
| 1405 | struct buffer_head *dibh; | ||
| 1406 | int copied = 0; | ||
| 1407 | int error; | ||
| 1408 | |||
| 1409 | if (!dip->i_di.di_entries) | ||
| 1410 | return 0; | ||
| 1411 | |||
| 1412 | if (dip->i_di.di_flags & GFS2_DIF_EXHASH) | ||
| 1413 | return dir_e_read(inode, offset, opaque, filldir); | ||
| 1414 | |||
| 1415 | if (!gfs2_is_stuffed(dip)) { | ||
| 1416 | gfs2_consist_inode(dip); | ||
| 1417 | return -EIO; | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | error = gfs2_meta_inode_buffer(dip, &dibh); | ||
| 1421 | if (error) | ||
| 1422 | return error; | ||
| 1423 | |||
| 1424 | error = -ENOMEM; | ||
| 1425 | darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *), | ||
| 1426 | GFP_KERNEL); | ||
| 1427 | if (darr) { | ||
| 1428 | g.pdent = darr; | ||
| 1429 | g.offset = 0; | ||
| 1430 | dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size, | ||
| 1431 | gfs2_dirent_gather, NULL, &g); | ||
| 1432 | if (IS_ERR(dent)) { | ||
| 1433 | error = PTR_ERR(dent); | ||
| 1434 | goto out; | ||
| 1435 | } | ||
| 1436 | error = do_filldir_main(dip, offset, opaque, filldir, darr, | ||
| 1437 | dip->i_di.di_entries, &copied); | ||
| 1438 | out: | ||
| 1439 | kfree(darr); | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | if (error > 0) | ||
| 1443 | error = 0; | ||
| 1444 | |||
| 1445 | brelse(dibh); | ||
| 1446 | |||
| 1447 | return error; | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | /** | ||
| 1451 | * gfs2_dir_search - Search a directory | ||
| 1452 | * @dip: The GFS2 inode | ||
| 1453 | * @filename: | ||
| 1454 | * @inode: | ||
| 1455 | * | ||
| 1456 | * This routine searches a directory for a file or another directory. | ||
| 1457 | * Assumes a glock is held on dip. | ||
| 1458 | * | ||
| 1459 | * Returns: errno | ||
| 1460 | */ | ||
| 1461 | |||
| 1462 | int gfs2_dir_search(struct inode *dir, const struct qstr *name, | ||
| 1463 | struct gfs2_inum *inum, unsigned int *type) | ||
| 1464 | { | ||
| 1465 | struct buffer_head *bh; | ||
| 1466 | struct gfs2_dirent *dent; | ||
| 1467 | |||
| 1468 | dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); | ||
| 1469 | if (dent) { | ||
| 1470 | if (IS_ERR(dent)) | ||
| 1471 | return PTR_ERR(dent); | ||
| 1472 | if (inum) | ||
| 1473 | gfs2_inum_in(inum, (char *)&dent->de_inum); | ||
| 1474 | if (type) | ||
| 1475 | *type = be16_to_cpu(dent->de_type); | ||
| 1476 | brelse(bh); | ||
| 1477 | return 0; | ||
| 1478 | } | ||
| 1479 | return -ENOENT; | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | static int dir_new_leaf(struct inode *inode, const struct qstr *name) | ||
| 1483 | { | ||
| 1484 | struct buffer_head *bh, *obh; | ||
| 1485 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1486 | struct gfs2_leaf *leaf, *oleaf; | ||
| 1487 | int error; | ||
| 1488 | u32 index; | ||
| 1489 | u64 bn; | ||
| 1490 | |||
| 1491 | index = name->hash >> (32 - ip->i_di.di_depth); | ||
| 1492 | error = get_first_leaf(ip, index, &obh); | ||
| 1493 | if (error) | ||
| 1494 | return error; | ||
| 1495 | do { | ||
| 1496 | oleaf = (struct gfs2_leaf *)obh->b_data; | ||
| 1497 | bn = be64_to_cpu(oleaf->lf_next); | ||
| 1498 | if (!bn) | ||
| 1499 | break; | ||
| 1500 | brelse(obh); | ||
| 1501 | error = get_leaf(ip, bn, &obh); | ||
| 1502 | if (error) | ||
| 1503 | return error; | ||
| 1504 | } while(1); | ||
| 1505 | |||
| 1506 | gfs2_trans_add_bh(ip->i_gl, obh, 1); | ||
| 1507 | |||
| 1508 | leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); | ||
| 1509 | if (!leaf) { | ||
| 1510 | brelse(obh); | ||
| 1511 | return -ENOSPC; | ||
| 1512 | } | ||
| 1513 | oleaf->lf_next = cpu_to_be64(bh->b_blocknr); | ||
| 1514 | brelse(bh); | ||
| 1515 | brelse(obh); | ||
| 1516 | |||
| 1517 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 1518 | if (error) | ||
| 1519 | return error; | ||
| 1520 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 1521 | ip->i_di.di_blocks++; | ||
| 1522 | gfs2_dinode_out(&ip->i_di, bh->b_data); | ||
| 1523 | brelse(bh); | ||
| 1524 | return 0; | ||
| 1525 | } | ||
| 1526 | |||
| 1527 | /** | ||
| 1528 | * gfs2_dir_add - Add new filename into directory | ||
| 1529 | * @dip: The GFS2 inode | ||
| 1530 | * @filename: The new name | ||
| 1531 | * @inode: The inode number of the entry | ||
| 1532 | * @type: The type of the entry | ||
| 1533 | * | ||
| 1534 | * Returns: 0 on success, error code on failure | ||
| 1535 | */ | ||
| 1536 | |||
| 1537 | int gfs2_dir_add(struct inode *inode, const struct qstr *name, | ||
| 1538 | const struct gfs2_inum *inum, unsigned type) | ||
| 1539 | { | ||
| 1540 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1541 | struct buffer_head *bh; | ||
| 1542 | struct gfs2_dirent *dent; | ||
| 1543 | struct gfs2_leaf *leaf; | ||
| 1544 | int error; | ||
| 1545 | |||
| 1546 | while(1) { | ||
| 1547 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, | ||
| 1548 | &bh); | ||
| 1549 | if (dent) { | ||
| 1550 | if (IS_ERR(dent)) | ||
| 1551 | return PTR_ERR(dent); | ||
| 1552 | dent = gfs2_init_dirent(inode, dent, name, bh); | ||
| 1553 | gfs2_inum_out(inum, (char *)&dent->de_inum); | ||
| 1554 | dent->de_type = cpu_to_be16(type); | ||
| 1555 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { | ||
| 1556 | leaf = (struct gfs2_leaf *)bh->b_data; | ||
| 1557 | leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1); | ||
| 1558 | } | ||
| 1559 | brelse(bh); | ||
| 1560 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 1561 | if (error) | ||
| 1562 | break; | ||
| 1563 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 1564 | ip->i_di.di_entries++; | ||
| 1565 | ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); | ||
| 1566 | gfs2_dinode_out(&ip->i_di, bh->b_data); | ||
| 1567 | brelse(bh); | ||
| 1568 | error = 0; | ||
| 1569 | break; | ||
| 1570 | } | ||
| 1571 | if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) { | ||
| 1572 | error = dir_make_exhash(inode); | ||
| 1573 | if (error) | ||
| 1574 | break; | ||
| 1575 | continue; | ||
| 1576 | } | ||
| 1577 | error = dir_split_leaf(inode, name); | ||
| 1578 | if (error == 0) | ||
| 1579 | continue; | ||
| 1580 | if (error < 0) | ||
| 1581 | break; | ||
| 1582 | if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) { | ||
| 1583 | error = dir_double_exhash(ip); | ||
| 1584 | if (error) | ||
| 1585 | break; | ||
| 1586 | error = dir_split_leaf(inode, name); | ||
| 1587 | if (error < 0) | ||
| 1588 | break; | ||
| 1589 | if (error == 0) | ||
| 1590 | continue; | ||
| 1591 | } | ||
| 1592 | error = dir_new_leaf(inode, name); | ||
| 1593 | if (!error) | ||
| 1594 | continue; | ||
| 1595 | error = -ENOSPC; | ||
| 1596 | break; | ||
| 1597 | } | ||
| 1598 | return error; | ||
| 1599 | } | ||
| 1600 | |||
| 1601 | |||
| 1602 | /** | ||
| 1603 | * gfs2_dir_del - Delete a directory entry | ||
| 1604 | * @dip: The GFS2 inode | ||
| 1605 | * @filename: The filename | ||
| 1606 | * | ||
| 1607 | * Returns: 0 on success, error code on failure | ||
| 1608 | */ | ||
| 1609 | |||
| 1610 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) | ||
| 1611 | { | ||
| 1612 | struct gfs2_dirent *dent, *prev = NULL; | ||
| 1613 | struct buffer_head *bh; | ||
| 1614 | int error; | ||
| 1615 | |||
| 1616 | /* Returns _either_ the entry (if its first in block) or the | ||
| 1617 | previous entry otherwise */ | ||
| 1618 | dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh); | ||
| 1619 | if (!dent) { | ||
| 1620 | gfs2_consist_inode(dip); | ||
| 1621 | return -EIO; | ||
| 1622 | } | ||
| 1623 | if (IS_ERR(dent)) { | ||
| 1624 | gfs2_consist_inode(dip); | ||
| 1625 | return PTR_ERR(dent); | ||
| 1626 | } | ||
| 1627 | /* If not first in block, adjust pointers accordingly */ | ||
| 1628 | if (gfs2_dirent_find(dent, name, NULL) == 0) { | ||
| 1629 | prev = dent; | ||
| 1630 | dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len)); | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | dirent_del(dip, bh, prev, dent); | ||
| 1634 | if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { | ||
| 1635 | struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; | ||
| 1636 | u16 entries = be16_to_cpu(leaf->lf_entries); | ||
| 1637 | if (!entries) | ||
| 1638 | gfs2_consist_inode(dip); | ||
| 1639 | leaf->lf_entries = cpu_to_be16(--entries); | ||
| 1640 | } | ||
| 1641 | brelse(bh); | ||
| 1642 | |||
| 1643 | error = gfs2_meta_inode_buffer(dip, &bh); | ||
| 1644 | if (error) | ||
| 1645 | return error; | ||
| 1646 | |||
| 1647 | if (!dip->i_di.di_entries) | ||
| 1648 | gfs2_consist_inode(dip); | ||
| 1649 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | ||
| 1650 | dip->i_di.di_entries--; | ||
| 1651 | dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); | ||
| 1652 | gfs2_dinode_out(&dip->i_di, bh->b_data); | ||
| 1653 | brelse(bh); | ||
| 1654 | mark_inode_dirty(&dip->i_inode); | ||
| 1655 | |||
| 1656 | return error; | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | /** | ||
| 1660 | * gfs2_dir_mvino - Change inode number of directory entry | ||
| 1661 | * @dip: The GFS2 inode | ||
| 1662 | * @filename: | ||
| 1663 | * @new_inode: | ||
| 1664 | * | ||
| 1665 | * This routine changes the inode number of a directory entry. It's used | ||
| 1666 | * by rename to change ".." when a directory is moved. | ||
| 1667 | * Assumes a glock is held on dvp. | ||
| 1668 | * | ||
| 1669 | * Returns: errno | ||
| 1670 | */ | ||
| 1671 | |||
| 1672 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | ||
| 1673 | struct gfs2_inum *inum, unsigned int new_type) | ||
| 1674 | { | ||
| 1675 | struct buffer_head *bh; | ||
| 1676 | struct gfs2_dirent *dent; | ||
| 1677 | int error; | ||
| 1678 | |||
| 1679 | dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh); | ||
| 1680 | if (!dent) { | ||
| 1681 | gfs2_consist_inode(dip); | ||
| 1682 | return -EIO; | ||
| 1683 | } | ||
| 1684 | if (IS_ERR(dent)) | ||
| 1685 | return PTR_ERR(dent); | ||
| 1686 | |||
| 1687 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | ||
| 1688 | gfs2_inum_out(inum, (char *)&dent->de_inum); | ||
| 1689 | dent->de_type = cpu_to_be16(new_type); | ||
| 1690 | |||
| 1691 | if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { | ||
| 1692 | brelse(bh); | ||
| 1693 | error = gfs2_meta_inode_buffer(dip, &bh); | ||
| 1694 | if (error) | ||
| 1695 | return error; | ||
| 1696 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | ||
| 1697 | } | ||
| 1698 | |||
| 1699 | dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); | ||
| 1700 | gfs2_dinode_out(&dip->i_di, bh->b_data); | ||
| 1701 | brelse(bh); | ||
| 1702 | return 0; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | /** | ||
| 1706 | * foreach_leaf - call a function for each leaf in a directory | ||
| 1707 | * @dip: the directory | ||
| 1708 | * @lc: the function to call for each each | ||
| 1709 | * @data: private data to pass to it | ||
| 1710 | * | ||
| 1711 | * Returns: errno | ||
| 1712 | */ | ||
| 1713 | |||
| 1714 | static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | ||
| 1715 | { | ||
| 1716 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 1717 | struct buffer_head *bh; | ||
| 1718 | struct gfs2_leaf *leaf; | ||
| 1719 | u32 hsize, len; | ||
| 1720 | u32 ht_offset, lp_offset, ht_offset_cur = -1; | ||
| 1721 | u32 index = 0; | ||
| 1722 | u64 *lp; | ||
| 1723 | u64 leaf_no; | ||
| 1724 | int error = 0; | ||
| 1725 | |||
| 1726 | hsize = 1 << dip->i_di.di_depth; | ||
| 1727 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | ||
| 1728 | gfs2_consist_inode(dip); | ||
| 1729 | return -EIO; | ||
| 1730 | } | ||
| 1731 | |||
| 1732 | lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); | ||
| 1733 | if (!lp) | ||
| 1734 | return -ENOMEM; | ||
| 1735 | |||
| 1736 | while (index < hsize) { | ||
| 1737 | lp_offset = index & (sdp->sd_hash_ptrs - 1); | ||
| 1738 | ht_offset = index - lp_offset; | ||
| 1739 | |||
| 1740 | if (ht_offset_cur != ht_offset) { | ||
| 1741 | error = gfs2_dir_read_data(dip, (char *)lp, | ||
| 1742 | ht_offset * sizeof(u64), | ||
| 1743 | sdp->sd_hash_bsize, 1); | ||
| 1744 | if (error != sdp->sd_hash_bsize) { | ||
| 1745 | if (error >= 0) | ||
| 1746 | error = -EIO; | ||
| 1747 | goto out; | ||
| 1748 | } | ||
| 1749 | ht_offset_cur = ht_offset; | ||
| 1750 | } | ||
| 1751 | |||
| 1752 | leaf_no = be64_to_cpu(lp[lp_offset]); | ||
| 1753 | if (leaf_no) { | ||
| 1754 | error = get_leaf(dip, leaf_no, &bh); | ||
| 1755 | if (error) | ||
| 1756 | goto out; | ||
| 1757 | leaf = (struct gfs2_leaf *)bh->b_data; | ||
| 1758 | len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth)); | ||
| 1759 | brelse(bh); | ||
| 1760 | |||
| 1761 | error = lc(dip, index, len, leaf_no, data); | ||
| 1762 | if (error) | ||
| 1763 | goto out; | ||
| 1764 | |||
| 1765 | index = (index & ~(len - 1)) + len; | ||
| 1766 | } else | ||
| 1767 | index++; | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | if (index != hsize) { | ||
| 1771 | gfs2_consist_inode(dip); | ||
| 1772 | error = -EIO; | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | out: | ||
| 1776 | kfree(lp); | ||
| 1777 | |||
| 1778 | return error; | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | /** | ||
| 1782 | * leaf_dealloc - Deallocate a directory leaf | ||
| 1783 | * @dip: the directory | ||
| 1784 | * @index: the hash table offset in the directory | ||
| 1785 | * @len: the number of pointers to this leaf | ||
| 1786 | * @leaf_no: the leaf number | ||
| 1787 | * @data: not used | ||
| 1788 | * | ||
| 1789 | * Returns: errno | ||
| 1790 | */ | ||
| 1791 | |||
| 1792 | static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | ||
| 1793 | u64 leaf_no, void *data) | ||
| 1794 | { | ||
| 1795 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 1796 | struct gfs2_leaf *tmp_leaf; | ||
| 1797 | struct gfs2_rgrp_list rlist; | ||
| 1798 | struct buffer_head *bh, *dibh; | ||
| 1799 | u64 blk, nblk; | ||
| 1800 | unsigned int rg_blocks = 0, l_blocks = 0; | ||
| 1801 | char *ht; | ||
| 1802 | unsigned int x, size = len * sizeof(u64); | ||
| 1803 | int error; | ||
| 1804 | |||
| 1805 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); | ||
| 1806 | |||
| 1807 | ht = kzalloc(size, GFP_KERNEL); | ||
| 1808 | if (!ht) | ||
| 1809 | return -ENOMEM; | ||
| 1810 | |||
| 1811 | gfs2_alloc_get(dip); | ||
| 1812 | |||
| 1813 | error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 1814 | if (error) | ||
| 1815 | goto out; | ||
| 1816 | |||
| 1817 | error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh); | ||
| 1818 | if (error) | ||
| 1819 | goto out_qs; | ||
| 1820 | |||
| 1821 | /* Count the number of leaves */ | ||
| 1822 | |||
| 1823 | for (blk = leaf_no; blk; blk = nblk) { | ||
| 1824 | error = get_leaf(dip, blk, &bh); | ||
| 1825 | if (error) | ||
| 1826 | goto out_rlist; | ||
| 1827 | tmp_leaf = (struct gfs2_leaf *)bh->b_data; | ||
| 1828 | nblk = be64_to_cpu(tmp_leaf->lf_next); | ||
| 1829 | brelse(bh); | ||
| 1830 | |||
| 1831 | gfs2_rlist_add(sdp, &rlist, blk); | ||
| 1832 | l_blocks++; | ||
| 1833 | } | ||
| 1834 | |||
| 1835 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | ||
| 1836 | |||
| 1837 | for (x = 0; x < rlist.rl_rgrps; x++) { | ||
| 1838 | struct gfs2_rgrpd *rgd; | ||
| 1839 | rgd = rlist.rl_ghs[x].gh_gl->gl_object; | ||
| 1840 | rg_blocks += rgd->rd_ri.ri_length; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); | ||
| 1844 | if (error) | ||
| 1845 | goto out_rlist; | ||
| 1846 | |||
| 1847 | error = gfs2_trans_begin(sdp, | ||
| 1848 | rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + | ||
| 1849 | RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); | ||
| 1850 | if (error) | ||
| 1851 | goto out_rg_gunlock; | ||
| 1852 | |||
| 1853 | for (blk = leaf_no; blk; blk = nblk) { | ||
| 1854 | error = get_leaf(dip, blk, &bh); | ||
| 1855 | if (error) | ||
| 1856 | goto out_end_trans; | ||
| 1857 | tmp_leaf = (struct gfs2_leaf *)bh->b_data; | ||
| 1858 | nblk = be64_to_cpu(tmp_leaf->lf_next); | ||
| 1859 | brelse(bh); | ||
| 1860 | |||
| 1861 | gfs2_free_meta(dip, blk, 1); | ||
| 1862 | |||
| 1863 | if (!dip->i_di.di_blocks) | ||
| 1864 | gfs2_consist_inode(dip); | ||
| 1865 | dip->i_di.di_blocks--; | ||
| 1866 | } | ||
| 1867 | |||
| 1868 | error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); | ||
| 1869 | if (error != size) { | ||
| 1870 | if (error >= 0) | ||
| 1871 | error = -EIO; | ||
| 1872 | goto out_end_trans; | ||
| 1873 | } | ||
| 1874 | |||
| 1875 | error = gfs2_meta_inode_buffer(dip, &dibh); | ||
| 1876 | if (error) | ||
| 1877 | goto out_end_trans; | ||
| 1878 | |||
| 1879 | gfs2_trans_add_bh(dip->i_gl, dibh, 1); | ||
| 1880 | gfs2_dinode_out(&dip->i_di, dibh->b_data); | ||
| 1881 | brelse(dibh); | ||
| 1882 | |||
| 1883 | out_end_trans: | ||
| 1884 | gfs2_trans_end(sdp); | ||
| 1885 | out_rg_gunlock: | ||
| 1886 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); | ||
| 1887 | out_rlist: | ||
| 1888 | gfs2_rlist_free(&rlist); | ||
| 1889 | gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh); | ||
| 1890 | out_qs: | ||
| 1891 | gfs2_quota_unhold(dip); | ||
| 1892 | out: | ||
| 1893 | gfs2_alloc_put(dip); | ||
| 1894 | kfree(ht); | ||
| 1895 | return error; | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | /** | ||
| 1899 | * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory | ||
| 1900 | * @dip: the directory | ||
| 1901 | * | ||
| 1902 | * Dealloc all on-disk directory leaves to FREEMETA state | ||
| 1903 | * Change on-disk inode type to "regular file" | ||
| 1904 | * | ||
| 1905 | * Returns: errno | ||
| 1906 | */ | ||
| 1907 | |||
| 1908 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) | ||
| 1909 | { | ||
| 1910 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 1911 | struct buffer_head *bh; | ||
| 1912 | int error; | ||
| 1913 | |||
| 1914 | /* Dealloc on-disk leaves to FREEMETA state */ | ||
| 1915 | error = foreach_leaf(dip, leaf_dealloc, NULL); | ||
| 1916 | if (error) | ||
| 1917 | return error; | ||
| 1918 | |||
| 1919 | /* Make this a regular file in case we crash. | ||
| 1920 | (We don't want to free these blocks a second time.) */ | ||
| 1921 | |||
| 1922 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 1923 | if (error) | ||
| 1924 | return error; | ||
| 1925 | |||
| 1926 | error = gfs2_meta_inode_buffer(dip, &bh); | ||
| 1927 | if (!error) { | ||
| 1928 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | ||
| 1929 | ((struct gfs2_dinode *)bh->b_data)->di_mode = | ||
| 1930 | cpu_to_be32(S_IFREG); | ||
| 1931 | brelse(bh); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | gfs2_trans_end(sdp); | ||
| 1935 | |||
| 1936 | return error; | ||
| 1937 | } | ||
| 1938 | |||
| 1939 | /** | ||
| 1940 | * gfs2_diradd_alloc_required - find if adding entry will require an allocation | ||
| 1941 | * @ip: the file being written to | ||
| 1942 | * @filname: the filename that's going to be added | ||
| 1943 | * | ||
| 1944 | * Returns: 1 if alloc required, 0 if not, -ve on error | ||
| 1945 | */ | ||
| 1946 | |||
| 1947 | int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name) | ||
| 1948 | { | ||
| 1949 | struct gfs2_dirent *dent; | ||
| 1950 | struct buffer_head *bh; | ||
| 1951 | |||
| 1952 | dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); | ||
| 1953 | if (!dent) { | ||
| 1954 | return 1; | ||
| 1955 | } | ||
| 1956 | if (IS_ERR(dent)) | ||
| 1957 | return PTR_ERR(dent); | ||
| 1958 | brelse(bh); | ||
| 1959 | return 0; | ||
| 1960 | } | ||
| 1961 | |||
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h new file mode 100644 index 000000000000..371233419b07 --- /dev/null +++ b/fs/gfs2/dir.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __DIR_DOT_H__ | ||
| 11 | #define __DIR_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/dcache.h> | ||
| 14 | |||
| 15 | struct inode; | ||
| 16 | struct gfs2_inode; | ||
| 17 | struct gfs2_inum; | ||
| 18 | |||
| 19 | /** | ||
| 20 | * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read() | ||
| 21 | * @opaque: opaque data used by the function | ||
| 22 | * @name: the name of the directory entry | ||
| 23 | * @length: the length of the name | ||
| 24 | * @offset: the entry's offset in the directory | ||
| 25 | * @inum: the inode number the entry points to | ||
| 26 | * @type: the type of inode the entry points to | ||
| 27 | * | ||
| 28 | * Returns: 0 on success, 1 if buffer full | ||
| 29 | */ | ||
| 30 | |||
| 31 | typedef int (*gfs2_filldir_t) (void *opaque, | ||
| 32 | const char *name, unsigned int length, | ||
| 33 | u64 offset, | ||
| 34 | struct gfs2_inum *inum, unsigned int type); | ||
| 35 | |||
| 36 | int gfs2_dir_search(struct inode *dir, const struct qstr *filename, | ||
| 37 | struct gfs2_inum *inum, unsigned int *type); | ||
| 38 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | ||
| 39 | const struct gfs2_inum *inum, unsigned int type); | ||
| 40 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); | ||
| 41 | int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, | ||
| 42 | gfs2_filldir_t filldir); | ||
| 43 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | ||
| 44 | struct gfs2_inum *new_inum, unsigned int new_type); | ||
| 45 | |||
| 46 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); | ||
| 47 | |||
| 48 | int gfs2_diradd_alloc_required(struct inode *dir, | ||
| 49 | const struct qstr *filename); | ||
| 50 | int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | ||
| 51 | struct buffer_head **bhp); | ||
| 52 | |||
| 53 | static inline u32 gfs2_disk_hash(const char *data, int len) | ||
| 54 | { | ||
| 55 | return crc32_le((u32)~0, data, len) ^ (u32)~0; | ||
| 56 | } | ||
| 57 | |||
| 58 | |||
| 59 | static inline void gfs2_str2qstr(struct qstr *name, const char *fname) | ||
| 60 | { | ||
| 61 | name->name = fname; | ||
| 62 | name->len = strlen(fname); | ||
| 63 | name->hash = gfs2_disk_hash(name->name, name->len); | ||
| 64 | } | ||
| 65 | |||
| 66 | /* N.B. This probably ought to take inum & type as args as well */ | ||
| 67 | static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct gfs2_dirent *dent) | ||
| 68 | { | ||
| 69 | dent->de_inum.no_addr = cpu_to_be64(0); | ||
| 70 | dent->de_inum.no_formal_ino = cpu_to_be64(0); | ||
| 71 | dent->de_hash = cpu_to_be32(name->hash); | ||
| 72 | dent->de_rec_len = cpu_to_be16(reclen); | ||
| 73 | dent->de_name_len = cpu_to_be16(name->len); | ||
| 74 | dent->de_type = cpu_to_be16(0); | ||
| 75 | memset(dent->__pad, 0, sizeof(dent->__pad)); | ||
| 76 | memcpy(dent + 1, name->name, name->len); | ||
| 77 | } | ||
| 78 | |||
| 79 | #endif /* __DIR_DOT_H__ */ | ||
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c new file mode 100644 index 000000000000..92c54e9b0dc3 --- /dev/null +++ b/fs/gfs2/eaops.c | |||
| @@ -0,0 +1,230 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/xattr.h> | ||
| 16 | #include <linux/gfs2_ondisk.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | #include <asm/uaccess.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "acl.h" | ||
| 23 | #include "eaops.h" | ||
| 24 | #include "eattr.h" | ||
| 25 | #include "util.h" | ||
| 26 | |||
| 27 | /** | ||
| 28 | * gfs2_ea_name2type - get the type of the ea, and truncate type from the name | ||
| 29 | * @namep: ea name, possibly with type appended | ||
| 30 | * | ||
| 31 | * Returns: GFS2_EATYPE_XXX | ||
| 32 | */ | ||
| 33 | |||
| 34 | unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name) | ||
| 35 | { | ||
| 36 | unsigned int type; | ||
| 37 | |||
| 38 | if (strncmp(name, "system.", 7) == 0) { | ||
| 39 | type = GFS2_EATYPE_SYS; | ||
| 40 | if (truncated_name) | ||
| 41 | *truncated_name = name + sizeof("system.") - 1; | ||
| 42 | } else if (strncmp(name, "user.", 5) == 0) { | ||
| 43 | type = GFS2_EATYPE_USR; | ||
| 44 | if (truncated_name) | ||
| 45 | *truncated_name = name + sizeof("user.") - 1; | ||
| 46 | } else if (strncmp(name, "security.", 9) == 0) { | ||
| 47 | type = GFS2_EATYPE_SECURITY; | ||
| 48 | if (truncated_name) | ||
| 49 | *truncated_name = name + sizeof("security.") - 1; | ||
| 50 | } else { | ||
| 51 | type = GFS2_EATYPE_UNUSED; | ||
| 52 | if (truncated_name) | ||
| 53 | *truncated_name = NULL; | ||
| 54 | } | ||
| 55 | |||
| 56 | return type; | ||
| 57 | } | ||
| 58 | |||
| 59 | static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 60 | { | ||
| 61 | struct inode *inode = &ip->i_inode; | ||
| 62 | int error = permission(inode, MAY_READ, NULL); | ||
| 63 | if (error) | ||
| 64 | return error; | ||
| 65 | |||
| 66 | return gfs2_ea_get_i(ip, er); | ||
| 67 | } | ||
| 68 | |||
| 69 | static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 70 | { | ||
| 71 | struct inode *inode = &ip->i_inode; | ||
| 72 | |||
| 73 | if (S_ISREG(inode->i_mode) || | ||
| 74 | (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { | ||
| 75 | int error = permission(inode, MAY_WRITE, NULL); | ||
| 76 | if (error) | ||
| 77 | return error; | ||
| 78 | } else | ||
| 79 | return -EPERM; | ||
| 80 | |||
| 81 | return gfs2_ea_set_i(ip, er); | ||
| 82 | } | ||
| 83 | |||
| 84 | static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 85 | { | ||
| 86 | struct inode *inode = &ip->i_inode; | ||
| 87 | |||
| 88 | if (S_ISREG(inode->i_mode) || | ||
| 89 | (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { | ||
| 90 | int error = permission(inode, MAY_WRITE, NULL); | ||
| 91 | if (error) | ||
| 92 | return error; | ||
| 93 | } else | ||
| 94 | return -EPERM; | ||
| 95 | |||
| 96 | return gfs2_ea_remove_i(ip, er); | ||
| 97 | } | ||
| 98 | |||
| 99 | static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 100 | { | ||
| 101 | if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) && | ||
| 102 | !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) && | ||
| 103 | !capable(CAP_SYS_ADMIN)) | ||
| 104 | return -EPERM; | ||
| 105 | |||
| 106 | if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 && | ||
| 107 | (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) || | ||
| 108 | GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len))) | ||
| 109 | return -EOPNOTSUPP; | ||
| 110 | |||
| 111 | |||
| 112 | |||
| 113 | return gfs2_ea_get_i(ip, er); | ||
| 114 | } | ||
| 115 | |||
| 116 | static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 117 | { | ||
| 118 | int remove = 0; | ||
| 119 | int error; | ||
| 120 | |||
| 121 | if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) { | ||
| 122 | if (!(er->er_flags & GFS2_ERF_MODE)) { | ||
| 123 | er->er_mode = ip->i_di.di_mode; | ||
| 124 | er->er_flags |= GFS2_ERF_MODE; | ||
| 125 | } | ||
| 126 | error = gfs2_acl_validate_set(ip, 1, er, | ||
| 127 | &remove, &er->er_mode); | ||
| 128 | if (error) | ||
| 129 | return error; | ||
| 130 | error = gfs2_ea_set_i(ip, er); | ||
| 131 | if (error) | ||
| 132 | return error; | ||
| 133 | if (remove) | ||
| 134 | gfs2_ea_remove_i(ip, er); | ||
| 135 | return 0; | ||
| 136 | |||
| 137 | } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) { | ||
| 138 | error = gfs2_acl_validate_set(ip, 0, er, | ||
| 139 | &remove, NULL); | ||
| 140 | if (error) | ||
| 141 | return error; | ||
| 142 | if (!remove) | ||
| 143 | error = gfs2_ea_set_i(ip, er); | ||
| 144 | else { | ||
| 145 | error = gfs2_ea_remove_i(ip, er); | ||
| 146 | if (error == -ENODATA) | ||
| 147 | error = 0; | ||
| 148 | } | ||
| 149 | return error; | ||
| 150 | } | ||
| 151 | |||
| 152 | return -EPERM; | ||
| 153 | } | ||
| 154 | |||
| 155 | static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 156 | { | ||
| 157 | if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) { | ||
| 158 | int error = gfs2_acl_validate_remove(ip, 1); | ||
| 159 | if (error) | ||
| 160 | return error; | ||
| 161 | |||
| 162 | } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) { | ||
| 163 | int error = gfs2_acl_validate_remove(ip, 0); | ||
| 164 | if (error) | ||
| 165 | return error; | ||
| 166 | |||
| 167 | } else | ||
| 168 | return -EPERM; | ||
| 169 | |||
| 170 | return gfs2_ea_remove_i(ip, er); | ||
| 171 | } | ||
| 172 | |||
| 173 | static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 174 | { | ||
| 175 | struct inode *inode = &ip->i_inode; | ||
| 176 | int error = permission(inode, MAY_READ, NULL); | ||
| 177 | if (error) | ||
| 178 | return error; | ||
| 179 | |||
| 180 | return gfs2_ea_get_i(ip, er); | ||
| 181 | } | ||
| 182 | |||
| 183 | static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 184 | { | ||
| 185 | struct inode *inode = &ip->i_inode; | ||
| 186 | int error = permission(inode, MAY_WRITE, NULL); | ||
| 187 | if (error) | ||
| 188 | return error; | ||
| 189 | |||
| 190 | return gfs2_ea_set_i(ip, er); | ||
| 191 | } | ||
| 192 | |||
| 193 | static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 194 | { | ||
| 195 | struct inode *inode = &ip->i_inode; | ||
| 196 | int error = permission(inode, MAY_WRITE, NULL); | ||
| 197 | if (error) | ||
| 198 | return error; | ||
| 199 | |||
| 200 | return gfs2_ea_remove_i(ip, er); | ||
| 201 | } | ||
| 202 | |||
| 203 | static struct gfs2_eattr_operations gfs2_user_eaops = { | ||
| 204 | .eo_get = user_eo_get, | ||
| 205 | .eo_set = user_eo_set, | ||
| 206 | .eo_remove = user_eo_remove, | ||
| 207 | .eo_name = "user", | ||
| 208 | }; | ||
| 209 | |||
| 210 | struct gfs2_eattr_operations gfs2_system_eaops = { | ||
| 211 | .eo_get = system_eo_get, | ||
| 212 | .eo_set = system_eo_set, | ||
| 213 | .eo_remove = system_eo_remove, | ||
| 214 | .eo_name = "system", | ||
| 215 | }; | ||
| 216 | |||
| 217 | static struct gfs2_eattr_operations gfs2_security_eaops = { | ||
| 218 | .eo_get = security_eo_get, | ||
| 219 | .eo_set = security_eo_set, | ||
| 220 | .eo_remove = security_eo_remove, | ||
| 221 | .eo_name = "security", | ||
| 222 | }; | ||
| 223 | |||
| 224 | struct gfs2_eattr_operations *gfs2_ea_ops[] = { | ||
| 225 | NULL, | ||
| 226 | &gfs2_user_eaops, | ||
| 227 | &gfs2_system_eaops, | ||
| 228 | &gfs2_security_eaops, | ||
| 229 | }; | ||
| 230 | |||
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h new file mode 100644 index 000000000000..508b4f7a2449 --- /dev/null +++ b/fs/gfs2/eaops.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __EAOPS_DOT_H__ | ||
| 11 | #define __EAOPS_DOT_H__ | ||
| 12 | |||
| 13 | struct gfs2_ea_request; | ||
| 14 | struct gfs2_inode; | ||
| 15 | |||
| 16 | struct gfs2_eattr_operations { | ||
| 17 | int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 18 | int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 19 | int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 20 | char *eo_name; | ||
| 21 | }; | ||
| 22 | |||
| 23 | unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name); | ||
| 24 | |||
| 25 | extern struct gfs2_eattr_operations gfs2_system_eaops; | ||
| 26 | |||
| 27 | extern struct gfs2_eattr_operations *gfs2_ea_ops[]; | ||
| 28 | |||
| 29 | #endif /* __EAOPS_DOT_H__ */ | ||
| 30 | |||
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c new file mode 100644 index 000000000000..a65a4ccfd4dd --- /dev/null +++ b/fs/gfs2/eattr.c | |||
| @@ -0,0 +1,1501 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/xattr.h> | ||
| 16 | #include <linux/gfs2_ondisk.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | #include <asm/uaccess.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "acl.h" | ||
| 23 | #include "eaops.h" | ||
| 24 | #include "eattr.h" | ||
| 25 | #include "glock.h" | ||
| 26 | #include "inode.h" | ||
| 27 | #include "meta_io.h" | ||
| 28 | #include "quota.h" | ||
| 29 | #include "rgrp.h" | ||
| 30 | #include "trans.h" | ||
| 31 | #include "util.h" | ||
| 32 | |||
| 33 | /** | ||
| 34 | * ea_calc_size - returns the acutal number of bytes the request will take up | ||
| 35 | * (not counting any unstuffed data blocks) | ||
| 36 | * @sdp: | ||
| 37 | * @er: | ||
| 38 | * @size: | ||
| 39 | * | ||
| 40 | * Returns: 1 if the EA should be stuffed | ||
| 41 | */ | ||
| 42 | |||
| 43 | static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er, | ||
| 44 | unsigned int *size) | ||
| 45 | { | ||
| 46 | *size = GFS2_EAREQ_SIZE_STUFFED(er); | ||
| 47 | if (*size <= sdp->sd_jbsize) | ||
| 48 | return 1; | ||
| 49 | |||
| 50 | *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er); | ||
| 51 | |||
| 52 | return 0; | ||
| 53 | } | ||
| 54 | |||
| 55 | static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er) | ||
| 56 | { | ||
| 57 | unsigned int size; | ||
| 58 | |||
| 59 | if (er->er_data_len > GFS2_EA_MAX_DATA_LEN) | ||
| 60 | return -ERANGE; | ||
| 61 | |||
| 62 | ea_calc_size(sdp, er, &size); | ||
| 63 | |||
| 64 | /* This can only happen with 512 byte blocks */ | ||
| 65 | if (size > sdp->sd_jbsize) | ||
| 66 | return -ERANGE; | ||
| 67 | |||
| 68 | return 0; | ||
| 69 | } | ||
| 70 | |||
| 71 | typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 72 | struct gfs2_ea_header *ea, | ||
| 73 | struct gfs2_ea_header *prev, void *private); | ||
| 74 | |||
| 75 | static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 76 | ea_call_t ea_call, void *data) | ||
| 77 | { | ||
| 78 | struct gfs2_ea_header *ea, *prev = NULL; | ||
| 79 | int error = 0; | ||
| 80 | |||
| 81 | if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_EA)) | ||
| 82 | return -EIO; | ||
| 83 | |||
| 84 | for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) { | ||
| 85 | if (!GFS2_EA_REC_LEN(ea)) | ||
| 86 | goto fail; | ||
| 87 | if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <= | ||
| 88 | bh->b_data + bh->b_size)) | ||
| 89 | goto fail; | ||
| 90 | if (!GFS2_EATYPE_VALID(ea->ea_type)) | ||
| 91 | goto fail; | ||
| 92 | |||
| 93 | error = ea_call(ip, bh, ea, prev, data); | ||
| 94 | if (error) | ||
| 95 | return error; | ||
| 96 | |||
| 97 | if (GFS2_EA_IS_LAST(ea)) { | ||
| 98 | if ((char *)GFS2_EA2NEXT(ea) != | ||
| 99 | bh->b_data + bh->b_size) | ||
| 100 | goto fail; | ||
| 101 | break; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | return error; | ||
| 106 | |||
| 107 | fail: | ||
| 108 | gfs2_consist_inode(ip); | ||
| 109 | return -EIO; | ||
| 110 | } | ||
| 111 | |||
| 112 | static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) | ||
| 113 | { | ||
| 114 | struct buffer_head *bh, *eabh; | ||
| 115 | u64 *eablk, *end; | ||
| 116 | int error; | ||
| 117 | |||
| 118 | error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh); | ||
| 119 | if (error) | ||
| 120 | return error; | ||
| 121 | |||
| 122 | if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) { | ||
| 123 | error = ea_foreach_i(ip, bh, ea_call, data); | ||
| 124 | goto out; | ||
| 125 | } | ||
| 126 | |||
| 127 | if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_IN)) { | ||
| 128 | error = -EIO; | ||
| 129 | goto out; | ||
| 130 | } | ||
| 131 | |||
| 132 | eablk = (u64 *)(bh->b_data + sizeof(struct gfs2_meta_header)); | ||
| 133 | end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs; | ||
| 134 | |||
| 135 | for (; eablk < end; eablk++) { | ||
| 136 | u64 bn; | ||
| 137 | |||
| 138 | if (!*eablk) | ||
| 139 | break; | ||
| 140 | bn = be64_to_cpu(*eablk); | ||
| 141 | |||
| 142 | error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, &eabh); | ||
| 143 | if (error) | ||
| 144 | break; | ||
| 145 | error = ea_foreach_i(ip, eabh, ea_call, data); | ||
| 146 | brelse(eabh); | ||
| 147 | if (error) | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | out: | ||
| 151 | brelse(bh); | ||
| 152 | return error; | ||
| 153 | } | ||
| 154 | |||
| 155 | struct ea_find { | ||
| 156 | struct gfs2_ea_request *ef_er; | ||
| 157 | struct gfs2_ea_location *ef_el; | ||
| 158 | }; | ||
| 159 | |||
| 160 | static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 161 | struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, | ||
| 162 | void *private) | ||
| 163 | { | ||
| 164 | struct ea_find *ef = private; | ||
| 165 | struct gfs2_ea_request *er = ef->ef_er; | ||
| 166 | |||
| 167 | if (ea->ea_type == GFS2_EATYPE_UNUSED) | ||
| 168 | return 0; | ||
| 169 | |||
| 170 | if (ea->ea_type == er->er_type) { | ||
| 171 | if (ea->ea_name_len == er->er_name_len && | ||
| 172 | !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) { | ||
| 173 | struct gfs2_ea_location *el = ef->ef_el; | ||
| 174 | get_bh(bh); | ||
| 175 | el->el_bh = bh; | ||
| 176 | el->el_ea = ea; | ||
| 177 | el->el_prev = prev; | ||
| 178 | return 1; | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | return 0; | ||
| 183 | } | ||
| 184 | |||
| 185 | int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er, | ||
| 186 | struct gfs2_ea_location *el) | ||
| 187 | { | ||
| 188 | struct ea_find ef; | ||
| 189 | int error; | ||
| 190 | |||
| 191 | ef.ef_er = er; | ||
| 192 | ef.ef_el = el; | ||
| 193 | |||
| 194 | memset(el, 0, sizeof(struct gfs2_ea_location)); | ||
| 195 | |||
| 196 | error = ea_foreach(ip, ea_find_i, &ef); | ||
| 197 | if (error > 0) | ||
| 198 | return 0; | ||
| 199 | |||
| 200 | return error; | ||
| 201 | } | ||
| 202 | |||
| 203 | /** | ||
| 204 | * ea_dealloc_unstuffed - | ||
| 205 | * @ip: | ||
| 206 | * @bh: | ||
| 207 | * @ea: | ||
| 208 | * @prev: | ||
| 209 | * @private: | ||
| 210 | * | ||
| 211 | * Take advantage of the fact that all unstuffed blocks are | ||
| 212 | * allocated from the same RG. But watch, this may not always | ||
| 213 | * be true. | ||
| 214 | * | ||
| 215 | * Returns: errno | ||
| 216 | */ | ||
| 217 | |||
| 218 | static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 219 | struct gfs2_ea_header *ea, | ||
| 220 | struct gfs2_ea_header *prev, void *private) | ||
| 221 | { | ||
| 222 | int *leave = private; | ||
| 223 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 224 | struct gfs2_rgrpd *rgd; | ||
| 225 | struct gfs2_holder rg_gh; | ||
| 226 | struct buffer_head *dibh; | ||
| 227 | u64 *dataptrs, bn = 0; | ||
| 228 | u64 bstart = 0; | ||
| 229 | unsigned int blen = 0; | ||
| 230 | unsigned int blks = 0; | ||
| 231 | unsigned int x; | ||
| 232 | int error; | ||
| 233 | |||
| 234 | if (GFS2_EA_IS_STUFFED(ea)) | ||
| 235 | return 0; | ||
| 236 | |||
| 237 | dataptrs = GFS2_EA2DATAPTRS(ea); | ||
| 238 | for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { | ||
| 239 | if (*dataptrs) { | ||
| 240 | blks++; | ||
| 241 | bn = be64_to_cpu(*dataptrs); | ||
| 242 | } | ||
| 243 | } | ||
| 244 | if (!blks) | ||
| 245 | return 0; | ||
| 246 | |||
| 247 | rgd = gfs2_blk2rgrpd(sdp, bn); | ||
| 248 | if (!rgd) { | ||
| 249 | gfs2_consist_inode(ip); | ||
| 250 | return -EIO; | ||
| 251 | } | ||
| 252 | |||
| 253 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); | ||
| 254 | if (error) | ||
| 255 | return error; | ||
| 256 | |||
| 257 | error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE + | ||
| 258 | RES_EATTR + RES_STATFS + RES_QUOTA, blks); | ||
| 259 | if (error) | ||
| 260 | goto out_gunlock; | ||
| 261 | |||
| 262 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 263 | |||
| 264 | dataptrs = GFS2_EA2DATAPTRS(ea); | ||
| 265 | for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { | ||
| 266 | if (!*dataptrs) | ||
| 267 | break; | ||
| 268 | bn = be64_to_cpu(*dataptrs); | ||
| 269 | |||
| 270 | if (bstart + blen == bn) | ||
| 271 | blen++; | ||
| 272 | else { | ||
| 273 | if (bstart) | ||
| 274 | gfs2_free_meta(ip, bstart, blen); | ||
| 275 | bstart = bn; | ||
| 276 | blen = 1; | ||
| 277 | } | ||
| 278 | |||
| 279 | *dataptrs = 0; | ||
| 280 | if (!ip->i_di.di_blocks) | ||
| 281 | gfs2_consist_inode(ip); | ||
| 282 | ip->i_di.di_blocks--; | ||
| 283 | } | ||
| 284 | if (bstart) | ||
| 285 | gfs2_free_meta(ip, bstart, blen); | ||
| 286 | |||
| 287 | if (prev && !leave) { | ||
| 288 | u32 len; | ||
| 289 | |||
| 290 | len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); | ||
| 291 | prev->ea_rec_len = cpu_to_be32(len); | ||
| 292 | |||
| 293 | if (GFS2_EA_IS_LAST(ea)) | ||
| 294 | prev->ea_flags |= GFS2_EAFLAG_LAST; | ||
| 295 | } else { | ||
| 296 | ea->ea_type = GFS2_EATYPE_UNUSED; | ||
| 297 | ea->ea_num_ptrs = 0; | ||
| 298 | } | ||
| 299 | |||
| 300 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 301 | if (!error) { | ||
| 302 | ip->i_di.di_ctime = get_seconds(); | ||
| 303 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 304 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 305 | brelse(dibh); | ||
| 306 | } | ||
| 307 | |||
| 308 | gfs2_trans_end(sdp); | ||
| 309 | |||
| 310 | out_gunlock: | ||
| 311 | gfs2_glock_dq_uninit(&rg_gh); | ||
| 312 | return error; | ||
| 313 | } | ||
| 314 | |||
| 315 | static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 316 | struct gfs2_ea_header *ea, | ||
| 317 | struct gfs2_ea_header *prev, int leave) | ||
| 318 | { | ||
| 319 | struct gfs2_alloc *al; | ||
| 320 | int error; | ||
| 321 | |||
| 322 | al = gfs2_alloc_get(ip); | ||
| 323 | |||
| 324 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 325 | if (error) | ||
| 326 | goto out_alloc; | ||
| 327 | |||
| 328 | error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh); | ||
| 329 | if (error) | ||
| 330 | goto out_quota; | ||
| 331 | |||
| 332 | error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL); | ||
| 333 | |||
| 334 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
| 335 | |||
| 336 | out_quota: | ||
| 337 | gfs2_quota_unhold(ip); | ||
| 338 | out_alloc: | ||
| 339 | gfs2_alloc_put(ip); | ||
| 340 | return error; | ||
| 341 | } | ||
| 342 | |||
| 343 | struct ea_list { | ||
| 344 | struct gfs2_ea_request *ei_er; | ||
| 345 | unsigned int ei_size; | ||
| 346 | }; | ||
| 347 | |||
| 348 | static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 349 | struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, | ||
| 350 | void *private) | ||
| 351 | { | ||
| 352 | struct ea_list *ei = private; | ||
| 353 | struct gfs2_ea_request *er = ei->ei_er; | ||
| 354 | unsigned int ea_size = gfs2_ea_strlen(ea); | ||
| 355 | |||
| 356 | if (ea->ea_type == GFS2_EATYPE_UNUSED) | ||
| 357 | return 0; | ||
| 358 | |||
| 359 | if (er->er_data_len) { | ||
| 360 | char *prefix = NULL; | ||
| 361 | unsigned int l = 0; | ||
| 362 | char c = 0; | ||
| 363 | |||
| 364 | if (ei->ei_size + ea_size > er->er_data_len) | ||
| 365 | return -ERANGE; | ||
| 366 | |||
| 367 | switch (ea->ea_type) { | ||
| 368 | case GFS2_EATYPE_USR: | ||
| 369 | prefix = "user."; | ||
| 370 | l = 5; | ||
| 371 | break; | ||
| 372 | case GFS2_EATYPE_SYS: | ||
| 373 | prefix = "system."; | ||
| 374 | l = 7; | ||
| 375 | break; | ||
| 376 | case GFS2_EATYPE_SECURITY: | ||
| 377 | prefix = "security."; | ||
| 378 | l = 9; | ||
| 379 | break; | ||
| 380 | } | ||
| 381 | |||
| 382 | BUG_ON(l == 0); | ||
| 383 | |||
| 384 | memcpy(er->er_data + ei->ei_size, prefix, l); | ||
| 385 | memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea), | ||
| 386 | ea->ea_name_len); | ||
| 387 | memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1); | ||
| 388 | } | ||
| 389 | |||
| 390 | ei->ei_size += ea_size; | ||
| 391 | |||
| 392 | return 0; | ||
| 393 | } | ||
| 394 | |||
| 395 | /** | ||
| 396 | * gfs2_ea_list - | ||
| 397 | * @ip: | ||
| 398 | * @er: | ||
| 399 | * | ||
| 400 | * Returns: actual size of data on success, -errno on error | ||
| 401 | */ | ||
| 402 | |||
| 403 | int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 404 | { | ||
| 405 | struct gfs2_holder i_gh; | ||
| 406 | int error; | ||
| 407 | |||
| 408 | if (!er->er_data || !er->er_data_len) { | ||
| 409 | er->er_data = NULL; | ||
| 410 | er->er_data_len = 0; | ||
| 411 | } | ||
| 412 | |||
| 413 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | ||
| 414 | if (error) | ||
| 415 | return error; | ||
| 416 | |||
| 417 | if (ip->i_di.di_eattr) { | ||
| 418 | struct ea_list ei = { .ei_er = er, .ei_size = 0 }; | ||
| 419 | |||
| 420 | error = ea_foreach(ip, ea_list_i, &ei); | ||
| 421 | if (!error) | ||
| 422 | error = ei.ei_size; | ||
| 423 | } | ||
| 424 | |||
| 425 | gfs2_glock_dq_uninit(&i_gh); | ||
| 426 | |||
| 427 | return error; | ||
| 428 | } | ||
| 429 | |||
| 430 | /** | ||
| 431 | * ea_get_unstuffed - actually copies the unstuffed data into the | ||
| 432 | * request buffer | ||
| 433 | * @ip: The GFS2 inode | ||
| 434 | * @ea: The extended attribute header structure | ||
| 435 | * @data: The data to be copied | ||
| 436 | * | ||
| 437 | * Returns: errno | ||
| 438 | */ | ||
| 439 | |||
| 440 | static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | ||
| 441 | char *data) | ||
| 442 | { | ||
| 443 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 444 | struct buffer_head **bh; | ||
| 445 | unsigned int amount = GFS2_EA_DATA_LEN(ea); | ||
| 446 | unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); | ||
| 447 | u64 *dataptrs = GFS2_EA2DATAPTRS(ea); | ||
| 448 | unsigned int x; | ||
| 449 | int error = 0; | ||
| 450 | |||
| 451 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); | ||
| 452 | if (!bh) | ||
| 453 | return -ENOMEM; | ||
| 454 | |||
| 455 | for (x = 0; x < nptrs; x++) { | ||
| 456 | error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, | ||
| 457 | bh + x); | ||
| 458 | if (error) { | ||
| 459 | while (x--) | ||
| 460 | brelse(bh[x]); | ||
| 461 | goto out; | ||
| 462 | } | ||
| 463 | dataptrs++; | ||
| 464 | } | ||
| 465 | |||
| 466 | for (x = 0; x < nptrs; x++) { | ||
| 467 | error = gfs2_meta_wait(sdp, bh[x]); | ||
| 468 | if (error) { | ||
| 469 | for (; x < nptrs; x++) | ||
| 470 | brelse(bh[x]); | ||
| 471 | goto out; | ||
| 472 | } | ||
| 473 | if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { | ||
| 474 | for (; x < nptrs; x++) | ||
| 475 | brelse(bh[x]); | ||
| 476 | error = -EIO; | ||
| 477 | goto out; | ||
| 478 | } | ||
| 479 | |||
| 480 | memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), | ||
| 481 | (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); | ||
| 482 | |||
| 483 | amount -= sdp->sd_jbsize; | ||
| 484 | data += sdp->sd_jbsize; | ||
| 485 | |||
| 486 | brelse(bh[x]); | ||
| 487 | } | ||
| 488 | |||
| 489 | out: | ||
| 490 | kfree(bh); | ||
| 491 | return error; | ||
| 492 | } | ||
| 493 | |||
| 494 | int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, | ||
| 495 | char *data) | ||
| 496 | { | ||
| 497 | if (GFS2_EA_IS_STUFFED(el->el_ea)) { | ||
| 498 | memcpy(data, GFS2_EA2DATA(el->el_ea), GFS2_EA_DATA_LEN(el->el_ea)); | ||
| 499 | return 0; | ||
| 500 | } else | ||
| 501 | return ea_get_unstuffed(ip, el->el_ea, data); | ||
| 502 | } | ||
| 503 | |||
| 504 | /** | ||
| 505 | * gfs2_ea_get_i - | ||
| 506 | * @ip: The GFS2 inode | ||
| 507 | * @er: The request structure | ||
| 508 | * | ||
| 509 | * Returns: actual size of data on success, -errno on error | ||
| 510 | */ | ||
| 511 | |||
| 512 | int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 513 | { | ||
| 514 | struct gfs2_ea_location el; | ||
| 515 | int error; | ||
| 516 | |||
| 517 | if (!ip->i_di.di_eattr) | ||
| 518 | return -ENODATA; | ||
| 519 | |||
| 520 | error = gfs2_ea_find(ip, er, &el); | ||
| 521 | if (error) | ||
| 522 | return error; | ||
| 523 | if (!el.el_ea) | ||
| 524 | return -ENODATA; | ||
| 525 | |||
| 526 | if (er->er_data_len) { | ||
| 527 | if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len) | ||
| 528 | error = -ERANGE; | ||
| 529 | else | ||
| 530 | error = gfs2_ea_get_copy(ip, &el, er->er_data); | ||
| 531 | } | ||
| 532 | if (!error) | ||
| 533 | error = GFS2_EA_DATA_LEN(el.el_ea); | ||
| 534 | |||
| 535 | brelse(el.el_bh); | ||
| 536 | |||
| 537 | return error; | ||
| 538 | } | ||
| 539 | |||
| 540 | /** | ||
| 541 | * gfs2_ea_get - | ||
| 542 | * @ip: The GFS2 inode | ||
| 543 | * @er: The request structure | ||
| 544 | * | ||
| 545 | * Returns: actual size of data on success, -errno on error | ||
| 546 | */ | ||
| 547 | |||
| 548 | int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 549 | { | ||
| 550 | struct gfs2_holder i_gh; | ||
| 551 | int error; | ||
| 552 | |||
| 553 | if (!er->er_name_len || | ||
| 554 | er->er_name_len > GFS2_EA_MAX_NAME_LEN) | ||
| 555 | return -EINVAL; | ||
| 556 | if (!er->er_data || !er->er_data_len) { | ||
| 557 | er->er_data = NULL; | ||
| 558 | er->er_data_len = 0; | ||
| 559 | } | ||
| 560 | |||
| 561 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | ||
| 562 | if (error) | ||
| 563 | return error; | ||
| 564 | |||
| 565 | error = gfs2_ea_ops[er->er_type]->eo_get(ip, er); | ||
| 566 | |||
| 567 | gfs2_glock_dq_uninit(&i_gh); | ||
| 568 | |||
| 569 | return error; | ||
| 570 | } | ||
| 571 | |||
| 572 | /** | ||
| 573 | * ea_alloc_blk - allocates a new block for extended attributes. | ||
| 574 | * @ip: A pointer to the inode that's getting extended attributes | ||
| 575 | * @bhp: Pointer to pointer to a struct buffer_head | ||
| 576 | * | ||
| 577 | * Returns: errno | ||
| 578 | */ | ||
| 579 | |||
| 580 | static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) | ||
| 581 | { | ||
| 582 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 583 | struct gfs2_ea_header *ea; | ||
| 584 | u64 block; | ||
| 585 | |||
| 586 | block = gfs2_alloc_meta(ip); | ||
| 587 | |||
| 588 | *bhp = gfs2_meta_new(ip->i_gl, block); | ||
| 589 | gfs2_trans_add_bh(ip->i_gl, *bhp, 1); | ||
| 590 | gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); | ||
| 591 | gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); | ||
| 592 | |||
| 593 | ea = GFS2_EA_BH2FIRST(*bhp); | ||
| 594 | ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize); | ||
| 595 | ea->ea_type = GFS2_EATYPE_UNUSED; | ||
| 596 | ea->ea_flags = GFS2_EAFLAG_LAST; | ||
| 597 | ea->ea_num_ptrs = 0; | ||
| 598 | |||
| 599 | ip->i_di.di_blocks++; | ||
| 600 | |||
| 601 | return 0; | ||
| 602 | } | ||
| 603 | |||
| 604 | /** | ||
| 605 | * ea_write - writes the request info to an ea, creating new blocks if | ||
| 606 | * necessary | ||
| 607 | * @ip: inode that is being modified | ||
| 608 | * @ea: the location of the new ea in a block | ||
| 609 | * @er: the write request | ||
| 610 | * | ||
| 611 | * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags | ||
| 612 | * | ||
| 613 | * returns : errno | ||
| 614 | */ | ||
| 615 | |||
| 616 | static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | ||
| 617 | struct gfs2_ea_request *er) | ||
| 618 | { | ||
| 619 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 620 | |||
| 621 | ea->ea_data_len = cpu_to_be32(er->er_data_len); | ||
| 622 | ea->ea_name_len = er->er_name_len; | ||
| 623 | ea->ea_type = er->er_type; | ||
| 624 | ea->__pad = 0; | ||
| 625 | |||
| 626 | memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len); | ||
| 627 | |||
| 628 | if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) { | ||
| 629 | ea->ea_num_ptrs = 0; | ||
| 630 | memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len); | ||
| 631 | } else { | ||
| 632 | u64 *dataptr = GFS2_EA2DATAPTRS(ea); | ||
| 633 | const char *data = er->er_data; | ||
| 634 | unsigned int data_len = er->er_data_len; | ||
| 635 | unsigned int copy; | ||
| 636 | unsigned int x; | ||
| 637 | |||
| 638 | ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize); | ||
| 639 | for (x = 0; x < ea->ea_num_ptrs; x++) { | ||
| 640 | struct buffer_head *bh; | ||
| 641 | u64 block; | ||
| 642 | int mh_size = sizeof(struct gfs2_meta_header); | ||
| 643 | |||
| 644 | block = gfs2_alloc_meta(ip); | ||
| 645 | |||
| 646 | bh = gfs2_meta_new(ip->i_gl, block); | ||
| 647 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 648 | gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); | ||
| 649 | |||
| 650 | ip->i_di.di_blocks++; | ||
| 651 | |||
| 652 | copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : | ||
| 653 | data_len; | ||
| 654 | memcpy(bh->b_data + mh_size, data, copy); | ||
| 655 | if (copy < sdp->sd_jbsize) | ||
| 656 | memset(bh->b_data + mh_size + copy, 0, | ||
| 657 | sdp->sd_jbsize - copy); | ||
| 658 | |||
| 659 | *dataptr++ = cpu_to_be64(bh->b_blocknr); | ||
| 660 | data += copy; | ||
| 661 | data_len -= copy; | ||
| 662 | |||
| 663 | brelse(bh); | ||
| 664 | } | ||
| 665 | |||
| 666 | gfs2_assert_withdraw(sdp, !data_len); | ||
| 667 | } | ||
| 668 | |||
| 669 | return 0; | ||
| 670 | } | ||
| 671 | |||
| 672 | typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip, | ||
| 673 | struct gfs2_ea_request *er, void *private); | ||
| 674 | |||
| 675 | static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | ||
| 676 | unsigned int blks, | ||
| 677 | ea_skeleton_call_t skeleton_call, void *private) | ||
| 678 | { | ||
| 679 | struct gfs2_alloc *al; | ||
| 680 | struct buffer_head *dibh; | ||
| 681 | int error; | ||
| 682 | |||
| 683 | al = gfs2_alloc_get(ip); | ||
| 684 | |||
| 685 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 686 | if (error) | ||
| 687 | goto out; | ||
| 688 | |||
| 689 | error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 690 | if (error) | ||
| 691 | goto out_gunlock_q; | ||
| 692 | |||
| 693 | al->al_requested = blks; | ||
| 694 | |||
| 695 | error = gfs2_inplace_reserve(ip); | ||
| 696 | if (error) | ||
| 697 | goto out_gunlock_q; | ||
| 698 | |||
| 699 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), | ||
| 700 | blks + al->al_rgd->rd_ri.ri_length + | ||
| 701 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); | ||
| 702 | if (error) | ||
| 703 | goto out_ipres; | ||
| 704 | |||
| 705 | error = skeleton_call(ip, er, private); | ||
| 706 | if (error) | ||
| 707 | goto out_end_trans; | ||
| 708 | |||
| 709 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 710 | if (!error) { | ||
| 711 | if (er->er_flags & GFS2_ERF_MODE) { | ||
| 712 | gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), | ||
| 713 | (ip->i_di.di_mode & S_IFMT) == | ||
| 714 | (er->er_mode & S_IFMT)); | ||
| 715 | ip->i_di.di_mode = er->er_mode; | ||
| 716 | } | ||
| 717 | ip->i_di.di_ctime = get_seconds(); | ||
| 718 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 719 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 720 | brelse(dibh); | ||
| 721 | } | ||
| 722 | |||
| 723 | out_end_trans: | ||
| 724 | gfs2_trans_end(GFS2_SB(&ip->i_inode)); | ||
| 725 | out_ipres: | ||
| 726 | gfs2_inplace_release(ip); | ||
| 727 | out_gunlock_q: | ||
| 728 | gfs2_quota_unlock(ip); | ||
| 729 | out: | ||
| 730 | gfs2_alloc_put(ip); | ||
| 731 | return error; | ||
| 732 | } | ||
| 733 | |||
| 734 | static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, | ||
| 735 | void *private) | ||
| 736 | { | ||
| 737 | struct buffer_head *bh; | ||
| 738 | int error; | ||
| 739 | |||
| 740 | error = ea_alloc_blk(ip, &bh); | ||
| 741 | if (error) | ||
| 742 | return error; | ||
| 743 | |||
| 744 | ip->i_di.di_eattr = bh->b_blocknr; | ||
| 745 | error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er); | ||
| 746 | |||
| 747 | brelse(bh); | ||
| 748 | |||
| 749 | return error; | ||
| 750 | } | ||
| 751 | |||
| 752 | /** | ||
| 753 | * ea_init - initializes a new eattr block | ||
| 754 | * @ip: | ||
| 755 | * @er: | ||
| 756 | * | ||
| 757 | * Returns: errno | ||
| 758 | */ | ||
| 759 | |||
| 760 | static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 761 | { | ||
| 762 | unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize; | ||
| 763 | unsigned int blks = 1; | ||
| 764 | |||
| 765 | if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize) | ||
| 766 | blks += DIV_ROUND_UP(er->er_data_len, jbsize); | ||
| 767 | |||
| 768 | return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL); | ||
| 769 | } | ||
| 770 | |||
| 771 | static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) | ||
| 772 | { | ||
| 773 | u32 ea_size = GFS2_EA_SIZE(ea); | ||
| 774 | struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea + | ||
| 775 | ea_size); | ||
| 776 | u32 new_size = GFS2_EA_REC_LEN(ea) - ea_size; | ||
| 777 | int last = ea->ea_flags & GFS2_EAFLAG_LAST; | ||
| 778 | |||
| 779 | ea->ea_rec_len = cpu_to_be32(ea_size); | ||
| 780 | ea->ea_flags ^= last; | ||
| 781 | |||
| 782 | new->ea_rec_len = cpu_to_be32(new_size); | ||
| 783 | new->ea_flags = last; | ||
| 784 | |||
| 785 | return new; | ||
| 786 | } | ||
| 787 | |||
| 788 | static void ea_set_remove_stuffed(struct gfs2_inode *ip, | ||
| 789 | struct gfs2_ea_location *el) | ||
| 790 | { | ||
| 791 | struct gfs2_ea_header *ea = el->el_ea; | ||
| 792 | struct gfs2_ea_header *prev = el->el_prev; | ||
| 793 | u32 len; | ||
| 794 | |||
| 795 | gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); | ||
| 796 | |||
| 797 | if (!prev || !GFS2_EA_IS_STUFFED(ea)) { | ||
| 798 | ea->ea_type = GFS2_EATYPE_UNUSED; | ||
| 799 | return; | ||
| 800 | } else if (GFS2_EA2NEXT(prev) != ea) { | ||
| 801 | prev = GFS2_EA2NEXT(prev); | ||
| 802 | gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), GFS2_EA2NEXT(prev) == ea); | ||
| 803 | } | ||
| 804 | |||
| 805 | len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); | ||
| 806 | prev->ea_rec_len = cpu_to_be32(len); | ||
| 807 | |||
| 808 | if (GFS2_EA_IS_LAST(ea)) | ||
| 809 | prev->ea_flags |= GFS2_EAFLAG_LAST; | ||
| 810 | } | ||
| 811 | |||
| 812 | struct ea_set { | ||
| 813 | int ea_split; | ||
| 814 | |||
| 815 | struct gfs2_ea_request *es_er; | ||
| 816 | struct gfs2_ea_location *es_el; | ||
| 817 | |||
| 818 | struct buffer_head *es_bh; | ||
| 819 | struct gfs2_ea_header *es_ea; | ||
| 820 | }; | ||
| 821 | |||
| 822 | static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 823 | struct gfs2_ea_header *ea, struct ea_set *es) | ||
| 824 | { | ||
| 825 | struct gfs2_ea_request *er = es->es_er; | ||
| 826 | struct buffer_head *dibh; | ||
| 827 | int error; | ||
| 828 | |||
| 829 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + 2 * RES_EATTR, 0); | ||
| 830 | if (error) | ||
| 831 | return error; | ||
| 832 | |||
| 833 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 834 | |||
| 835 | if (es->ea_split) | ||
| 836 | ea = ea_split_ea(ea); | ||
| 837 | |||
| 838 | ea_write(ip, ea, er); | ||
| 839 | |||
| 840 | if (es->es_el) | ||
| 841 | ea_set_remove_stuffed(ip, es->es_el); | ||
| 842 | |||
| 843 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 844 | if (error) | ||
| 845 | goto out; | ||
| 846 | |||
| 847 | if (er->er_flags & GFS2_ERF_MODE) { | ||
| 848 | gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), | ||
| 849 | (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT)); | ||
| 850 | ip->i_di.di_mode = er->er_mode; | ||
| 851 | } | ||
| 852 | ip->i_di.di_ctime = get_seconds(); | ||
| 853 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 854 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 855 | brelse(dibh); | ||
| 856 | out: | ||
| 857 | gfs2_trans_end(GFS2_SB(&ip->i_inode)); | ||
| 858 | return error; | ||
| 859 | } | ||
| 860 | |||
| 861 | static int ea_set_simple_alloc(struct gfs2_inode *ip, | ||
| 862 | struct gfs2_ea_request *er, void *private) | ||
| 863 | { | ||
| 864 | struct ea_set *es = private; | ||
| 865 | struct gfs2_ea_header *ea = es->es_ea; | ||
| 866 | int error; | ||
| 867 | |||
| 868 | gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1); | ||
| 869 | |||
| 870 | if (es->ea_split) | ||
| 871 | ea = ea_split_ea(ea); | ||
| 872 | |||
| 873 | error = ea_write(ip, ea, er); | ||
| 874 | if (error) | ||
| 875 | return error; | ||
| 876 | |||
| 877 | if (es->es_el) | ||
| 878 | ea_set_remove_stuffed(ip, es->es_el); | ||
| 879 | |||
| 880 | return 0; | ||
| 881 | } | ||
| 882 | |||
| 883 | static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh, | ||
| 884 | struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, | ||
| 885 | void *private) | ||
| 886 | { | ||
| 887 | struct ea_set *es = private; | ||
| 888 | unsigned int size; | ||
| 889 | int stuffed; | ||
| 890 | int error; | ||
| 891 | |||
| 892 | stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er, &size); | ||
| 893 | |||
| 894 | if (ea->ea_type == GFS2_EATYPE_UNUSED) { | ||
| 895 | if (GFS2_EA_REC_LEN(ea) < size) | ||
| 896 | return 0; | ||
| 897 | if (!GFS2_EA_IS_STUFFED(ea)) { | ||
| 898 | error = ea_remove_unstuffed(ip, bh, ea, prev, 1); | ||
| 899 | if (error) | ||
| 900 | return error; | ||
| 901 | } | ||
| 902 | es->ea_split = 0; | ||
| 903 | } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size) | ||
| 904 | es->ea_split = 1; | ||
| 905 | else | ||
| 906 | return 0; | ||
| 907 | |||
| 908 | if (stuffed) { | ||
| 909 | error = ea_set_simple_noalloc(ip, bh, ea, es); | ||
| 910 | if (error) | ||
| 911 | return error; | ||
| 912 | } else { | ||
| 913 | unsigned int blks; | ||
| 914 | |||
| 915 | es->es_bh = bh; | ||
| 916 | es->es_ea = ea; | ||
| 917 | blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len, | ||
| 918 | GFS2_SB(&ip->i_inode)->sd_jbsize); | ||
| 919 | |||
| 920 | error = ea_alloc_skeleton(ip, es->es_er, blks, | ||
| 921 | ea_set_simple_alloc, es); | ||
| 922 | if (error) | ||
| 923 | return error; | ||
| 924 | } | ||
| 925 | |||
| 926 | return 1; | ||
| 927 | } | ||
| 928 | |||
| 929 | static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, | ||
| 930 | void *private) | ||
| 931 | { | ||
| 932 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 933 | struct buffer_head *indbh, *newbh; | ||
| 934 | u64 *eablk; | ||
| 935 | int error; | ||
| 936 | int mh_size = sizeof(struct gfs2_meta_header); | ||
| 937 | |||
| 938 | if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) { | ||
| 939 | u64 *end; | ||
| 940 | |||
| 941 | error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, | ||
| 942 | &indbh); | ||
| 943 | if (error) | ||
| 944 | return error; | ||
| 945 | |||
| 946 | if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) { | ||
| 947 | error = -EIO; | ||
| 948 | goto out; | ||
| 949 | } | ||
| 950 | |||
| 951 | eablk = (u64 *)(indbh->b_data + mh_size); | ||
| 952 | end = eablk + sdp->sd_inptrs; | ||
| 953 | |||
| 954 | for (; eablk < end; eablk++) | ||
| 955 | if (!*eablk) | ||
| 956 | break; | ||
| 957 | |||
| 958 | if (eablk == end) { | ||
| 959 | error = -ENOSPC; | ||
| 960 | goto out; | ||
| 961 | } | ||
| 962 | |||
| 963 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | ||
| 964 | } else { | ||
| 965 | u64 blk; | ||
| 966 | |||
| 967 | blk = gfs2_alloc_meta(ip); | ||
| 968 | |||
| 969 | indbh = gfs2_meta_new(ip->i_gl, blk); | ||
| 970 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | ||
| 971 | gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); | ||
| 972 | gfs2_buffer_clear_tail(indbh, mh_size); | ||
| 973 | |||
| 974 | eablk = (u64 *)(indbh->b_data + mh_size); | ||
| 975 | *eablk = cpu_to_be64(ip->i_di.di_eattr); | ||
| 976 | ip->i_di.di_eattr = blk; | ||
| 977 | ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; | ||
| 978 | ip->i_di.di_blocks++; | ||
| 979 | |||
| 980 | eablk++; | ||
| 981 | } | ||
| 982 | |||
| 983 | error = ea_alloc_blk(ip, &newbh); | ||
| 984 | if (error) | ||
| 985 | goto out; | ||
| 986 | |||
| 987 | *eablk = cpu_to_be64((u64)newbh->b_blocknr); | ||
| 988 | error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er); | ||
| 989 | brelse(newbh); | ||
| 990 | if (error) | ||
| 991 | goto out; | ||
| 992 | |||
| 993 | if (private) | ||
| 994 | ea_set_remove_stuffed(ip, private); | ||
| 995 | |||
| 996 | out: | ||
| 997 | brelse(indbh); | ||
| 998 | return error; | ||
| 999 | } | ||
| 1000 | |||
| 1001 | static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, | ||
| 1002 | struct gfs2_ea_location *el) | ||
| 1003 | { | ||
| 1004 | struct ea_set es; | ||
| 1005 | unsigned int blks = 2; | ||
| 1006 | int error; | ||
| 1007 | |||
| 1008 | memset(&es, 0, sizeof(struct ea_set)); | ||
| 1009 | es.es_er = er; | ||
| 1010 | es.es_el = el; | ||
| 1011 | |||
| 1012 | error = ea_foreach(ip, ea_set_simple, &es); | ||
| 1013 | if (error > 0) | ||
| 1014 | return 0; | ||
| 1015 | if (error) | ||
| 1016 | return error; | ||
| 1017 | |||
| 1018 | if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) | ||
| 1019 | blks++; | ||
| 1020 | if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize) | ||
| 1021 | blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); | ||
| 1022 | |||
| 1023 | return ea_alloc_skeleton(ip, er, blks, ea_set_block, el); | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | static int ea_set_remove_unstuffed(struct gfs2_inode *ip, | ||
| 1027 | struct gfs2_ea_location *el) | ||
| 1028 | { | ||
| 1029 | if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) { | ||
| 1030 | el->el_prev = GFS2_EA2NEXT(el->el_prev); | ||
| 1031 | gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), | ||
| 1032 | GFS2_EA2NEXT(el->el_prev) == el->el_ea); | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0); | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 1039 | { | ||
| 1040 | struct gfs2_ea_location el; | ||
| 1041 | int error; | ||
| 1042 | |||
| 1043 | if (!ip->i_di.di_eattr) { | ||
| 1044 | if (er->er_flags & XATTR_REPLACE) | ||
| 1045 | return -ENODATA; | ||
| 1046 | return ea_init(ip, er); | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | error = gfs2_ea_find(ip, er, &el); | ||
| 1050 | if (error) | ||
| 1051 | return error; | ||
| 1052 | |||
| 1053 | if (el.el_ea) { | ||
| 1054 | if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) { | ||
| 1055 | brelse(el.el_bh); | ||
| 1056 | return -EPERM; | ||
| 1057 | } | ||
| 1058 | |||
| 1059 | error = -EEXIST; | ||
| 1060 | if (!(er->er_flags & XATTR_CREATE)) { | ||
| 1061 | int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea); | ||
| 1062 | error = ea_set_i(ip, er, &el); | ||
| 1063 | if (!error && unstuffed) | ||
| 1064 | ea_set_remove_unstuffed(ip, &el); | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | brelse(el.el_bh); | ||
| 1068 | } else { | ||
| 1069 | error = -ENODATA; | ||
| 1070 | if (!(er->er_flags & XATTR_REPLACE)) | ||
| 1071 | error = ea_set_i(ip, er, NULL); | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | return error; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 1078 | { | ||
| 1079 | struct gfs2_holder i_gh; | ||
| 1080 | int error; | ||
| 1081 | |||
| 1082 | if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN) | ||
| 1083 | return -EINVAL; | ||
| 1084 | if (!er->er_data || !er->er_data_len) { | ||
| 1085 | er->er_data = NULL; | ||
| 1086 | er->er_data_len = 0; | ||
| 1087 | } | ||
| 1088 | error = ea_check_size(GFS2_SB(&ip->i_inode), er); | ||
| 1089 | if (error) | ||
| 1090 | return error; | ||
| 1091 | |||
| 1092 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
| 1093 | if (error) | ||
| 1094 | return error; | ||
| 1095 | |||
| 1096 | if (IS_IMMUTABLE(&ip->i_inode)) | ||
| 1097 | error = -EPERM; | ||
| 1098 | else | ||
| 1099 | error = gfs2_ea_ops[er->er_type]->eo_set(ip, er); | ||
| 1100 | |||
| 1101 | gfs2_glock_dq_uninit(&i_gh); | ||
| 1102 | |||
| 1103 | return error; | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) | ||
| 1107 | { | ||
| 1108 | struct gfs2_ea_header *ea = el->el_ea; | ||
| 1109 | struct gfs2_ea_header *prev = el->el_prev; | ||
| 1110 | struct buffer_head *dibh; | ||
| 1111 | int error; | ||
| 1112 | |||
| 1113 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); | ||
| 1114 | if (error) | ||
| 1115 | return error; | ||
| 1116 | |||
| 1117 | gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); | ||
| 1118 | |||
| 1119 | if (prev) { | ||
| 1120 | u32 len; | ||
| 1121 | |||
| 1122 | len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); | ||
| 1123 | prev->ea_rec_len = cpu_to_be32(len); | ||
| 1124 | |||
| 1125 | if (GFS2_EA_IS_LAST(ea)) | ||
| 1126 | prev->ea_flags |= GFS2_EAFLAG_LAST; | ||
| 1127 | } else | ||
| 1128 | ea->ea_type = GFS2_EATYPE_UNUSED; | ||
| 1129 | |||
| 1130 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1131 | if (!error) { | ||
| 1132 | ip->i_di.di_ctime = get_seconds(); | ||
| 1133 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1134 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 1135 | brelse(dibh); | ||
| 1136 | } | ||
| 1137 | |||
| 1138 | gfs2_trans_end(GFS2_SB(&ip->i_inode)); | ||
| 1139 | |||
| 1140 | return error; | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 1144 | { | ||
| 1145 | struct gfs2_ea_location el; | ||
| 1146 | int error; | ||
| 1147 | |||
| 1148 | if (!ip->i_di.di_eattr) | ||
| 1149 | return -ENODATA; | ||
| 1150 | |||
| 1151 | error = gfs2_ea_find(ip, er, &el); | ||
| 1152 | if (error) | ||
| 1153 | return error; | ||
| 1154 | if (!el.el_ea) | ||
| 1155 | return -ENODATA; | ||
| 1156 | |||
| 1157 | if (GFS2_EA_IS_STUFFED(el.el_ea)) | ||
| 1158 | error = ea_remove_stuffed(ip, &el); | ||
| 1159 | else | ||
| 1160 | error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, | ||
| 1161 | 0); | ||
| 1162 | |||
| 1163 | brelse(el.el_bh); | ||
| 1164 | |||
| 1165 | return error; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | /** | ||
| 1169 | * gfs2_ea_remove - sets (or creates or replaces) an extended attribute | ||
| 1170 | * @ip: pointer to the inode of the target file | ||
| 1171 | * @er: request information | ||
| 1172 | * | ||
| 1173 | * Returns: errno | ||
| 1174 | */ | ||
| 1175 | |||
| 1176 | int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
| 1177 | { | ||
| 1178 | struct gfs2_holder i_gh; | ||
| 1179 | int error; | ||
| 1180 | |||
| 1181 | if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN) | ||
| 1182 | return -EINVAL; | ||
| 1183 | |||
| 1184 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
| 1185 | if (error) | ||
| 1186 | return error; | ||
| 1187 | |||
| 1188 | if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) | ||
| 1189 | error = -EPERM; | ||
| 1190 | else | ||
| 1191 | error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er); | ||
| 1192 | |||
| 1193 | gfs2_glock_dq_uninit(&i_gh); | ||
| 1194 | |||
| 1195 | return error; | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, | ||
| 1199 | struct gfs2_ea_header *ea, char *data) | ||
| 1200 | { | ||
| 1201 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1202 | struct buffer_head **bh; | ||
| 1203 | unsigned int amount = GFS2_EA_DATA_LEN(ea); | ||
| 1204 | unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); | ||
| 1205 | u64 *dataptrs = GFS2_EA2DATAPTRS(ea); | ||
| 1206 | unsigned int x; | ||
| 1207 | int error; | ||
| 1208 | |||
| 1209 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); | ||
| 1210 | if (!bh) | ||
| 1211 | return -ENOMEM; | ||
| 1212 | |||
| 1213 | error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); | ||
| 1214 | if (error) | ||
| 1215 | goto out; | ||
| 1216 | |||
| 1217 | for (x = 0; x < nptrs; x++) { | ||
| 1218 | error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, | ||
| 1219 | bh + x); | ||
| 1220 | if (error) { | ||
| 1221 | while (x--) | ||
| 1222 | brelse(bh[x]); | ||
| 1223 | goto fail; | ||
| 1224 | } | ||
| 1225 | dataptrs++; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | for (x = 0; x < nptrs; x++) { | ||
| 1229 | error = gfs2_meta_wait(sdp, bh[x]); | ||
| 1230 | if (error) { | ||
| 1231 | for (; x < nptrs; x++) | ||
| 1232 | brelse(bh[x]); | ||
| 1233 | goto fail; | ||
| 1234 | } | ||
| 1235 | if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { | ||
| 1236 | for (; x < nptrs; x++) | ||
| 1237 | brelse(bh[x]); | ||
| 1238 | error = -EIO; | ||
| 1239 | goto fail; | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | gfs2_trans_add_bh(ip->i_gl, bh[x], 1); | ||
| 1243 | |||
| 1244 | memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data, | ||
| 1245 | (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); | ||
| 1246 | |||
| 1247 | amount -= sdp->sd_jbsize; | ||
| 1248 | data += sdp->sd_jbsize; | ||
| 1249 | |||
| 1250 | brelse(bh[x]); | ||
| 1251 | } | ||
| 1252 | |||
| 1253 | out: | ||
| 1254 | kfree(bh); | ||
| 1255 | return error; | ||
| 1256 | |||
| 1257 | fail: | ||
| 1258 | gfs2_trans_end(sdp); | ||
| 1259 | kfree(bh); | ||
| 1260 | return error; | ||
| 1261 | } | ||
| 1262 | |||
| 1263 | int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, | ||
| 1264 | struct iattr *attr, char *data) | ||
| 1265 | { | ||
| 1266 | struct buffer_head *dibh; | ||
| 1267 | int error; | ||
| 1268 | |||
| 1269 | if (GFS2_EA_IS_STUFFED(el->el_ea)) { | ||
| 1270 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); | ||
| 1271 | if (error) | ||
| 1272 | return error; | ||
| 1273 | |||
| 1274 | gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); | ||
| 1275 | memcpy(GFS2_EA2DATA(el->el_ea), data, | ||
| 1276 | GFS2_EA_DATA_LEN(el->el_ea)); | ||
| 1277 | } else | ||
| 1278 | error = ea_acl_chmod_unstuffed(ip, el->el_ea, data); | ||
| 1279 | |||
| 1280 | if (error) | ||
| 1281 | return error; | ||
| 1282 | |||
| 1283 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1284 | if (!error) { | ||
| 1285 | error = inode_setattr(&ip->i_inode, attr); | ||
| 1286 | gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); | ||
| 1287 | gfs2_inode_attr_out(ip); | ||
| 1288 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1289 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 1290 | brelse(dibh); | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | gfs2_trans_end(GFS2_SB(&ip->i_inode)); | ||
| 1294 | |||
| 1295 | return error; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | static int ea_dealloc_indirect(struct gfs2_inode *ip) | ||
| 1299 | { | ||
| 1300 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1301 | struct gfs2_rgrp_list rlist; | ||
| 1302 | struct buffer_head *indbh, *dibh; | ||
| 1303 | u64 *eablk, *end; | ||
| 1304 | unsigned int rg_blocks = 0; | ||
| 1305 | u64 bstart = 0; | ||
| 1306 | unsigned int blen = 0; | ||
| 1307 | unsigned int blks = 0; | ||
| 1308 | unsigned int x; | ||
| 1309 | int error; | ||
| 1310 | |||
| 1311 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); | ||
| 1312 | |||
| 1313 | error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &indbh); | ||
| 1314 | if (error) | ||
| 1315 | return error; | ||
| 1316 | |||
| 1317 | if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) { | ||
| 1318 | error = -EIO; | ||
| 1319 | goto out; | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); | ||
| 1323 | end = eablk + sdp->sd_inptrs; | ||
| 1324 | |||
| 1325 | for (; eablk < end; eablk++) { | ||
| 1326 | u64 bn; | ||
| 1327 | |||
| 1328 | if (!*eablk) | ||
| 1329 | break; | ||
| 1330 | bn = be64_to_cpu(*eablk); | ||
| 1331 | |||
| 1332 | if (bstart + blen == bn) | ||
| 1333 | blen++; | ||
| 1334 | else { | ||
| 1335 | if (bstart) | ||
| 1336 | gfs2_rlist_add(sdp, &rlist, bstart); | ||
| 1337 | bstart = bn; | ||
| 1338 | blen = 1; | ||
| 1339 | } | ||
| 1340 | blks++; | ||
| 1341 | } | ||
| 1342 | if (bstart) | ||
| 1343 | gfs2_rlist_add(sdp, &rlist, bstart); | ||
| 1344 | else | ||
| 1345 | goto out; | ||
| 1346 | |||
| 1347 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | ||
| 1348 | |||
| 1349 | for (x = 0; x < rlist.rl_rgrps; x++) { | ||
| 1350 | struct gfs2_rgrpd *rgd; | ||
| 1351 | rgd = rlist.rl_ghs[x].gh_gl->gl_object; | ||
| 1352 | rg_blocks += rgd->rd_ri.ri_length; | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); | ||
| 1356 | if (error) | ||
| 1357 | goto out_rlist_free; | ||
| 1358 | |||
| 1359 | error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + | ||
| 1360 | RES_STATFS + RES_QUOTA, blks); | ||
| 1361 | if (error) | ||
| 1362 | goto out_gunlock; | ||
| 1363 | |||
| 1364 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | ||
| 1365 | |||
| 1366 | eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); | ||
| 1367 | bstart = 0; | ||
| 1368 | blen = 0; | ||
| 1369 | |||
| 1370 | for (; eablk < end; eablk++) { | ||
| 1371 | u64 bn; | ||
| 1372 | |||
| 1373 | if (!*eablk) | ||
| 1374 | break; | ||
| 1375 | bn = be64_to_cpu(*eablk); | ||
| 1376 | |||
| 1377 | if (bstart + blen == bn) | ||
| 1378 | blen++; | ||
| 1379 | else { | ||
| 1380 | if (bstart) | ||
| 1381 | gfs2_free_meta(ip, bstart, blen); | ||
| 1382 | bstart = bn; | ||
| 1383 | blen = 1; | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | *eablk = 0; | ||
| 1387 | if (!ip->i_di.di_blocks) | ||
| 1388 | gfs2_consist_inode(ip); | ||
| 1389 | ip->i_di.di_blocks--; | ||
| 1390 | } | ||
| 1391 | if (bstart) | ||
| 1392 | gfs2_free_meta(ip, bstart, blen); | ||
| 1393 | |||
| 1394 | ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT; | ||
| 1395 | |||
| 1396 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1397 | if (!error) { | ||
| 1398 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1399 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 1400 | brelse(dibh); | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | gfs2_trans_end(sdp); | ||
| 1404 | |||
| 1405 | out_gunlock: | ||
| 1406 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); | ||
| 1407 | out_rlist_free: | ||
| 1408 | gfs2_rlist_free(&rlist); | ||
| 1409 | out: | ||
| 1410 | brelse(indbh); | ||
| 1411 | return error; | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | static int ea_dealloc_block(struct gfs2_inode *ip) | ||
| 1415 | { | ||
| 1416 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1417 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 1418 | struct gfs2_rgrpd *rgd; | ||
| 1419 | struct buffer_head *dibh; | ||
| 1420 | int error; | ||
| 1421 | |||
| 1422 | rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr); | ||
| 1423 | if (!rgd) { | ||
| 1424 | gfs2_consist_inode(ip); | ||
| 1425 | return -EIO; | ||
| 1426 | } | ||
| 1427 | |||
| 1428 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, | ||
| 1429 | &al->al_rgd_gh); | ||
| 1430 | if (error) | ||
| 1431 | return error; | ||
| 1432 | |||
| 1433 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE + RES_STATFS + | ||
| 1434 | RES_QUOTA, 1); | ||
| 1435 | if (error) | ||
| 1436 | goto out_gunlock; | ||
| 1437 | |||
| 1438 | gfs2_free_meta(ip, ip->i_di.di_eattr, 1); | ||
| 1439 | |||
| 1440 | ip->i_di.di_eattr = 0; | ||
| 1441 | if (!ip->i_di.di_blocks) | ||
| 1442 | gfs2_consist_inode(ip); | ||
| 1443 | ip->i_di.di_blocks--; | ||
| 1444 | |||
| 1445 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1446 | if (!error) { | ||
| 1447 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1448 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 1449 | brelse(dibh); | ||
| 1450 | } | ||
| 1451 | |||
| 1452 | gfs2_trans_end(sdp); | ||
| 1453 | |||
| 1454 | out_gunlock: | ||
| 1455 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
| 1456 | return error; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /** | ||
| 1460 | * gfs2_ea_dealloc - deallocate the extended attribute fork | ||
| 1461 | * @ip: the inode | ||
| 1462 | * | ||
| 1463 | * Returns: errno | ||
| 1464 | */ | ||
| 1465 | |||
| 1466 | int gfs2_ea_dealloc(struct gfs2_inode *ip) | ||
| 1467 | { | ||
| 1468 | struct gfs2_alloc *al; | ||
| 1469 | int error; | ||
| 1470 | |||
| 1471 | al = gfs2_alloc_get(ip); | ||
| 1472 | |||
| 1473 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 1474 | if (error) | ||
| 1475 | goto out_alloc; | ||
| 1476 | |||
| 1477 | error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh); | ||
| 1478 | if (error) | ||
| 1479 | goto out_quota; | ||
| 1480 | |||
| 1481 | error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); | ||
| 1482 | if (error) | ||
| 1483 | goto out_rindex; | ||
| 1484 | |||
| 1485 | if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) { | ||
| 1486 | error = ea_dealloc_indirect(ip); | ||
| 1487 | if (error) | ||
| 1488 | goto out_rindex; | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | error = ea_dealloc_block(ip); | ||
| 1492 | |||
| 1493 | out_rindex: | ||
| 1494 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
| 1495 | out_quota: | ||
| 1496 | gfs2_quota_unhold(ip); | ||
| 1497 | out_alloc: | ||
| 1498 | gfs2_alloc_put(ip); | ||
| 1499 | return error; | ||
| 1500 | } | ||
| 1501 | |||
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h new file mode 100644 index 000000000000..ffa65947d686 --- /dev/null +++ b/fs/gfs2/eattr.h | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __EATTR_DOT_H__ | ||
| 11 | #define __EATTR_DOT_H__ | ||
| 12 | |||
| 13 | struct gfs2_inode; | ||
| 14 | struct iattr; | ||
| 15 | |||
| 16 | #define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len) | ||
| 17 | #define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len) | ||
| 18 | |||
| 19 | #define GFS2_EA_SIZE(ea) \ | ||
| 20 | ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \ | ||
| 21 | ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \ | ||
| 22 | (sizeof(u64) * (ea)->ea_num_ptrs)), 8) | ||
| 23 | |||
| 24 | #define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs) | ||
| 25 | #define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST) | ||
| 26 | |||
| 27 | #define GFS2_EAREQ_SIZE_STUFFED(er) \ | ||
| 28 | ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8) | ||
| 29 | |||
| 30 | #define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \ | ||
| 31 | ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \ | ||
| 32 | sizeof(u64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8) | ||
| 33 | |||
| 34 | #define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1)) | ||
| 35 | #define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len) | ||
| 36 | |||
| 37 | #define GFS2_EA2DATAPTRS(ea) \ | ||
| 38 | ((u64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8))) | ||
| 39 | |||
| 40 | #define GFS2_EA2NEXT(ea) \ | ||
| 41 | ((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea))) | ||
| 42 | |||
| 43 | #define GFS2_EA_BH2FIRST(bh) \ | ||
| 44 | ((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header))) | ||
| 45 | |||
| 46 | #define GFS2_ERF_MODE 0x80000000 | ||
| 47 | |||
| 48 | struct gfs2_ea_request { | ||
| 49 | const char *er_name; | ||
| 50 | char *er_data; | ||
| 51 | unsigned int er_name_len; | ||
| 52 | unsigned int er_data_len; | ||
| 53 | unsigned int er_type; /* GFS2_EATYPE_... */ | ||
| 54 | int er_flags; | ||
| 55 | mode_t er_mode; | ||
| 56 | }; | ||
| 57 | |||
| 58 | struct gfs2_ea_location { | ||
| 59 | struct buffer_head *el_bh; | ||
| 60 | struct gfs2_ea_header *el_ea; | ||
| 61 | struct gfs2_ea_header *el_prev; | ||
| 62 | }; | ||
| 63 | |||
| 64 | int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 65 | int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 66 | int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 67 | |||
| 68 | int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 69 | int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 70 | int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 71 | int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er); | ||
| 72 | |||
| 73 | int gfs2_ea_dealloc(struct gfs2_inode *ip); | ||
| 74 | |||
| 75 | /* Exported to acl.c */ | ||
| 76 | |||
| 77 | int gfs2_ea_find(struct gfs2_inode *ip, | ||
| 78 | struct gfs2_ea_request *er, | ||
| 79 | struct gfs2_ea_location *el); | ||
| 80 | int gfs2_ea_get_copy(struct gfs2_inode *ip, | ||
| 81 | struct gfs2_ea_location *el, | ||
| 82 | char *data); | ||
| 83 | int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, | ||
| 84 | struct iattr *attr, char *data); | ||
| 85 | |||
| 86 | static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea) | ||
| 87 | { | ||
| 88 | switch (ea->ea_type) { | ||
| 89 | case GFS2_EATYPE_USR: | ||
| 90 | return 5 + ea->ea_name_len + 1; | ||
| 91 | case GFS2_EATYPE_SYS: | ||
| 92 | return 7 + ea->ea_name_len + 1; | ||
| 93 | case GFS2_EATYPE_SECURITY: | ||
| 94 | return 9 + ea->ea_name_len + 1; | ||
| 95 | default: | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | #endif /* __EATTR_DOT_H__ */ | ||
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h new file mode 100644 index 000000000000..3bb11c0f8b56 --- /dev/null +++ b/fs/gfs2/gfs2.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __GFS2_DOT_H__ | ||
| 11 | #define __GFS2_DOT_H__ | ||
| 12 | |||
| 13 | enum { | ||
| 14 | NO_CREATE = 0, | ||
| 15 | CREATE = 1, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum { | ||
| 19 | NO_WAIT = 0, | ||
| 20 | WAIT = 1, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum { | ||
| 24 | NO_FORCE = 0, | ||
| 25 | FORCE = 1, | ||
| 26 | }; | ||
| 27 | |||
| 28 | #define GFS2_FAST_NAME_SIZE 8 | ||
| 29 | |||
| 30 | #endif /* __GFS2_DOT_H__ */ | ||
| 31 | |||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c new file mode 100644 index 000000000000..78fe0fae23ff --- /dev/null +++ b/fs/gfs2/glock.c | |||
| @@ -0,0 +1,2231 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/delay.h> | ||
| 16 | #include <linux/sort.h> | ||
| 17 | #include <linux/jhash.h> | ||
| 18 | #include <linux/kallsyms.h> | ||
| 19 | #include <linux/gfs2_ondisk.h> | ||
| 20 | #include <linux/list.h> | ||
| 21 | #include <linux/lm_interface.h> | ||
| 22 | #include <asm/uaccess.h> | ||
| 23 | |||
| 24 | #include "gfs2.h" | ||
| 25 | #include "incore.h" | ||
| 26 | #include "glock.h" | ||
| 27 | #include "glops.h" | ||
| 28 | #include "inode.h" | ||
| 29 | #include "lm.h" | ||
| 30 | #include "lops.h" | ||
| 31 | #include "meta_io.h" | ||
| 32 | #include "quota.h" | ||
| 33 | #include "super.h" | ||
| 34 | #include "util.h" | ||
| 35 | |||
| 36 | struct greedy { | ||
| 37 | struct gfs2_holder gr_gh; | ||
| 38 | struct work_struct gr_work; | ||
| 39 | }; | ||
| 40 | |||
| 41 | struct gfs2_gl_hash_bucket { | ||
| 42 | struct hlist_head hb_list; | ||
| 43 | }; | ||
| 44 | |||
| 45 | typedef void (*glock_examiner) (struct gfs2_glock * gl); | ||
| 46 | |||
| 47 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | ||
| 48 | static int dump_glock(struct gfs2_glock *gl); | ||
| 49 | static int dump_inode(struct gfs2_inode *ip); | ||
| 50 | |||
| 51 | #define GFS2_GL_HASH_SHIFT 15 | ||
| 52 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) | ||
| 53 | #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) | ||
| 54 | |||
| 55 | static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; | ||
| 56 | |||
| 57 | /* | ||
| 58 | * Despite what you might think, the numbers below are not arbitrary :-) | ||
| 59 | * They are taken from the ipv4 routing hash code, which is well tested | ||
| 60 | * and thus should be nearly optimal. Later on we might tweek the numbers | ||
| 61 | * but for now this should be fine. | ||
| 62 | * | ||
| 63 | * The reason for putting the locks in a separate array from the list heads | ||
| 64 | * is that we can have fewer locks than list heads and save memory. We use | ||
| 65 | * the same hash function for both, but with a different hash mask. | ||
| 66 | */ | ||
| 67 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | ||
| 68 | defined(CONFIG_PROVE_LOCKING) | ||
| 69 | |||
| 70 | #ifdef CONFIG_LOCKDEP | ||
| 71 | # define GL_HASH_LOCK_SZ 256 | ||
| 72 | #else | ||
| 73 | # if NR_CPUS >= 32 | ||
| 74 | # define GL_HASH_LOCK_SZ 4096 | ||
| 75 | # elif NR_CPUS >= 16 | ||
| 76 | # define GL_HASH_LOCK_SZ 2048 | ||
| 77 | # elif NR_CPUS >= 8 | ||
| 78 | # define GL_HASH_LOCK_SZ 1024 | ||
| 79 | # elif NR_CPUS >= 4 | ||
| 80 | # define GL_HASH_LOCK_SZ 512 | ||
| 81 | # else | ||
| 82 | # define GL_HASH_LOCK_SZ 256 | ||
| 83 | # endif | ||
| 84 | #endif | ||
| 85 | |||
| 86 | /* We never want more locks than chains */ | ||
| 87 | #if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ | ||
| 88 | # undef GL_HASH_LOCK_SZ | ||
| 89 | # define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE | ||
| 90 | #endif | ||
| 91 | |||
| 92 | static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ]; | ||
| 93 | |||
| 94 | static inline rwlock_t *gl_lock_addr(unsigned int x) | ||
| 95 | { | ||
| 96 | return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)]; | ||
| 97 | } | ||
| 98 | #else /* not SMP, so no spinlocks required */ | ||
| 99 | static inline rwlock_t *gl_lock_addr(x) | ||
| 100 | { | ||
| 101 | return NULL; | ||
| 102 | } | ||
| 103 | #endif | ||
| 104 | |||
| 105 | /** | ||
| 106 | * relaxed_state_ok - is a requested lock compatible with the current lock mode? | ||
| 107 | * @actual: the current state of the lock | ||
| 108 | * @requested: the lock state that was requested by the caller | ||
| 109 | * @flags: the modifier flags passed in by the caller | ||
| 110 | * | ||
| 111 | * Returns: 1 if the locks are compatible, 0 otherwise | ||
| 112 | */ | ||
| 113 | |||
| 114 | static inline int relaxed_state_ok(unsigned int actual, unsigned requested, | ||
| 115 | int flags) | ||
| 116 | { | ||
| 117 | if (actual == requested) | ||
| 118 | return 1; | ||
| 119 | |||
| 120 | if (flags & GL_EXACT) | ||
| 121 | return 0; | ||
| 122 | |||
| 123 | if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED) | ||
| 124 | return 1; | ||
| 125 | |||
| 126 | if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY)) | ||
| 127 | return 1; | ||
| 128 | |||
| 129 | return 0; | ||
| 130 | } | ||
| 131 | |||
| 132 | /** | ||
| 133 | * gl_hash() - Turn glock number into hash bucket number | ||
| 134 | * @lock: The glock number | ||
| 135 | * | ||
| 136 | * Returns: The number of the corresponding hash bucket | ||
| 137 | */ | ||
| 138 | |||
| 139 | static unsigned int gl_hash(const struct gfs2_sbd *sdp, | ||
| 140 | const struct lm_lockname *name) | ||
| 141 | { | ||
| 142 | unsigned int h; | ||
| 143 | |||
| 144 | h = jhash(&name->ln_number, sizeof(u64), 0); | ||
| 145 | h = jhash(&name->ln_type, sizeof(unsigned int), h); | ||
| 146 | h = jhash(&sdp, sizeof(struct gfs2_sbd *), h); | ||
| 147 | h &= GFS2_GL_HASH_MASK; | ||
| 148 | |||
| 149 | return h; | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * glock_free() - Perform a few checks and then release struct gfs2_glock | ||
| 154 | * @gl: The glock to release | ||
| 155 | * | ||
| 156 | * Also calls lock module to release its internal structure for this glock. | ||
| 157 | * | ||
| 158 | */ | ||
| 159 | |||
| 160 | static void glock_free(struct gfs2_glock *gl) | ||
| 161 | { | ||
| 162 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 163 | struct inode *aspace = gl->gl_aspace; | ||
| 164 | |||
| 165 | gfs2_lm_put_lock(sdp, gl->gl_lock); | ||
| 166 | |||
| 167 | if (aspace) | ||
| 168 | gfs2_aspace_put(aspace); | ||
| 169 | |||
| 170 | kmem_cache_free(gfs2_glock_cachep, gl); | ||
| 171 | } | ||
| 172 | |||
| 173 | /** | ||
| 174 | * gfs2_glock_hold() - increment reference count on glock | ||
| 175 | * @gl: The glock to hold | ||
| 176 | * | ||
| 177 | */ | ||
| 178 | |||
| 179 | void gfs2_glock_hold(struct gfs2_glock *gl) | ||
| 180 | { | ||
| 181 | atomic_inc(&gl->gl_ref); | ||
| 182 | } | ||
| 183 | |||
| 184 | /** | ||
| 185 | * gfs2_glock_put() - Decrement reference count on glock | ||
| 186 | * @gl: The glock to put | ||
| 187 | * | ||
| 188 | */ | ||
| 189 | |||
| 190 | int gfs2_glock_put(struct gfs2_glock *gl) | ||
| 191 | { | ||
| 192 | int rv = 0; | ||
| 193 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 194 | |||
| 195 | write_lock(gl_lock_addr(gl->gl_hash)); | ||
| 196 | if (atomic_dec_and_test(&gl->gl_ref)) { | ||
| 197 | hlist_del(&gl->gl_list); | ||
| 198 | write_unlock(gl_lock_addr(gl->gl_hash)); | ||
| 199 | BUG_ON(spin_is_locked(&gl->gl_spin)); | ||
| 200 | gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); | ||
| 201 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); | ||
| 202 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); | ||
| 203 | gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); | ||
| 204 | gfs2_assert(sdp, list_empty(&gl->gl_waiters2)); | ||
| 205 | gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); | ||
| 206 | glock_free(gl); | ||
| 207 | rv = 1; | ||
| 208 | goto out; | ||
| 209 | } | ||
| 210 | write_unlock(gl_lock_addr(gl->gl_hash)); | ||
| 211 | out: | ||
| 212 | return rv; | ||
| 213 | } | ||
| 214 | |||
| 215 | /** | ||
| 216 | * queue_empty - check to see if a glock's queue is empty | ||
| 217 | * @gl: the glock | ||
| 218 | * @head: the head of the queue to check | ||
| 219 | * | ||
| 220 | * This function protects the list in the event that a process already | ||
| 221 | * has a holder on the list and is adding a second holder for itself. | ||
| 222 | * The glmutex lock is what generally prevents processes from working | ||
| 223 | * on the same glock at once, but the special case of adding a second | ||
| 224 | * holder for yourself ("recursive" locking) doesn't involve locking | ||
| 225 | * glmutex, making the spin lock necessary. | ||
| 226 | * | ||
| 227 | * Returns: 1 if the queue is empty | ||
| 228 | */ | ||
| 229 | |||
| 230 | static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head) | ||
| 231 | { | ||
| 232 | int empty; | ||
| 233 | spin_lock(&gl->gl_spin); | ||
| 234 | empty = list_empty(head); | ||
| 235 | spin_unlock(&gl->gl_spin); | ||
| 236 | return empty; | ||
| 237 | } | ||
| 238 | |||
| 239 | /** | ||
| 240 | * search_bucket() - Find struct gfs2_glock by lock number | ||
| 241 | * @bucket: the bucket to search | ||
| 242 | * @name: The lock name | ||
| 243 | * | ||
| 244 | * Returns: NULL, or the struct gfs2_glock with the requested number | ||
| 245 | */ | ||
| 246 | |||
| 247 | static struct gfs2_glock *search_bucket(unsigned int hash, | ||
| 248 | const struct gfs2_sbd *sdp, | ||
| 249 | const struct lm_lockname *name) | ||
| 250 | { | ||
| 251 | struct gfs2_glock *gl; | ||
| 252 | struct hlist_node *h; | ||
| 253 | |||
| 254 | hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { | ||
| 255 | if (!lm_name_equal(&gl->gl_name, name)) | ||
| 256 | continue; | ||
| 257 | if (gl->gl_sbd != sdp) | ||
| 258 | continue; | ||
| 259 | |||
| 260 | atomic_inc(&gl->gl_ref); | ||
| 261 | |||
| 262 | return gl; | ||
| 263 | } | ||
| 264 | |||
| 265 | return NULL; | ||
| 266 | } | ||
| 267 | |||
| 268 | /** | ||
| 269 | * gfs2_glock_find() - Find glock by lock number | ||
| 270 | * @sdp: The GFS2 superblock | ||
| 271 | * @name: The lock name | ||
| 272 | * | ||
| 273 | * Returns: NULL, or the struct gfs2_glock with the requested number | ||
| 274 | */ | ||
| 275 | |||
| 276 | static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, | ||
| 277 | const struct lm_lockname *name) | ||
| 278 | { | ||
| 279 | unsigned int hash = gl_hash(sdp, name); | ||
| 280 | struct gfs2_glock *gl; | ||
| 281 | |||
| 282 | read_lock(gl_lock_addr(hash)); | ||
| 283 | gl = search_bucket(hash, sdp, name); | ||
| 284 | read_unlock(gl_lock_addr(hash)); | ||
| 285 | |||
| 286 | return gl; | ||
| 287 | } | ||
| 288 | |||
| 289 | /** | ||
| 290 | * gfs2_glock_get() - Get a glock, or create one if one doesn't exist | ||
| 291 | * @sdp: The GFS2 superblock | ||
| 292 | * @number: the lock number | ||
| 293 | * @glops: The glock_operations to use | ||
| 294 | * @create: If 0, don't create the glock if it doesn't exist | ||
| 295 | * @glp: the glock is returned here | ||
| 296 | * | ||
| 297 | * This does not lock a glock, just finds/creates structures for one. | ||
| 298 | * | ||
| 299 | * Returns: errno | ||
| 300 | */ | ||
| 301 | |||
| 302 | int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | ||
| 303 | const struct gfs2_glock_operations *glops, int create, | ||
| 304 | struct gfs2_glock **glp) | ||
| 305 | { | ||
| 306 | struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; | ||
| 307 | struct gfs2_glock *gl, *tmp; | ||
| 308 | unsigned int hash = gl_hash(sdp, &name); | ||
| 309 | int error; | ||
| 310 | |||
| 311 | read_lock(gl_lock_addr(hash)); | ||
| 312 | gl = search_bucket(hash, sdp, &name); | ||
| 313 | read_unlock(gl_lock_addr(hash)); | ||
| 314 | |||
| 315 | if (gl || !create) { | ||
| 316 | *glp = gl; | ||
| 317 | return 0; | ||
| 318 | } | ||
| 319 | |||
| 320 | gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); | ||
| 321 | if (!gl) | ||
| 322 | return -ENOMEM; | ||
| 323 | |||
| 324 | gl->gl_flags = 0; | ||
| 325 | gl->gl_name = name; | ||
| 326 | atomic_set(&gl->gl_ref, 1); | ||
| 327 | gl->gl_state = LM_ST_UNLOCKED; | ||
| 328 | gl->gl_hash = hash; | ||
| 329 | gl->gl_owner = NULL; | ||
| 330 | gl->gl_ip = 0; | ||
| 331 | gl->gl_ops = glops; | ||
| 332 | gl->gl_req_gh = NULL; | ||
| 333 | gl->gl_req_bh = NULL; | ||
| 334 | gl->gl_vn = 0; | ||
| 335 | gl->gl_stamp = jiffies; | ||
| 336 | gl->gl_object = NULL; | ||
| 337 | gl->gl_sbd = sdp; | ||
| 338 | gl->gl_aspace = NULL; | ||
| 339 | lops_init_le(&gl->gl_le, &gfs2_glock_lops); | ||
| 340 | |||
| 341 | /* If this glock protects actual on-disk data or metadata blocks, | ||
| 342 | create a VFS inode to manage the pages/buffers holding them. */ | ||
| 343 | if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) { | ||
| 344 | gl->gl_aspace = gfs2_aspace_get(sdp); | ||
| 345 | if (!gl->gl_aspace) { | ||
| 346 | error = -ENOMEM; | ||
| 347 | goto fail; | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock); | ||
| 352 | if (error) | ||
| 353 | goto fail_aspace; | ||
| 354 | |||
| 355 | write_lock(gl_lock_addr(hash)); | ||
| 356 | tmp = search_bucket(hash, sdp, &name); | ||
| 357 | if (tmp) { | ||
| 358 | write_unlock(gl_lock_addr(hash)); | ||
| 359 | glock_free(gl); | ||
| 360 | gl = tmp; | ||
| 361 | } else { | ||
| 362 | hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); | ||
| 363 | write_unlock(gl_lock_addr(hash)); | ||
| 364 | } | ||
| 365 | |||
| 366 | *glp = gl; | ||
| 367 | |||
| 368 | return 0; | ||
| 369 | |||
| 370 | fail_aspace: | ||
| 371 | if (gl->gl_aspace) | ||
| 372 | gfs2_aspace_put(gl->gl_aspace); | ||
| 373 | fail: | ||
| 374 | kmem_cache_free(gfs2_glock_cachep, gl); | ||
| 375 | return error; | ||
| 376 | } | ||
| 377 | |||
| 378 | /** | ||
| 379 | * gfs2_holder_init - initialize a struct gfs2_holder in the default way | ||
| 380 | * @gl: the glock | ||
| 381 | * @state: the state we're requesting | ||
| 382 | * @flags: the modifier flags | ||
| 383 | * @gh: the holder structure | ||
| 384 | * | ||
| 385 | */ | ||
| 386 | |||
| 387 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | ||
| 388 | struct gfs2_holder *gh) | ||
| 389 | { | ||
| 390 | INIT_LIST_HEAD(&gh->gh_list); | ||
| 391 | gh->gh_gl = gl; | ||
| 392 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | ||
| 393 | gh->gh_owner = current; | ||
| 394 | gh->gh_state = state; | ||
| 395 | gh->gh_flags = flags; | ||
| 396 | gh->gh_error = 0; | ||
| 397 | gh->gh_iflags = 0; | ||
| 398 | init_completion(&gh->gh_wait); | ||
| 399 | |||
| 400 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
| 401 | gh->gh_flags |= GL_LOCAL_EXCL; | ||
| 402 | |||
| 403 | gfs2_glock_hold(gl); | ||
| 404 | } | ||
| 405 | |||
| 406 | /** | ||
| 407 | * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it | ||
| 408 | * @state: the state we're requesting | ||
| 409 | * @flags: the modifier flags | ||
| 410 | * @gh: the holder structure | ||
| 411 | * | ||
| 412 | * Don't mess with the glock. | ||
| 413 | * | ||
| 414 | */ | ||
| 415 | |||
| 416 | void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh) | ||
| 417 | { | ||
| 418 | gh->gh_state = state; | ||
| 419 | gh->gh_flags = flags; | ||
| 420 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
| 421 | gh->gh_flags |= GL_LOCAL_EXCL; | ||
| 422 | |||
| 423 | gh->gh_iflags &= 1 << HIF_ALLOCED; | ||
| 424 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | ||
| 425 | } | ||
| 426 | |||
| 427 | /** | ||
| 428 | * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) | ||
| 429 | * @gh: the holder structure | ||
| 430 | * | ||
| 431 | */ | ||
| 432 | |||
| 433 | void gfs2_holder_uninit(struct gfs2_holder *gh) | ||
| 434 | { | ||
| 435 | gfs2_glock_put(gh->gh_gl); | ||
| 436 | gh->gh_gl = NULL; | ||
| 437 | gh->gh_ip = 0; | ||
| 438 | } | ||
| 439 | |||
| 440 | /** | ||
| 441 | * gfs2_holder_get - get a struct gfs2_holder structure | ||
| 442 | * @gl: the glock | ||
| 443 | * @state: the state we're requesting | ||
| 444 | * @flags: the modifier flags | ||
| 445 | * @gfp_flags: | ||
| 446 | * | ||
| 447 | * Figure out how big an impact this function has. Either: | ||
| 448 | * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd | ||
| 449 | * 2) Leave it like it is | ||
| 450 | * | ||
| 451 | * Returns: the holder structure, NULL on ENOMEM | ||
| 452 | */ | ||
| 453 | |||
| 454 | static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, | ||
| 455 | unsigned int state, | ||
| 456 | int flags, gfp_t gfp_flags) | ||
| 457 | { | ||
| 458 | struct gfs2_holder *gh; | ||
| 459 | |||
| 460 | gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags); | ||
| 461 | if (!gh) | ||
| 462 | return NULL; | ||
| 463 | |||
| 464 | gfs2_holder_init(gl, state, flags, gh); | ||
| 465 | set_bit(HIF_ALLOCED, &gh->gh_iflags); | ||
| 466 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | ||
| 467 | return gh; | ||
| 468 | } | ||
| 469 | |||
| 470 | /** | ||
| 471 | * gfs2_holder_put - get rid of a struct gfs2_holder structure | ||
| 472 | * @gh: the holder structure | ||
| 473 | * | ||
| 474 | */ | ||
| 475 | |||
| 476 | static void gfs2_holder_put(struct gfs2_holder *gh) | ||
| 477 | { | ||
| 478 | gfs2_holder_uninit(gh); | ||
| 479 | kfree(gh); | ||
| 480 | } | ||
| 481 | |||
| 482 | /** | ||
| 483 | * rq_mutex - process a mutex request in the queue | ||
| 484 | * @gh: the glock holder | ||
| 485 | * | ||
| 486 | * Returns: 1 if the queue is blocked | ||
| 487 | */ | ||
| 488 | |||
| 489 | static int rq_mutex(struct gfs2_holder *gh) | ||
| 490 | { | ||
| 491 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 492 | |||
| 493 | list_del_init(&gh->gh_list); | ||
| 494 | /* gh->gh_error never examined. */ | ||
| 495 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 496 | complete(&gh->gh_wait); | ||
| 497 | |||
| 498 | return 1; | ||
| 499 | } | ||
| 500 | |||
| 501 | /** | ||
| 502 | * rq_promote - process a promote request in the queue | ||
| 503 | * @gh: the glock holder | ||
| 504 | * | ||
| 505 | * Acquire a new inter-node lock, or change a lock state to more restrictive. | ||
| 506 | * | ||
| 507 | * Returns: 1 if the queue is blocked | ||
| 508 | */ | ||
| 509 | |||
| 510 | static int rq_promote(struct gfs2_holder *gh) | ||
| 511 | { | ||
| 512 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 513 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 514 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 515 | |||
| 516 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | ||
| 517 | if (list_empty(&gl->gl_holders)) { | ||
| 518 | gl->gl_req_gh = gh; | ||
| 519 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 520 | spin_unlock(&gl->gl_spin); | ||
| 521 | |||
| 522 | if (atomic_read(&sdp->sd_reclaim_count) > | ||
| 523 | gfs2_tune_get(sdp, gt_reclaim_limit) && | ||
| 524 | !(gh->gh_flags & LM_FLAG_PRIORITY)) { | ||
| 525 | gfs2_reclaim_glock(sdp); | ||
| 526 | gfs2_reclaim_glock(sdp); | ||
| 527 | } | ||
| 528 | |||
| 529 | glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); | ||
| 530 | spin_lock(&gl->gl_spin); | ||
| 531 | } | ||
| 532 | return 1; | ||
| 533 | } | ||
| 534 | |||
| 535 | if (list_empty(&gl->gl_holders)) { | ||
| 536 | set_bit(HIF_FIRST, &gh->gh_iflags); | ||
| 537 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 538 | } else { | ||
| 539 | struct gfs2_holder *next_gh; | ||
| 540 | if (gh->gh_flags & GL_LOCAL_EXCL) | ||
| 541 | return 1; | ||
| 542 | next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, | ||
| 543 | gh_list); | ||
| 544 | if (next_gh->gh_flags & GL_LOCAL_EXCL) | ||
| 545 | return 1; | ||
| 546 | } | ||
| 547 | |||
| 548 | list_move_tail(&gh->gh_list, &gl->gl_holders); | ||
| 549 | gh->gh_error = 0; | ||
| 550 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 551 | |||
| 552 | complete(&gh->gh_wait); | ||
| 553 | |||
| 554 | return 0; | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * rq_demote - process a demote request in the queue | ||
| 559 | * @gh: the glock holder | ||
| 560 | * | ||
| 561 | * Returns: 1 if the queue is blocked | ||
| 562 | */ | ||
| 563 | |||
| 564 | static int rq_demote(struct gfs2_holder *gh) | ||
| 565 | { | ||
| 566 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 567 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 568 | |||
| 569 | if (!list_empty(&gl->gl_holders)) | ||
| 570 | return 1; | ||
| 571 | |||
| 572 | if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) { | ||
| 573 | list_del_init(&gh->gh_list); | ||
| 574 | gh->gh_error = 0; | ||
| 575 | spin_unlock(&gl->gl_spin); | ||
| 576 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | ||
| 577 | gfs2_holder_put(gh); | ||
| 578 | else | ||
| 579 | complete(&gh->gh_wait); | ||
| 580 | spin_lock(&gl->gl_spin); | ||
| 581 | } else { | ||
| 582 | gl->gl_req_gh = gh; | ||
| 583 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 584 | spin_unlock(&gl->gl_spin); | ||
| 585 | |||
| 586 | if (gh->gh_state == LM_ST_UNLOCKED || | ||
| 587 | gl->gl_state != LM_ST_EXCLUSIVE) | ||
| 588 | glops->go_drop_th(gl); | ||
| 589 | else | ||
| 590 | glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); | ||
| 591 | |||
| 592 | spin_lock(&gl->gl_spin); | ||
| 593 | } | ||
| 594 | |||
| 595 | return 0; | ||
| 596 | } | ||
| 597 | |||
| 598 | /** | ||
| 599 | * rq_greedy - process a queued request to drop greedy status | ||
| 600 | * @gh: the glock holder | ||
| 601 | * | ||
| 602 | * Returns: 1 if the queue is blocked | ||
| 603 | */ | ||
| 604 | |||
| 605 | static int rq_greedy(struct gfs2_holder *gh) | ||
| 606 | { | ||
| 607 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 608 | |||
| 609 | list_del_init(&gh->gh_list); | ||
| 610 | /* gh->gh_error never examined. */ | ||
| 611 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
| 612 | spin_unlock(&gl->gl_spin); | ||
| 613 | |||
| 614 | gfs2_holder_uninit(gh); | ||
| 615 | kfree(container_of(gh, struct greedy, gr_gh)); | ||
| 616 | |||
| 617 | spin_lock(&gl->gl_spin); | ||
| 618 | |||
| 619 | return 0; | ||
| 620 | } | ||
| 621 | |||
| 622 | /** | ||
| 623 | * run_queue - process holder structures on a glock | ||
| 624 | * @gl: the glock | ||
| 625 | * | ||
| 626 | */ | ||
| 627 | static void run_queue(struct gfs2_glock *gl) | ||
| 628 | { | ||
| 629 | struct gfs2_holder *gh; | ||
| 630 | int blocked = 1; | ||
| 631 | |||
| 632 | for (;;) { | ||
| 633 | if (test_bit(GLF_LOCK, &gl->gl_flags)) | ||
| 634 | break; | ||
| 635 | |||
| 636 | if (!list_empty(&gl->gl_waiters1)) { | ||
| 637 | gh = list_entry(gl->gl_waiters1.next, | ||
| 638 | struct gfs2_holder, gh_list); | ||
| 639 | |||
| 640 | if (test_bit(HIF_MUTEX, &gh->gh_iflags)) | ||
| 641 | blocked = rq_mutex(gh); | ||
| 642 | else | ||
| 643 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
| 644 | |||
| 645 | } else if (!list_empty(&gl->gl_waiters2) && | ||
| 646 | !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) { | ||
| 647 | gh = list_entry(gl->gl_waiters2.next, | ||
| 648 | struct gfs2_holder, gh_list); | ||
| 649 | |||
| 650 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) | ||
| 651 | blocked = rq_demote(gh); | ||
| 652 | else if (test_bit(HIF_GREEDY, &gh->gh_iflags)) | ||
| 653 | blocked = rq_greedy(gh); | ||
| 654 | else | ||
| 655 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
| 656 | |||
| 657 | } else if (!list_empty(&gl->gl_waiters3)) { | ||
| 658 | gh = list_entry(gl->gl_waiters3.next, | ||
| 659 | struct gfs2_holder, gh_list); | ||
| 660 | |||
| 661 | if (test_bit(HIF_PROMOTE, &gh->gh_iflags)) | ||
| 662 | blocked = rq_promote(gh); | ||
| 663 | else | ||
| 664 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
| 665 | |||
| 666 | } else | ||
| 667 | break; | ||
| 668 | |||
| 669 | if (blocked) | ||
| 670 | break; | ||
| 671 | } | ||
| 672 | } | ||
| 673 | |||
| 674 | /** | ||
| 675 | * gfs2_glmutex_lock - acquire a local lock on a glock | ||
| 676 | * @gl: the glock | ||
| 677 | * | ||
| 678 | * Gives caller exclusive access to manipulate a glock structure. | ||
| 679 | */ | ||
| 680 | |||
| 681 | static void gfs2_glmutex_lock(struct gfs2_glock *gl) | ||
| 682 | { | ||
| 683 | struct gfs2_holder gh; | ||
| 684 | |||
| 685 | gfs2_holder_init(gl, 0, 0, &gh); | ||
| 686 | set_bit(HIF_MUTEX, &gh.gh_iflags); | ||
| 687 | |||
| 688 | spin_lock(&gl->gl_spin); | ||
| 689 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | ||
| 690 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); | ||
| 691 | } else { | ||
| 692 | gl->gl_owner = current; | ||
| 693 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | ||
| 694 | complete(&gh.gh_wait); | ||
| 695 | } | ||
| 696 | spin_unlock(&gl->gl_spin); | ||
| 697 | |||
| 698 | wait_for_completion(&gh.gh_wait); | ||
| 699 | gfs2_holder_uninit(&gh); | ||
| 700 | } | ||
| 701 | |||
| 702 | /** | ||
| 703 | * gfs2_glmutex_trylock - try to acquire a local lock on a glock | ||
| 704 | * @gl: the glock | ||
| 705 | * | ||
| 706 | * Returns: 1 if the glock is acquired | ||
| 707 | */ | ||
| 708 | |||
| 709 | static int gfs2_glmutex_trylock(struct gfs2_glock *gl) | ||
| 710 | { | ||
| 711 | int acquired = 1; | ||
| 712 | |||
| 713 | spin_lock(&gl->gl_spin); | ||
| 714 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | ||
| 715 | acquired = 0; | ||
| 716 | } else { | ||
| 717 | gl->gl_owner = current; | ||
| 718 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | ||
| 719 | } | ||
| 720 | spin_unlock(&gl->gl_spin); | ||
| 721 | |||
| 722 | return acquired; | ||
| 723 | } | ||
| 724 | |||
| 725 | /** | ||
| 726 | * gfs2_glmutex_unlock - release a local lock on a glock | ||
| 727 | * @gl: the glock | ||
| 728 | * | ||
| 729 | */ | ||
| 730 | |||
| 731 | static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | ||
| 732 | { | ||
| 733 | spin_lock(&gl->gl_spin); | ||
| 734 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 735 | gl->gl_owner = NULL; | ||
| 736 | gl->gl_ip = 0; | ||
| 737 | run_queue(gl); | ||
| 738 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | ||
| 739 | spin_unlock(&gl->gl_spin); | ||
| 740 | } | ||
| 741 | |||
| 742 | /** | ||
| 743 | * handle_callback - add a demote request to a lock's queue | ||
| 744 | * @gl: the glock | ||
| 745 | * @state: the state the caller wants us to change to | ||
| 746 | * | ||
| 747 | * Note: This may fail sliently if we are out of memory. | ||
| 748 | */ | ||
| 749 | |||
| 750 | static void handle_callback(struct gfs2_glock *gl, unsigned int state) | ||
| 751 | { | ||
| 752 | struct gfs2_holder *gh, *new_gh = NULL; | ||
| 753 | |||
| 754 | restart: | ||
| 755 | spin_lock(&gl->gl_spin); | ||
| 756 | |||
| 757 | list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { | ||
| 758 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags) && | ||
| 759 | gl->gl_req_gh != gh) { | ||
| 760 | if (gh->gh_state != state) | ||
| 761 | gh->gh_state = LM_ST_UNLOCKED; | ||
| 762 | goto out; | ||
| 763 | } | ||
| 764 | } | ||
| 765 | |||
| 766 | if (new_gh) { | ||
| 767 | list_add_tail(&new_gh->gh_list, &gl->gl_waiters2); | ||
| 768 | new_gh = NULL; | ||
| 769 | } else { | ||
| 770 | spin_unlock(&gl->gl_spin); | ||
| 771 | |||
| 772 | new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL); | ||
| 773 | if (!new_gh) | ||
| 774 | return; | ||
| 775 | set_bit(HIF_DEMOTE, &new_gh->gh_iflags); | ||
| 776 | set_bit(HIF_DEALLOC, &new_gh->gh_iflags); | ||
| 777 | |||
| 778 | goto restart; | ||
| 779 | } | ||
| 780 | |||
| 781 | out: | ||
| 782 | spin_unlock(&gl->gl_spin); | ||
| 783 | |||
| 784 | if (new_gh) | ||
| 785 | gfs2_holder_put(new_gh); | ||
| 786 | } | ||
| 787 | |||
| 788 | void gfs2_glock_inode_squish(struct inode *inode) | ||
| 789 | { | ||
| 790 | struct gfs2_holder gh; | ||
| 791 | struct gfs2_glock *gl = GFS2_I(inode)->i_gl; | ||
| 792 | gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh); | ||
| 793 | set_bit(HIF_DEMOTE, &gh.gh_iflags); | ||
| 794 | spin_lock(&gl->gl_spin); | ||
| 795 | gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders)); | ||
| 796 | list_add_tail(&gh.gh_list, &gl->gl_waiters2); | ||
| 797 | run_queue(gl); | ||
| 798 | spin_unlock(&gl->gl_spin); | ||
| 799 | wait_for_completion(&gh.gh_wait); | ||
| 800 | gfs2_holder_uninit(&gh); | ||
| 801 | } | ||
| 802 | |||
| 803 | /** | ||
| 804 | * state_change - record that the glock is now in a different state | ||
| 805 | * @gl: the glock | ||
| 806 | * @new_state the new state | ||
| 807 | * | ||
| 808 | */ | ||
| 809 | |||
| 810 | static void state_change(struct gfs2_glock *gl, unsigned int new_state) | ||
| 811 | { | ||
| 812 | int held1, held2; | ||
| 813 | |||
| 814 | held1 = (gl->gl_state != LM_ST_UNLOCKED); | ||
| 815 | held2 = (new_state != LM_ST_UNLOCKED); | ||
| 816 | |||
| 817 | if (held1 != held2) { | ||
| 818 | if (held2) | ||
| 819 | gfs2_glock_hold(gl); | ||
| 820 | else | ||
| 821 | gfs2_glock_put(gl); | ||
| 822 | } | ||
| 823 | |||
| 824 | gl->gl_state = new_state; | ||
| 825 | } | ||
| 826 | |||
| 827 | /** | ||
| 828 | * xmote_bh - Called after the lock module is done acquiring a lock | ||
| 829 | * @gl: The glock in question | ||
| 830 | * @ret: the int returned from the lock module | ||
| 831 | * | ||
| 832 | */ | ||
| 833 | |||
| 834 | static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 835 | { | ||
| 836 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 837 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 838 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 839 | int prev_state = gl->gl_state; | ||
| 840 | int op_done = 1; | ||
| 841 | |||
| 842 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 843 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | ||
| 844 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); | ||
| 845 | |||
| 846 | state_change(gl, ret & LM_OUT_ST_MASK); | ||
| 847 | |||
| 848 | if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) { | ||
| 849 | if (glops->go_inval) | ||
| 850 | glops->go_inval(gl, DIO_METADATA | DIO_DATA); | ||
| 851 | } else if (gl->gl_state == LM_ST_DEFERRED) { | ||
| 852 | /* We might not want to do this here. | ||
| 853 | Look at moving to the inode glops. */ | ||
| 854 | if (glops->go_inval) | ||
| 855 | glops->go_inval(gl, DIO_DATA); | ||
| 856 | } | ||
| 857 | |||
| 858 | /* Deal with each possible exit condition */ | ||
| 859 | |||
| 860 | if (!gh) | ||
| 861 | gl->gl_stamp = jiffies; | ||
| 862 | else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { | ||
| 863 | spin_lock(&gl->gl_spin); | ||
| 864 | list_del_init(&gh->gh_list); | ||
| 865 | gh->gh_error = -EIO; | ||
| 866 | spin_unlock(&gl->gl_spin); | ||
| 867 | } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) { | ||
| 868 | spin_lock(&gl->gl_spin); | ||
| 869 | list_del_init(&gh->gh_list); | ||
| 870 | if (gl->gl_state == gh->gh_state || | ||
| 871 | gl->gl_state == LM_ST_UNLOCKED) { | ||
| 872 | gh->gh_error = 0; | ||
| 873 | } else { | ||
| 874 | if (gfs2_assert_warn(sdp, gh->gh_flags & | ||
| 875 | (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1) | ||
| 876 | fs_warn(sdp, "ret = 0x%.8X\n", ret); | ||
| 877 | gh->gh_error = GLR_TRYFAILED; | ||
| 878 | } | ||
| 879 | spin_unlock(&gl->gl_spin); | ||
| 880 | |||
| 881 | if (ret & LM_OUT_CANCELED) | ||
| 882 | handle_callback(gl, LM_ST_UNLOCKED); | ||
| 883 | |||
| 884 | } else if (ret & LM_OUT_CANCELED) { | ||
| 885 | spin_lock(&gl->gl_spin); | ||
| 886 | list_del_init(&gh->gh_list); | ||
| 887 | gh->gh_error = GLR_CANCELED; | ||
| 888 | spin_unlock(&gl->gl_spin); | ||
| 889 | |||
| 890 | } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | ||
| 891 | spin_lock(&gl->gl_spin); | ||
| 892 | list_move_tail(&gh->gh_list, &gl->gl_holders); | ||
| 893 | gh->gh_error = 0; | ||
| 894 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 895 | spin_unlock(&gl->gl_spin); | ||
| 896 | |||
| 897 | set_bit(HIF_FIRST, &gh->gh_iflags); | ||
| 898 | |||
| 899 | op_done = 0; | ||
| 900 | |||
| 901 | } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { | ||
| 902 | spin_lock(&gl->gl_spin); | ||
| 903 | list_del_init(&gh->gh_list); | ||
| 904 | gh->gh_error = GLR_TRYFAILED; | ||
| 905 | spin_unlock(&gl->gl_spin); | ||
| 906 | |||
| 907 | } else { | ||
| 908 | if (gfs2_assert_withdraw(sdp, 0) == -1) | ||
| 909 | fs_err(sdp, "ret = 0x%.8X\n", ret); | ||
| 910 | } | ||
| 911 | |||
| 912 | if (glops->go_xmote_bh) | ||
| 913 | glops->go_xmote_bh(gl); | ||
| 914 | |||
| 915 | if (op_done) { | ||
| 916 | spin_lock(&gl->gl_spin); | ||
| 917 | gl->gl_req_gh = NULL; | ||
| 918 | gl->gl_req_bh = NULL; | ||
| 919 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 920 | run_queue(gl); | ||
| 921 | spin_unlock(&gl->gl_spin); | ||
| 922 | } | ||
| 923 | |||
| 924 | gfs2_glock_put(gl); | ||
| 925 | |||
| 926 | if (gh) { | ||
| 927 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | ||
| 928 | gfs2_holder_put(gh); | ||
| 929 | else | ||
| 930 | complete(&gh->gh_wait); | ||
| 931 | } | ||
| 932 | } | ||
| 933 | |||
| 934 | /** | ||
| 935 | * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock | ||
| 936 | * @gl: The glock in question | ||
| 937 | * @state: the requested state | ||
| 938 | * @flags: modifier flags to the lock call | ||
| 939 | * | ||
| 940 | */ | ||
| 941 | |||
| 942 | void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) | ||
| 943 | { | ||
| 944 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 945 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 946 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | | ||
| 947 | LM_FLAG_NOEXP | LM_FLAG_ANY | | ||
| 948 | LM_FLAG_PRIORITY); | ||
| 949 | unsigned int lck_ret; | ||
| 950 | |||
| 951 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 952 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | ||
| 953 | gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); | ||
| 954 | gfs2_assert_warn(sdp, state != gl->gl_state); | ||
| 955 | |||
| 956 | if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) | ||
| 957 | glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE); | ||
| 958 | |||
| 959 | gfs2_glock_hold(gl); | ||
| 960 | gl->gl_req_bh = xmote_bh; | ||
| 961 | |||
| 962 | lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); | ||
| 963 | |||
| 964 | if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR))) | ||
| 965 | return; | ||
| 966 | |||
| 967 | if (lck_ret & LM_OUT_ASYNC) | ||
| 968 | gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC); | ||
| 969 | else | ||
| 970 | xmote_bh(gl, lck_ret); | ||
| 971 | } | ||
| 972 | |||
| 973 | /** | ||
| 974 | * drop_bh - Called after a lock module unlock completes | ||
| 975 | * @gl: the glock | ||
| 976 | * @ret: the return status | ||
| 977 | * | ||
| 978 | * Doesn't wake up the process waiting on the struct gfs2_holder (if any) | ||
| 979 | * Doesn't drop the reference on the glock the top half took out | ||
| 980 | * | ||
| 981 | */ | ||
| 982 | |||
| 983 | static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 984 | { | ||
| 985 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 986 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 987 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 988 | |||
| 989 | clear_bit(GLF_PREFETCH, &gl->gl_flags); | ||
| 990 | |||
| 991 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 992 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | ||
| 993 | gfs2_assert_warn(sdp, !ret); | ||
| 994 | |||
| 995 | state_change(gl, LM_ST_UNLOCKED); | ||
| 996 | |||
| 997 | if (glops->go_inval) | ||
| 998 | glops->go_inval(gl, DIO_METADATA | DIO_DATA); | ||
| 999 | |||
| 1000 | if (gh) { | ||
| 1001 | spin_lock(&gl->gl_spin); | ||
| 1002 | list_del_init(&gh->gh_list); | ||
| 1003 | gh->gh_error = 0; | ||
| 1004 | spin_unlock(&gl->gl_spin); | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | if (glops->go_drop_bh) | ||
| 1008 | glops->go_drop_bh(gl); | ||
| 1009 | |||
| 1010 | spin_lock(&gl->gl_spin); | ||
| 1011 | gl->gl_req_gh = NULL; | ||
| 1012 | gl->gl_req_bh = NULL; | ||
| 1013 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1014 | run_queue(gl); | ||
| 1015 | spin_unlock(&gl->gl_spin); | ||
| 1016 | |||
| 1017 | gfs2_glock_put(gl); | ||
| 1018 | |||
| 1019 | if (gh) { | ||
| 1020 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | ||
| 1021 | gfs2_holder_put(gh); | ||
| 1022 | else | ||
| 1023 | complete(&gh->gh_wait); | ||
| 1024 | } | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | /** | ||
| 1028 | * gfs2_glock_drop_th - call into the lock module to unlock a lock | ||
| 1029 | * @gl: the glock | ||
| 1030 | * | ||
| 1031 | */ | ||
| 1032 | |||
| 1033 | void gfs2_glock_drop_th(struct gfs2_glock *gl) | ||
| 1034 | { | ||
| 1035 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1036 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1037 | unsigned int ret; | ||
| 1038 | |||
| 1039 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 1040 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | ||
| 1041 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); | ||
| 1042 | |||
| 1043 | if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) | ||
| 1044 | glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE); | ||
| 1045 | |||
| 1046 | gfs2_glock_hold(gl); | ||
| 1047 | gl->gl_req_bh = drop_bh; | ||
| 1048 | |||
| 1049 | ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); | ||
| 1050 | |||
| 1051 | if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR))) | ||
| 1052 | return; | ||
| 1053 | |||
| 1054 | if (!ret) | ||
| 1055 | drop_bh(gl, ret); | ||
| 1056 | else | ||
| 1057 | gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | /** | ||
| 1061 | * do_cancels - cancel requests for locks stuck waiting on an expire flag | ||
| 1062 | * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock | ||
| 1063 | * | ||
| 1064 | * Don't cancel GL_NOCANCEL requests. | ||
| 1065 | */ | ||
| 1066 | |||
| 1067 | static void do_cancels(struct gfs2_holder *gh) | ||
| 1068 | { | ||
| 1069 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1070 | |||
| 1071 | spin_lock(&gl->gl_spin); | ||
| 1072 | |||
| 1073 | while (gl->gl_req_gh != gh && | ||
| 1074 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | ||
| 1075 | !list_empty(&gh->gh_list)) { | ||
| 1076 | if (gl->gl_req_bh && !(gl->gl_req_gh && | ||
| 1077 | (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { | ||
| 1078 | spin_unlock(&gl->gl_spin); | ||
| 1079 | gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock); | ||
| 1080 | msleep(100); | ||
| 1081 | spin_lock(&gl->gl_spin); | ||
| 1082 | } else { | ||
| 1083 | spin_unlock(&gl->gl_spin); | ||
| 1084 | msleep(100); | ||
| 1085 | spin_lock(&gl->gl_spin); | ||
| 1086 | } | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | spin_unlock(&gl->gl_spin); | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | /** | ||
| 1093 | * glock_wait_internal - wait on a glock acquisition | ||
| 1094 | * @gh: the glock holder | ||
| 1095 | * | ||
| 1096 | * Returns: 0 on success | ||
| 1097 | */ | ||
| 1098 | |||
| 1099 | static int glock_wait_internal(struct gfs2_holder *gh) | ||
| 1100 | { | ||
| 1101 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1102 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1103 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1104 | |||
| 1105 | if (test_bit(HIF_ABORTED, &gh->gh_iflags)) | ||
| 1106 | return -EIO; | ||
| 1107 | |||
| 1108 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { | ||
| 1109 | spin_lock(&gl->gl_spin); | ||
| 1110 | if (gl->gl_req_gh != gh && | ||
| 1111 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | ||
| 1112 | !list_empty(&gh->gh_list)) { | ||
| 1113 | list_del_init(&gh->gh_list); | ||
| 1114 | gh->gh_error = GLR_TRYFAILED; | ||
| 1115 | run_queue(gl); | ||
| 1116 | spin_unlock(&gl->gl_spin); | ||
| 1117 | return gh->gh_error; | ||
| 1118 | } | ||
| 1119 | spin_unlock(&gl->gl_spin); | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | if (gh->gh_flags & LM_FLAG_PRIORITY) | ||
| 1123 | do_cancels(gh); | ||
| 1124 | |||
| 1125 | wait_for_completion(&gh->gh_wait); | ||
| 1126 | |||
| 1127 | if (gh->gh_error) | ||
| 1128 | return gh->gh_error; | ||
| 1129 | |||
| 1130 | gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags)); | ||
| 1131 | gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state, | ||
| 1132 | gh->gh_flags)); | ||
| 1133 | |||
| 1134 | if (test_bit(HIF_FIRST, &gh->gh_iflags)) { | ||
| 1135 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 1136 | |||
| 1137 | if (glops->go_lock) { | ||
| 1138 | gh->gh_error = glops->go_lock(gh); | ||
| 1139 | if (gh->gh_error) { | ||
| 1140 | spin_lock(&gl->gl_spin); | ||
| 1141 | list_del_init(&gh->gh_list); | ||
| 1142 | spin_unlock(&gl->gl_spin); | ||
| 1143 | } | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | spin_lock(&gl->gl_spin); | ||
| 1147 | gl->gl_req_gh = NULL; | ||
| 1148 | gl->gl_req_bh = NULL; | ||
| 1149 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1150 | run_queue(gl); | ||
| 1151 | spin_unlock(&gl->gl_spin); | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | return gh->gh_error; | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | static inline struct gfs2_holder * | ||
| 1158 | find_holder_by_owner(struct list_head *head, struct task_struct *owner) | ||
| 1159 | { | ||
| 1160 | struct gfs2_holder *gh; | ||
| 1161 | |||
| 1162 | list_for_each_entry(gh, head, gh_list) { | ||
| 1163 | if (gh->gh_owner == owner) | ||
| 1164 | return gh; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | return NULL; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | /** | ||
| 1171 | * add_to_queue - Add a holder to the wait queue (but look for recursion) | ||
| 1172 | * @gh: the holder structure to add | ||
| 1173 | * | ||
| 1174 | */ | ||
| 1175 | |||
| 1176 | static void add_to_queue(struct gfs2_holder *gh) | ||
| 1177 | { | ||
| 1178 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1179 | struct gfs2_holder *existing; | ||
| 1180 | |||
| 1181 | BUG_ON(!gh->gh_owner); | ||
| 1182 | |||
| 1183 | existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); | ||
| 1184 | if (existing) { | ||
| 1185 | print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); | ||
| 1186 | printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid); | ||
| 1187 | printk(KERN_INFO "lock type : %d lock state : %d\n", | ||
| 1188 | existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); | ||
| 1189 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | ||
| 1190 | printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid); | ||
| 1191 | printk(KERN_INFO "lock type : %d lock state : %d\n", | ||
| 1192 | gl->gl_name.ln_type, gl->gl_state); | ||
| 1193 | BUG(); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner); | ||
| 1197 | if (existing) { | ||
| 1198 | print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); | ||
| 1199 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | ||
| 1200 | BUG(); | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | if (gh->gh_flags & LM_FLAG_PRIORITY) | ||
| 1204 | list_add(&gh->gh_list, &gl->gl_waiters3); | ||
| 1205 | else | ||
| 1206 | list_add_tail(&gh->gh_list, &gl->gl_waiters3); | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | /** | ||
| 1210 | * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock) | ||
| 1211 | * @gh: the holder structure | ||
| 1212 | * | ||
| 1213 | * if (gh->gh_flags & GL_ASYNC), this never returns an error | ||
| 1214 | * | ||
| 1215 | * Returns: 0, GLR_TRYFAILED, or errno on failure | ||
| 1216 | */ | ||
| 1217 | |||
| 1218 | int gfs2_glock_nq(struct gfs2_holder *gh) | ||
| 1219 | { | ||
| 1220 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1221 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1222 | int error = 0; | ||
| 1223 | |||
| 1224 | restart: | ||
| 1225 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { | ||
| 1226 | set_bit(HIF_ABORTED, &gh->gh_iflags); | ||
| 1227 | return -EIO; | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | set_bit(HIF_PROMOTE, &gh->gh_iflags); | ||
| 1231 | |||
| 1232 | spin_lock(&gl->gl_spin); | ||
| 1233 | add_to_queue(gh); | ||
| 1234 | run_queue(gl); | ||
| 1235 | spin_unlock(&gl->gl_spin); | ||
| 1236 | |||
| 1237 | if (!(gh->gh_flags & GL_ASYNC)) { | ||
| 1238 | error = glock_wait_internal(gh); | ||
| 1239 | if (error == GLR_CANCELED) { | ||
| 1240 | msleep(100); | ||
| 1241 | goto restart; | ||
| 1242 | } | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | clear_bit(GLF_PREFETCH, &gl->gl_flags); | ||
| 1246 | |||
| 1247 | if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP)) | ||
| 1248 | dump_glock(gl); | ||
| 1249 | |||
| 1250 | return error; | ||
| 1251 | } | ||
| 1252 | |||
| 1253 | /** | ||
| 1254 | * gfs2_glock_poll - poll to see if an async request has been completed | ||
| 1255 | * @gh: the holder | ||
| 1256 | * | ||
| 1257 | * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on | ||
| 1258 | */ | ||
| 1259 | |||
| 1260 | int gfs2_glock_poll(struct gfs2_holder *gh) | ||
| 1261 | { | ||
| 1262 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1263 | int ready = 0; | ||
| 1264 | |||
| 1265 | spin_lock(&gl->gl_spin); | ||
| 1266 | |||
| 1267 | if (test_bit(HIF_HOLDER, &gh->gh_iflags)) | ||
| 1268 | ready = 1; | ||
| 1269 | else if (list_empty(&gh->gh_list)) { | ||
| 1270 | if (gh->gh_error == GLR_CANCELED) { | ||
| 1271 | spin_unlock(&gl->gl_spin); | ||
| 1272 | msleep(100); | ||
| 1273 | if (gfs2_glock_nq(gh)) | ||
| 1274 | return 1; | ||
| 1275 | return 0; | ||
| 1276 | } else | ||
| 1277 | ready = 1; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | spin_unlock(&gl->gl_spin); | ||
| 1281 | |||
| 1282 | return ready; | ||
| 1283 | } | ||
| 1284 | |||
| 1285 | /** | ||
| 1286 | * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC | ||
| 1287 | * @gh: the holder structure | ||
| 1288 | * | ||
| 1289 | * Returns: 0, GLR_TRYFAILED, or errno on failure | ||
| 1290 | */ | ||
| 1291 | |||
| 1292 | int gfs2_glock_wait(struct gfs2_holder *gh) | ||
| 1293 | { | ||
| 1294 | int error; | ||
| 1295 | |||
| 1296 | error = glock_wait_internal(gh); | ||
| 1297 | if (error == GLR_CANCELED) { | ||
| 1298 | msleep(100); | ||
| 1299 | gh->gh_flags &= ~GL_ASYNC; | ||
| 1300 | error = gfs2_glock_nq(gh); | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | return error; | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | /** | ||
| 1307 | * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) | ||
| 1308 | * @gh: the glock holder | ||
| 1309 | * | ||
| 1310 | */ | ||
| 1311 | |||
| 1312 | void gfs2_glock_dq(struct gfs2_holder *gh) | ||
| 1313 | { | ||
| 1314 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1315 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1316 | |||
| 1317 | if (gh->gh_flags & GL_NOCACHE) | ||
| 1318 | handle_callback(gl, LM_ST_UNLOCKED); | ||
| 1319 | |||
| 1320 | gfs2_glmutex_lock(gl); | ||
| 1321 | |||
| 1322 | spin_lock(&gl->gl_spin); | ||
| 1323 | list_del_init(&gh->gh_list); | ||
| 1324 | |||
| 1325 | if (list_empty(&gl->gl_holders)) { | ||
| 1326 | spin_unlock(&gl->gl_spin); | ||
| 1327 | |||
| 1328 | if (glops->go_unlock) | ||
| 1329 | glops->go_unlock(gh); | ||
| 1330 | |||
| 1331 | gl->gl_stamp = jiffies; | ||
| 1332 | |||
| 1333 | spin_lock(&gl->gl_spin); | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1337 | run_queue(gl); | ||
| 1338 | spin_unlock(&gl->gl_spin); | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | /** | ||
| 1342 | * gfs2_glock_prefetch - Try to prefetch a glock | ||
| 1343 | * @gl: the glock | ||
| 1344 | * @state: the state to prefetch in | ||
| 1345 | * @flags: flags passed to go_xmote_th() | ||
| 1346 | * | ||
| 1347 | */ | ||
| 1348 | |||
| 1349 | static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, | ||
| 1350 | int flags) | ||
| 1351 | { | ||
| 1352 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1353 | |||
| 1354 | spin_lock(&gl->gl_spin); | ||
| 1355 | |||
| 1356 | if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) || | ||
| 1357 | !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) || | ||
| 1358 | !list_empty(&gl->gl_waiters3) || | ||
| 1359 | relaxed_state_ok(gl->gl_state, state, flags)) { | ||
| 1360 | spin_unlock(&gl->gl_spin); | ||
| 1361 | return; | ||
| 1362 | } | ||
| 1363 | |||
| 1364 | set_bit(GLF_PREFETCH, &gl->gl_flags); | ||
| 1365 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 1366 | spin_unlock(&gl->gl_spin); | ||
| 1367 | |||
| 1368 | glops->go_xmote_th(gl, state, flags); | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | static void greedy_work(void *data) | ||
| 1372 | { | ||
| 1373 | struct greedy *gr = data; | ||
| 1374 | struct gfs2_holder *gh = &gr->gr_gh; | ||
| 1375 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1376 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1377 | |||
| 1378 | clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); | ||
| 1379 | |||
| 1380 | if (glops->go_greedy) | ||
| 1381 | glops->go_greedy(gl); | ||
| 1382 | |||
| 1383 | spin_lock(&gl->gl_spin); | ||
| 1384 | |||
| 1385 | if (list_empty(&gl->gl_waiters2)) { | ||
| 1386 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
| 1387 | spin_unlock(&gl->gl_spin); | ||
| 1388 | gfs2_holder_uninit(gh); | ||
| 1389 | kfree(gr); | ||
| 1390 | } else { | ||
| 1391 | gfs2_glock_hold(gl); | ||
| 1392 | list_add_tail(&gh->gh_list, &gl->gl_waiters2); | ||
| 1393 | run_queue(gl); | ||
| 1394 | spin_unlock(&gl->gl_spin); | ||
| 1395 | gfs2_glock_put(gl); | ||
| 1396 | } | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | /** | ||
| 1400 | * gfs2_glock_be_greedy - | ||
| 1401 | * @gl: | ||
| 1402 | * @time: | ||
| 1403 | * | ||
| 1404 | * Returns: 0 if go_greedy will be called, 1 otherwise | ||
| 1405 | */ | ||
| 1406 | |||
| 1407 | int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time) | ||
| 1408 | { | ||
| 1409 | struct greedy *gr; | ||
| 1410 | struct gfs2_holder *gh; | ||
| 1411 | |||
| 1412 | if (!time || gl->gl_sbd->sd_args.ar_localcaching || | ||
| 1413 | test_and_set_bit(GLF_GREEDY, &gl->gl_flags)) | ||
| 1414 | return 1; | ||
| 1415 | |||
| 1416 | gr = kmalloc(sizeof(struct greedy), GFP_KERNEL); | ||
| 1417 | if (!gr) { | ||
| 1418 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
| 1419 | return 1; | ||
| 1420 | } | ||
| 1421 | gh = &gr->gr_gh; | ||
| 1422 | |||
| 1423 | gfs2_holder_init(gl, 0, 0, gh); | ||
| 1424 | set_bit(HIF_GREEDY, &gh->gh_iflags); | ||
| 1425 | INIT_WORK(&gr->gr_work, greedy_work, gr); | ||
| 1426 | |||
| 1427 | set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); | ||
| 1428 | schedule_delayed_work(&gr->gr_work, time); | ||
| 1429 | |||
| 1430 | return 0; | ||
| 1431 | } | ||
| 1432 | |||
| 1433 | /** | ||
| 1434 | * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it | ||
| 1435 | * @gh: the holder structure | ||
| 1436 | * | ||
| 1437 | */ | ||
| 1438 | |||
| 1439 | void gfs2_glock_dq_uninit(struct gfs2_holder *gh) | ||
| 1440 | { | ||
| 1441 | gfs2_glock_dq(gh); | ||
| 1442 | gfs2_holder_uninit(gh); | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | /** | ||
| 1446 | * gfs2_glock_nq_num - acquire a glock based on lock number | ||
| 1447 | * @sdp: the filesystem | ||
| 1448 | * @number: the lock number | ||
| 1449 | * @glops: the glock operations for the type of glock | ||
| 1450 | * @state: the state to acquire the glock in | ||
| 1451 | * @flags: modifier flags for the aquisition | ||
| 1452 | * @gh: the struct gfs2_holder | ||
| 1453 | * | ||
| 1454 | * Returns: errno | ||
| 1455 | */ | ||
| 1456 | |||
| 1457 | int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, | ||
| 1458 | const struct gfs2_glock_operations *glops, | ||
| 1459 | unsigned int state, int flags, struct gfs2_holder *gh) | ||
| 1460 | { | ||
| 1461 | struct gfs2_glock *gl; | ||
| 1462 | int error; | ||
| 1463 | |||
| 1464 | error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); | ||
| 1465 | if (!error) { | ||
| 1466 | error = gfs2_glock_nq_init(gl, state, flags, gh); | ||
| 1467 | gfs2_glock_put(gl); | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | return error; | ||
| 1471 | } | ||
| 1472 | |||
| 1473 | /** | ||
| 1474 | * glock_compare - Compare two struct gfs2_glock structures for sorting | ||
| 1475 | * @arg_a: the first structure | ||
| 1476 | * @arg_b: the second structure | ||
| 1477 | * | ||
| 1478 | */ | ||
| 1479 | |||
| 1480 | static int glock_compare(const void *arg_a, const void *arg_b) | ||
| 1481 | { | ||
| 1482 | const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; | ||
| 1483 | const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; | ||
| 1484 | const struct lm_lockname *a = &gh_a->gh_gl->gl_name; | ||
| 1485 | const struct lm_lockname *b = &gh_b->gh_gl->gl_name; | ||
| 1486 | |||
| 1487 | if (a->ln_number > b->ln_number) | ||
| 1488 | return 1; | ||
| 1489 | if (a->ln_number < b->ln_number) | ||
| 1490 | return -1; | ||
| 1491 | if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) | ||
| 1492 | return 1; | ||
| 1493 | if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL)) | ||
| 1494 | return 1; | ||
| 1495 | return 0; | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | /** | ||
| 1499 | * nq_m_sync - synchonously acquire more than one glock in deadlock free order | ||
| 1500 | * @num_gh: the number of structures | ||
| 1501 | * @ghs: an array of struct gfs2_holder structures | ||
| 1502 | * | ||
| 1503 | * Returns: 0 on success (all glocks acquired), | ||
| 1504 | * errno on failure (no glocks acquired) | ||
| 1505 | */ | ||
| 1506 | |||
| 1507 | static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, | ||
| 1508 | struct gfs2_holder **p) | ||
| 1509 | { | ||
| 1510 | unsigned int x; | ||
| 1511 | int error = 0; | ||
| 1512 | |||
| 1513 | for (x = 0; x < num_gh; x++) | ||
| 1514 | p[x] = &ghs[x]; | ||
| 1515 | |||
| 1516 | sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); | ||
| 1517 | |||
| 1518 | for (x = 0; x < num_gh; x++) { | ||
| 1519 | p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); | ||
| 1520 | |||
| 1521 | error = gfs2_glock_nq(p[x]); | ||
| 1522 | if (error) { | ||
| 1523 | while (x--) | ||
| 1524 | gfs2_glock_dq(p[x]); | ||
| 1525 | break; | ||
| 1526 | } | ||
| 1527 | } | ||
| 1528 | |||
| 1529 | return error; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | /** | ||
| 1533 | * gfs2_glock_nq_m - acquire multiple glocks | ||
| 1534 | * @num_gh: the number of structures | ||
| 1535 | * @ghs: an array of struct gfs2_holder structures | ||
| 1536 | * | ||
| 1537 | * Figure out how big an impact this function has. Either: | ||
| 1538 | * 1) Replace this code with code that calls gfs2_glock_prefetch() | ||
| 1539 | * 2) Forget async stuff and just call nq_m_sync() | ||
| 1540 | * 3) Leave it like it is | ||
| 1541 | * | ||
| 1542 | * Returns: 0 on success (all glocks acquired), | ||
| 1543 | * errno on failure (no glocks acquired) | ||
| 1544 | */ | ||
| 1545 | |||
| 1546 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) | ||
| 1547 | { | ||
| 1548 | int *e; | ||
| 1549 | unsigned int x; | ||
| 1550 | int borked = 0, serious = 0; | ||
| 1551 | int error = 0; | ||
| 1552 | |||
| 1553 | if (!num_gh) | ||
| 1554 | return 0; | ||
| 1555 | |||
| 1556 | if (num_gh == 1) { | ||
| 1557 | ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); | ||
| 1558 | return gfs2_glock_nq(ghs); | ||
| 1559 | } | ||
| 1560 | |||
| 1561 | e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL); | ||
| 1562 | if (!e) | ||
| 1563 | return -ENOMEM; | ||
| 1564 | |||
| 1565 | for (x = 0; x < num_gh; x++) { | ||
| 1566 | ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC; | ||
| 1567 | error = gfs2_glock_nq(&ghs[x]); | ||
| 1568 | if (error) { | ||
| 1569 | borked = 1; | ||
| 1570 | serious = error; | ||
| 1571 | num_gh = x; | ||
| 1572 | break; | ||
| 1573 | } | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | for (x = 0; x < num_gh; x++) { | ||
| 1577 | error = e[x] = glock_wait_internal(&ghs[x]); | ||
| 1578 | if (error) { | ||
| 1579 | borked = 1; | ||
| 1580 | if (error != GLR_TRYFAILED && error != GLR_CANCELED) | ||
| 1581 | serious = error; | ||
| 1582 | } | ||
| 1583 | } | ||
| 1584 | |||
| 1585 | if (!borked) { | ||
| 1586 | kfree(e); | ||
| 1587 | return 0; | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | for (x = 0; x < num_gh; x++) | ||
| 1591 | if (!e[x]) | ||
| 1592 | gfs2_glock_dq(&ghs[x]); | ||
| 1593 | |||
| 1594 | if (serious) | ||
| 1595 | error = serious; | ||
| 1596 | else { | ||
| 1597 | for (x = 0; x < num_gh; x++) | ||
| 1598 | gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags, | ||
| 1599 | &ghs[x]); | ||
| 1600 | error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e); | ||
| 1601 | } | ||
| 1602 | |||
| 1603 | kfree(e); | ||
| 1604 | |||
| 1605 | return error; | ||
| 1606 | } | ||
| 1607 | |||
| 1608 | /** | ||
| 1609 | * gfs2_glock_dq_m - release multiple glocks | ||
| 1610 | * @num_gh: the number of structures | ||
| 1611 | * @ghs: an array of struct gfs2_holder structures | ||
| 1612 | * | ||
| 1613 | */ | ||
| 1614 | |||
| 1615 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) | ||
| 1616 | { | ||
| 1617 | unsigned int x; | ||
| 1618 | |||
| 1619 | for (x = 0; x < num_gh; x++) | ||
| 1620 | gfs2_glock_dq(&ghs[x]); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | /** | ||
| 1624 | * gfs2_glock_dq_uninit_m - release multiple glocks | ||
| 1625 | * @num_gh: the number of structures | ||
| 1626 | * @ghs: an array of struct gfs2_holder structures | ||
| 1627 | * | ||
| 1628 | */ | ||
| 1629 | |||
| 1630 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | ||
| 1631 | { | ||
| 1632 | unsigned int x; | ||
| 1633 | |||
| 1634 | for (x = 0; x < num_gh; x++) | ||
| 1635 | gfs2_glock_dq_uninit(&ghs[x]); | ||
| 1636 | } | ||
| 1637 | |||
| 1638 | /** | ||
| 1639 | * gfs2_glock_prefetch_num - prefetch a glock based on lock number | ||
| 1640 | * @sdp: the filesystem | ||
| 1641 | * @number: the lock number | ||
| 1642 | * @glops: the glock operations for the type of glock | ||
| 1643 | * @state: the state to acquire the glock in | ||
| 1644 | * @flags: modifier flags for the aquisition | ||
| 1645 | * | ||
| 1646 | * Returns: errno | ||
| 1647 | */ | ||
| 1648 | |||
| 1649 | void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, | ||
| 1650 | const struct gfs2_glock_operations *glops, | ||
| 1651 | unsigned int state, int flags) | ||
| 1652 | { | ||
| 1653 | struct gfs2_glock *gl; | ||
| 1654 | int error; | ||
| 1655 | |||
| 1656 | if (atomic_read(&sdp->sd_reclaim_count) < | ||
| 1657 | gfs2_tune_get(sdp, gt_reclaim_limit)) { | ||
| 1658 | error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); | ||
| 1659 | if (!error) { | ||
| 1660 | gfs2_glock_prefetch(gl, state, flags); | ||
| 1661 | gfs2_glock_put(gl); | ||
| 1662 | } | ||
| 1663 | } | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | /** | ||
| 1667 | * gfs2_lvb_hold - attach a LVB from a glock | ||
| 1668 | * @gl: The glock in question | ||
| 1669 | * | ||
| 1670 | */ | ||
| 1671 | |||
| 1672 | int gfs2_lvb_hold(struct gfs2_glock *gl) | ||
| 1673 | { | ||
| 1674 | int error; | ||
| 1675 | |||
| 1676 | gfs2_glmutex_lock(gl); | ||
| 1677 | |||
| 1678 | if (!atomic_read(&gl->gl_lvb_count)) { | ||
| 1679 | error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); | ||
| 1680 | if (error) { | ||
| 1681 | gfs2_glmutex_unlock(gl); | ||
| 1682 | return error; | ||
| 1683 | } | ||
| 1684 | gfs2_glock_hold(gl); | ||
| 1685 | } | ||
| 1686 | atomic_inc(&gl->gl_lvb_count); | ||
| 1687 | |||
| 1688 | gfs2_glmutex_unlock(gl); | ||
| 1689 | |||
| 1690 | return 0; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | /** | ||
| 1694 | * gfs2_lvb_unhold - detach a LVB from a glock | ||
| 1695 | * @gl: The glock in question | ||
| 1696 | * | ||
| 1697 | */ | ||
| 1698 | |||
| 1699 | void gfs2_lvb_unhold(struct gfs2_glock *gl) | ||
| 1700 | { | ||
| 1701 | gfs2_glock_hold(gl); | ||
| 1702 | gfs2_glmutex_lock(gl); | ||
| 1703 | |||
| 1704 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); | ||
| 1705 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { | ||
| 1706 | gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb); | ||
| 1707 | gl->gl_lvb = NULL; | ||
| 1708 | gfs2_glock_put(gl); | ||
| 1709 | } | ||
| 1710 | |||
| 1711 | gfs2_glmutex_unlock(gl); | ||
| 1712 | gfs2_glock_put(gl); | ||
| 1713 | } | ||
| 1714 | |||
| 1715 | static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 1716 | unsigned int state) | ||
| 1717 | { | ||
| 1718 | struct gfs2_glock *gl; | ||
| 1719 | |||
| 1720 | gl = gfs2_glock_find(sdp, name); | ||
| 1721 | if (!gl) | ||
| 1722 | return; | ||
| 1723 | |||
| 1724 | if (gl->gl_ops->go_callback) | ||
| 1725 | gl->gl_ops->go_callback(gl, state); | ||
| 1726 | handle_callback(gl, state); | ||
| 1727 | |||
| 1728 | spin_lock(&gl->gl_spin); | ||
| 1729 | run_queue(gl); | ||
| 1730 | spin_unlock(&gl->gl_spin); | ||
| 1731 | |||
| 1732 | gfs2_glock_put(gl); | ||
| 1733 | } | ||
| 1734 | |||
| 1735 | /** | ||
| 1736 | * gfs2_glock_cb - Callback used by locking module | ||
| 1737 | * @sdp: Pointer to the superblock | ||
| 1738 | * @type: Type of callback | ||
| 1739 | * @data: Type dependent data pointer | ||
| 1740 | * | ||
| 1741 | * Called by the locking module when it wants to tell us something. | ||
| 1742 | * Either we need to drop a lock, one of our ASYNC requests completed, or | ||
| 1743 | * a journal from another client needs to be recovered. | ||
| 1744 | */ | ||
| 1745 | |||
| 1746 | void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | ||
| 1747 | { | ||
| 1748 | struct gfs2_sbd *sdp = cb_data; | ||
| 1749 | |||
| 1750 | switch (type) { | ||
| 1751 | case LM_CB_NEED_E: | ||
| 1752 | blocking_cb(sdp, data, LM_ST_UNLOCKED); | ||
| 1753 | return; | ||
| 1754 | |||
| 1755 | case LM_CB_NEED_D: | ||
| 1756 | blocking_cb(sdp, data, LM_ST_DEFERRED); | ||
| 1757 | return; | ||
| 1758 | |||
| 1759 | case LM_CB_NEED_S: | ||
| 1760 | blocking_cb(sdp, data, LM_ST_SHARED); | ||
| 1761 | return; | ||
| 1762 | |||
| 1763 | case LM_CB_ASYNC: { | ||
| 1764 | struct lm_async_cb *async = data; | ||
| 1765 | struct gfs2_glock *gl; | ||
| 1766 | |||
| 1767 | gl = gfs2_glock_find(sdp, &async->lc_name); | ||
| 1768 | if (gfs2_assert_warn(sdp, gl)) | ||
| 1769 | return; | ||
| 1770 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) | ||
| 1771 | gl->gl_req_bh(gl, async->lc_ret); | ||
| 1772 | gfs2_glock_put(gl); | ||
| 1773 | return; | ||
| 1774 | } | ||
| 1775 | |||
| 1776 | case LM_CB_NEED_RECOVERY: | ||
| 1777 | gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data); | ||
| 1778 | if (sdp->sd_recoverd_process) | ||
| 1779 | wake_up_process(sdp->sd_recoverd_process); | ||
| 1780 | return; | ||
| 1781 | |||
| 1782 | case LM_CB_DROPLOCKS: | ||
| 1783 | gfs2_gl_hash_clear(sdp, NO_WAIT); | ||
| 1784 | gfs2_quota_scan(sdp); | ||
| 1785 | return; | ||
| 1786 | |||
| 1787 | default: | ||
| 1788 | gfs2_assert_warn(sdp, 0); | ||
| 1789 | return; | ||
| 1790 | } | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | /** | ||
| 1794 | * demote_ok - Check to see if it's ok to unlock a glock | ||
| 1795 | * @gl: the glock | ||
| 1796 | * | ||
| 1797 | * Returns: 1 if it's ok | ||
| 1798 | */ | ||
| 1799 | |||
| 1800 | static int demote_ok(struct gfs2_glock *gl) | ||
| 1801 | { | ||
| 1802 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1803 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
| 1804 | int demote = 1; | ||
| 1805 | |||
| 1806 | if (test_bit(GLF_STICKY, &gl->gl_flags)) | ||
| 1807 | demote = 0; | ||
| 1808 | else if (test_bit(GLF_PREFETCH, &gl->gl_flags)) | ||
| 1809 | demote = time_after_eq(jiffies, gl->gl_stamp + | ||
| 1810 | gfs2_tune_get(sdp, gt_prefetch_secs) * HZ); | ||
| 1811 | else if (glops->go_demote_ok) | ||
| 1812 | demote = glops->go_demote_ok(gl); | ||
| 1813 | |||
| 1814 | return demote; | ||
| 1815 | } | ||
| 1816 | |||
| 1817 | /** | ||
| 1818 | * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list | ||
| 1819 | * @gl: the glock | ||
| 1820 | * | ||
| 1821 | */ | ||
| 1822 | |||
| 1823 | void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | ||
| 1824 | { | ||
| 1825 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1826 | |||
| 1827 | spin_lock(&sdp->sd_reclaim_lock); | ||
| 1828 | if (list_empty(&gl->gl_reclaim)) { | ||
| 1829 | gfs2_glock_hold(gl); | ||
| 1830 | list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); | ||
| 1831 | atomic_inc(&sdp->sd_reclaim_count); | ||
| 1832 | } | ||
| 1833 | spin_unlock(&sdp->sd_reclaim_lock); | ||
| 1834 | |||
| 1835 | wake_up(&sdp->sd_reclaim_wq); | ||
| 1836 | } | ||
| 1837 | |||
| 1838 | /** | ||
| 1839 | * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list | ||
| 1840 | * @sdp: the filesystem | ||
| 1841 | * | ||
| 1842 | * Called from gfs2_glockd() glock reclaim daemon, or when promoting a | ||
| 1843 | * different glock and we notice that there are a lot of glocks in the | ||
| 1844 | * reclaim list. | ||
| 1845 | * | ||
| 1846 | */ | ||
| 1847 | |||
| 1848 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | ||
| 1849 | { | ||
| 1850 | struct gfs2_glock *gl; | ||
| 1851 | |||
| 1852 | spin_lock(&sdp->sd_reclaim_lock); | ||
| 1853 | if (list_empty(&sdp->sd_reclaim_list)) { | ||
| 1854 | spin_unlock(&sdp->sd_reclaim_lock); | ||
| 1855 | return; | ||
| 1856 | } | ||
| 1857 | gl = list_entry(sdp->sd_reclaim_list.next, | ||
| 1858 | struct gfs2_glock, gl_reclaim); | ||
| 1859 | list_del_init(&gl->gl_reclaim); | ||
| 1860 | spin_unlock(&sdp->sd_reclaim_lock); | ||
| 1861 | |||
| 1862 | atomic_dec(&sdp->sd_reclaim_count); | ||
| 1863 | atomic_inc(&sdp->sd_reclaimed); | ||
| 1864 | |||
| 1865 | if (gfs2_glmutex_trylock(gl)) { | ||
| 1866 | if (queue_empty(gl, &gl->gl_holders) && | ||
| 1867 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | ||
| 1868 | handle_callback(gl, LM_ST_UNLOCKED); | ||
| 1869 | gfs2_glmutex_unlock(gl); | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | gfs2_glock_put(gl); | ||
| 1873 | } | ||
| 1874 | |||
| 1875 | /** | ||
| 1876 | * examine_bucket - Call a function for glock in a hash bucket | ||
| 1877 | * @examiner: the function | ||
| 1878 | * @sdp: the filesystem | ||
| 1879 | * @bucket: the bucket | ||
| 1880 | * | ||
| 1881 | * Returns: 1 if the bucket has entries | ||
| 1882 | */ | ||
| 1883 | |||
| 1884 | static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, | ||
| 1885 | unsigned int hash) | ||
| 1886 | { | ||
| 1887 | struct gfs2_glock *gl, *prev = NULL; | ||
| 1888 | int has_entries = 0; | ||
| 1889 | struct hlist_head *head = &gl_hash_table[hash].hb_list; | ||
| 1890 | |||
| 1891 | read_lock(gl_lock_addr(hash)); | ||
| 1892 | /* Can't use hlist_for_each_entry - don't want prefetch here */ | ||
| 1893 | if (hlist_empty(head)) | ||
| 1894 | goto out; | ||
| 1895 | gl = list_entry(head->first, struct gfs2_glock, gl_list); | ||
| 1896 | while(1) { | ||
| 1897 | if (gl->gl_sbd == sdp) { | ||
| 1898 | gfs2_glock_hold(gl); | ||
| 1899 | read_unlock(gl_lock_addr(hash)); | ||
| 1900 | if (prev) | ||
| 1901 | gfs2_glock_put(prev); | ||
| 1902 | prev = gl; | ||
| 1903 | examiner(gl); | ||
| 1904 | has_entries = 1; | ||
| 1905 | read_lock(gl_lock_addr(hash)); | ||
| 1906 | } | ||
| 1907 | if (gl->gl_list.next == NULL) | ||
| 1908 | break; | ||
| 1909 | gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list); | ||
| 1910 | } | ||
| 1911 | out: | ||
| 1912 | read_unlock(gl_lock_addr(hash)); | ||
| 1913 | if (prev) | ||
| 1914 | gfs2_glock_put(prev); | ||
| 1915 | return has_entries; | ||
| 1916 | } | ||
| 1917 | |||
| 1918 | /** | ||
| 1919 | * scan_glock - look at a glock and see if we can reclaim it | ||
| 1920 | * @gl: the glock to look at | ||
| 1921 | * | ||
| 1922 | */ | ||
| 1923 | |||
| 1924 | static void scan_glock(struct gfs2_glock *gl) | ||
| 1925 | { | ||
| 1926 | if (gl->gl_ops == &gfs2_inode_glops) | ||
| 1927 | return; | ||
| 1928 | |||
| 1929 | if (gfs2_glmutex_trylock(gl)) { | ||
| 1930 | if (queue_empty(gl, &gl->gl_holders) && | ||
| 1931 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | ||
| 1932 | goto out_schedule; | ||
| 1933 | gfs2_glmutex_unlock(gl); | ||
| 1934 | } | ||
| 1935 | return; | ||
| 1936 | |||
| 1937 | out_schedule: | ||
| 1938 | gfs2_glmutex_unlock(gl); | ||
| 1939 | gfs2_glock_schedule_for_reclaim(gl); | ||
| 1940 | } | ||
| 1941 | |||
| 1942 | /** | ||
| 1943 | * gfs2_scand_internal - Look for glocks and inodes to toss from memory | ||
| 1944 | * @sdp: the filesystem | ||
| 1945 | * | ||
| 1946 | */ | ||
| 1947 | |||
| 1948 | void gfs2_scand_internal(struct gfs2_sbd *sdp) | ||
| 1949 | { | ||
| 1950 | unsigned int x; | ||
| 1951 | |||
| 1952 | for (x = 0; x < GFS2_GL_HASH_SIZE; x++) | ||
| 1953 | examine_bucket(scan_glock, sdp, x); | ||
| 1954 | } | ||
| 1955 | |||
| 1956 | /** | ||
| 1957 | * clear_glock - look at a glock and see if we can free it from glock cache | ||
| 1958 | * @gl: the glock to look at | ||
| 1959 | * | ||
| 1960 | */ | ||
| 1961 | |||
| 1962 | static void clear_glock(struct gfs2_glock *gl) | ||
| 1963 | { | ||
| 1964 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1965 | int released; | ||
| 1966 | |||
| 1967 | spin_lock(&sdp->sd_reclaim_lock); | ||
| 1968 | if (!list_empty(&gl->gl_reclaim)) { | ||
| 1969 | list_del_init(&gl->gl_reclaim); | ||
| 1970 | atomic_dec(&sdp->sd_reclaim_count); | ||
| 1971 | spin_unlock(&sdp->sd_reclaim_lock); | ||
| 1972 | released = gfs2_glock_put(gl); | ||
| 1973 | gfs2_assert(sdp, !released); | ||
| 1974 | } else { | ||
| 1975 | spin_unlock(&sdp->sd_reclaim_lock); | ||
| 1976 | } | ||
| 1977 | |||
| 1978 | if (gfs2_glmutex_trylock(gl)) { | ||
| 1979 | if (queue_empty(gl, &gl->gl_holders) && | ||
| 1980 | gl->gl_state != LM_ST_UNLOCKED) | ||
| 1981 | handle_callback(gl, LM_ST_UNLOCKED); | ||
| 1982 | gfs2_glmutex_unlock(gl); | ||
| 1983 | } | ||
| 1984 | } | ||
| 1985 | |||
| 1986 | /** | ||
| 1987 | * gfs2_gl_hash_clear - Empty out the glock hash table | ||
| 1988 | * @sdp: the filesystem | ||
| 1989 | * @wait: wait until it's all gone | ||
| 1990 | * | ||
| 1991 | * Called when unmounting the filesystem, or when inter-node lock manager | ||
| 1992 | * requests DROPLOCKS because it is running out of capacity. | ||
| 1993 | */ | ||
| 1994 | |||
| 1995 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | ||
| 1996 | { | ||
| 1997 | unsigned long t; | ||
| 1998 | unsigned int x; | ||
| 1999 | int cont; | ||
| 2000 | |||
| 2001 | t = jiffies; | ||
| 2002 | |||
| 2003 | for (;;) { | ||
| 2004 | cont = 0; | ||
| 2005 | for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { | ||
| 2006 | if (examine_bucket(clear_glock, sdp, x)) | ||
| 2007 | cont = 1; | ||
| 2008 | } | ||
| 2009 | |||
| 2010 | if (!wait || !cont) | ||
| 2011 | break; | ||
| 2012 | |||
| 2013 | if (time_after_eq(jiffies, | ||
| 2014 | t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) { | ||
| 2015 | fs_warn(sdp, "Unmount seems to be stalled. " | ||
| 2016 | "Dumping lock state...\n"); | ||
| 2017 | gfs2_dump_lockstate(sdp); | ||
| 2018 | t = jiffies; | ||
| 2019 | } | ||
| 2020 | |||
| 2021 | invalidate_inodes(sdp->sd_vfs); | ||
| 2022 | msleep(10); | ||
| 2023 | } | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | /* | ||
| 2027 | * Diagnostic routines to help debug distributed deadlock | ||
| 2028 | */ | ||
| 2029 | |||
| 2030 | /** | ||
| 2031 | * dump_holder - print information about a glock holder | ||
| 2032 | * @str: a string naming the type of holder | ||
| 2033 | * @gh: the glock holder | ||
| 2034 | * | ||
| 2035 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
| 2036 | */ | ||
| 2037 | |||
| 2038 | static int dump_holder(char *str, struct gfs2_holder *gh) | ||
| 2039 | { | ||
| 2040 | unsigned int x; | ||
| 2041 | int error = -ENOBUFS; | ||
| 2042 | |||
| 2043 | printk(KERN_INFO " %s\n", str); | ||
| 2044 | printk(KERN_INFO " owner = %ld\n", | ||
| 2045 | (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); | ||
| 2046 | printk(KERN_INFO " gh_state = %u\n", gh->gh_state); | ||
| 2047 | printk(KERN_INFO " gh_flags ="); | ||
| 2048 | for (x = 0; x < 32; x++) | ||
| 2049 | if (gh->gh_flags & (1 << x)) | ||
| 2050 | printk(" %u", x); | ||
| 2051 | printk(" \n"); | ||
| 2052 | printk(KERN_INFO " error = %d\n", gh->gh_error); | ||
| 2053 | printk(KERN_INFO " gh_iflags ="); | ||
| 2054 | for (x = 0; x < 32; x++) | ||
| 2055 | if (test_bit(x, &gh->gh_iflags)) | ||
| 2056 | printk(" %u", x); | ||
| 2057 | printk(" \n"); | ||
| 2058 | print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); | ||
| 2059 | |||
| 2060 | error = 0; | ||
| 2061 | |||
| 2062 | return error; | ||
| 2063 | } | ||
| 2064 | |||
| 2065 | /** | ||
| 2066 | * dump_inode - print information about an inode | ||
| 2067 | * @ip: the inode | ||
| 2068 | * | ||
| 2069 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
| 2070 | */ | ||
| 2071 | |||
| 2072 | static int dump_inode(struct gfs2_inode *ip) | ||
| 2073 | { | ||
| 2074 | unsigned int x; | ||
| 2075 | int error = -ENOBUFS; | ||
| 2076 | |||
| 2077 | printk(KERN_INFO " Inode:\n"); | ||
| 2078 | printk(KERN_INFO " num = %llu %llu\n", | ||
| 2079 | (unsigned long long)ip->i_num.no_formal_ino, | ||
| 2080 | (unsigned long long)ip->i_num.no_addr); | ||
| 2081 | printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode)); | ||
| 2082 | printk(KERN_INFO " i_flags ="); | ||
| 2083 | for (x = 0; x < 32; x++) | ||
| 2084 | if (test_bit(x, &ip->i_flags)) | ||
| 2085 | printk(" %u", x); | ||
| 2086 | printk(" \n"); | ||
| 2087 | |||
| 2088 | error = 0; | ||
| 2089 | |||
| 2090 | return error; | ||
| 2091 | } | ||
| 2092 | |||
| 2093 | /** | ||
| 2094 | * dump_glock - print information about a glock | ||
| 2095 | * @gl: the glock | ||
| 2096 | * @count: where we are in the buffer | ||
| 2097 | * | ||
| 2098 | * Returns: 0 on success, -ENOBUFS when we run out of space | ||
| 2099 | */ | ||
| 2100 | |||
| 2101 | static int dump_glock(struct gfs2_glock *gl) | ||
| 2102 | { | ||
| 2103 | struct gfs2_holder *gh; | ||
| 2104 | unsigned int x; | ||
| 2105 | int error = -ENOBUFS; | ||
| 2106 | |||
| 2107 | spin_lock(&gl->gl_spin); | ||
| 2108 | |||
| 2109 | printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, | ||
| 2110 | (unsigned long long)gl->gl_name.ln_number); | ||
| 2111 | printk(KERN_INFO " gl_flags ="); | ||
| 2112 | for (x = 0; x < 32; x++) { | ||
| 2113 | if (test_bit(x, &gl->gl_flags)) | ||
| 2114 | printk(" %u", x); | ||
| 2115 | } | ||
| 2116 | printk(" \n"); | ||
| 2117 | printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref)); | ||
| 2118 | printk(KERN_INFO " gl_state = %u\n", gl->gl_state); | ||
| 2119 | printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm); | ||
| 2120 | print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip); | ||
| 2121 | printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); | ||
| 2122 | printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); | ||
| 2123 | printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | ||
| 2124 | printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no"); | ||
| 2125 | printk(KERN_INFO " le = %s\n", | ||
| 2126 | (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); | ||
| 2127 | printk(KERN_INFO " reclaim = %s\n", | ||
| 2128 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); | ||
| 2129 | if (gl->gl_aspace) | ||
| 2130 | printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, | ||
| 2131 | gl->gl_aspace->i_mapping->nrpages); | ||
| 2132 | else | ||
| 2133 | printk(KERN_INFO " aspace = no\n"); | ||
| 2134 | printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count)); | ||
| 2135 | if (gl->gl_req_gh) { | ||
| 2136 | error = dump_holder("Request", gl->gl_req_gh); | ||
| 2137 | if (error) | ||
| 2138 | goto out; | ||
| 2139 | } | ||
| 2140 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | ||
| 2141 | error = dump_holder("Holder", gh); | ||
| 2142 | if (error) | ||
| 2143 | goto out; | ||
| 2144 | } | ||
| 2145 | list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { | ||
| 2146 | error = dump_holder("Waiter1", gh); | ||
| 2147 | if (error) | ||
| 2148 | goto out; | ||
| 2149 | } | ||
| 2150 | list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { | ||
| 2151 | error = dump_holder("Waiter2", gh); | ||
| 2152 | if (error) | ||
| 2153 | goto out; | ||
| 2154 | } | ||
| 2155 | list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { | ||
| 2156 | error = dump_holder("Waiter3", gh); | ||
| 2157 | if (error) | ||
| 2158 | goto out; | ||
| 2159 | } | ||
| 2160 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { | ||
| 2161 | if (!test_bit(GLF_LOCK, &gl->gl_flags) && | ||
| 2162 | list_empty(&gl->gl_holders)) { | ||
| 2163 | error = dump_inode(gl->gl_object); | ||
| 2164 | if (error) | ||
| 2165 | goto out; | ||
| 2166 | } else { | ||
| 2167 | error = -ENOBUFS; | ||
| 2168 | printk(KERN_INFO " Inode: busy\n"); | ||
| 2169 | } | ||
| 2170 | } | ||
| 2171 | |||
| 2172 | error = 0; | ||
| 2173 | |||
| 2174 | out: | ||
| 2175 | spin_unlock(&gl->gl_spin); | ||
| 2176 | return error; | ||
| 2177 | } | ||
| 2178 | |||
| 2179 | /** | ||
| 2180 | * gfs2_dump_lockstate - print out the current lockstate | ||
| 2181 | * @sdp: the filesystem | ||
| 2182 | * @ub: the buffer to copy the information into | ||
| 2183 | * | ||
| 2184 | * If @ub is NULL, dump the lockstate to the console. | ||
| 2185 | * | ||
| 2186 | */ | ||
| 2187 | |||
| 2188 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) | ||
| 2189 | { | ||
| 2190 | struct gfs2_glock *gl; | ||
| 2191 | struct hlist_node *h; | ||
| 2192 | unsigned int x; | ||
| 2193 | int error = 0; | ||
| 2194 | |||
| 2195 | for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { | ||
| 2196 | |||
| 2197 | read_lock(gl_lock_addr(x)); | ||
| 2198 | |||
| 2199 | hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) { | ||
| 2200 | if (gl->gl_sbd != sdp) | ||
| 2201 | continue; | ||
| 2202 | |||
| 2203 | error = dump_glock(gl); | ||
| 2204 | if (error) | ||
| 2205 | break; | ||
| 2206 | } | ||
| 2207 | |||
| 2208 | read_unlock(gl_lock_addr(x)); | ||
| 2209 | |||
| 2210 | if (error) | ||
| 2211 | break; | ||
| 2212 | } | ||
| 2213 | |||
| 2214 | |||
| 2215 | return error; | ||
| 2216 | } | ||
| 2217 | |||
| 2218 | int __init gfs2_glock_init(void) | ||
| 2219 | { | ||
| 2220 | unsigned i; | ||
| 2221 | for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { | ||
| 2222 | INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); | ||
| 2223 | } | ||
| 2224 | #ifdef GL_HASH_LOCK_SZ | ||
| 2225 | for(i = 0; i < GL_HASH_LOCK_SZ; i++) { | ||
| 2226 | rwlock_init(&gl_hash_locks[i]); | ||
| 2227 | } | ||
| 2228 | #endif | ||
| 2229 | return 0; | ||
| 2230 | } | ||
| 2231 | |||
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h new file mode 100644 index 000000000000..2b2a889ee2cc --- /dev/null +++ b/fs/gfs2/glock.h | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __GLOCK_DOT_H__ | ||
| 11 | #define __GLOCK_DOT_H__ | ||
| 12 | |||
| 13 | #include "incore.h" | ||
| 14 | |||
| 15 | /* Flags for lock requests; used in gfs2_holder gh_flag field. | ||
| 16 | From lm_interface.h: | ||
| 17 | #define LM_FLAG_TRY 0x00000001 | ||
| 18 | #define LM_FLAG_TRY_1CB 0x00000002 | ||
| 19 | #define LM_FLAG_NOEXP 0x00000004 | ||
| 20 | #define LM_FLAG_ANY 0x00000008 | ||
| 21 | #define LM_FLAG_PRIORITY 0x00000010 */ | ||
| 22 | |||
| 23 | #define GL_LOCAL_EXCL 0x00000020 | ||
| 24 | #define GL_ASYNC 0x00000040 | ||
| 25 | #define GL_EXACT 0x00000080 | ||
| 26 | #define GL_SKIP 0x00000100 | ||
| 27 | #define GL_ATIME 0x00000200 | ||
| 28 | #define GL_NOCACHE 0x00000400 | ||
| 29 | #define GL_NOCANCEL 0x00001000 | ||
| 30 | #define GL_AOP 0x00004000 | ||
| 31 | #define GL_DUMP 0x00008000 | ||
| 32 | |||
| 33 | #define GLR_TRYFAILED 13 | ||
| 34 | #define GLR_CANCELED 14 | ||
| 35 | |||
| 36 | static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | ||
| 37 | { | ||
| 38 | struct gfs2_holder *gh; | ||
| 39 | int locked = 0; | ||
| 40 | |||
| 41 | /* Look in glock's list of holders for one with current task as owner */ | ||
| 42 | spin_lock(&gl->gl_spin); | ||
| 43 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | ||
| 44 | if (gh->gh_owner == current) { | ||
| 45 | locked = 1; | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | } | ||
| 49 | spin_unlock(&gl->gl_spin); | ||
| 50 | |||
| 51 | return locked; | ||
| 52 | } | ||
| 53 | |||
| 54 | static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) | ||
| 55 | { | ||
| 56 | return gl->gl_state == LM_ST_EXCLUSIVE; | ||
| 57 | } | ||
| 58 | |||
| 59 | static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl) | ||
| 60 | { | ||
| 61 | return gl->gl_state == LM_ST_DEFERRED; | ||
| 62 | } | ||
| 63 | |||
| 64 | static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) | ||
| 65 | { | ||
| 66 | return gl->gl_state == LM_ST_SHARED; | ||
| 67 | } | ||
| 68 | |||
| 69 | static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) | ||
| 70 | { | ||
| 71 | int ret; | ||
| 72 | spin_lock(&gl->gl_spin); | ||
| 73 | ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3); | ||
| 74 | spin_unlock(&gl->gl_spin); | ||
| 75 | return ret; | ||
| 76 | } | ||
| 77 | |||
| 78 | int gfs2_glock_get(struct gfs2_sbd *sdp, | ||
| 79 | u64 number, const struct gfs2_glock_operations *glops, | ||
| 80 | int create, struct gfs2_glock **glp); | ||
| 81 | void gfs2_glock_hold(struct gfs2_glock *gl); | ||
| 82 | int gfs2_glock_put(struct gfs2_glock *gl); | ||
| 83 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | ||
| 84 | struct gfs2_holder *gh); | ||
| 85 | void gfs2_holder_reinit(unsigned int state, unsigned flags, | ||
| 86 | struct gfs2_holder *gh); | ||
| 87 | void gfs2_holder_uninit(struct gfs2_holder *gh); | ||
| 88 | |||
| 89 | void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags); | ||
| 90 | void gfs2_glock_drop_th(struct gfs2_glock *gl); | ||
| 91 | |||
| 92 | int gfs2_glock_nq(struct gfs2_holder *gh); | ||
| 93 | int gfs2_glock_poll(struct gfs2_holder *gh); | ||
| 94 | int gfs2_glock_wait(struct gfs2_holder *gh); | ||
| 95 | void gfs2_glock_dq(struct gfs2_holder *gh); | ||
| 96 | |||
| 97 | int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time); | ||
| 98 | |||
| 99 | void gfs2_glock_dq_uninit(struct gfs2_holder *gh); | ||
| 100 | int gfs2_glock_nq_num(struct gfs2_sbd *sdp, | ||
| 101 | u64 number, const struct gfs2_glock_operations *glops, | ||
| 102 | unsigned int state, int flags, struct gfs2_holder *gh); | ||
| 103 | |||
| 104 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | ||
| 105 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | ||
| 106 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | ||
| 107 | |||
| 108 | void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, | ||
| 109 | const struct gfs2_glock_operations *glops, | ||
| 110 | unsigned int state, int flags); | ||
| 111 | void gfs2_glock_inode_squish(struct inode *inode); | ||
| 112 | |||
| 113 | /** | ||
| 114 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | ||
| 115 | * @gl: the glock | ||
| 116 | * @state: the state we're requesting | ||
| 117 | * @flags: the modifier flags | ||
| 118 | * @gh: the holder structure | ||
| 119 | * | ||
| 120 | * Returns: 0, GLR_*, or errno | ||
| 121 | */ | ||
| 122 | |||
| 123 | static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, | ||
| 124 | unsigned int state, int flags, | ||
| 125 | struct gfs2_holder *gh) | ||
| 126 | { | ||
| 127 | int error; | ||
| 128 | |||
| 129 | gfs2_holder_init(gl, state, flags, gh); | ||
| 130 | |||
| 131 | error = gfs2_glock_nq(gh); | ||
| 132 | if (error) | ||
| 133 | gfs2_holder_uninit(gh); | ||
| 134 | |||
| 135 | return error; | ||
| 136 | } | ||
| 137 | |||
| 138 | /* Lock Value Block functions */ | ||
| 139 | |||
| 140 | int gfs2_lvb_hold(struct gfs2_glock *gl); | ||
| 141 | void gfs2_lvb_unhold(struct gfs2_glock *gl); | ||
| 142 | |||
| 143 | void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); | ||
| 144 | |||
| 145 | void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); | ||
| 146 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp); | ||
| 147 | |||
| 148 | void gfs2_scand_internal(struct gfs2_sbd *sdp); | ||
| 149 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); | ||
| 150 | |||
| 151 | int __init gfs2_glock_init(void); | ||
| 152 | |||
| 153 | #endif /* __GLOCK_DOT_H__ */ | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c new file mode 100644 index 000000000000..41a6b6818a50 --- /dev/null +++ b/fs/gfs2/glops.c | |||
| @@ -0,0 +1,615 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/lm_interface.h> | ||
| 17 | |||
| 18 | #include "gfs2.h" | ||
| 19 | #include "incore.h" | ||
| 20 | #include "bmap.h" | ||
| 21 | #include "glock.h" | ||
| 22 | #include "glops.h" | ||
| 23 | #include "inode.h" | ||
| 24 | #include "log.h" | ||
| 25 | #include "meta_io.h" | ||
| 26 | #include "recovery.h" | ||
| 27 | #include "rgrp.h" | ||
| 28 | #include "util.h" | ||
| 29 | #include "trans.h" | ||
| 30 | |||
| 31 | /** | ||
| 32 | * ail_empty_gl - remove all buffers for a given lock from the AIL | ||
| 33 | * @gl: the glock | ||
| 34 | * | ||
| 35 | * None of the buffers should be dirty, locked, or pinned. | ||
| 36 | */ | ||
| 37 | |||
| 38 | static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | ||
| 39 | { | ||
| 40 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 41 | unsigned int blocks; | ||
| 42 | struct list_head *head = &gl->gl_ail_list; | ||
| 43 | struct gfs2_bufdata *bd; | ||
| 44 | struct buffer_head *bh; | ||
| 45 | u64 blkno; | ||
| 46 | int error; | ||
| 47 | |||
| 48 | blocks = atomic_read(&gl->gl_ail_count); | ||
| 49 | if (!blocks) | ||
| 50 | return; | ||
| 51 | |||
| 52 | error = gfs2_trans_begin(sdp, 0, blocks); | ||
| 53 | if (gfs2_assert_withdraw(sdp, !error)) | ||
| 54 | return; | ||
| 55 | |||
| 56 | gfs2_log_lock(sdp); | ||
| 57 | while (!list_empty(head)) { | ||
| 58 | bd = list_entry(head->next, struct gfs2_bufdata, | ||
| 59 | bd_ail_gl_list); | ||
| 60 | bh = bd->bd_bh; | ||
| 61 | blkno = bh->b_blocknr; | ||
| 62 | gfs2_assert_withdraw(sdp, !buffer_busy(bh)); | ||
| 63 | |||
| 64 | bd->bd_ail = NULL; | ||
| 65 | list_del(&bd->bd_ail_st_list); | ||
| 66 | list_del(&bd->bd_ail_gl_list); | ||
| 67 | atomic_dec(&gl->gl_ail_count); | ||
| 68 | brelse(bh); | ||
| 69 | gfs2_log_unlock(sdp); | ||
| 70 | |||
| 71 | gfs2_trans_add_revoke(sdp, blkno); | ||
| 72 | |||
| 73 | gfs2_log_lock(sdp); | ||
| 74 | } | ||
| 75 | gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); | ||
| 76 | gfs2_log_unlock(sdp); | ||
| 77 | |||
| 78 | gfs2_trans_end(sdp); | ||
| 79 | gfs2_log_flush(sdp, NULL); | ||
| 80 | } | ||
| 81 | |||
| 82 | /** | ||
| 83 | * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock | ||
| 84 | * @gl: the glock | ||
| 85 | * | ||
| 86 | */ | ||
| 87 | |||
| 88 | static void gfs2_pte_inval(struct gfs2_glock *gl) | ||
| 89 | { | ||
| 90 | struct gfs2_inode *ip; | ||
| 91 | struct inode *inode; | ||
| 92 | |||
| 93 | ip = gl->gl_object; | ||
| 94 | inode = &ip->i_inode; | ||
| 95 | if (!ip || !S_ISREG(ip->i_di.di_mode)) | ||
| 96 | return; | ||
| 97 | |||
| 98 | if (!test_bit(GIF_PAGED, &ip->i_flags)) | ||
| 99 | return; | ||
| 100 | |||
| 101 | unmap_shared_mapping_range(inode->i_mapping, 0, 0); | ||
| 102 | |||
| 103 | if (test_bit(GIF_SW_PAGED, &ip->i_flags)) | ||
| 104 | set_bit(GLF_DIRTY, &gl->gl_flags); | ||
| 105 | |||
| 106 | clear_bit(GIF_SW_PAGED, &ip->i_flags); | ||
| 107 | } | ||
| 108 | |||
| 109 | /** | ||
| 110 | * gfs2_page_inval - Invalidate all pages associated with a glock | ||
| 111 | * @gl: the glock | ||
| 112 | * | ||
| 113 | */ | ||
| 114 | |||
| 115 | static void gfs2_page_inval(struct gfs2_glock *gl) | ||
| 116 | { | ||
| 117 | struct gfs2_inode *ip; | ||
| 118 | struct inode *inode; | ||
| 119 | |||
| 120 | ip = gl->gl_object; | ||
| 121 | inode = &ip->i_inode; | ||
| 122 | if (!ip || !S_ISREG(ip->i_di.di_mode)) | ||
| 123 | return; | ||
| 124 | |||
| 125 | truncate_inode_pages(inode->i_mapping, 0); | ||
| 126 | gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages); | ||
| 127 | clear_bit(GIF_PAGED, &ip->i_flags); | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 131 | * gfs2_page_wait - Wait for writeback of data | ||
| 132 | * @gl: the glock | ||
| 133 | * | ||
| 134 | * Syncs data (not metadata) for a regular file. | ||
| 135 | * No-op for all other types. | ||
| 136 | */ | ||
| 137 | |||
| 138 | static void gfs2_page_wait(struct gfs2_glock *gl) | ||
| 139 | { | ||
| 140 | struct gfs2_inode *ip = gl->gl_object; | ||
| 141 | struct inode *inode = &ip->i_inode; | ||
| 142 | struct address_space *mapping = inode->i_mapping; | ||
| 143 | int error; | ||
| 144 | |||
| 145 | if (!S_ISREG(ip->i_di.di_mode)) | ||
| 146 | return; | ||
| 147 | |||
| 148 | error = filemap_fdatawait(mapping); | ||
| 149 | |||
| 150 | /* Put back any errors cleared by filemap_fdatawait() | ||
| 151 | so they can be caught by someone who can pass them | ||
| 152 | up to user space. */ | ||
| 153 | |||
| 154 | if (error == -ENOSPC) | ||
| 155 | set_bit(AS_ENOSPC, &mapping->flags); | ||
| 156 | else if (error) | ||
| 157 | set_bit(AS_EIO, &mapping->flags); | ||
| 158 | |||
| 159 | } | ||
| 160 | |||
| 161 | static void gfs2_page_writeback(struct gfs2_glock *gl) | ||
| 162 | { | ||
| 163 | struct gfs2_inode *ip = gl->gl_object; | ||
| 164 | struct inode *inode = &ip->i_inode; | ||
| 165 | struct address_space *mapping = inode->i_mapping; | ||
| 166 | |||
| 167 | if (!S_ISREG(ip->i_di.di_mode)) | ||
| 168 | return; | ||
| 169 | |||
| 170 | filemap_fdatawrite(mapping); | ||
| 171 | } | ||
| 172 | |||
| 173 | /** | ||
| 174 | * meta_go_sync - sync out the metadata for this glock | ||
| 175 | * @gl: the glock | ||
| 176 | * @flags: DIO_* | ||
| 177 | * | ||
| 178 | * Called when demoting or unlocking an EX glock. We must flush | ||
| 179 | * to disk all dirty buffers/pages relating to this glock, and must not | ||
| 180 | * not return to caller to demote/unlock the glock until I/O is complete. | ||
| 181 | */ | ||
| 182 | |||
| 183 | static void meta_go_sync(struct gfs2_glock *gl, int flags) | ||
| 184 | { | ||
| 185 | if (!(flags & DIO_METADATA)) | ||
| 186 | return; | ||
| 187 | |||
| 188 | if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { | ||
| 189 | gfs2_log_flush(gl->gl_sbd, gl); | ||
| 190 | gfs2_meta_sync(gl); | ||
| 191 | if (flags & DIO_RELEASE) | ||
| 192 | gfs2_ail_empty_gl(gl); | ||
| 193 | } | ||
| 194 | |||
| 195 | } | ||
| 196 | |||
| 197 | /** | ||
| 198 | * meta_go_inval - invalidate the metadata for this glock | ||
| 199 | * @gl: the glock | ||
| 200 | * @flags: | ||
| 201 | * | ||
| 202 | */ | ||
| 203 | |||
| 204 | static void meta_go_inval(struct gfs2_glock *gl, int flags) | ||
| 205 | { | ||
| 206 | if (!(flags & DIO_METADATA)) | ||
| 207 | return; | ||
| 208 | |||
| 209 | gfs2_meta_inval(gl); | ||
| 210 | gl->gl_vn++; | ||
| 211 | } | ||
| 212 | |||
| 213 | /** | ||
| 214 | * inode_go_xmote_th - promote/demote a glock | ||
| 215 | * @gl: the glock | ||
| 216 | * @state: the requested state | ||
| 217 | * @flags: | ||
| 218 | * | ||
| 219 | */ | ||
| 220 | |||
| 221 | static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | ||
| 222 | int flags) | ||
| 223 | { | ||
| 224 | if (gl->gl_state != LM_ST_UNLOCKED) | ||
| 225 | gfs2_pte_inval(gl); | ||
| 226 | gfs2_glock_xmote_th(gl, state, flags); | ||
| 227 | } | ||
| 228 | |||
| 229 | /** | ||
| 230 | * inode_go_xmote_bh - After promoting/demoting a glock | ||
| 231 | * @gl: the glock | ||
| 232 | * | ||
| 233 | */ | ||
| 234 | |||
| 235 | static void inode_go_xmote_bh(struct gfs2_glock *gl) | ||
| 236 | { | ||
| 237 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 238 | struct buffer_head *bh; | ||
| 239 | int error; | ||
| 240 | |||
| 241 | if (gl->gl_state != LM_ST_UNLOCKED && | ||
| 242 | (!gh || !(gh->gh_flags & GL_SKIP))) { | ||
| 243 | error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh); | ||
| 244 | if (!error) | ||
| 245 | brelse(bh); | ||
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | /** | ||
| 250 | * inode_go_drop_th - unlock a glock | ||
| 251 | * @gl: the glock | ||
| 252 | * | ||
| 253 | * Invoked from rq_demote(). | ||
| 254 | * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long) | ||
| 255 | * is being purged from our node's glock cache; we're dropping lock. | ||
| 256 | */ | ||
| 257 | |||
| 258 | static void inode_go_drop_th(struct gfs2_glock *gl) | ||
| 259 | { | ||
| 260 | gfs2_pte_inval(gl); | ||
| 261 | gfs2_glock_drop_th(gl); | ||
| 262 | } | ||
| 263 | |||
| 264 | /** | ||
| 265 | * inode_go_sync - Sync the dirty data and/or metadata for an inode glock | ||
| 266 | * @gl: the glock protecting the inode | ||
| 267 | * @flags: | ||
| 268 | * | ||
| 269 | */ | ||
| 270 | |||
| 271 | static void inode_go_sync(struct gfs2_glock *gl, int flags) | ||
| 272 | { | ||
| 273 | int meta = (flags & DIO_METADATA); | ||
| 274 | int data = (flags & DIO_DATA); | ||
| 275 | |||
| 276 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { | ||
| 277 | if (meta && data) { | ||
| 278 | gfs2_page_writeback(gl); | ||
| 279 | gfs2_log_flush(gl->gl_sbd, gl); | ||
| 280 | gfs2_meta_sync(gl); | ||
| 281 | gfs2_page_wait(gl); | ||
| 282 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
| 283 | } else if (meta) { | ||
| 284 | gfs2_log_flush(gl->gl_sbd, gl); | ||
| 285 | gfs2_meta_sync(gl); | ||
| 286 | } else if (data) { | ||
| 287 | gfs2_page_writeback(gl); | ||
| 288 | gfs2_page_wait(gl); | ||
| 289 | } | ||
| 290 | if (flags & DIO_RELEASE) | ||
| 291 | gfs2_ail_empty_gl(gl); | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | /** | ||
| 296 | * inode_go_inval - prepare a inode glock to be released | ||
| 297 | * @gl: the glock | ||
| 298 | * @flags: | ||
| 299 | * | ||
| 300 | */ | ||
| 301 | |||
| 302 | static void inode_go_inval(struct gfs2_glock *gl, int flags) | ||
| 303 | { | ||
| 304 | int meta = (flags & DIO_METADATA); | ||
| 305 | int data = (flags & DIO_DATA); | ||
| 306 | |||
| 307 | if (meta) { | ||
| 308 | gfs2_meta_inval(gl); | ||
| 309 | gl->gl_vn++; | ||
| 310 | } | ||
| 311 | if (data) | ||
| 312 | gfs2_page_inval(gl); | ||
| 313 | } | ||
| 314 | |||
| 315 | /** | ||
| 316 | * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock | ||
| 317 | * @gl: the glock | ||
| 318 | * | ||
| 319 | * Returns: 1 if it's ok | ||
| 320 | */ | ||
| 321 | |||
| 322 | static int inode_go_demote_ok(struct gfs2_glock *gl) | ||
| 323 | { | ||
| 324 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 325 | int demote = 0; | ||
| 326 | |||
| 327 | if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages) | ||
| 328 | demote = 1; | ||
| 329 | else if (!sdp->sd_args.ar_localcaching && | ||
| 330 | time_after_eq(jiffies, gl->gl_stamp + | ||
| 331 | gfs2_tune_get(sdp, gt_demote_secs) * HZ)) | ||
| 332 | demote = 1; | ||
| 333 | |||
| 334 | return demote; | ||
| 335 | } | ||
| 336 | |||
| 337 | /** | ||
| 338 | * inode_go_lock - operation done after an inode lock is locked by a process | ||
| 339 | * @gl: the glock | ||
| 340 | * @flags: | ||
| 341 | * | ||
| 342 | * Returns: errno | ||
| 343 | */ | ||
| 344 | |||
| 345 | static int inode_go_lock(struct gfs2_holder *gh) | ||
| 346 | { | ||
| 347 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 348 | struct gfs2_inode *ip = gl->gl_object; | ||
| 349 | int error = 0; | ||
| 350 | |||
| 351 | if (!ip) | ||
| 352 | return 0; | ||
| 353 | |||
| 354 | if (ip->i_vn != gl->gl_vn) { | ||
| 355 | error = gfs2_inode_refresh(ip); | ||
| 356 | if (error) | ||
| 357 | return error; | ||
| 358 | gfs2_inode_attr_in(ip); | ||
| 359 | } | ||
| 360 | |||
| 361 | if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && | ||
| 362 | (gl->gl_state == LM_ST_EXCLUSIVE) && | ||
| 363 | (gh->gh_flags & GL_LOCAL_EXCL)) | ||
| 364 | error = gfs2_truncatei_resume(ip); | ||
| 365 | |||
| 366 | return error; | ||
| 367 | } | ||
| 368 | |||
| 369 | /** | ||
| 370 | * inode_go_unlock - operation done before an inode lock is unlocked by a | ||
| 371 | * process | ||
| 372 | * @gl: the glock | ||
| 373 | * @flags: | ||
| 374 | * | ||
| 375 | */ | ||
| 376 | |||
| 377 | static void inode_go_unlock(struct gfs2_holder *gh) | ||
| 378 | { | ||
| 379 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 380 | struct gfs2_inode *ip = gl->gl_object; | ||
| 381 | |||
| 382 | if (ip == NULL) | ||
| 383 | return; | ||
| 384 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) | ||
| 385 | gfs2_inode_attr_in(ip); | ||
| 386 | gfs2_meta_cache_flush(ip); | ||
| 387 | } | ||
| 388 | |||
| 389 | /** | ||
| 390 | * inode_greedy - | ||
| 391 | * @gl: the glock | ||
| 392 | * | ||
| 393 | */ | ||
| 394 | |||
| 395 | static void inode_greedy(struct gfs2_glock *gl) | ||
| 396 | { | ||
| 397 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 398 | struct gfs2_inode *ip = gl->gl_object; | ||
| 399 | unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum); | ||
| 400 | unsigned int max = gfs2_tune_get(sdp, gt_greedy_max); | ||
| 401 | unsigned int new_time; | ||
| 402 | |||
| 403 | spin_lock(&ip->i_spin); | ||
| 404 | |||
| 405 | if (time_after(ip->i_last_pfault + quantum, jiffies)) { | ||
| 406 | new_time = ip->i_greedy + quantum; | ||
| 407 | if (new_time > max) | ||
| 408 | new_time = max; | ||
| 409 | } else { | ||
| 410 | new_time = ip->i_greedy - quantum; | ||
| 411 | if (!new_time || new_time > max) | ||
| 412 | new_time = 1; | ||
| 413 | } | ||
| 414 | |||
| 415 | ip->i_greedy = new_time; | ||
| 416 | |||
| 417 | spin_unlock(&ip->i_spin); | ||
| 418 | |||
| 419 | iput(&ip->i_inode); | ||
| 420 | } | ||
| 421 | |||
| 422 | /** | ||
| 423 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | ||
| 424 | * @gl: the glock | ||
| 425 | * | ||
| 426 | * Returns: 1 if it's ok | ||
| 427 | */ | ||
| 428 | |||
| 429 | static int rgrp_go_demote_ok(struct gfs2_glock *gl) | ||
| 430 | { | ||
| 431 | return !gl->gl_aspace->i_mapping->nrpages; | ||
| 432 | } | ||
| 433 | |||
| 434 | /** | ||
| 435 | * rgrp_go_lock - operation done after an rgrp lock is locked by | ||
| 436 | * a first holder on this node. | ||
| 437 | * @gl: the glock | ||
| 438 | * @flags: | ||
| 439 | * | ||
| 440 | * Returns: errno | ||
| 441 | */ | ||
| 442 | |||
| 443 | static int rgrp_go_lock(struct gfs2_holder *gh) | ||
| 444 | { | ||
| 445 | return gfs2_rgrp_bh_get(gh->gh_gl->gl_object); | ||
| 446 | } | ||
| 447 | |||
| 448 | /** | ||
| 449 | * rgrp_go_unlock - operation done before an rgrp lock is unlocked by | ||
| 450 | * a last holder on this node. | ||
| 451 | * @gl: the glock | ||
| 452 | * @flags: | ||
| 453 | * | ||
| 454 | */ | ||
| 455 | |||
| 456 | static void rgrp_go_unlock(struct gfs2_holder *gh) | ||
| 457 | { | ||
| 458 | gfs2_rgrp_bh_put(gh->gh_gl->gl_object); | ||
| 459 | } | ||
| 460 | |||
| 461 | /** | ||
| 462 | * trans_go_xmote_th - promote/demote the transaction glock | ||
| 463 | * @gl: the glock | ||
| 464 | * @state: the requested state | ||
| 465 | * @flags: | ||
| 466 | * | ||
| 467 | */ | ||
| 468 | |||
| 469 | static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | ||
| 470 | int flags) | ||
| 471 | { | ||
| 472 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 473 | |||
| 474 | if (gl->gl_state != LM_ST_UNLOCKED && | ||
| 475 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
| 476 | gfs2_meta_syncfs(sdp); | ||
| 477 | gfs2_log_shutdown(sdp); | ||
| 478 | } | ||
| 479 | |||
| 480 | gfs2_glock_xmote_th(gl, state, flags); | ||
| 481 | } | ||
| 482 | |||
| 483 | /** | ||
| 484 | * trans_go_xmote_bh - After promoting/demoting the transaction glock | ||
| 485 | * @gl: the glock | ||
| 486 | * | ||
| 487 | */ | ||
| 488 | |||
| 489 | static void trans_go_xmote_bh(struct gfs2_glock *gl) | ||
| 490 | { | ||
| 491 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 492 | struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); | ||
| 493 | struct gfs2_glock *j_gl = ip->i_gl; | ||
| 494 | struct gfs2_log_header head; | ||
| 495 | int error; | ||
| 496 | |||
| 497 | if (gl->gl_state != LM_ST_UNLOCKED && | ||
| 498 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
| 499 | gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode)); | ||
| 500 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); | ||
| 501 | |||
| 502 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | ||
| 503 | if (error) | ||
| 504 | gfs2_consist(sdp); | ||
| 505 | if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) | ||
| 506 | gfs2_consist(sdp); | ||
| 507 | |||
| 508 | /* Initialize some head of the log stuff */ | ||
| 509 | if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { | ||
| 510 | sdp->sd_log_sequence = head.lh_sequence + 1; | ||
| 511 | gfs2_log_pointers_init(sdp, head.lh_blkno); | ||
| 512 | } | ||
| 513 | } | ||
| 514 | } | ||
| 515 | |||
| 516 | /** | ||
| 517 | * trans_go_drop_th - unlock the transaction glock | ||
| 518 | * @gl: the glock | ||
| 519 | * | ||
| 520 | * We want to sync the device even with localcaching. Remember | ||
| 521 | * that localcaching journal replay only marks buffers dirty. | ||
| 522 | */ | ||
| 523 | |||
| 524 | static void trans_go_drop_th(struct gfs2_glock *gl) | ||
| 525 | { | ||
| 526 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 527 | |||
| 528 | if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
| 529 | gfs2_meta_syncfs(sdp); | ||
| 530 | gfs2_log_shutdown(sdp); | ||
| 531 | } | ||
| 532 | |||
| 533 | gfs2_glock_drop_th(gl); | ||
| 534 | } | ||
| 535 | |||
| 536 | /** | ||
| 537 | * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock | ||
| 538 | * @gl: the glock | ||
| 539 | * | ||
| 540 | * Returns: 1 if it's ok | ||
| 541 | */ | ||
| 542 | |||
| 543 | static int quota_go_demote_ok(struct gfs2_glock *gl) | ||
| 544 | { | ||
| 545 | return !atomic_read(&gl->gl_lvb_count); | ||
| 546 | } | ||
| 547 | |||
| 548 | const struct gfs2_glock_operations gfs2_meta_glops = { | ||
| 549 | .go_xmote_th = gfs2_glock_xmote_th, | ||
| 550 | .go_drop_th = gfs2_glock_drop_th, | ||
| 551 | .go_type = LM_TYPE_META, | ||
| 552 | }; | ||
| 553 | |||
| 554 | const struct gfs2_glock_operations gfs2_inode_glops = { | ||
| 555 | .go_xmote_th = inode_go_xmote_th, | ||
| 556 | .go_xmote_bh = inode_go_xmote_bh, | ||
| 557 | .go_drop_th = inode_go_drop_th, | ||
| 558 | .go_sync = inode_go_sync, | ||
| 559 | .go_inval = inode_go_inval, | ||
| 560 | .go_demote_ok = inode_go_demote_ok, | ||
| 561 | .go_lock = inode_go_lock, | ||
| 562 | .go_unlock = inode_go_unlock, | ||
| 563 | .go_greedy = inode_greedy, | ||
| 564 | .go_type = LM_TYPE_INODE, | ||
| 565 | }; | ||
| 566 | |||
| 567 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | ||
| 568 | .go_xmote_th = gfs2_glock_xmote_th, | ||
| 569 | .go_drop_th = gfs2_glock_drop_th, | ||
| 570 | .go_sync = meta_go_sync, | ||
| 571 | .go_inval = meta_go_inval, | ||
| 572 | .go_demote_ok = rgrp_go_demote_ok, | ||
| 573 | .go_lock = rgrp_go_lock, | ||
| 574 | .go_unlock = rgrp_go_unlock, | ||
| 575 | .go_type = LM_TYPE_RGRP, | ||
| 576 | }; | ||
| 577 | |||
| 578 | const struct gfs2_glock_operations gfs2_trans_glops = { | ||
| 579 | .go_xmote_th = trans_go_xmote_th, | ||
| 580 | .go_xmote_bh = trans_go_xmote_bh, | ||
| 581 | .go_drop_th = trans_go_drop_th, | ||
| 582 | .go_type = LM_TYPE_NONDISK, | ||
| 583 | }; | ||
| 584 | |||
| 585 | const struct gfs2_glock_operations gfs2_iopen_glops = { | ||
| 586 | .go_xmote_th = gfs2_glock_xmote_th, | ||
| 587 | .go_drop_th = gfs2_glock_drop_th, | ||
| 588 | .go_type = LM_TYPE_IOPEN, | ||
| 589 | }; | ||
| 590 | |||
| 591 | const struct gfs2_glock_operations gfs2_flock_glops = { | ||
| 592 | .go_xmote_th = gfs2_glock_xmote_th, | ||
| 593 | .go_drop_th = gfs2_glock_drop_th, | ||
| 594 | .go_type = LM_TYPE_FLOCK, | ||
| 595 | }; | ||
| 596 | |||
| 597 | const struct gfs2_glock_operations gfs2_nondisk_glops = { | ||
| 598 | .go_xmote_th = gfs2_glock_xmote_th, | ||
| 599 | .go_drop_th = gfs2_glock_drop_th, | ||
| 600 | .go_type = LM_TYPE_NONDISK, | ||
| 601 | }; | ||
| 602 | |||
| 603 | const struct gfs2_glock_operations gfs2_quota_glops = { | ||
| 604 | .go_xmote_th = gfs2_glock_xmote_th, | ||
| 605 | .go_drop_th = gfs2_glock_drop_th, | ||
| 606 | .go_demote_ok = quota_go_demote_ok, | ||
| 607 | .go_type = LM_TYPE_QUOTA, | ||
| 608 | }; | ||
| 609 | |||
| 610 | const struct gfs2_glock_operations gfs2_journal_glops = { | ||
| 611 | .go_xmote_th = gfs2_glock_xmote_th, | ||
| 612 | .go_drop_th = gfs2_glock_drop_th, | ||
| 613 | .go_type = LM_TYPE_JOURNAL, | ||
| 614 | }; | ||
| 615 | |||
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h new file mode 100644 index 000000000000..a1d9b5b024e6 --- /dev/null +++ b/fs/gfs2/glops.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __GLOPS_DOT_H__ | ||
| 11 | #define __GLOPS_DOT_H__ | ||
| 12 | |||
| 13 | #include "incore.h" | ||
| 14 | |||
| 15 | extern const struct gfs2_glock_operations gfs2_meta_glops; | ||
| 16 | extern const struct gfs2_glock_operations gfs2_inode_glops; | ||
| 17 | extern const struct gfs2_glock_operations gfs2_rgrp_glops; | ||
| 18 | extern const struct gfs2_glock_operations gfs2_trans_glops; | ||
| 19 | extern const struct gfs2_glock_operations gfs2_iopen_glops; | ||
| 20 | extern const struct gfs2_glock_operations gfs2_flock_glops; | ||
| 21 | extern const struct gfs2_glock_operations gfs2_nondisk_glops; | ||
| 22 | extern const struct gfs2_glock_operations gfs2_quota_glops; | ||
| 23 | extern const struct gfs2_glock_operations gfs2_journal_glops; | ||
| 24 | |||
| 25 | #endif /* __GLOPS_DOT_H__ */ | ||
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h new file mode 100644 index 000000000000..118dc693d111 --- /dev/null +++ b/fs/gfs2/incore.h | |||
| @@ -0,0 +1,634 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __INCORE_DOT_H__ | ||
| 11 | #define __INCORE_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/fs.h> | ||
| 14 | |||
| 15 | #define DIO_WAIT 0x00000010 | ||
| 16 | #define DIO_METADATA 0x00000020 | ||
| 17 | #define DIO_DATA 0x00000040 | ||
| 18 | #define DIO_RELEASE 0x00000080 | ||
| 19 | #define DIO_ALL 0x00000100 | ||
| 20 | |||
| 21 | struct gfs2_log_operations; | ||
| 22 | struct gfs2_log_element; | ||
| 23 | struct gfs2_holder; | ||
| 24 | struct gfs2_glock; | ||
| 25 | struct gfs2_quota_data; | ||
| 26 | struct gfs2_trans; | ||
| 27 | struct gfs2_ail; | ||
| 28 | struct gfs2_jdesc; | ||
| 29 | struct gfs2_sbd; | ||
| 30 | |||
| 31 | typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Structure of operations that are associated with each | ||
| 35 | * type of element in the log. | ||
| 36 | */ | ||
| 37 | |||
| 38 | struct gfs2_log_operations { | ||
| 39 | void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); | ||
| 40 | void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); | ||
| 41 | void (*lo_before_commit) (struct gfs2_sbd *sdp); | ||
| 42 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); | ||
| 43 | void (*lo_before_scan) (struct gfs2_jdesc *jd, | ||
| 44 | struct gfs2_log_header *head, int pass); | ||
| 45 | int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, | ||
| 46 | struct gfs2_log_descriptor *ld, __be64 *ptr, | ||
| 47 | int pass); | ||
| 48 | void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass); | ||
| 49 | const char *lo_name; | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct gfs2_log_element { | ||
| 53 | struct list_head le_list; | ||
| 54 | const struct gfs2_log_operations *le_ops; | ||
| 55 | }; | ||
| 56 | |||
| 57 | struct gfs2_bitmap { | ||
| 58 | struct buffer_head *bi_bh; | ||
| 59 | char *bi_clone; | ||
| 60 | u32 bi_offset; | ||
| 61 | u32 bi_start; | ||
| 62 | u32 bi_len; | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct gfs2_rgrpd { | ||
| 66 | struct list_head rd_list; /* Link with superblock */ | ||
| 67 | struct list_head rd_list_mru; | ||
| 68 | struct list_head rd_recent; /* Recently used rgrps */ | ||
| 69 | struct gfs2_glock *rd_gl; /* Glock for this rgrp */ | ||
| 70 | struct gfs2_rindex rd_ri; | ||
| 71 | struct gfs2_rgrp rd_rg; | ||
| 72 | u64 rd_rg_vn; | ||
| 73 | struct gfs2_bitmap *rd_bits; | ||
| 74 | unsigned int rd_bh_count; | ||
| 75 | struct mutex rd_mutex; | ||
| 76 | u32 rd_free_clone; | ||
| 77 | struct gfs2_log_element rd_le; | ||
| 78 | u32 rd_last_alloc_data; | ||
| 79 | u32 rd_last_alloc_meta; | ||
| 80 | struct gfs2_sbd *rd_sbd; | ||
| 81 | }; | ||
| 82 | |||
| 83 | enum gfs2_state_bits { | ||
| 84 | BH_Pinned = BH_PrivateStart, | ||
| 85 | BH_Escaped = BH_PrivateStart + 1, | ||
| 86 | }; | ||
| 87 | |||
| 88 | BUFFER_FNS(Pinned, pinned) | ||
| 89 | TAS_BUFFER_FNS(Pinned, pinned) | ||
| 90 | BUFFER_FNS(Escaped, escaped) | ||
| 91 | TAS_BUFFER_FNS(Escaped, escaped) | ||
| 92 | |||
| 93 | struct gfs2_bufdata { | ||
| 94 | struct buffer_head *bd_bh; | ||
| 95 | struct gfs2_glock *bd_gl; | ||
| 96 | |||
| 97 | struct list_head bd_list_tr; | ||
| 98 | struct gfs2_log_element bd_le; | ||
| 99 | |||
| 100 | struct gfs2_ail *bd_ail; | ||
| 101 | struct list_head bd_ail_st_list; | ||
| 102 | struct list_head bd_ail_gl_list; | ||
| 103 | }; | ||
| 104 | |||
| 105 | struct gfs2_glock_operations { | ||
| 106 | void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state, | ||
| 107 | int flags); | ||
| 108 | void (*go_xmote_bh) (struct gfs2_glock * gl); | ||
| 109 | void (*go_drop_th) (struct gfs2_glock * gl); | ||
| 110 | void (*go_drop_bh) (struct gfs2_glock * gl); | ||
| 111 | void (*go_sync) (struct gfs2_glock * gl, int flags); | ||
| 112 | void (*go_inval) (struct gfs2_glock * gl, int flags); | ||
| 113 | int (*go_demote_ok) (struct gfs2_glock * gl); | ||
| 114 | int (*go_lock) (struct gfs2_holder * gh); | ||
| 115 | void (*go_unlock) (struct gfs2_holder * gh); | ||
| 116 | void (*go_callback) (struct gfs2_glock * gl, unsigned int state); | ||
| 117 | void (*go_greedy) (struct gfs2_glock * gl); | ||
| 118 | const int go_type; | ||
| 119 | }; | ||
| 120 | |||
| 121 | enum { | ||
| 122 | /* Actions */ | ||
| 123 | HIF_MUTEX = 0, | ||
| 124 | HIF_PROMOTE = 1, | ||
| 125 | HIF_DEMOTE = 2, | ||
| 126 | HIF_GREEDY = 3, | ||
| 127 | |||
| 128 | /* States */ | ||
| 129 | HIF_ALLOCED = 4, | ||
| 130 | HIF_DEALLOC = 5, | ||
| 131 | HIF_HOLDER = 6, | ||
| 132 | HIF_FIRST = 7, | ||
| 133 | HIF_ABORTED = 9, | ||
| 134 | }; | ||
| 135 | |||
| 136 | struct gfs2_holder { | ||
| 137 | struct list_head gh_list; | ||
| 138 | |||
| 139 | struct gfs2_glock *gh_gl; | ||
| 140 | struct task_struct *gh_owner; | ||
| 141 | unsigned int gh_state; | ||
| 142 | unsigned gh_flags; | ||
| 143 | |||
| 144 | int gh_error; | ||
| 145 | unsigned long gh_iflags; | ||
| 146 | struct completion gh_wait; | ||
| 147 | unsigned long gh_ip; | ||
| 148 | }; | ||
| 149 | |||
| 150 | enum { | ||
| 151 | GLF_LOCK = 1, | ||
| 152 | GLF_STICKY = 2, | ||
| 153 | GLF_PREFETCH = 3, | ||
| 154 | GLF_DIRTY = 5, | ||
| 155 | GLF_SKIP_WAITERS2 = 6, | ||
| 156 | GLF_GREEDY = 7, | ||
| 157 | }; | ||
| 158 | |||
| 159 | struct gfs2_glock { | ||
| 160 | struct hlist_node gl_list; | ||
| 161 | unsigned long gl_flags; /* GLF_... */ | ||
| 162 | struct lm_lockname gl_name; | ||
| 163 | atomic_t gl_ref; | ||
| 164 | |||
| 165 | spinlock_t gl_spin; | ||
| 166 | |||
| 167 | unsigned int gl_state; | ||
| 168 | unsigned int gl_hash; | ||
| 169 | struct task_struct *gl_owner; | ||
| 170 | unsigned long gl_ip; | ||
| 171 | struct list_head gl_holders; | ||
| 172 | struct list_head gl_waiters1; /* HIF_MUTEX */ | ||
| 173 | struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ | ||
| 174 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | ||
| 175 | |||
| 176 | const struct gfs2_glock_operations *gl_ops; | ||
| 177 | |||
| 178 | struct gfs2_holder *gl_req_gh; | ||
| 179 | gfs2_glop_bh_t gl_req_bh; | ||
| 180 | |||
| 181 | void *gl_lock; | ||
| 182 | char *gl_lvb; | ||
| 183 | atomic_t gl_lvb_count; | ||
| 184 | |||
| 185 | u64 gl_vn; | ||
| 186 | unsigned long gl_stamp; | ||
| 187 | void *gl_object; | ||
| 188 | |||
| 189 | struct list_head gl_reclaim; | ||
| 190 | |||
| 191 | struct gfs2_sbd *gl_sbd; | ||
| 192 | |||
| 193 | struct inode *gl_aspace; | ||
| 194 | struct gfs2_log_element gl_le; | ||
| 195 | struct list_head gl_ail_list; | ||
| 196 | atomic_t gl_ail_count; | ||
| 197 | }; | ||
| 198 | |||
| 199 | struct gfs2_alloc { | ||
| 200 | /* Quota stuff */ | ||
| 201 | |||
| 202 | struct gfs2_quota_data *al_qd[2*MAXQUOTAS]; | ||
| 203 | struct gfs2_holder al_qd_ghs[2*MAXQUOTAS]; | ||
| 204 | unsigned int al_qd_num; | ||
| 205 | |||
| 206 | u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */ | ||
| 207 | u32 al_alloced; /* Filled in by gfs2_alloc_*() */ | ||
| 208 | |||
| 209 | /* Filled in by gfs2_inplace_reserve() */ | ||
| 210 | |||
| 211 | unsigned int al_line; | ||
| 212 | char *al_file; | ||
| 213 | struct gfs2_holder al_ri_gh; | ||
| 214 | struct gfs2_holder al_rgd_gh; | ||
| 215 | struct gfs2_rgrpd *al_rgd; | ||
| 216 | |||
| 217 | }; | ||
| 218 | |||
| 219 | enum { | ||
| 220 | GIF_QD_LOCKED = 1, | ||
| 221 | GIF_PAGED = 2, | ||
| 222 | GIF_SW_PAGED = 3, | ||
| 223 | }; | ||
| 224 | |||
| 225 | struct gfs2_inode { | ||
| 226 | struct inode i_inode; | ||
| 227 | struct gfs2_inum i_num; | ||
| 228 | |||
| 229 | unsigned long i_flags; /* GIF_... */ | ||
| 230 | |||
| 231 | u64 i_vn; | ||
| 232 | struct gfs2_dinode i_di; /* To be replaced by ref to block */ | ||
| 233 | |||
| 234 | struct gfs2_glock *i_gl; /* Move into i_gh? */ | ||
| 235 | struct gfs2_holder i_iopen_gh; | ||
| 236 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ | ||
| 237 | struct gfs2_alloc i_alloc; | ||
| 238 | u64 i_last_rg_alloc; | ||
| 239 | |||
| 240 | spinlock_t i_spin; | ||
| 241 | struct rw_semaphore i_rw_mutex; | ||
| 242 | unsigned int i_greedy; | ||
| 243 | unsigned long i_last_pfault; | ||
| 244 | |||
| 245 | struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; | ||
| 246 | }; | ||
| 247 | |||
| 248 | /* | ||
| 249 | * Since i_inode is the first element of struct gfs2_inode, | ||
| 250 | * this is effectively a cast. | ||
| 251 | */ | ||
| 252 | static inline struct gfs2_inode *GFS2_I(struct inode *inode) | ||
| 253 | { | ||
| 254 | return container_of(inode, struct gfs2_inode, i_inode); | ||
| 255 | } | ||
| 256 | |||
| 257 | /* To be removed? */ | ||
| 258 | static inline struct gfs2_sbd *GFS2_SB(struct inode *inode) | ||
| 259 | { | ||
| 260 | return inode->i_sb->s_fs_info; | ||
| 261 | } | ||
| 262 | |||
| 263 | enum { | ||
| 264 | GFF_DID_DIRECT_ALLOC = 0, | ||
| 265 | GFF_EXLOCK = 1, | ||
| 266 | }; | ||
| 267 | |||
| 268 | struct gfs2_file { | ||
| 269 | unsigned long f_flags; /* GFF_... */ | ||
| 270 | struct mutex f_fl_mutex; | ||
| 271 | struct gfs2_holder f_fl_gh; | ||
| 272 | }; | ||
| 273 | |||
| 274 | struct gfs2_revoke { | ||
| 275 | struct gfs2_log_element rv_le; | ||
| 276 | u64 rv_blkno; | ||
| 277 | }; | ||
| 278 | |||
| 279 | struct gfs2_revoke_replay { | ||
| 280 | struct list_head rr_list; | ||
| 281 | u64 rr_blkno; | ||
| 282 | unsigned int rr_where; | ||
| 283 | }; | ||
| 284 | |||
| 285 | enum { | ||
| 286 | QDF_USER = 0, | ||
| 287 | QDF_CHANGE = 1, | ||
| 288 | QDF_LOCKED = 2, | ||
| 289 | }; | ||
| 290 | |||
| 291 | struct gfs2_quota_lvb { | ||
| 292 | __be32 qb_magic; | ||
| 293 | u32 __pad; | ||
| 294 | __be64 qb_limit; /* Hard limit of # blocks to alloc */ | ||
| 295 | __be64 qb_warn; /* Warn user when alloc is above this # */ | ||
| 296 | __be64 qb_value; /* Current # blocks allocated */ | ||
| 297 | }; | ||
| 298 | |||
| 299 | struct gfs2_quota_data { | ||
| 300 | struct list_head qd_list; | ||
| 301 | unsigned int qd_count; | ||
| 302 | |||
| 303 | u32 qd_id; | ||
| 304 | unsigned long qd_flags; /* QDF_... */ | ||
| 305 | |||
| 306 | s64 qd_change; | ||
| 307 | s64 qd_change_sync; | ||
| 308 | |||
| 309 | unsigned int qd_slot; | ||
| 310 | unsigned int qd_slot_count; | ||
| 311 | |||
| 312 | struct buffer_head *qd_bh; | ||
| 313 | struct gfs2_quota_change *qd_bh_qc; | ||
| 314 | unsigned int qd_bh_count; | ||
| 315 | |||
| 316 | struct gfs2_glock *qd_gl; | ||
| 317 | struct gfs2_quota_lvb qd_qb; | ||
| 318 | |||
| 319 | u64 qd_sync_gen; | ||
| 320 | unsigned long qd_last_warn; | ||
| 321 | unsigned long qd_last_touched; | ||
| 322 | }; | ||
| 323 | |||
| 324 | struct gfs2_log_buf { | ||
| 325 | struct list_head lb_list; | ||
| 326 | struct buffer_head *lb_bh; | ||
| 327 | struct buffer_head *lb_real; | ||
| 328 | }; | ||
| 329 | |||
| 330 | struct gfs2_trans { | ||
| 331 | unsigned long tr_ip; | ||
| 332 | |||
| 333 | unsigned int tr_blocks; | ||
| 334 | unsigned int tr_revokes; | ||
| 335 | unsigned int tr_reserved; | ||
| 336 | |||
| 337 | struct gfs2_holder tr_t_gh; | ||
| 338 | |||
| 339 | int tr_touched; | ||
| 340 | |||
| 341 | unsigned int tr_num_buf; | ||
| 342 | unsigned int tr_num_buf_new; | ||
| 343 | unsigned int tr_num_buf_rm; | ||
| 344 | struct list_head tr_list_buf; | ||
| 345 | |||
| 346 | unsigned int tr_num_revoke; | ||
| 347 | unsigned int tr_num_revoke_rm; | ||
| 348 | }; | ||
| 349 | |||
| 350 | struct gfs2_ail { | ||
| 351 | struct list_head ai_list; | ||
| 352 | |||
| 353 | unsigned int ai_first; | ||
| 354 | struct list_head ai_ail1_list; | ||
| 355 | struct list_head ai_ail2_list; | ||
| 356 | |||
| 357 | u64 ai_sync_gen; | ||
| 358 | }; | ||
| 359 | |||
| 360 | struct gfs2_jdesc { | ||
| 361 | struct list_head jd_list; | ||
| 362 | |||
| 363 | struct inode *jd_inode; | ||
| 364 | unsigned int jd_jid; | ||
| 365 | int jd_dirty; | ||
| 366 | |||
| 367 | unsigned int jd_blocks; | ||
| 368 | }; | ||
| 369 | |||
| 370 | #define GFS2_GLOCKD_DEFAULT 1 | ||
| 371 | #define GFS2_GLOCKD_MAX 16 | ||
| 372 | |||
| 373 | #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF | ||
| 374 | #define GFS2_QUOTA_OFF 0 | ||
| 375 | #define GFS2_QUOTA_ACCOUNT 1 | ||
| 376 | #define GFS2_QUOTA_ON 2 | ||
| 377 | |||
| 378 | #define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED | ||
| 379 | #define GFS2_DATA_WRITEBACK 1 | ||
| 380 | #define GFS2_DATA_ORDERED 2 | ||
| 381 | |||
| 382 | struct gfs2_args { | ||
| 383 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ | ||
| 384 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | ||
| 385 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | ||
| 386 | int ar_spectator; /* Don't get a journal because we're always RO */ | ||
| 387 | int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ | ||
| 388 | int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ | ||
| 389 | int ar_localcaching; /* Local-style caching (dangerous on multihost) */ | ||
| 390 | int ar_debug; /* Oops on errors instead of trying to be graceful */ | ||
| 391 | int ar_upgrade; /* Upgrade ondisk/multihost format */ | ||
| 392 | unsigned int ar_num_glockd; /* Number of glockd threads */ | ||
| 393 | int ar_posix_acl; /* Enable posix acls */ | ||
| 394 | int ar_quota; /* off/account/on */ | ||
| 395 | int ar_suiddir; /* suiddir support */ | ||
| 396 | int ar_data; /* ordered/writeback */ | ||
| 397 | }; | ||
| 398 | |||
| 399 | struct gfs2_tune { | ||
| 400 | spinlock_t gt_spin; | ||
| 401 | |||
| 402 | unsigned int gt_ilimit; | ||
| 403 | unsigned int gt_ilimit_tries; | ||
| 404 | unsigned int gt_ilimit_min; | ||
| 405 | unsigned int gt_demote_secs; /* Cache retention for unheld glock */ | ||
| 406 | unsigned int gt_incore_log_blocks; | ||
| 407 | unsigned int gt_log_flush_secs; | ||
| 408 | unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ | ||
| 409 | |||
| 410 | unsigned int gt_scand_secs; | ||
| 411 | unsigned int gt_recoverd_secs; | ||
| 412 | unsigned int gt_logd_secs; | ||
| 413 | unsigned int gt_quotad_secs; | ||
| 414 | |||
| 415 | unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ | ||
| 416 | unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ | ||
| 417 | unsigned int gt_quota_scale_num; /* Numerator */ | ||
| 418 | unsigned int gt_quota_scale_den; /* Denominator */ | ||
| 419 | unsigned int gt_quota_cache_secs; | ||
| 420 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | ||
| 421 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | ||
| 422 | unsigned int gt_new_files_jdata; | ||
| 423 | unsigned int gt_new_files_directio; | ||
| 424 | unsigned int gt_max_atomic_write; /* Split big writes into this size */ | ||
| 425 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | ||
| 426 | unsigned int gt_lockdump_size; | ||
| 427 | unsigned int gt_stall_secs; /* Detects trouble! */ | ||
| 428 | unsigned int gt_complain_secs; | ||
| 429 | unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ | ||
| 430 | unsigned int gt_entries_per_readdir; | ||
| 431 | unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */ | ||
| 432 | unsigned int gt_greedy_default; | ||
| 433 | unsigned int gt_greedy_quantum; | ||
| 434 | unsigned int gt_greedy_max; | ||
| 435 | unsigned int gt_statfs_quantum; | ||
| 436 | unsigned int gt_statfs_slow; | ||
| 437 | }; | ||
| 438 | |||
| 439 | enum { | ||
| 440 | SDF_JOURNAL_CHECKED = 0, | ||
| 441 | SDF_JOURNAL_LIVE = 1, | ||
| 442 | SDF_SHUTDOWN = 2, | ||
| 443 | SDF_NOATIME = 3, | ||
| 444 | }; | ||
| 445 | |||
| 446 | #define GFS2_FSNAME_LEN 256 | ||
| 447 | |||
| 448 | struct gfs2_sbd { | ||
| 449 | struct super_block *sd_vfs; | ||
| 450 | struct super_block *sd_vfs_meta; | ||
| 451 | struct kobject sd_kobj; | ||
| 452 | unsigned long sd_flags; /* SDF_... */ | ||
| 453 | struct gfs2_sb sd_sb; | ||
| 454 | |||
| 455 | /* Constants computed on mount */ | ||
| 456 | |||
| 457 | u32 sd_fsb2bb; | ||
| 458 | u32 sd_fsb2bb_shift; | ||
| 459 | u32 sd_diptrs; /* Number of pointers in a dinode */ | ||
| 460 | u32 sd_inptrs; /* Number of pointers in a indirect block */ | ||
| 461 | u32 sd_jbsize; /* Size of a journaled data block */ | ||
| 462 | u32 sd_hash_bsize; /* sizeof(exhash block) */ | ||
| 463 | u32 sd_hash_bsize_shift; | ||
| 464 | u32 sd_hash_ptrs; /* Number of pointers in a hash block */ | ||
| 465 | u32 sd_qc_per_block; | ||
| 466 | u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ | ||
| 467 | u32 sd_max_height; /* Max height of a file's metadata tree */ | ||
| 468 | u64 sd_heightsize[GFS2_MAX_META_HEIGHT]; | ||
| 469 | u32 sd_max_jheight; /* Max height of journaled file's meta tree */ | ||
| 470 | u64 sd_jheightsize[GFS2_MAX_META_HEIGHT]; | ||
| 471 | |||
| 472 | struct gfs2_args sd_args; /* Mount arguments */ | ||
| 473 | struct gfs2_tune sd_tune; /* Filesystem tuning structure */ | ||
| 474 | |||
| 475 | /* Lock Stuff */ | ||
| 476 | |||
| 477 | struct lm_lockstruct sd_lockstruct; | ||
| 478 | struct list_head sd_reclaim_list; | ||
| 479 | spinlock_t sd_reclaim_lock; | ||
| 480 | wait_queue_head_t sd_reclaim_wq; | ||
| 481 | atomic_t sd_reclaim_count; | ||
| 482 | struct gfs2_holder sd_live_gh; | ||
| 483 | struct gfs2_glock *sd_rename_gl; | ||
| 484 | struct gfs2_glock *sd_trans_gl; | ||
| 485 | |||
| 486 | /* Inode Stuff */ | ||
| 487 | |||
| 488 | struct inode *sd_master_dir; | ||
| 489 | struct inode *sd_jindex; | ||
| 490 | struct inode *sd_inum_inode; | ||
| 491 | struct inode *sd_statfs_inode; | ||
| 492 | struct inode *sd_ir_inode; | ||
| 493 | struct inode *sd_sc_inode; | ||
| 494 | struct inode *sd_qc_inode; | ||
| 495 | struct inode *sd_rindex; | ||
| 496 | struct inode *sd_quota_inode; | ||
| 497 | |||
| 498 | /* Inum stuff */ | ||
| 499 | |||
| 500 | struct mutex sd_inum_mutex; | ||
| 501 | |||
| 502 | /* StatFS stuff */ | ||
| 503 | |||
| 504 | spinlock_t sd_statfs_spin; | ||
| 505 | struct mutex sd_statfs_mutex; | ||
| 506 | struct gfs2_statfs_change sd_statfs_master; | ||
| 507 | struct gfs2_statfs_change sd_statfs_local; | ||
| 508 | unsigned long sd_statfs_sync_time; | ||
| 509 | |||
| 510 | /* Resource group stuff */ | ||
| 511 | |||
| 512 | u64 sd_rindex_vn; | ||
| 513 | spinlock_t sd_rindex_spin; | ||
| 514 | struct mutex sd_rindex_mutex; | ||
| 515 | struct list_head sd_rindex_list; | ||
| 516 | struct list_head sd_rindex_mru_list; | ||
| 517 | struct list_head sd_rindex_recent_list; | ||
| 518 | struct gfs2_rgrpd *sd_rindex_forward; | ||
| 519 | unsigned int sd_rgrps; | ||
| 520 | |||
| 521 | /* Journal index stuff */ | ||
| 522 | |||
| 523 | struct list_head sd_jindex_list; | ||
| 524 | spinlock_t sd_jindex_spin; | ||
| 525 | struct mutex sd_jindex_mutex; | ||
| 526 | unsigned int sd_journals; | ||
| 527 | unsigned long sd_jindex_refresh_time; | ||
| 528 | |||
| 529 | struct gfs2_jdesc *sd_jdesc; | ||
| 530 | struct gfs2_holder sd_journal_gh; | ||
| 531 | struct gfs2_holder sd_jinode_gh; | ||
| 532 | |||
| 533 | struct gfs2_holder sd_ir_gh; | ||
| 534 | struct gfs2_holder sd_sc_gh; | ||
| 535 | struct gfs2_holder sd_qc_gh; | ||
| 536 | |||
| 537 | /* Daemon stuff */ | ||
| 538 | |||
| 539 | struct task_struct *sd_scand_process; | ||
| 540 | struct task_struct *sd_recoverd_process; | ||
| 541 | struct task_struct *sd_logd_process; | ||
| 542 | struct task_struct *sd_quotad_process; | ||
| 543 | struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX]; | ||
| 544 | unsigned int sd_glockd_num; | ||
| 545 | |||
| 546 | /* Quota stuff */ | ||
| 547 | |||
| 548 | struct list_head sd_quota_list; | ||
| 549 | atomic_t sd_quota_count; | ||
| 550 | spinlock_t sd_quota_spin; | ||
| 551 | struct mutex sd_quota_mutex; | ||
| 552 | |||
| 553 | unsigned int sd_quota_slots; | ||
| 554 | unsigned int sd_quota_chunks; | ||
| 555 | unsigned char **sd_quota_bitmap; | ||
| 556 | |||
| 557 | u64 sd_quota_sync_gen; | ||
| 558 | unsigned long sd_quota_sync_time; | ||
| 559 | |||
| 560 | /* Log stuff */ | ||
| 561 | |||
| 562 | spinlock_t sd_log_lock; | ||
| 563 | |||
| 564 | unsigned int sd_log_blks_reserved; | ||
| 565 | unsigned int sd_log_commited_buf; | ||
| 566 | unsigned int sd_log_commited_revoke; | ||
| 567 | |||
| 568 | unsigned int sd_log_num_gl; | ||
| 569 | unsigned int sd_log_num_buf; | ||
| 570 | unsigned int sd_log_num_revoke; | ||
| 571 | unsigned int sd_log_num_rg; | ||
| 572 | unsigned int sd_log_num_databuf; | ||
| 573 | unsigned int sd_log_num_jdata; | ||
| 574 | unsigned int sd_log_num_hdrs; | ||
| 575 | |||
| 576 | struct list_head sd_log_le_gl; | ||
| 577 | struct list_head sd_log_le_buf; | ||
| 578 | struct list_head sd_log_le_revoke; | ||
| 579 | struct list_head sd_log_le_rg; | ||
| 580 | struct list_head sd_log_le_databuf; | ||
| 581 | |||
| 582 | unsigned int sd_log_blks_free; | ||
| 583 | struct mutex sd_log_reserve_mutex; | ||
| 584 | |||
| 585 | u64 sd_log_sequence; | ||
| 586 | unsigned int sd_log_head; | ||
| 587 | unsigned int sd_log_tail; | ||
| 588 | int sd_log_idle; | ||
| 589 | |||
| 590 | unsigned long sd_log_flush_time; | ||
| 591 | struct rw_semaphore sd_log_flush_lock; | ||
| 592 | struct list_head sd_log_flush_list; | ||
| 593 | |||
| 594 | unsigned int sd_log_flush_head; | ||
| 595 | u64 sd_log_flush_wrapped; | ||
| 596 | |||
| 597 | struct list_head sd_ail1_list; | ||
| 598 | struct list_head sd_ail2_list; | ||
| 599 | u64 sd_ail_sync_gen; | ||
| 600 | |||
| 601 | /* Replay stuff */ | ||
| 602 | |||
| 603 | struct list_head sd_revoke_list; | ||
| 604 | unsigned int sd_replay_tail; | ||
| 605 | |||
| 606 | unsigned int sd_found_blocks; | ||
| 607 | unsigned int sd_found_revokes; | ||
| 608 | unsigned int sd_replayed_blocks; | ||
| 609 | |||
| 610 | /* For quiescing the filesystem */ | ||
| 611 | |||
| 612 | struct gfs2_holder sd_freeze_gh; | ||
| 613 | struct mutex sd_freeze_lock; | ||
| 614 | unsigned int sd_freeze_count; | ||
| 615 | |||
| 616 | /* Counters */ | ||
| 617 | |||
| 618 | atomic_t sd_glock_count; | ||
| 619 | atomic_t sd_glock_held_count; | ||
| 620 | atomic_t sd_inode_count; | ||
| 621 | atomic_t sd_reclaimed; | ||
| 622 | |||
| 623 | char sd_fsname[GFS2_FSNAME_LEN]; | ||
| 624 | char sd_table_name[GFS2_FSNAME_LEN]; | ||
| 625 | char sd_proto_name[GFS2_FSNAME_LEN]; | ||
| 626 | |||
| 627 | /* Debugging crud */ | ||
| 628 | |||
| 629 | unsigned long sd_last_warning; | ||
| 630 | struct vfsmount *sd_gfs2mnt; | ||
| 631 | }; | ||
| 632 | |||
| 633 | #endif /* __INCORE_DOT_H__ */ | ||
| 634 | |||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c new file mode 100644 index 000000000000..57c43ac47925 --- /dev/null +++ b/fs/gfs2/inode.c | |||
| @@ -0,0 +1,1379 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/posix_acl.h> | ||
| 16 | #include <linux/sort.h> | ||
| 17 | #include <linux/gfs2_ondisk.h> | ||
| 18 | #include <linux/crc32.h> | ||
| 19 | #include <linux/lm_interface.h> | ||
| 20 | #include <linux/security.h> | ||
| 21 | |||
| 22 | #include "gfs2.h" | ||
| 23 | #include "incore.h" | ||
| 24 | #include "acl.h" | ||
| 25 | #include "bmap.h" | ||
| 26 | #include "dir.h" | ||
| 27 | #include "eattr.h" | ||
| 28 | #include "glock.h" | ||
| 29 | #include "glops.h" | ||
| 30 | #include "inode.h" | ||
| 31 | #include "log.h" | ||
| 32 | #include "meta_io.h" | ||
| 33 | #include "ops_address.h" | ||
| 34 | #include "ops_file.h" | ||
| 35 | #include "ops_inode.h" | ||
| 36 | #include "quota.h" | ||
| 37 | #include "rgrp.h" | ||
| 38 | #include "trans.h" | ||
| 39 | #include "util.h" | ||
| 40 | |||
| 41 | /** | ||
| 42 | * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode | ||
| 43 | * @ip: The GFS2 inode (with embedded disk inode data) | ||
| 44 | * @inode: The Linux VFS inode | ||
| 45 | * | ||
| 46 | */ | ||
| 47 | |||
| 48 | void gfs2_inode_attr_in(struct gfs2_inode *ip) | ||
| 49 | { | ||
| 50 | struct inode *inode = &ip->i_inode; | ||
| 51 | struct gfs2_dinode *di = &ip->i_di; | ||
| 52 | |||
| 53 | inode->i_ino = ip->i_num.no_addr; | ||
| 54 | |||
| 55 | switch (di->di_mode & S_IFMT) { | ||
| 56 | case S_IFBLK: | ||
| 57 | case S_IFCHR: | ||
| 58 | inode->i_rdev = MKDEV(di->di_major, di->di_minor); | ||
| 59 | break; | ||
| 60 | default: | ||
| 61 | inode->i_rdev = 0; | ||
| 62 | break; | ||
| 63 | }; | ||
| 64 | |||
| 65 | inode->i_mode = di->di_mode; | ||
| 66 | inode->i_nlink = di->di_nlink; | ||
| 67 | inode->i_uid = di->di_uid; | ||
| 68 | inode->i_gid = di->di_gid; | ||
| 69 | i_size_write(inode, di->di_size); | ||
| 70 | inode->i_atime.tv_sec = di->di_atime; | ||
| 71 | inode->i_mtime.tv_sec = di->di_mtime; | ||
| 72 | inode->i_ctime.tv_sec = di->di_ctime; | ||
| 73 | inode->i_atime.tv_nsec = 0; | ||
| 74 | inode->i_mtime.tv_nsec = 0; | ||
| 75 | inode->i_ctime.tv_nsec = 0; | ||
| 76 | inode->i_blocks = di->di_blocks << | ||
| 77 | (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); | ||
| 78 | |||
| 79 | if (di->di_flags & GFS2_DIF_IMMUTABLE) | ||
| 80 | inode->i_flags |= S_IMMUTABLE; | ||
| 81 | else | ||
| 82 | inode->i_flags &= ~S_IMMUTABLE; | ||
| 83 | |||
| 84 | if (di->di_flags & GFS2_DIF_APPENDONLY) | ||
| 85 | inode->i_flags |= S_APPEND; | ||
| 86 | else | ||
| 87 | inode->i_flags &= ~S_APPEND; | ||
| 88 | } | ||
| 89 | |||
| 90 | /** | ||
| 91 | * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode | ||
| 92 | * @ip: The GFS2 inode | ||
| 93 | * | ||
| 94 | * Only copy out the attributes that we want the VFS layer | ||
| 95 | * to be able to modify. | ||
| 96 | */ | ||
| 97 | |||
| 98 | void gfs2_inode_attr_out(struct gfs2_inode *ip) | ||
| 99 | { | ||
| 100 | struct inode *inode = &ip->i_inode; | ||
| 101 | struct gfs2_dinode *di = &ip->i_di; | ||
| 102 | gfs2_assert_withdraw(GFS2_SB(inode), | ||
| 103 | (di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT)); | ||
| 104 | di->di_mode = inode->i_mode; | ||
| 105 | di->di_uid = inode->i_uid; | ||
| 106 | di->di_gid = inode->i_gid; | ||
| 107 | di->di_atime = inode->i_atime.tv_sec; | ||
| 108 | di->di_mtime = inode->i_mtime.tv_sec; | ||
| 109 | di->di_ctime = inode->i_ctime.tv_sec; | ||
| 110 | } | ||
| 111 | |||
| 112 | static int iget_test(struct inode *inode, void *opaque) | ||
| 113 | { | ||
| 114 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 115 | struct gfs2_inum *inum = opaque; | ||
| 116 | |||
| 117 | if (ip && ip->i_num.no_addr == inum->no_addr) | ||
| 118 | return 1; | ||
| 119 | |||
| 120 | return 0; | ||
| 121 | } | ||
| 122 | |||
| 123 | static int iget_set(struct inode *inode, void *opaque) | ||
| 124 | { | ||
| 125 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 126 | struct gfs2_inum *inum = opaque; | ||
| 127 | |||
| 128 | ip->i_num = *inum; | ||
| 129 | return 0; | ||
| 130 | } | ||
| 131 | |||
| 132 | struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum) | ||
| 133 | { | ||
| 134 | return ilookup5(sb, (unsigned long)inum->no_formal_ino, | ||
| 135 | iget_test, inum); | ||
| 136 | } | ||
| 137 | |||
| 138 | static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum) | ||
| 139 | { | ||
| 140 | return iget5_locked(sb, (unsigned long)inum->no_formal_ino, | ||
| 141 | iget_test, iget_set, inum); | ||
| 142 | } | ||
| 143 | |||
| 144 | /** | ||
| 145 | * gfs2_inode_lookup - Lookup an inode | ||
| 146 | * @sb: The super block | ||
| 147 | * @inum: The inode number | ||
| 148 | * @type: The type of the inode | ||
| 149 | * | ||
| 150 | * Returns: A VFS inode, or an error | ||
| 151 | */ | ||
| 152 | |||
| 153 | struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type) | ||
| 154 | { | ||
| 155 | struct inode *inode = gfs2_iget(sb, inum); | ||
| 156 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 157 | struct gfs2_glock *io_gl; | ||
| 158 | int error; | ||
| 159 | |||
| 160 | if (inode->i_state & I_NEW) { | ||
| 161 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 162 | umode_t mode = DT2IF(type); | ||
| 163 | inode->i_private = ip; | ||
| 164 | inode->i_mode = mode; | ||
| 165 | |||
| 166 | if (S_ISREG(mode)) { | ||
| 167 | inode->i_op = &gfs2_file_iops; | ||
| 168 | inode->i_fop = &gfs2_file_fops; | ||
| 169 | inode->i_mapping->a_ops = &gfs2_file_aops; | ||
| 170 | } else if (S_ISDIR(mode)) { | ||
| 171 | inode->i_op = &gfs2_dir_iops; | ||
| 172 | inode->i_fop = &gfs2_dir_fops; | ||
| 173 | } else if (S_ISLNK(mode)) { | ||
| 174 | inode->i_op = &gfs2_symlink_iops; | ||
| 175 | } else { | ||
| 176 | inode->i_op = &gfs2_dev_iops; | ||
| 177 | } | ||
| 178 | |||
| 179 | error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); | ||
| 180 | if (unlikely(error)) | ||
| 181 | goto fail; | ||
| 182 | ip->i_gl->gl_object = ip; | ||
| 183 | |||
| 184 | error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl); | ||
| 185 | if (unlikely(error)) | ||
| 186 | goto fail_put; | ||
| 187 | |||
| 188 | ip->i_vn = ip->i_gl->gl_vn - 1; | ||
| 189 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); | ||
| 190 | if (unlikely(error)) | ||
| 191 | goto fail_iopen; | ||
| 192 | |||
| 193 | gfs2_glock_put(io_gl); | ||
| 194 | unlock_new_inode(inode); | ||
| 195 | } | ||
| 196 | |||
| 197 | return inode; | ||
| 198 | fail_iopen: | ||
| 199 | gfs2_glock_put(io_gl); | ||
| 200 | fail_put: | ||
| 201 | ip->i_gl->gl_object = NULL; | ||
| 202 | gfs2_glock_put(ip->i_gl); | ||
| 203 | fail: | ||
| 204 | iput(inode); | ||
| 205 | return ERR_PTR(error); | ||
| 206 | } | ||
| 207 | |||
| 208 | /** | ||
| 209 | * gfs2_inode_refresh - Refresh the incore copy of the dinode | ||
| 210 | * @ip: The GFS2 inode | ||
| 211 | * | ||
| 212 | * Returns: errno | ||
| 213 | */ | ||
| 214 | |||
| 215 | int gfs2_inode_refresh(struct gfs2_inode *ip) | ||
| 216 | { | ||
| 217 | struct buffer_head *dibh; | ||
| 218 | int error; | ||
| 219 | |||
| 220 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 221 | if (error) | ||
| 222 | return error; | ||
| 223 | |||
| 224 | if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { | ||
| 225 | brelse(dibh); | ||
| 226 | return -EIO; | ||
| 227 | } | ||
| 228 | |||
| 229 | gfs2_dinode_in(&ip->i_di, dibh->b_data); | ||
| 230 | |||
| 231 | brelse(dibh); | ||
| 232 | |||
| 233 | if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) { | ||
| 234 | if (gfs2_consist_inode(ip)) | ||
| 235 | gfs2_dinode_print(&ip->i_di); | ||
| 236 | return -EIO; | ||
| 237 | } | ||
| 238 | if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino) | ||
| 239 | return -ESTALE; | ||
| 240 | |||
| 241 | ip->i_vn = ip->i_gl->gl_vn; | ||
| 242 | |||
| 243 | return 0; | ||
| 244 | } | ||
| 245 | |||
| 246 | int gfs2_dinode_dealloc(struct gfs2_inode *ip) | ||
| 247 | { | ||
| 248 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 249 | struct gfs2_alloc *al; | ||
| 250 | struct gfs2_rgrpd *rgd; | ||
| 251 | int error; | ||
| 252 | |||
| 253 | if (ip->i_di.di_blocks != 1) { | ||
| 254 | if (gfs2_consist_inode(ip)) | ||
| 255 | gfs2_dinode_print(&ip->i_di); | ||
| 256 | return -EIO; | ||
| 257 | } | ||
| 258 | |||
| 259 | al = gfs2_alloc_get(ip); | ||
| 260 | |||
| 261 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 262 | if (error) | ||
| 263 | goto out; | ||
| 264 | |||
| 265 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | ||
| 266 | if (error) | ||
| 267 | goto out_qs; | ||
| 268 | |||
| 269 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); | ||
| 270 | if (!rgd) { | ||
| 271 | gfs2_consist_inode(ip); | ||
| 272 | error = -EIO; | ||
| 273 | goto out_rindex_relse; | ||
| 274 | } | ||
| 275 | |||
| 276 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, | ||
| 277 | &al->al_rgd_gh); | ||
| 278 | if (error) | ||
| 279 | goto out_rindex_relse; | ||
| 280 | |||
| 281 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); | ||
| 282 | if (error) | ||
| 283 | goto out_rg_gunlock; | ||
| 284 | |||
| 285 | gfs2_trans_add_gl(ip->i_gl); | ||
| 286 | |||
| 287 | gfs2_free_di(rgd, ip); | ||
| 288 | |||
| 289 | gfs2_trans_end(sdp); | ||
| 290 | clear_bit(GLF_STICKY, &ip->i_gl->gl_flags); | ||
| 291 | |||
| 292 | out_rg_gunlock: | ||
| 293 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
| 294 | out_rindex_relse: | ||
| 295 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
| 296 | out_qs: | ||
| 297 | gfs2_quota_unhold(ip); | ||
| 298 | out: | ||
| 299 | gfs2_alloc_put(ip); | ||
| 300 | return error; | ||
| 301 | } | ||
| 302 | |||
| 303 | /** | ||
| 304 | * gfs2_change_nlink - Change nlink count on inode | ||
| 305 | * @ip: The GFS2 inode | ||
| 306 | * @diff: The change in the nlink count required | ||
| 307 | * | ||
| 308 | * Returns: errno | ||
| 309 | */ | ||
| 310 | |||
| 311 | int gfs2_change_nlink(struct gfs2_inode *ip, int diff) | ||
| 312 | { | ||
| 313 | struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info; | ||
| 314 | struct buffer_head *dibh; | ||
| 315 | u32 nlink; | ||
| 316 | int error; | ||
| 317 | |||
| 318 | BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink); | ||
| 319 | nlink = ip->i_di.di_nlink + diff; | ||
| 320 | |||
| 321 | /* If we are reducing the nlink count, but the new value ends up being | ||
| 322 | bigger than the old one, we must have underflowed. */ | ||
| 323 | if (diff < 0 && nlink > ip->i_di.di_nlink) { | ||
| 324 | if (gfs2_consist_inode(ip)) | ||
| 325 | gfs2_dinode_print(&ip->i_di); | ||
| 326 | return -EIO; | ||
| 327 | } | ||
| 328 | |||
| 329 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 330 | if (error) | ||
| 331 | return error; | ||
| 332 | |||
| 333 | ip->i_di.di_nlink = nlink; | ||
| 334 | ip->i_di.di_ctime = get_seconds(); | ||
| 335 | ip->i_inode.i_nlink = nlink; | ||
| 336 | |||
| 337 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 338 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 339 | brelse(dibh); | ||
| 340 | mark_inode_dirty(&ip->i_inode); | ||
| 341 | |||
| 342 | if (ip->i_di.di_nlink == 0) { | ||
| 343 | struct gfs2_rgrpd *rgd; | ||
| 344 | struct gfs2_holder ri_gh, rg_gh; | ||
| 345 | |||
| 346 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
| 347 | if (error) | ||
| 348 | goto out; | ||
| 349 | error = -EIO; | ||
| 350 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); | ||
| 351 | if (!rgd) | ||
| 352 | goto out_norgrp; | ||
| 353 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); | ||
| 354 | if (error) | ||
| 355 | goto out_norgrp; | ||
| 356 | |||
| 357 | clear_nlink(&ip->i_inode); | ||
| 358 | gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ | ||
| 359 | gfs2_glock_dq_uninit(&rg_gh); | ||
| 360 | out_norgrp: | ||
| 361 | gfs2_glock_dq_uninit(&ri_gh); | ||
| 362 | } | ||
| 363 | out: | ||
| 364 | return error; | ||
| 365 | } | ||
| 366 | |||
| 367 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) | ||
| 368 | { | ||
| 369 | struct qstr qstr; | ||
| 370 | gfs2_str2qstr(&qstr, name); | ||
| 371 | return gfs2_lookupi(dip, &qstr, 1, NULL); | ||
| 372 | } | ||
| 373 | |||
| 374 | |||
| 375 | /** | ||
| 376 | * gfs2_lookupi - Look up a filename in a directory and return its inode | ||
| 377 | * @d_gh: An initialized holder for the directory glock | ||
| 378 | * @name: The name of the inode to look for | ||
| 379 | * @is_root: If 1, ignore the caller's permissions | ||
| 380 | * @i_gh: An uninitialized holder for the new inode glock | ||
| 381 | * | ||
| 382 | * There will always be a vnode (Linux VFS inode) for the d_gh inode unless | ||
| 383 | * @is_root is true. | ||
| 384 | * | ||
| 385 | * Returns: errno | ||
| 386 | */ | ||
| 387 | |||
| 388 | struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | ||
| 389 | int is_root, struct nameidata *nd) | ||
| 390 | { | ||
| 391 | struct super_block *sb = dir->i_sb; | ||
| 392 | struct gfs2_inode *dip = GFS2_I(dir); | ||
| 393 | struct gfs2_holder d_gh; | ||
| 394 | struct gfs2_inum inum; | ||
| 395 | unsigned int type; | ||
| 396 | int error = 0; | ||
| 397 | struct inode *inode = NULL; | ||
| 398 | |||
| 399 | if (!name->len || name->len > GFS2_FNAMESIZE) | ||
| 400 | return ERR_PTR(-ENAMETOOLONG); | ||
| 401 | |||
| 402 | if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || | ||
| 403 | (name->len == 2 && memcmp(name->name, "..", 2) == 0 && | ||
| 404 | dir == sb->s_root->d_inode)) { | ||
| 405 | igrab(dir); | ||
| 406 | return dir; | ||
| 407 | } | ||
| 408 | |||
| 409 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | ||
| 410 | if (error) | ||
| 411 | return ERR_PTR(error); | ||
| 412 | |||
| 413 | if (!is_root) { | ||
| 414 | error = permission(dir, MAY_EXEC, NULL); | ||
| 415 | if (error) | ||
| 416 | goto out; | ||
| 417 | } | ||
| 418 | |||
| 419 | error = gfs2_dir_search(dir, name, &inum, &type); | ||
| 420 | if (error) | ||
| 421 | goto out; | ||
| 422 | |||
| 423 | inode = gfs2_inode_lookup(sb, &inum, type); | ||
| 424 | |||
| 425 | out: | ||
| 426 | gfs2_glock_dq_uninit(&d_gh); | ||
| 427 | if (error == -ENOENT) | ||
| 428 | return NULL; | ||
| 429 | return inode; | ||
| 430 | } | ||
| 431 | |||
| 432 | static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) | ||
| 433 | { | ||
| 434 | struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); | ||
| 435 | struct buffer_head *bh; | ||
| 436 | struct gfs2_inum_range ir; | ||
| 437 | int error; | ||
| 438 | |||
| 439 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 440 | if (error) | ||
| 441 | return error; | ||
| 442 | mutex_lock(&sdp->sd_inum_mutex); | ||
| 443 | |||
| 444 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 445 | if (error) { | ||
| 446 | mutex_unlock(&sdp->sd_inum_mutex); | ||
| 447 | gfs2_trans_end(sdp); | ||
| 448 | return error; | ||
| 449 | } | ||
| 450 | |||
| 451 | gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); | ||
| 452 | |||
| 453 | if (ir.ir_length) { | ||
| 454 | *formal_ino = ir.ir_start++; | ||
| 455 | ir.ir_length--; | ||
| 456 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 457 | gfs2_inum_range_out(&ir, | ||
| 458 | bh->b_data + sizeof(struct gfs2_dinode)); | ||
| 459 | brelse(bh); | ||
| 460 | mutex_unlock(&sdp->sd_inum_mutex); | ||
| 461 | gfs2_trans_end(sdp); | ||
| 462 | return 0; | ||
| 463 | } | ||
| 464 | |||
| 465 | brelse(bh); | ||
| 466 | |||
| 467 | mutex_unlock(&sdp->sd_inum_mutex); | ||
| 468 | gfs2_trans_end(sdp); | ||
| 469 | |||
| 470 | return 1; | ||
| 471 | } | ||
| 472 | |||
| 473 | static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) | ||
| 474 | { | ||
| 475 | struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); | ||
| 476 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); | ||
| 477 | struct gfs2_holder gh; | ||
| 478 | struct buffer_head *bh; | ||
| 479 | struct gfs2_inum_range ir; | ||
| 480 | int error; | ||
| 481 | |||
| 482 | error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
| 483 | if (error) | ||
| 484 | return error; | ||
| 485 | |||
| 486 | error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); | ||
| 487 | if (error) | ||
| 488 | goto out; | ||
| 489 | mutex_lock(&sdp->sd_inum_mutex); | ||
| 490 | |||
| 491 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 492 | if (error) | ||
| 493 | goto out_end_trans; | ||
| 494 | |||
| 495 | gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); | ||
| 496 | |||
| 497 | if (!ir.ir_length) { | ||
| 498 | struct buffer_head *m_bh; | ||
| 499 | u64 x, y; | ||
| 500 | |||
| 501 | error = gfs2_meta_inode_buffer(m_ip, &m_bh); | ||
| 502 | if (error) | ||
| 503 | goto out_brelse; | ||
| 504 | |||
| 505 | x = *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); | ||
| 506 | x = y = be64_to_cpu(x); | ||
| 507 | ir.ir_start = x; | ||
| 508 | ir.ir_length = GFS2_INUM_QUANTUM; | ||
| 509 | x += GFS2_INUM_QUANTUM; | ||
| 510 | if (x < y) | ||
| 511 | gfs2_consist_inode(m_ip); | ||
| 512 | x = cpu_to_be64(x); | ||
| 513 | gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); | ||
| 514 | *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x; | ||
| 515 | |||
| 516 | brelse(m_bh); | ||
| 517 | } | ||
| 518 | |||
| 519 | *formal_ino = ir.ir_start++; | ||
| 520 | ir.ir_length--; | ||
| 521 | |||
| 522 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 523 | gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); | ||
| 524 | |||
| 525 | out_brelse: | ||
| 526 | brelse(bh); | ||
| 527 | out_end_trans: | ||
| 528 | mutex_unlock(&sdp->sd_inum_mutex); | ||
| 529 | gfs2_trans_end(sdp); | ||
| 530 | out: | ||
| 531 | gfs2_glock_dq_uninit(&gh); | ||
| 532 | return error; | ||
| 533 | } | ||
| 534 | |||
| 535 | static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum) | ||
| 536 | { | ||
| 537 | int error; | ||
| 538 | |||
| 539 | error = pick_formal_ino_1(sdp, inum); | ||
| 540 | if (error <= 0) | ||
| 541 | return error; | ||
| 542 | |||
| 543 | error = pick_formal_ino_2(sdp, inum); | ||
| 544 | |||
| 545 | return error; | ||
| 546 | } | ||
| 547 | |||
| 548 | /** | ||
| 549 | * create_ok - OK to create a new on-disk inode here? | ||
| 550 | * @dip: Directory in which dinode is to be created | ||
| 551 | * @name: Name of new dinode | ||
| 552 | * @mode: | ||
| 553 | * | ||
| 554 | * Returns: errno | ||
| 555 | */ | ||
| 556 | |||
| 557 | static int create_ok(struct gfs2_inode *dip, const struct qstr *name, | ||
| 558 | unsigned int mode) | ||
| 559 | { | ||
| 560 | int error; | ||
| 561 | |||
| 562 | error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); | ||
| 563 | if (error) | ||
| 564 | return error; | ||
| 565 | |||
| 566 | /* Don't create entries in an unlinked directory */ | ||
| 567 | if (!dip->i_di.di_nlink) | ||
| 568 | return -EPERM; | ||
| 569 | |||
| 570 | error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL); | ||
| 571 | switch (error) { | ||
| 572 | case -ENOENT: | ||
| 573 | error = 0; | ||
| 574 | break; | ||
| 575 | case 0: | ||
| 576 | return -EEXIST; | ||
| 577 | default: | ||
| 578 | return error; | ||
| 579 | } | ||
| 580 | |||
| 581 | if (dip->i_di.di_entries == (u32)-1) | ||
| 582 | return -EFBIG; | ||
| 583 | if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1) | ||
| 584 | return -EMLINK; | ||
| 585 | |||
| 586 | return 0; | ||
| 587 | } | ||
| 588 | |||
| 589 | static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, | ||
| 590 | unsigned int *uid, unsigned int *gid) | ||
| 591 | { | ||
| 592 | if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && | ||
| 593 | (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) { | ||
| 594 | if (S_ISDIR(*mode)) | ||
| 595 | *mode |= S_ISUID; | ||
| 596 | else if (dip->i_di.di_uid != current->fsuid) | ||
| 597 | *mode &= ~07111; | ||
| 598 | *uid = dip->i_di.di_uid; | ||
| 599 | } else | ||
| 600 | *uid = current->fsuid; | ||
| 601 | |||
| 602 | if (dip->i_di.di_mode & S_ISGID) { | ||
| 603 | if (S_ISDIR(*mode)) | ||
| 604 | *mode |= S_ISGID; | ||
| 605 | *gid = dip->i_di.di_gid; | ||
| 606 | } else | ||
| 607 | *gid = current->fsgid; | ||
| 608 | } | ||
| 609 | |||
| 610 | static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum, | ||
| 611 | u64 *generation) | ||
| 612 | { | ||
| 613 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 614 | int error; | ||
| 615 | |||
| 616 | gfs2_alloc_get(dip); | ||
| 617 | |||
| 618 | dip->i_alloc.al_requested = RES_DINODE; | ||
| 619 | error = gfs2_inplace_reserve(dip); | ||
| 620 | if (error) | ||
| 621 | goto out; | ||
| 622 | |||
| 623 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); | ||
| 624 | if (error) | ||
| 625 | goto out_ipreserv; | ||
| 626 | |||
| 627 | inum->no_addr = gfs2_alloc_di(dip, generation); | ||
| 628 | |||
| 629 | gfs2_trans_end(sdp); | ||
| 630 | |||
| 631 | out_ipreserv: | ||
| 632 | gfs2_inplace_release(dip); | ||
| 633 | out: | ||
| 634 | gfs2_alloc_put(dip); | ||
| 635 | return error; | ||
| 636 | } | ||
| 637 | |||
| 638 | /** | ||
| 639 | * init_dinode - Fill in a new dinode structure | ||
| 640 | * @dip: the directory this inode is being created in | ||
| 641 | * @gl: The glock covering the new inode | ||
| 642 | * @inum: the inode number | ||
| 643 | * @mode: the file permissions | ||
| 644 | * @uid: | ||
| 645 | * @gid: | ||
| 646 | * | ||
| 647 | */ | ||
| 648 | |||
| 649 | static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | ||
| 650 | const struct gfs2_inum *inum, unsigned int mode, | ||
| 651 | unsigned int uid, unsigned int gid, | ||
| 652 | const u64 *generation) | ||
| 653 | { | ||
| 654 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 655 | struct gfs2_dinode *di; | ||
| 656 | struct buffer_head *dibh; | ||
| 657 | |||
| 658 | dibh = gfs2_meta_new(gl, inum->no_addr); | ||
| 659 | gfs2_trans_add_bh(gl, dibh, 1); | ||
| 660 | gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); | ||
| 661 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
| 662 | di = (struct gfs2_dinode *)dibh->b_data; | ||
| 663 | |||
| 664 | di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); | ||
| 665 | di->di_num.no_addr = cpu_to_be64(inum->no_addr); | ||
| 666 | di->di_mode = cpu_to_be32(mode); | ||
| 667 | di->di_uid = cpu_to_be32(uid); | ||
| 668 | di->di_gid = cpu_to_be32(gid); | ||
| 669 | di->di_nlink = cpu_to_be32(0); | ||
| 670 | di->di_size = cpu_to_be64(0); | ||
| 671 | di->di_blocks = cpu_to_be64(1); | ||
| 672 | di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds()); | ||
| 673 | di->di_major = di->di_minor = cpu_to_be32(0); | ||
| 674 | di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); | ||
| 675 | di->di_generation = cpu_to_be64(*generation); | ||
| 676 | di->di_flags = cpu_to_be32(0); | ||
| 677 | |||
| 678 | if (S_ISREG(mode)) { | ||
| 679 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || | ||
| 680 | gfs2_tune_get(sdp, gt_new_files_jdata)) | ||
| 681 | di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); | ||
| 682 | if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) || | ||
| 683 | gfs2_tune_get(sdp, gt_new_files_directio)) | ||
| 684 | di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO); | ||
| 685 | } else if (S_ISDIR(mode)) { | ||
| 686 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & | ||
| 687 | GFS2_DIF_INHERIT_DIRECTIO); | ||
| 688 | di->di_flags |= cpu_to_be32(dip->i_di.di_flags & | ||
| 689 | GFS2_DIF_INHERIT_JDATA); | ||
| 690 | } | ||
| 691 | |||
| 692 | di->__pad1 = 0; | ||
| 693 | di->di_payload_format = cpu_to_be32(0); | ||
| 694 | di->di_height = cpu_to_be32(0); | ||
| 695 | di->__pad2 = 0; | ||
| 696 | di->__pad3 = 0; | ||
| 697 | di->di_depth = cpu_to_be16(0); | ||
| 698 | di->di_entries = cpu_to_be32(0); | ||
| 699 | memset(&di->__pad4, 0, sizeof(di->__pad4)); | ||
| 700 | di->di_eattr = cpu_to_be64(0); | ||
| 701 | memset(&di->di_reserved, 0, sizeof(di->di_reserved)); | ||
| 702 | |||
| 703 | brelse(dibh); | ||
| 704 | } | ||
| 705 | |||
| 706 | static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | ||
| 707 | unsigned int mode, const struct gfs2_inum *inum, | ||
| 708 | const u64 *generation) | ||
| 709 | { | ||
| 710 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 711 | unsigned int uid, gid; | ||
| 712 | int error; | ||
| 713 | |||
| 714 | munge_mode_uid_gid(dip, &mode, &uid, &gid); | ||
| 715 | gfs2_alloc_get(dip); | ||
| 716 | |||
| 717 | error = gfs2_quota_lock(dip, uid, gid); | ||
| 718 | if (error) | ||
| 719 | goto out; | ||
| 720 | |||
| 721 | error = gfs2_quota_check(dip, uid, gid); | ||
| 722 | if (error) | ||
| 723 | goto out_quota; | ||
| 724 | |||
| 725 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); | ||
| 726 | if (error) | ||
| 727 | goto out_quota; | ||
| 728 | |||
| 729 | init_dinode(dip, gl, inum, mode, uid, gid, generation); | ||
| 730 | gfs2_quota_change(dip, +1, uid, gid); | ||
| 731 | gfs2_trans_end(sdp); | ||
| 732 | |||
| 733 | out_quota: | ||
| 734 | gfs2_quota_unlock(dip); | ||
| 735 | out: | ||
| 736 | gfs2_alloc_put(dip); | ||
| 737 | return error; | ||
| 738 | } | ||
| 739 | |||
| 740 | static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | ||
| 741 | struct gfs2_inode *ip) | ||
| 742 | { | ||
| 743 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 744 | struct gfs2_alloc *al; | ||
| 745 | int alloc_required; | ||
| 746 | struct buffer_head *dibh; | ||
| 747 | int error; | ||
| 748 | |||
| 749 | al = gfs2_alloc_get(dip); | ||
| 750 | |||
| 751 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 752 | if (error) | ||
| 753 | goto fail; | ||
| 754 | |||
| 755 | error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); | ||
| 756 | if (alloc_required < 0) | ||
| 757 | goto fail; | ||
| 758 | if (alloc_required) { | ||
| 759 | error = gfs2_quota_check(dip, dip->i_di.di_uid, | ||
| 760 | dip->i_di.di_gid); | ||
| 761 | if (error) | ||
| 762 | goto fail_quota_locks; | ||
| 763 | |||
| 764 | al->al_requested = sdp->sd_max_dirres; | ||
| 765 | |||
| 766 | error = gfs2_inplace_reserve(dip); | ||
| 767 | if (error) | ||
| 768 | goto fail_quota_locks; | ||
| 769 | |||
| 770 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | ||
| 771 | al->al_rgd->rd_ri.ri_length + | ||
| 772 | 2 * RES_DINODE + | ||
| 773 | RES_STATFS + RES_QUOTA, 0); | ||
| 774 | if (error) | ||
| 775 | goto fail_ipreserv; | ||
| 776 | } else { | ||
| 777 | error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); | ||
| 778 | if (error) | ||
| 779 | goto fail_quota_locks; | ||
| 780 | } | ||
| 781 | |||
| 782 | error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode)); | ||
| 783 | if (error) | ||
| 784 | goto fail_end_trans; | ||
| 785 | |||
| 786 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 787 | if (error) | ||
| 788 | goto fail_end_trans; | ||
| 789 | ip->i_di.di_nlink = 1; | ||
| 790 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 791 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 792 | brelse(dibh); | ||
| 793 | return 0; | ||
| 794 | |||
| 795 | fail_end_trans: | ||
| 796 | gfs2_trans_end(sdp); | ||
| 797 | |||
| 798 | fail_ipreserv: | ||
| 799 | if (dip->i_alloc.al_rgd) | ||
| 800 | gfs2_inplace_release(dip); | ||
| 801 | |||
| 802 | fail_quota_locks: | ||
| 803 | gfs2_quota_unlock(dip); | ||
| 804 | |||
| 805 | fail: | ||
| 806 | gfs2_alloc_put(dip); | ||
| 807 | return error; | ||
| 808 | } | ||
| 809 | |||
| 810 | static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) | ||
| 811 | { | ||
| 812 | int err; | ||
| 813 | size_t len; | ||
| 814 | void *value; | ||
| 815 | char *name; | ||
| 816 | struct gfs2_ea_request er; | ||
| 817 | |||
| 818 | err = security_inode_init_security(&ip->i_inode, &dip->i_inode, | ||
| 819 | &name, &value, &len); | ||
| 820 | |||
| 821 | if (err) { | ||
| 822 | if (err == -EOPNOTSUPP) | ||
| 823 | return 0; | ||
| 824 | return err; | ||
| 825 | } | ||
| 826 | |||
| 827 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
| 828 | |||
| 829 | er.er_type = GFS2_EATYPE_SECURITY; | ||
| 830 | er.er_name = name; | ||
| 831 | er.er_data = value; | ||
| 832 | er.er_name_len = strlen(name); | ||
| 833 | er.er_data_len = len; | ||
| 834 | |||
| 835 | err = gfs2_ea_set_i(ip, &er); | ||
| 836 | |||
| 837 | kfree(value); | ||
| 838 | kfree(name); | ||
| 839 | |||
| 840 | return err; | ||
| 841 | } | ||
| 842 | |||
| 843 | /** | ||
| 844 | * gfs2_createi - Create a new inode | ||
| 845 | * @ghs: An array of two holders | ||
| 846 | * @name: The name of the new file | ||
| 847 | * @mode: the permissions on the new inode | ||
| 848 | * | ||
| 849 | * @ghs[0] is an initialized holder for the directory | ||
| 850 | * @ghs[1] is the holder for the inode lock | ||
| 851 | * | ||
| 852 | * If the return value is not NULL, the glocks on both the directory and the new | ||
| 853 | * file are held. A transaction has been started and an inplace reservation | ||
| 854 | * is held, as well. | ||
| 855 | * | ||
| 856 | * Returns: An inode | ||
| 857 | */ | ||
| 858 | |||
| 859 | struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | ||
| 860 | unsigned int mode) | ||
| 861 | { | ||
| 862 | struct inode *inode; | ||
| 863 | struct gfs2_inode *dip = ghs->gh_gl->gl_object; | ||
| 864 | struct inode *dir = &dip->i_inode; | ||
| 865 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 866 | struct gfs2_inum inum; | ||
| 867 | int error; | ||
| 868 | u64 generation; | ||
| 869 | |||
| 870 | if (!name->len || name->len > GFS2_FNAMESIZE) | ||
| 871 | return ERR_PTR(-ENAMETOOLONG); | ||
| 872 | |||
| 873 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); | ||
| 874 | error = gfs2_glock_nq(ghs); | ||
| 875 | if (error) | ||
| 876 | goto fail; | ||
| 877 | |||
| 878 | error = create_ok(dip, name, mode); | ||
| 879 | if (error) | ||
| 880 | goto fail_gunlock; | ||
| 881 | |||
| 882 | error = pick_formal_ino(sdp, &inum.no_formal_ino); | ||
| 883 | if (error) | ||
| 884 | goto fail_gunlock; | ||
| 885 | |||
| 886 | error = alloc_dinode(dip, &inum, &generation); | ||
| 887 | if (error) | ||
| 888 | goto fail_gunlock; | ||
| 889 | |||
| 890 | if (inum.no_addr < dip->i_num.no_addr) { | ||
| 891 | gfs2_glock_dq(ghs); | ||
| 892 | |||
| 893 | error = gfs2_glock_nq_num(sdp, inum.no_addr, | ||
| 894 | &gfs2_inode_glops, LM_ST_EXCLUSIVE, | ||
| 895 | GL_SKIP, ghs + 1); | ||
| 896 | if (error) { | ||
| 897 | return ERR_PTR(error); | ||
| 898 | } | ||
| 899 | |||
| 900 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); | ||
| 901 | error = gfs2_glock_nq(ghs); | ||
| 902 | if (error) { | ||
| 903 | gfs2_glock_dq_uninit(ghs + 1); | ||
| 904 | return ERR_PTR(error); | ||
| 905 | } | ||
| 906 | |||
| 907 | error = create_ok(dip, name, mode); | ||
| 908 | if (error) | ||
| 909 | goto fail_gunlock2; | ||
| 910 | } else { | ||
| 911 | error = gfs2_glock_nq_num(sdp, inum.no_addr, | ||
| 912 | &gfs2_inode_glops, LM_ST_EXCLUSIVE, | ||
| 913 | GL_SKIP, ghs + 1); | ||
| 914 | if (error) | ||
| 915 | goto fail_gunlock; | ||
| 916 | } | ||
| 917 | |||
| 918 | error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation); | ||
| 919 | if (error) | ||
| 920 | goto fail_gunlock2; | ||
| 921 | |||
| 922 | inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode)); | ||
| 923 | if (IS_ERR(inode)) | ||
| 924 | goto fail_gunlock2; | ||
| 925 | |||
| 926 | error = gfs2_inode_refresh(GFS2_I(inode)); | ||
| 927 | if (error) | ||
| 928 | goto fail_iput; | ||
| 929 | |||
| 930 | error = gfs2_acl_create(dip, GFS2_I(inode)); | ||
| 931 | if (error) | ||
| 932 | goto fail_iput; | ||
| 933 | |||
| 934 | error = gfs2_security_init(dip, GFS2_I(inode)); | ||
| 935 | if (error) | ||
| 936 | goto fail_iput; | ||
| 937 | |||
| 938 | error = link_dinode(dip, name, GFS2_I(inode)); | ||
| 939 | if (error) | ||
| 940 | goto fail_iput; | ||
| 941 | |||
| 942 | if (!inode) | ||
| 943 | return ERR_PTR(-ENOMEM); | ||
| 944 | return inode; | ||
| 945 | |||
| 946 | fail_iput: | ||
| 947 | iput(inode); | ||
| 948 | fail_gunlock2: | ||
| 949 | gfs2_glock_dq_uninit(ghs + 1); | ||
| 950 | fail_gunlock: | ||
| 951 | gfs2_glock_dq(ghs); | ||
| 952 | fail: | ||
| 953 | return ERR_PTR(error); | ||
| 954 | } | ||
| 955 | |||
| 956 | /** | ||
| 957 | * gfs2_rmdiri - Remove a directory | ||
| 958 | * @dip: The parent directory of the directory to be removed | ||
| 959 | * @name: The name of the directory to be removed | ||
| 960 | * @ip: The GFS2 inode of the directory to be removed | ||
| 961 | * | ||
| 962 | * Assumes Glocks on dip and ip are held | ||
| 963 | * | ||
| 964 | * Returns: errno | ||
| 965 | */ | ||
| 966 | |||
| 967 | int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | ||
| 968 | struct gfs2_inode *ip) | ||
| 969 | { | ||
| 970 | struct qstr dotname; | ||
| 971 | int error; | ||
| 972 | |||
| 973 | if (ip->i_di.di_entries != 2) { | ||
| 974 | if (gfs2_consist_inode(ip)) | ||
| 975 | gfs2_dinode_print(&ip->i_di); | ||
| 976 | return -EIO; | ||
| 977 | } | ||
| 978 | |||
| 979 | error = gfs2_dir_del(dip, name); | ||
| 980 | if (error) | ||
| 981 | return error; | ||
| 982 | |||
| 983 | error = gfs2_change_nlink(dip, -1); | ||
| 984 | if (error) | ||
| 985 | return error; | ||
| 986 | |||
| 987 | gfs2_str2qstr(&dotname, "."); | ||
| 988 | error = gfs2_dir_del(ip, &dotname); | ||
| 989 | if (error) | ||
| 990 | return error; | ||
| 991 | |||
| 992 | gfs2_str2qstr(&dotname, ".."); | ||
| 993 | error = gfs2_dir_del(ip, &dotname); | ||
| 994 | if (error) | ||
| 995 | return error; | ||
| 996 | |||
| 997 | error = gfs2_change_nlink(ip, -2); | ||
| 998 | if (error) | ||
| 999 | return error; | ||
| 1000 | |||
| 1001 | return error; | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | /* | ||
| 1005 | * gfs2_unlink_ok - check to see that a inode is still in a directory | ||
| 1006 | * @dip: the directory | ||
| 1007 | * @name: the name of the file | ||
| 1008 | * @ip: the inode | ||
| 1009 | * | ||
| 1010 | * Assumes that the lock on (at least) @dip is held. | ||
| 1011 | * | ||
| 1012 | * Returns: 0 if the parent/child relationship is correct, errno if it isn't | ||
| 1013 | */ | ||
| 1014 | |||
| 1015 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | ||
| 1016 | struct gfs2_inode *ip) | ||
| 1017 | { | ||
| 1018 | struct gfs2_inum inum; | ||
| 1019 | unsigned int type; | ||
| 1020 | int error; | ||
| 1021 | |||
| 1022 | if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) | ||
| 1023 | return -EPERM; | ||
| 1024 | |||
| 1025 | if ((dip->i_di.di_mode & S_ISVTX) && | ||
| 1026 | dip->i_di.di_uid != current->fsuid && | ||
| 1027 | ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER)) | ||
| 1028 | return -EPERM; | ||
| 1029 | |||
| 1030 | if (IS_APPEND(&dip->i_inode)) | ||
| 1031 | return -EPERM; | ||
| 1032 | |||
| 1033 | error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); | ||
| 1034 | if (error) | ||
| 1035 | return error; | ||
| 1036 | |||
| 1037 | error = gfs2_dir_search(&dip->i_inode, name, &inum, &type); | ||
| 1038 | if (error) | ||
| 1039 | return error; | ||
| 1040 | |||
| 1041 | if (!gfs2_inum_equal(&inum, &ip->i_num)) | ||
| 1042 | return -ENOENT; | ||
| 1043 | |||
| 1044 | if (IF2DT(ip->i_di.di_mode) != type) { | ||
| 1045 | gfs2_consist_inode(dip); | ||
| 1046 | return -EIO; | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | return 0; | ||
| 1050 | } | ||
| 1051 | |||
| 1052 | /* | ||
| 1053 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
| 1054 | * @this: move this | ||
| 1055 | * @to: to here | ||
| 1056 | * | ||
| 1057 | * Follow @to back to the root and make sure we don't encounter @this | ||
| 1058 | * Assumes we already hold the rename lock. | ||
| 1059 | * | ||
| 1060 | * Returns: errno | ||
| 1061 | */ | ||
| 1062 | |||
| 1063 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
| 1064 | { | ||
| 1065 | struct inode *dir = &to->i_inode; | ||
| 1066 | struct super_block *sb = dir->i_sb; | ||
| 1067 | struct inode *tmp; | ||
| 1068 | struct qstr dotdot; | ||
| 1069 | int error = 0; | ||
| 1070 | |||
| 1071 | gfs2_str2qstr(&dotdot, ".."); | ||
| 1072 | |||
| 1073 | igrab(dir); | ||
| 1074 | |||
| 1075 | for (;;) { | ||
| 1076 | if (dir == &this->i_inode) { | ||
| 1077 | error = -EINVAL; | ||
| 1078 | break; | ||
| 1079 | } | ||
| 1080 | if (dir == sb->s_root->d_inode) { | ||
| 1081 | error = 0; | ||
| 1082 | break; | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | tmp = gfs2_lookupi(dir, &dotdot, 1, NULL); | ||
| 1086 | if (IS_ERR(tmp)) { | ||
| 1087 | error = PTR_ERR(tmp); | ||
| 1088 | break; | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | iput(dir); | ||
| 1092 | dir = tmp; | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | iput(dir); | ||
| 1096 | |||
| 1097 | return error; | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | /** | ||
| 1101 | * gfs2_readlinki - return the contents of a symlink | ||
| 1102 | * @ip: the symlink's inode | ||
| 1103 | * @buf: a pointer to the buffer to be filled | ||
| 1104 | * @len: a pointer to the length of @buf | ||
| 1105 | * | ||
| 1106 | * If @buf is too small, a piece of memory is kmalloc()ed and needs | ||
| 1107 | * to be freed by the caller. | ||
| 1108 | * | ||
| 1109 | * Returns: errno | ||
| 1110 | */ | ||
| 1111 | |||
| 1112 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | ||
| 1113 | { | ||
| 1114 | struct gfs2_holder i_gh; | ||
| 1115 | struct buffer_head *dibh; | ||
| 1116 | unsigned int x; | ||
| 1117 | int error; | ||
| 1118 | |||
| 1119 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | ||
| 1120 | error = gfs2_glock_nq_atime(&i_gh); | ||
| 1121 | if (error) { | ||
| 1122 | gfs2_holder_uninit(&i_gh); | ||
| 1123 | return error; | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | if (!ip->i_di.di_size) { | ||
| 1127 | gfs2_consist_inode(ip); | ||
| 1128 | error = -EIO; | ||
| 1129 | goto out; | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1133 | if (error) | ||
| 1134 | goto out; | ||
| 1135 | |||
| 1136 | x = ip->i_di.di_size + 1; | ||
| 1137 | if (x > *len) { | ||
| 1138 | *buf = kmalloc(x, GFP_KERNEL); | ||
| 1139 | if (!*buf) { | ||
| 1140 | error = -ENOMEM; | ||
| 1141 | goto out_brelse; | ||
| 1142 | } | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); | ||
| 1146 | *len = x; | ||
| 1147 | |||
| 1148 | out_brelse: | ||
| 1149 | brelse(dibh); | ||
| 1150 | out: | ||
| 1151 | gfs2_glock_dq_uninit(&i_gh); | ||
| 1152 | return error; | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | /** | ||
| 1156 | * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and | ||
| 1157 | * conditionally update the inode's atime | ||
| 1158 | * @gh: the holder to acquire | ||
| 1159 | * | ||
| 1160 | * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap | ||
| 1161 | * Update if the difference between the current time and the inode's current | ||
| 1162 | * atime is greater than an interval specified at mount. | ||
| 1163 | * | ||
| 1164 | * Returns: errno | ||
| 1165 | */ | ||
| 1166 | |||
| 1167 | int gfs2_glock_nq_atime(struct gfs2_holder *gh) | ||
| 1168 | { | ||
| 1169 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1170 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1171 | struct gfs2_inode *ip = gl->gl_object; | ||
| 1172 | s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum); | ||
| 1173 | unsigned int state; | ||
| 1174 | int flags; | ||
| 1175 | int error; | ||
| 1176 | |||
| 1177 | if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || | ||
| 1178 | gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || | ||
| 1179 | gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) | ||
| 1180 | return -EINVAL; | ||
| 1181 | |||
| 1182 | state = gh->gh_state; | ||
| 1183 | flags = gh->gh_flags; | ||
| 1184 | |||
| 1185 | error = gfs2_glock_nq(gh); | ||
| 1186 | if (error) | ||
| 1187 | return error; | ||
| 1188 | |||
| 1189 | if (test_bit(SDF_NOATIME, &sdp->sd_flags) || | ||
| 1190 | (sdp->sd_vfs->s_flags & MS_RDONLY)) | ||
| 1191 | return 0; | ||
| 1192 | |||
| 1193 | curtime = get_seconds(); | ||
| 1194 | if (curtime - ip->i_di.di_atime >= quantum) { | ||
| 1195 | gfs2_glock_dq(gh); | ||
| 1196 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, | ||
| 1197 | gh); | ||
| 1198 | error = gfs2_glock_nq(gh); | ||
| 1199 | if (error) | ||
| 1200 | return error; | ||
| 1201 | |||
| 1202 | /* Verify that atime hasn't been updated while we were | ||
| 1203 | trying to get exclusive lock. */ | ||
| 1204 | |||
| 1205 | curtime = get_seconds(); | ||
| 1206 | if (curtime - ip->i_di.di_atime >= quantum) { | ||
| 1207 | struct buffer_head *dibh; | ||
| 1208 | struct gfs2_dinode *di; | ||
| 1209 | |||
| 1210 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 1211 | if (error == -EROFS) | ||
| 1212 | return 0; | ||
| 1213 | if (error) | ||
| 1214 | goto fail; | ||
| 1215 | |||
| 1216 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1217 | if (error) | ||
| 1218 | goto fail_end_trans; | ||
| 1219 | |||
| 1220 | ip->i_di.di_atime = curtime; | ||
| 1221 | |||
| 1222 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1223 | di = (struct gfs2_dinode *)dibh->b_data; | ||
| 1224 | di->di_atime = cpu_to_be64(ip->i_di.di_atime); | ||
| 1225 | brelse(dibh); | ||
| 1226 | |||
| 1227 | gfs2_trans_end(sdp); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | /* If someone else has asked for the glock, | ||
| 1231 | unlock and let them have it. Then reacquire | ||
| 1232 | in the original state. */ | ||
| 1233 | if (gfs2_glock_is_blocking(gl)) { | ||
| 1234 | gfs2_glock_dq(gh); | ||
| 1235 | gfs2_holder_reinit(state, flags, gh); | ||
| 1236 | return gfs2_glock_nq(gh); | ||
| 1237 | } | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | return 0; | ||
| 1241 | |||
| 1242 | fail_end_trans: | ||
| 1243 | gfs2_trans_end(sdp); | ||
| 1244 | fail: | ||
| 1245 | gfs2_glock_dq(gh); | ||
| 1246 | return error; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | /** | ||
| 1250 | * glock_compare_atime - Compare two struct gfs2_glock structures for sort | ||
| 1251 | * @arg_a: the first structure | ||
| 1252 | * @arg_b: the second structure | ||
| 1253 | * | ||
| 1254 | * Returns: 1 if A > B | ||
| 1255 | * -1 if A < B | ||
| 1256 | * 0 if A == B | ||
| 1257 | */ | ||
| 1258 | |||
| 1259 | static int glock_compare_atime(const void *arg_a, const void *arg_b) | ||
| 1260 | { | ||
| 1261 | const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; | ||
| 1262 | const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; | ||
| 1263 | const struct lm_lockname *a = &gh_a->gh_gl->gl_name; | ||
| 1264 | const struct lm_lockname *b = &gh_b->gh_gl->gl_name; | ||
| 1265 | |||
| 1266 | if (a->ln_number > b->ln_number) | ||
| 1267 | return 1; | ||
| 1268 | if (a->ln_number < b->ln_number) | ||
| 1269 | return -1; | ||
| 1270 | if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) | ||
| 1271 | return 1; | ||
| 1272 | if (gh_a->gh_state == LM_ST_SHARED && (gh_b->gh_flags & GL_ATIME)) | ||
| 1273 | return 1; | ||
| 1274 | |||
| 1275 | return 0; | ||
| 1276 | } | ||
| 1277 | |||
| 1278 | /** | ||
| 1279 | * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an | ||
| 1280 | * atime update | ||
| 1281 | * @num_gh: the number of structures | ||
| 1282 | * @ghs: an array of struct gfs2_holder structures | ||
| 1283 | * | ||
| 1284 | * Returns: 0 on success (all glocks acquired), | ||
| 1285 | * errno on failure (no glocks acquired) | ||
| 1286 | */ | ||
| 1287 | |||
| 1288 | int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs) | ||
| 1289 | { | ||
| 1290 | struct gfs2_holder **p; | ||
| 1291 | unsigned int x; | ||
| 1292 | int error = 0; | ||
| 1293 | |||
| 1294 | if (!num_gh) | ||
| 1295 | return 0; | ||
| 1296 | |||
| 1297 | if (num_gh == 1) { | ||
| 1298 | ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); | ||
| 1299 | if (ghs->gh_flags & GL_ATIME) | ||
| 1300 | error = gfs2_glock_nq_atime(ghs); | ||
| 1301 | else | ||
| 1302 | error = gfs2_glock_nq(ghs); | ||
| 1303 | return error; | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL); | ||
| 1307 | if (!p) | ||
| 1308 | return -ENOMEM; | ||
| 1309 | |||
| 1310 | for (x = 0; x < num_gh; x++) | ||
| 1311 | p[x] = &ghs[x]; | ||
| 1312 | |||
| 1313 | sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL); | ||
| 1314 | |||
| 1315 | for (x = 0; x < num_gh; x++) { | ||
| 1316 | p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); | ||
| 1317 | |||
| 1318 | if (p[x]->gh_flags & GL_ATIME) | ||
| 1319 | error = gfs2_glock_nq_atime(p[x]); | ||
| 1320 | else | ||
| 1321 | error = gfs2_glock_nq(p[x]); | ||
| 1322 | |||
| 1323 | if (error) { | ||
| 1324 | while (x--) | ||
| 1325 | gfs2_glock_dq(p[x]); | ||
| 1326 | break; | ||
| 1327 | } | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | kfree(p); | ||
| 1331 | return error; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | |||
| 1335 | static int | ||
| 1336 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | ||
| 1337 | { | ||
| 1338 | struct buffer_head *dibh; | ||
| 1339 | int error; | ||
| 1340 | |||
| 1341 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1342 | if (!error) { | ||
| 1343 | error = inode_setattr(&ip->i_inode, attr); | ||
| 1344 | gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); | ||
| 1345 | gfs2_inode_attr_out(ip); | ||
| 1346 | |||
| 1347 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1348 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 1349 | brelse(dibh); | ||
| 1350 | } | ||
| 1351 | return error; | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | /** | ||
| 1355 | * gfs2_setattr_simple - | ||
| 1356 | * @ip: | ||
| 1357 | * @attr: | ||
| 1358 | * | ||
| 1359 | * Called with a reference on the vnode. | ||
| 1360 | * | ||
| 1361 | * Returns: errno | ||
| 1362 | */ | ||
| 1363 | |||
| 1364 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | ||
| 1365 | { | ||
| 1366 | int error; | ||
| 1367 | |||
| 1368 | if (current->journal_info) | ||
| 1369 | return __gfs2_setattr_simple(ip, attr); | ||
| 1370 | |||
| 1371 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); | ||
| 1372 | if (error) | ||
| 1373 | return error; | ||
| 1374 | |||
| 1375 | error = __gfs2_setattr_simple(ip, attr); | ||
| 1376 | gfs2_trans_end(GFS2_SB(&ip->i_inode)); | ||
| 1377 | return error; | ||
| 1378 | } | ||
| 1379 | |||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h new file mode 100644 index 000000000000..f5d861760579 --- /dev/null +++ b/fs/gfs2/inode.h | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __INODE_DOT_H__ | ||
| 11 | #define __INODE_DOT_H__ | ||
| 12 | |||
| 13 | static inline int gfs2_is_stuffed(struct gfs2_inode *ip) | ||
| 14 | { | ||
| 15 | return !ip->i_di.di_height; | ||
| 16 | } | ||
| 17 | |||
| 18 | static inline int gfs2_is_jdata(struct gfs2_inode *ip) | ||
| 19 | { | ||
| 20 | return ip->i_di.di_flags & GFS2_DIF_JDATA; | ||
| 21 | } | ||
| 22 | |||
| 23 | static inline int gfs2_is_dir(struct gfs2_inode *ip) | ||
| 24 | { | ||
| 25 | return S_ISDIR(ip->i_di.di_mode); | ||
| 26 | } | ||
| 27 | |||
| 28 | void gfs2_inode_attr_in(struct gfs2_inode *ip); | ||
| 29 | void gfs2_inode_attr_out(struct gfs2_inode *ip); | ||
| 30 | struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type); | ||
| 31 | struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum); | ||
| 32 | |||
| 33 | int gfs2_inode_refresh(struct gfs2_inode *ip); | ||
| 34 | |||
| 35 | int gfs2_dinode_dealloc(struct gfs2_inode *inode); | ||
| 36 | int gfs2_change_nlink(struct gfs2_inode *ip, int diff); | ||
| 37 | struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | ||
| 38 | int is_root, struct nameidata *nd); | ||
| 39 | struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | ||
| 40 | unsigned int mode); | ||
| 41 | int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | ||
| 42 | struct gfs2_inode *ip); | ||
| 43 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | ||
| 44 | struct gfs2_inode *ip); | ||
| 45 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | ||
| 46 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | ||
| 47 | |||
| 48 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | ||
| 49 | int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs); | ||
| 50 | |||
| 51 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | ||
| 52 | |||
| 53 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | ||
| 54 | |||
| 55 | #endif /* __INODE_DOT_H__ */ | ||
| 56 | |||
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c new file mode 100644 index 000000000000..effe4a337c1d --- /dev/null +++ b/fs/gfs2/lm.c | |||
| @@ -0,0 +1,217 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/delay.h> | ||
| 16 | #include <linux/gfs2_ondisk.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | #include "gfs2.h" | ||
| 20 | #include "incore.h" | ||
| 21 | #include "glock.h" | ||
| 22 | #include "lm.h" | ||
| 23 | #include "super.h" | ||
| 24 | #include "util.h" | ||
| 25 | |||
| 26 | /** | ||
| 27 | * gfs2_lm_mount - mount a locking protocol | ||
| 28 | * @sdp: the filesystem | ||
| 29 | * @args: mount arguements | ||
| 30 | * @silent: if 1, don't complain if the FS isn't a GFS2 fs | ||
| 31 | * | ||
| 32 | * Returns: errno | ||
| 33 | */ | ||
| 34 | |||
| 35 | int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | ||
| 36 | { | ||
| 37 | char *proto = sdp->sd_proto_name; | ||
| 38 | char *table = sdp->sd_table_name; | ||
| 39 | int flags = 0; | ||
| 40 | int error; | ||
| 41 | |||
| 42 | if (sdp->sd_args.ar_spectator) | ||
| 43 | flags |= LM_MFLAG_SPECTATOR; | ||
| 44 | |||
| 45 | fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table); | ||
| 46 | |||
| 47 | error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata, | ||
| 48 | gfs2_glock_cb, sdp, | ||
| 49 | GFS2_MIN_LVB_SIZE, flags, | ||
| 50 | &sdp->sd_lockstruct, &sdp->sd_kobj); | ||
| 51 | if (error) { | ||
| 52 | fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n", | ||
| 53 | proto, table, sdp->sd_args.ar_hostdata); | ||
| 54 | goto out; | ||
| 55 | } | ||
| 56 | |||
| 57 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || | ||
| 58 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || | ||
| 59 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= | ||
| 60 | GFS2_MIN_LVB_SIZE)) { | ||
| 61 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
| 62 | goto out; | ||
| 63 | } | ||
| 64 | |||
| 65 | if (sdp->sd_args.ar_spectator) | ||
| 66 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); | ||
| 67 | else | ||
| 68 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, | ||
| 69 | sdp->sd_lockstruct.ls_jid); | ||
| 70 | |||
| 71 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); | ||
| 72 | |||
| 73 | if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && | ||
| 74 | !sdp->sd_args.ar_ignore_local_fs) { | ||
| 75 | sdp->sd_args.ar_localflocks = 1; | ||
| 76 | sdp->sd_args.ar_localcaching = 1; | ||
| 77 | } | ||
| 78 | |||
| 79 | out: | ||
| 80 | return error; | ||
| 81 | } | ||
| 82 | |||
| 83 | void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | ||
| 84 | { | ||
| 85 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 86 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( | ||
| 87 | sdp->sd_lockstruct.ls_lockspace); | ||
| 88 | } | ||
| 89 | |||
| 90 | void gfs2_lm_unmount(struct gfs2_sbd *sdp) | ||
| 91 | { | ||
| 92 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 93 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
| 94 | } | ||
| 95 | |||
| 96 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
| 97 | { | ||
| 98 | va_list args; | ||
| 99 | |||
| 100 | if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 101 | return 0; | ||
| 102 | |||
| 103 | va_start(args, fmt); | ||
| 104 | vprintk(fmt, args); | ||
| 105 | va_end(args); | ||
| 106 | |||
| 107 | fs_err(sdp, "about to withdraw from the cluster\n"); | ||
| 108 | BUG_ON(sdp->sd_args.ar_debug); | ||
| 109 | |||
| 110 | |||
| 111 | fs_err(sdp, "waiting for outstanding I/O\n"); | ||
| 112 | |||
| 113 | /* FIXME: suspend dm device so oustanding bio's complete | ||
| 114 | and all further io requests fail */ | ||
| 115 | |||
| 116 | fs_err(sdp, "telling LM to withdraw\n"); | ||
| 117 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); | ||
| 118 | fs_err(sdp, "withdrawn\n"); | ||
| 119 | dump_stack(); | ||
| 120 | |||
| 121 | return -1; | ||
| 122 | } | ||
| 123 | |||
| 124 | int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 125 | void **lockp) | ||
| 126 | { | ||
| 127 | int error = -EIO; | ||
| 128 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 129 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( | ||
| 130 | sdp->sd_lockstruct.ls_lockspace, name, lockp); | ||
| 131 | return error; | ||
| 132 | } | ||
| 133 | |||
| 134 | void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock) | ||
| 135 | { | ||
| 136 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 137 | sdp->sd_lockstruct.ls_ops->lm_put_lock(lock); | ||
| 138 | } | ||
| 139 | |||
| 140 | unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 141 | unsigned int cur_state, unsigned int req_state, | ||
| 142 | unsigned int flags) | ||
| 143 | { | ||
| 144 | int ret = 0; | ||
| 145 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 146 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
| 147 | req_state, flags); | ||
| 148 | return ret; | ||
| 149 | } | ||
| 150 | |||
| 151 | unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
| 152 | unsigned int cur_state) | ||
| 153 | { | ||
| 154 | int ret = 0; | ||
| 155 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 156 | ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); | ||
| 157 | return ret; | ||
| 158 | } | ||
| 159 | |||
| 160 | void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock) | ||
| 161 | { | ||
| 162 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 163 | sdp->sd_lockstruct.ls_ops->lm_cancel(lock); | ||
| 164 | } | ||
| 165 | |||
| 166 | int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) | ||
| 167 | { | ||
| 168 | int error = -EIO; | ||
| 169 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 170 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); | ||
| 171 | return error; | ||
| 172 | } | ||
| 173 | |||
| 174 | void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb) | ||
| 175 | { | ||
| 176 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 177 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb); | ||
| 178 | } | ||
| 179 | |||
| 180 | int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 181 | struct file *file, struct file_lock *fl) | ||
| 182 | { | ||
| 183 | int error = -EIO; | ||
| 184 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 185 | error = sdp->sd_lockstruct.ls_ops->lm_plock_get( | ||
| 186 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
| 187 | return error; | ||
| 188 | } | ||
| 189 | |||
| 190 | int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 191 | struct file *file, int cmd, struct file_lock *fl) | ||
| 192 | { | ||
| 193 | int error = -EIO; | ||
| 194 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 195 | error = sdp->sd_lockstruct.ls_ops->lm_plock( | ||
| 196 | sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl); | ||
| 197 | return error; | ||
| 198 | } | ||
| 199 | |||
| 200 | int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 201 | struct file *file, struct file_lock *fl) | ||
| 202 | { | ||
| 203 | int error = -EIO; | ||
| 204 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 205 | error = sdp->sd_lockstruct.ls_ops->lm_punlock( | ||
| 206 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
| 207 | return error; | ||
| 208 | } | ||
| 209 | |||
| 210 | void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
| 211 | unsigned int message) | ||
| 212 | { | ||
| 213 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 214 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( | ||
| 215 | sdp->sd_lockstruct.ls_lockspace, jid, message); | ||
| 216 | } | ||
| 217 | |||
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h new file mode 100644 index 000000000000..21cdc30ee08c --- /dev/null +++ b/fs/gfs2/lm.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __LM_DOT_H__ | ||
| 11 | #define __LM_DOT_H__ | ||
| 12 | |||
| 13 | struct gfs2_sbd; | ||
| 14 | |||
| 15 | #define GFS2_MIN_LVB_SIZE 32 | ||
| 16 | |||
| 17 | int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent); | ||
| 18 | void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp); | ||
| 19 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | ||
| 20 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
| 21 | __attribute__ ((format(printf, 2, 3))); | ||
| 22 | int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 23 | void **lockp); | ||
| 24 | void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock); | ||
| 25 | unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 26 | unsigned int cur_state, unsigned int req_state, | ||
| 27 | unsigned int flags); | ||
| 28 | unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
| 29 | unsigned int cur_state); | ||
| 30 | void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock); | ||
| 31 | int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp); | ||
| 32 | void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb); | ||
| 33 | int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 34 | struct file *file, struct file_lock *fl); | ||
| 35 | int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 36 | struct file *file, int cmd, struct file_lock *fl); | ||
| 37 | int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 38 | struct file *file, struct file_lock *fl); | ||
| 39 | void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
| 40 | unsigned int message); | ||
| 41 | |||
| 42 | #endif /* __LM_DOT_H__ */ | ||
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c new file mode 100644 index 000000000000..663fee728783 --- /dev/null +++ b/fs/gfs2/locking.c | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/init.h> | ||
| 12 | #include <linux/string.h> | ||
| 13 | #include <linux/slab.h> | ||
| 14 | #include <linux/wait.h> | ||
| 15 | #include <linux/sched.h> | ||
| 16 | #include <linux/kmod.h> | ||
| 17 | #include <linux/fs.h> | ||
| 18 | #include <linux/delay.h> | ||
| 19 | #include <linux/lm_interface.h> | ||
| 20 | |||
| 21 | struct lmh_wrapper { | ||
| 22 | struct list_head lw_list; | ||
| 23 | const struct lm_lockops *lw_ops; | ||
| 24 | }; | ||
| 25 | |||
| 26 | /* List of registered low-level locking protocols. A file system selects one | ||
| 27 | of them by name at mount time, e.g. lock_nolock, lock_dlm. */ | ||
| 28 | |||
| 29 | static LIST_HEAD(lmh_list); | ||
| 30 | static DEFINE_MUTEX(lmh_lock); | ||
| 31 | |||
| 32 | /** | ||
| 33 | * gfs2_register_lockproto - Register a low-level locking protocol | ||
| 34 | * @proto: the protocol definition | ||
| 35 | * | ||
| 36 | * Returns: 0 on success, -EXXX on failure | ||
| 37 | */ | ||
| 38 | |||
| 39 | int gfs2_register_lockproto(const struct lm_lockops *proto) | ||
| 40 | { | ||
| 41 | struct lmh_wrapper *lw; | ||
| 42 | |||
| 43 | mutex_lock(&lmh_lock); | ||
| 44 | |||
| 45 | list_for_each_entry(lw, &lmh_list, lw_list) { | ||
| 46 | if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) { | ||
| 47 | mutex_unlock(&lmh_lock); | ||
| 48 | printk(KERN_INFO "GFS2: protocol %s already exists\n", | ||
| 49 | proto->lm_proto_name); | ||
| 50 | return -EEXIST; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL); | ||
| 55 | if (!lw) { | ||
| 56 | mutex_unlock(&lmh_lock); | ||
| 57 | return -ENOMEM; | ||
| 58 | } | ||
| 59 | |||
| 60 | lw->lw_ops = proto; | ||
| 61 | list_add(&lw->lw_list, &lmh_list); | ||
| 62 | |||
| 63 | mutex_unlock(&lmh_lock); | ||
| 64 | |||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | |||
| 68 | /** | ||
| 69 | * gfs2_unregister_lockproto - Unregister a low-level locking protocol | ||
| 70 | * @proto: the protocol definition | ||
| 71 | * | ||
| 72 | */ | ||
| 73 | |||
| 74 | void gfs2_unregister_lockproto(const struct lm_lockops *proto) | ||
| 75 | { | ||
| 76 | struct lmh_wrapper *lw; | ||
| 77 | |||
| 78 | mutex_lock(&lmh_lock); | ||
| 79 | |||
| 80 | list_for_each_entry(lw, &lmh_list, lw_list) { | ||
| 81 | if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) { | ||
| 82 | list_del(&lw->lw_list); | ||
| 83 | mutex_unlock(&lmh_lock); | ||
| 84 | kfree(lw); | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | mutex_unlock(&lmh_lock); | ||
| 90 | |||
| 91 | printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n", | ||
| 92 | proto->lm_proto_name); | ||
| 93 | } | ||
| 94 | |||
| 95 | /** | ||
| 96 | * gfs2_mount_lockproto - Mount a lock protocol | ||
| 97 | * @proto_name - the name of the protocol | ||
| 98 | * @table_name - the name of the lock space | ||
| 99 | * @host_data - data specific to this host | ||
| 100 | * @cb - the callback to the code using the lock module | ||
| 101 | * @sdp - The GFS2 superblock | ||
| 102 | * @min_lvb_size - the mininum LVB size that the caller can deal with | ||
| 103 | * @flags - LM_MFLAG_* | ||
| 104 | * @lockstruct - a structure returned describing the mount | ||
| 105 | * | ||
| 106 | * Returns: 0 on success, -EXXX on failure | ||
| 107 | */ | ||
| 108 | |||
| 109 | int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data, | ||
| 110 | lm_callback_t cb, void *cb_data, | ||
| 111 | unsigned int min_lvb_size, int flags, | ||
| 112 | struct lm_lockstruct *lockstruct, | ||
| 113 | struct kobject *fskobj) | ||
| 114 | { | ||
| 115 | struct lmh_wrapper *lw = NULL; | ||
| 116 | int try = 0; | ||
| 117 | int error, found; | ||
| 118 | |||
| 119 | retry: | ||
| 120 | mutex_lock(&lmh_lock); | ||
| 121 | |||
| 122 | found = 0; | ||
| 123 | list_for_each_entry(lw, &lmh_list, lw_list) { | ||
| 124 | if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) { | ||
| 125 | found = 1; | ||
| 126 | break; | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | if (!found) { | ||
| 131 | if (!try && capable(CAP_SYS_MODULE)) { | ||
| 132 | try = 1; | ||
| 133 | mutex_unlock(&lmh_lock); | ||
| 134 | request_module(proto_name); | ||
| 135 | goto retry; | ||
| 136 | } | ||
| 137 | printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name); | ||
| 138 | error = -ENOENT; | ||
| 139 | goto out; | ||
| 140 | } | ||
| 141 | |||
| 142 | if (!try_module_get(lw->lw_ops->lm_owner)) { | ||
| 143 | try = 0; | ||
| 144 | mutex_unlock(&lmh_lock); | ||
| 145 | msleep(1000); | ||
| 146 | goto retry; | ||
| 147 | } | ||
| 148 | |||
| 149 | error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data, | ||
| 150 | min_lvb_size, flags, lockstruct, fskobj); | ||
| 151 | if (error) | ||
| 152 | module_put(lw->lw_ops->lm_owner); | ||
| 153 | out: | ||
| 154 | mutex_unlock(&lmh_lock); | ||
| 155 | return error; | ||
| 156 | } | ||
| 157 | |||
| 158 | void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct) | ||
| 159 | { | ||
| 160 | mutex_lock(&lmh_lock); | ||
| 161 | lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); | ||
| 162 | if (lockstruct->ls_ops->lm_owner) | ||
| 163 | module_put(lockstruct->ls_ops->lm_owner); | ||
| 164 | mutex_unlock(&lmh_lock); | ||
| 165 | } | ||
| 166 | |||
| 167 | /** | ||
| 168 | * gfs2_withdraw_lockproto - abnormally unmount a lock module | ||
| 169 | * @lockstruct: the lockstruct passed into mount | ||
| 170 | * | ||
| 171 | */ | ||
| 172 | |||
| 173 | void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct) | ||
| 174 | { | ||
| 175 | mutex_lock(&lmh_lock); | ||
| 176 | lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace); | ||
| 177 | if (lockstruct->ls_ops->lm_owner) | ||
| 178 | module_put(lockstruct->ls_ops->lm_owner); | ||
| 179 | mutex_unlock(&lmh_lock); | ||
| 180 | } | ||
| 181 | |||
| 182 | EXPORT_SYMBOL_GPL(gfs2_register_lockproto); | ||
| 183 | EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto); | ||
| 184 | |||
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile new file mode 100644 index 000000000000..89b93b6b45cf --- /dev/null +++ b/fs/gfs2/locking/dlm/Makefile | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o | ||
| 2 | lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o | ||
| 3 | |||
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c new file mode 100644 index 000000000000..b167addf9fd1 --- /dev/null +++ b/fs/gfs2/locking/dlm/lock.c | |||
| @@ -0,0 +1,524 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include "lock_dlm.h" | ||
| 11 | |||
| 12 | static char junk_lvb[GDLM_LVB_SIZE]; | ||
| 13 | |||
| 14 | static void queue_complete(struct gdlm_lock *lp) | ||
| 15 | { | ||
| 16 | struct gdlm_ls *ls = lp->ls; | ||
| 17 | |||
| 18 | clear_bit(LFL_ACTIVE, &lp->flags); | ||
| 19 | |||
| 20 | spin_lock(&ls->async_lock); | ||
| 21 | list_add_tail(&lp->clist, &ls->complete); | ||
| 22 | spin_unlock(&ls->async_lock); | ||
| 23 | wake_up(&ls->thread_wait); | ||
| 24 | } | ||
| 25 | |||
| 26 | static inline void gdlm_ast(void *astarg) | ||
| 27 | { | ||
| 28 | queue_complete(astarg); | ||
| 29 | } | ||
| 30 | |||
| 31 | static inline void gdlm_bast(void *astarg, int mode) | ||
| 32 | { | ||
| 33 | struct gdlm_lock *lp = astarg; | ||
| 34 | struct gdlm_ls *ls = lp->ls; | ||
| 35 | |||
| 36 | if (!mode) { | ||
| 37 | printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n", | ||
| 38 | lp->lockname.ln_type, | ||
| 39 | (unsigned long long)lp->lockname.ln_number); | ||
| 40 | return; | ||
| 41 | } | ||
| 42 | |||
| 43 | spin_lock(&ls->async_lock); | ||
| 44 | if (!lp->bast_mode) { | ||
| 45 | list_add_tail(&lp->blist, &ls->blocking); | ||
| 46 | lp->bast_mode = mode; | ||
| 47 | } else if (lp->bast_mode < mode) | ||
| 48 | lp->bast_mode = mode; | ||
| 49 | spin_unlock(&ls->async_lock); | ||
| 50 | wake_up(&ls->thread_wait); | ||
| 51 | } | ||
| 52 | |||
| 53 | void gdlm_queue_delayed(struct gdlm_lock *lp) | ||
| 54 | { | ||
| 55 | struct gdlm_ls *ls = lp->ls; | ||
| 56 | |||
| 57 | spin_lock(&ls->async_lock); | ||
| 58 | list_add_tail(&lp->delay_list, &ls->delayed); | ||
| 59 | spin_unlock(&ls->async_lock); | ||
| 60 | } | ||
| 61 | |||
| 62 | /* convert gfs lock-state to dlm lock-mode */ | ||
| 63 | |||
| 64 | static s16 make_mode(s16 lmstate) | ||
| 65 | { | ||
| 66 | switch (lmstate) { | ||
| 67 | case LM_ST_UNLOCKED: | ||
| 68 | return DLM_LOCK_NL; | ||
| 69 | case LM_ST_EXCLUSIVE: | ||
| 70 | return DLM_LOCK_EX; | ||
| 71 | case LM_ST_DEFERRED: | ||
| 72 | return DLM_LOCK_CW; | ||
| 73 | case LM_ST_SHARED: | ||
| 74 | return DLM_LOCK_PR; | ||
| 75 | } | ||
| 76 | gdlm_assert(0, "unknown LM state %d", lmstate); | ||
| 77 | return -1; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* convert dlm lock-mode to gfs lock-state */ | ||
| 81 | |||
| 82 | s16 gdlm_make_lmstate(s16 dlmmode) | ||
| 83 | { | ||
| 84 | switch (dlmmode) { | ||
| 85 | case DLM_LOCK_IV: | ||
| 86 | case DLM_LOCK_NL: | ||
| 87 | return LM_ST_UNLOCKED; | ||
| 88 | case DLM_LOCK_EX: | ||
| 89 | return LM_ST_EXCLUSIVE; | ||
| 90 | case DLM_LOCK_CW: | ||
| 91 | return LM_ST_DEFERRED; | ||
| 92 | case DLM_LOCK_PR: | ||
| 93 | return LM_ST_SHARED; | ||
| 94 | } | ||
| 95 | gdlm_assert(0, "unknown DLM mode %d", dlmmode); | ||
| 96 | return -1; | ||
| 97 | } | ||
| 98 | |||
| 99 | /* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and | ||
| 100 | DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */ | ||
| 101 | |||
| 102 | static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state) | ||
| 103 | { | ||
| 104 | s16 cur = make_mode(cur_state); | ||
| 105 | if (lp->cur != DLM_LOCK_IV) | ||
| 106 | gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur); | ||
| 107 | } | ||
| 108 | |||
| 109 | static inline unsigned int make_flags(struct gdlm_lock *lp, | ||
| 110 | unsigned int gfs_flags, | ||
| 111 | s16 cur, s16 req) | ||
| 112 | { | ||
| 113 | unsigned int lkf = 0; | ||
| 114 | |||
| 115 | if (gfs_flags & LM_FLAG_TRY) | ||
| 116 | lkf |= DLM_LKF_NOQUEUE; | ||
| 117 | |||
| 118 | if (gfs_flags & LM_FLAG_TRY_1CB) { | ||
| 119 | lkf |= DLM_LKF_NOQUEUE; | ||
| 120 | lkf |= DLM_LKF_NOQUEUEBAST; | ||
| 121 | } | ||
| 122 | |||
| 123 | if (gfs_flags & LM_FLAG_PRIORITY) { | ||
| 124 | lkf |= DLM_LKF_NOORDER; | ||
| 125 | lkf |= DLM_LKF_HEADQUE; | ||
| 126 | } | ||
| 127 | |||
| 128 | if (gfs_flags & LM_FLAG_ANY) { | ||
| 129 | if (req == DLM_LOCK_PR) | ||
| 130 | lkf |= DLM_LKF_ALTCW; | ||
| 131 | else if (req == DLM_LOCK_CW) | ||
| 132 | lkf |= DLM_LKF_ALTPR; | ||
| 133 | } | ||
| 134 | |||
| 135 | if (lp->lksb.sb_lkid != 0) { | ||
| 136 | lkf |= DLM_LKF_CONVERT; | ||
| 137 | |||
| 138 | /* Conversion deadlock avoidance by DLM */ | ||
| 139 | |||
| 140 | if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) && | ||
| 141 | !(lkf & DLM_LKF_NOQUEUE) && | ||
| 142 | cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) | ||
| 143 | lkf |= DLM_LKF_CONVDEADLK; | ||
| 144 | } | ||
| 145 | |||
| 146 | if (lp->lvb) | ||
| 147 | lkf |= DLM_LKF_VALBLK; | ||
| 148 | |||
| 149 | return lkf; | ||
| 150 | } | ||
| 151 | |||
| 152 | /* make_strname - convert GFS lock numbers to a string */ | ||
| 153 | |||
| 154 | static inline void make_strname(struct lm_lockname *lockname, | ||
| 155 | struct gdlm_strname *str) | ||
| 156 | { | ||
| 157 | sprintf(str->name, "%8x%16llx", lockname->ln_type, | ||
| 158 | (unsigned long long)lockname->ln_number); | ||
| 159 | str->namelen = GDLM_STRNAME_BYTES; | ||
| 160 | } | ||
| 161 | |||
| 162 | static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | ||
| 163 | struct gdlm_lock **lpp) | ||
| 164 | { | ||
| 165 | struct gdlm_lock *lp; | ||
| 166 | |||
| 167 | lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL); | ||
| 168 | if (!lp) | ||
| 169 | return -ENOMEM; | ||
| 170 | |||
| 171 | lp->lockname = *name; | ||
| 172 | lp->ls = ls; | ||
| 173 | lp->cur = DLM_LOCK_IV; | ||
| 174 | lp->lvb = NULL; | ||
| 175 | lp->hold_null = NULL; | ||
| 176 | init_completion(&lp->ast_wait); | ||
| 177 | INIT_LIST_HEAD(&lp->clist); | ||
| 178 | INIT_LIST_HEAD(&lp->blist); | ||
| 179 | INIT_LIST_HEAD(&lp->delay_list); | ||
| 180 | |||
| 181 | spin_lock(&ls->async_lock); | ||
| 182 | list_add(&lp->all_list, &ls->all_locks); | ||
| 183 | ls->all_locks_count++; | ||
| 184 | spin_unlock(&ls->async_lock); | ||
| 185 | |||
| 186 | *lpp = lp; | ||
| 187 | return 0; | ||
| 188 | } | ||
| 189 | |||
| 190 | void gdlm_delete_lp(struct gdlm_lock *lp) | ||
| 191 | { | ||
| 192 | struct gdlm_ls *ls = lp->ls; | ||
| 193 | |||
| 194 | spin_lock(&ls->async_lock); | ||
| 195 | if (!list_empty(&lp->clist)) | ||
| 196 | list_del_init(&lp->clist); | ||
| 197 | if (!list_empty(&lp->blist)) | ||
| 198 | list_del_init(&lp->blist); | ||
| 199 | if (!list_empty(&lp->delay_list)) | ||
| 200 | list_del_init(&lp->delay_list); | ||
| 201 | gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type, | ||
| 202 | (unsigned long long)lp->lockname.ln_number); | ||
| 203 | list_del_init(&lp->all_list); | ||
| 204 | ls->all_locks_count--; | ||
| 205 | spin_unlock(&ls->async_lock); | ||
| 206 | |||
| 207 | kfree(lp); | ||
| 208 | } | ||
| 209 | |||
| 210 | int gdlm_get_lock(void *lockspace, struct lm_lockname *name, | ||
| 211 | void **lockp) | ||
| 212 | { | ||
| 213 | struct gdlm_lock *lp; | ||
| 214 | int error; | ||
| 215 | |||
| 216 | error = gdlm_create_lp(lockspace, name, &lp); | ||
| 217 | |||
| 218 | *lockp = lp; | ||
| 219 | return error; | ||
| 220 | } | ||
| 221 | |||
| 222 | void gdlm_put_lock(void *lock) | ||
| 223 | { | ||
| 224 | gdlm_delete_lp(lock); | ||
| 225 | } | ||
| 226 | |||
| 227 | unsigned int gdlm_do_lock(struct gdlm_lock *lp) | ||
| 228 | { | ||
| 229 | struct gdlm_ls *ls = lp->ls; | ||
| 230 | struct gdlm_strname str; | ||
| 231 | int error, bast = 1; | ||
| 232 | |||
| 233 | /* | ||
| 234 | * When recovery is in progress, delay lock requests for submission | ||
| 235 | * once recovery is done. Requests for recovery (NOEXP) and unlocks | ||
| 236 | * can pass. | ||
| 237 | */ | ||
| 238 | |||
| 239 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 240 | !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) { | ||
| 241 | gdlm_queue_delayed(lp); | ||
| 242 | return LM_OUT_ASYNC; | ||
| 243 | } | ||
| 244 | |||
| 245 | /* | ||
| 246 | * Submit the actual lock request. | ||
| 247 | */ | ||
| 248 | |||
| 249 | if (test_bit(LFL_NOBAST, &lp->flags)) | ||
| 250 | bast = 0; | ||
| 251 | |||
| 252 | make_strname(&lp->lockname, &str); | ||
| 253 | |||
| 254 | set_bit(LFL_ACTIVE, &lp->flags); | ||
| 255 | |||
| 256 | log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type, | ||
| 257 | (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid, | ||
| 258 | lp->cur, lp->req, lp->lkf); | ||
| 259 | |||
| 260 | error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf, | ||
| 261 | str.name, str.namelen, 0, gdlm_ast, lp, | ||
| 262 | bast ? gdlm_bast : NULL); | ||
| 263 | |||
| 264 | if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { | ||
| 265 | lp->lksb.sb_status = -EAGAIN; | ||
| 266 | queue_complete(lp); | ||
| 267 | error = 0; | ||
| 268 | } | ||
| 269 | |||
| 270 | if (error) { | ||
| 271 | log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " | ||
| 272 | "flags=%lx", ls->fsname, lp->lockname.ln_type, | ||
| 273 | (unsigned long long)lp->lockname.ln_number, error, | ||
| 274 | lp->cur, lp->req, lp->lkf, lp->flags); | ||
| 275 | return LM_OUT_ERROR; | ||
| 276 | } | ||
| 277 | return LM_OUT_ASYNC; | ||
| 278 | } | ||
| 279 | |||
| 280 | static unsigned int gdlm_do_unlock(struct gdlm_lock *lp) | ||
| 281 | { | ||
| 282 | struct gdlm_ls *ls = lp->ls; | ||
| 283 | unsigned int lkf = 0; | ||
| 284 | int error; | ||
| 285 | |||
| 286 | set_bit(LFL_DLM_UNLOCK, &lp->flags); | ||
| 287 | set_bit(LFL_ACTIVE, &lp->flags); | ||
| 288 | |||
| 289 | if (lp->lvb) | ||
| 290 | lkf = DLM_LKF_VALBLK; | ||
| 291 | |||
| 292 | log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type, | ||
| 293 | (unsigned long long)lp->lockname.ln_number, | ||
| 294 | lp->lksb.sb_lkid, lp->cur, lkf); | ||
| 295 | |||
| 296 | error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp); | ||
| 297 | |||
| 298 | if (error) { | ||
| 299 | log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " | ||
| 300 | "flags=%lx", ls->fsname, lp->lockname.ln_type, | ||
| 301 | (unsigned long long)lp->lockname.ln_number, error, | ||
| 302 | lp->cur, lp->req, lp->lkf, lp->flags); | ||
| 303 | return LM_OUT_ERROR; | ||
| 304 | } | ||
| 305 | return LM_OUT_ASYNC; | ||
| 306 | } | ||
| 307 | |||
| 308 | unsigned int gdlm_lock(void *lock, unsigned int cur_state, | ||
| 309 | unsigned int req_state, unsigned int flags) | ||
| 310 | { | ||
| 311 | struct gdlm_lock *lp = lock; | ||
| 312 | |||
| 313 | clear_bit(LFL_DLM_CANCEL, &lp->flags); | ||
| 314 | if (flags & LM_FLAG_NOEXP) | ||
| 315 | set_bit(LFL_NOBLOCK, &lp->flags); | ||
| 316 | |||
| 317 | check_cur_state(lp, cur_state); | ||
| 318 | lp->req = make_mode(req_state); | ||
| 319 | lp->lkf = make_flags(lp, flags, lp->cur, lp->req); | ||
| 320 | |||
| 321 | return gdlm_do_lock(lp); | ||
| 322 | } | ||
| 323 | |||
| 324 | unsigned int gdlm_unlock(void *lock, unsigned int cur_state) | ||
| 325 | { | ||
| 326 | struct gdlm_lock *lp = lock; | ||
| 327 | |||
| 328 | clear_bit(LFL_DLM_CANCEL, &lp->flags); | ||
| 329 | if (lp->cur == DLM_LOCK_IV) | ||
| 330 | return 0; | ||
| 331 | return gdlm_do_unlock(lp); | ||
| 332 | } | ||
| 333 | |||
| 334 | void gdlm_cancel(void *lock) | ||
| 335 | { | ||
| 336 | struct gdlm_lock *lp = lock; | ||
| 337 | struct gdlm_ls *ls = lp->ls; | ||
| 338 | int error, delay_list = 0; | ||
| 339 | |||
| 340 | if (test_bit(LFL_DLM_CANCEL, &lp->flags)) | ||
| 341 | return; | ||
| 342 | |||
| 343 | log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type, | ||
| 344 | (unsigned long long)lp->lockname.ln_number, lp->flags); | ||
| 345 | |||
| 346 | spin_lock(&ls->async_lock); | ||
| 347 | if (!list_empty(&lp->delay_list)) { | ||
| 348 | list_del_init(&lp->delay_list); | ||
| 349 | delay_list = 1; | ||
| 350 | } | ||
| 351 | spin_unlock(&ls->async_lock); | ||
| 352 | |||
| 353 | if (delay_list) { | ||
| 354 | set_bit(LFL_CANCEL, &lp->flags); | ||
| 355 | set_bit(LFL_ACTIVE, &lp->flags); | ||
| 356 | queue_complete(lp); | ||
| 357 | return; | ||
| 358 | } | ||
| 359 | |||
| 360 | if (!test_bit(LFL_ACTIVE, &lp->flags) || | ||
| 361 | test_bit(LFL_DLM_UNLOCK, &lp->flags)) { | ||
| 362 | log_info("gdlm_cancel skip %x,%llx flags %lx", | ||
| 363 | lp->lockname.ln_type, | ||
| 364 | (unsigned long long)lp->lockname.ln_number, lp->flags); | ||
| 365 | return; | ||
| 366 | } | ||
| 367 | |||
| 368 | /* the lock is blocked in the dlm */ | ||
| 369 | |||
| 370 | set_bit(LFL_DLM_CANCEL, &lp->flags); | ||
| 371 | set_bit(LFL_ACTIVE, &lp->flags); | ||
| 372 | |||
| 373 | error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL, | ||
| 374 | NULL, lp); | ||
| 375 | |||
| 376 | log_info("gdlm_cancel rv %d %x,%llx flags %lx", error, | ||
| 377 | lp->lockname.ln_type, | ||
| 378 | (unsigned long long)lp->lockname.ln_number, lp->flags); | ||
| 379 | |||
| 380 | if (error == -EBUSY) | ||
| 381 | clear_bit(LFL_DLM_CANCEL, &lp->flags); | ||
| 382 | } | ||
| 383 | |||
| 384 | static int gdlm_add_lvb(struct gdlm_lock *lp) | ||
| 385 | { | ||
| 386 | char *lvb; | ||
| 387 | |||
| 388 | lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL); | ||
| 389 | if (!lvb) | ||
| 390 | return -ENOMEM; | ||
| 391 | |||
| 392 | lp->lksb.sb_lvbptr = lvb; | ||
| 393 | lp->lvb = lvb; | ||
| 394 | return 0; | ||
| 395 | } | ||
| 396 | |||
| 397 | static void gdlm_del_lvb(struct gdlm_lock *lp) | ||
| 398 | { | ||
| 399 | kfree(lp->lvb); | ||
| 400 | lp->lvb = NULL; | ||
| 401 | lp->lksb.sb_lvbptr = NULL; | ||
| 402 | } | ||
| 403 | |||
| 404 | /* This can do a synchronous dlm request (requiring a lock_dlm thread to get | ||
| 405 | the completion) because gfs won't call hold_lvb() during a callback (from | ||
| 406 | the context of a lock_dlm thread). */ | ||
| 407 | |||
| 408 | static int hold_null_lock(struct gdlm_lock *lp) | ||
| 409 | { | ||
| 410 | struct gdlm_lock *lpn = NULL; | ||
| 411 | int error; | ||
| 412 | |||
| 413 | if (lp->hold_null) { | ||
| 414 | printk(KERN_INFO "lock_dlm: lvb already held\n"); | ||
| 415 | return 0; | ||
| 416 | } | ||
| 417 | |||
| 418 | error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn); | ||
| 419 | if (error) | ||
| 420 | goto out; | ||
| 421 | |||
| 422 | lpn->lksb.sb_lvbptr = junk_lvb; | ||
| 423 | lpn->lvb = junk_lvb; | ||
| 424 | |||
| 425 | lpn->req = DLM_LOCK_NL; | ||
| 426 | lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE; | ||
| 427 | set_bit(LFL_NOBAST, &lpn->flags); | ||
| 428 | set_bit(LFL_INLOCK, &lpn->flags); | ||
| 429 | |||
| 430 | init_completion(&lpn->ast_wait); | ||
| 431 | gdlm_do_lock(lpn); | ||
| 432 | wait_for_completion(&lpn->ast_wait); | ||
| 433 | error = lpn->lksb.sb_status; | ||
| 434 | if (error) { | ||
| 435 | printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n", | ||
| 436 | error); | ||
| 437 | gdlm_delete_lp(lpn); | ||
| 438 | lpn = NULL; | ||
| 439 | } | ||
| 440 | out: | ||
| 441 | lp->hold_null = lpn; | ||
| 442 | return error; | ||
| 443 | } | ||
| 444 | |||
| 445 | /* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get | ||
| 446 | the completion) because gfs may call unhold_lvb() during a callback (from | ||
| 447 | the context of a lock_dlm thread) which could cause a deadlock since the | ||
| 448 | other lock_dlm thread could be engaged in recovery. */ | ||
| 449 | |||
| 450 | static void unhold_null_lock(struct gdlm_lock *lp) | ||
| 451 | { | ||
| 452 | struct gdlm_lock *lpn = lp->hold_null; | ||
| 453 | |||
| 454 | gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type, | ||
| 455 | (unsigned long long)lp->lockname.ln_number); | ||
| 456 | lpn->lksb.sb_lvbptr = NULL; | ||
| 457 | lpn->lvb = NULL; | ||
| 458 | set_bit(LFL_UNLOCK_DELETE, &lpn->flags); | ||
| 459 | gdlm_do_unlock(lpn); | ||
| 460 | lp->hold_null = NULL; | ||
| 461 | } | ||
| 462 | |||
| 463 | /* Acquire a NL lock because gfs requires the value block to remain | ||
| 464 | intact on the resource while the lvb is "held" even if it's holding no locks | ||
| 465 | on the resource. */ | ||
| 466 | |||
| 467 | int gdlm_hold_lvb(void *lock, char **lvbp) | ||
| 468 | { | ||
| 469 | struct gdlm_lock *lp = lock; | ||
| 470 | int error; | ||
| 471 | |||
| 472 | error = gdlm_add_lvb(lp); | ||
| 473 | if (error) | ||
| 474 | return error; | ||
| 475 | |||
| 476 | *lvbp = lp->lvb; | ||
| 477 | |||
| 478 | error = hold_null_lock(lp); | ||
| 479 | if (error) | ||
| 480 | gdlm_del_lvb(lp); | ||
| 481 | |||
| 482 | return error; | ||
| 483 | } | ||
| 484 | |||
| 485 | void gdlm_unhold_lvb(void *lock, char *lvb) | ||
| 486 | { | ||
| 487 | struct gdlm_lock *lp = lock; | ||
| 488 | |||
| 489 | unhold_null_lock(lp); | ||
| 490 | gdlm_del_lvb(lp); | ||
| 491 | } | ||
| 492 | |||
| 493 | void gdlm_submit_delayed(struct gdlm_ls *ls) | ||
| 494 | { | ||
| 495 | struct gdlm_lock *lp, *safe; | ||
| 496 | |||
| 497 | spin_lock(&ls->async_lock); | ||
| 498 | list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) { | ||
| 499 | list_del_init(&lp->delay_list); | ||
| 500 | list_add_tail(&lp->delay_list, &ls->submit); | ||
| 501 | } | ||
| 502 | spin_unlock(&ls->async_lock); | ||
| 503 | wake_up(&ls->thread_wait); | ||
| 504 | } | ||
| 505 | |||
| 506 | int gdlm_release_all_locks(struct gdlm_ls *ls) | ||
| 507 | { | ||
| 508 | struct gdlm_lock *lp, *safe; | ||
| 509 | int count = 0; | ||
| 510 | |||
| 511 | spin_lock(&ls->async_lock); | ||
| 512 | list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) { | ||
| 513 | list_del_init(&lp->all_list); | ||
| 514 | |||
| 515 | if (lp->lvb && lp->lvb != junk_lvb) | ||
| 516 | kfree(lp->lvb); | ||
| 517 | kfree(lp); | ||
| 518 | count++; | ||
| 519 | } | ||
| 520 | spin_unlock(&ls->async_lock); | ||
| 521 | |||
| 522 | return count; | ||
| 523 | } | ||
| 524 | |||
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h new file mode 100644 index 000000000000..33af707a4d3f --- /dev/null +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
| @@ -0,0 +1,187 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef LOCK_DLM_DOT_H | ||
| 11 | #define LOCK_DLM_DOT_H | ||
| 12 | |||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/slab.h> | ||
| 15 | #include <linux/spinlock.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/types.h> | ||
| 18 | #include <linux/string.h> | ||
| 19 | #include <linux/list.h> | ||
| 20 | #include <linux/socket.h> | ||
| 21 | #include <linux/delay.h> | ||
| 22 | #include <linux/kthread.h> | ||
| 23 | #include <linux/kobject.h> | ||
| 24 | #include <linux/fcntl.h> | ||
| 25 | #include <linux/wait.h> | ||
| 26 | #include <net/sock.h> | ||
| 27 | |||
| 28 | #include <linux/dlm.h> | ||
| 29 | #include <linux/lm_interface.h> | ||
| 30 | |||
| 31 | /* | ||
| 32 | * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a | ||
| 33 | * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module | ||
| 34 | * as "lock_dlm". | ||
| 35 | */ | ||
| 36 | |||
| 37 | #define GDLM_STRNAME_BYTES 24 | ||
| 38 | #define GDLM_LVB_SIZE 32 | ||
| 39 | #define GDLM_DROP_COUNT 50000 | ||
| 40 | #define GDLM_DROP_PERIOD 60 | ||
| 41 | #define GDLM_NAME_LEN 128 | ||
| 42 | |||
| 43 | /* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number). | ||
| 44 | We sprintf these numbers into a 24 byte string of hex values to make them | ||
| 45 | human-readable (to make debugging simpler.) */ | ||
| 46 | |||
| 47 | struct gdlm_strname { | ||
| 48 | unsigned char name[GDLM_STRNAME_BYTES]; | ||
| 49 | unsigned short namelen; | ||
| 50 | }; | ||
| 51 | |||
| 52 | enum { | ||
| 53 | DFL_BLOCK_LOCKS = 0, | ||
| 54 | DFL_SPECTATOR = 1, | ||
| 55 | DFL_WITHDRAW = 2, | ||
| 56 | }; | ||
| 57 | |||
| 58 | struct gdlm_ls { | ||
| 59 | u32 id; | ||
| 60 | int jid; | ||
| 61 | int first; | ||
| 62 | int first_done; | ||
| 63 | unsigned long flags; | ||
| 64 | struct kobject kobj; | ||
| 65 | char clustername[GDLM_NAME_LEN]; | ||
| 66 | char fsname[GDLM_NAME_LEN]; | ||
| 67 | int fsflags; | ||
| 68 | dlm_lockspace_t *dlm_lockspace; | ||
| 69 | lm_callback_t fscb; | ||
| 70 | struct gfs2_sbd *sdp; | ||
| 71 | int recover_jid; | ||
| 72 | int recover_jid_done; | ||
| 73 | int recover_jid_status; | ||
| 74 | spinlock_t async_lock; | ||
| 75 | struct list_head complete; | ||
| 76 | struct list_head blocking; | ||
| 77 | struct list_head delayed; | ||
| 78 | struct list_head submit; | ||
| 79 | struct list_head all_locks; | ||
| 80 | u32 all_locks_count; | ||
| 81 | wait_queue_head_t wait_control; | ||
| 82 | struct task_struct *thread1; | ||
| 83 | struct task_struct *thread2; | ||
| 84 | wait_queue_head_t thread_wait; | ||
| 85 | unsigned long drop_time; | ||
| 86 | int drop_locks_count; | ||
| 87 | int drop_locks_period; | ||
| 88 | }; | ||
| 89 | |||
| 90 | enum { | ||
| 91 | LFL_NOBLOCK = 0, | ||
| 92 | LFL_NOCACHE = 1, | ||
| 93 | LFL_DLM_UNLOCK = 2, | ||
| 94 | LFL_DLM_CANCEL = 3, | ||
| 95 | LFL_SYNC_LVB = 4, | ||
| 96 | LFL_FORCE_PROMOTE = 5, | ||
| 97 | LFL_REREQUEST = 6, | ||
| 98 | LFL_ACTIVE = 7, | ||
| 99 | LFL_INLOCK = 8, | ||
| 100 | LFL_CANCEL = 9, | ||
| 101 | LFL_NOBAST = 10, | ||
| 102 | LFL_HEADQUE = 11, | ||
| 103 | LFL_UNLOCK_DELETE = 12, | ||
| 104 | }; | ||
| 105 | |||
| 106 | struct gdlm_lock { | ||
| 107 | struct gdlm_ls *ls; | ||
| 108 | struct lm_lockname lockname; | ||
| 109 | char *lvb; | ||
| 110 | struct dlm_lksb lksb; | ||
| 111 | |||
| 112 | s16 cur; | ||
| 113 | s16 req; | ||
| 114 | s16 prev_req; | ||
| 115 | u32 lkf; /* dlm flags DLM_LKF_ */ | ||
| 116 | unsigned long flags; /* lock_dlm flags LFL_ */ | ||
| 117 | |||
| 118 | int bast_mode; /* protected by async_lock */ | ||
| 119 | struct completion ast_wait; | ||
| 120 | |||
| 121 | struct list_head clist; /* complete */ | ||
| 122 | struct list_head blist; /* blocking */ | ||
| 123 | struct list_head delay_list; /* delayed */ | ||
| 124 | struct list_head all_list; /* all locks for the fs */ | ||
| 125 | struct gdlm_lock *hold_null; /* NL lock for hold_lvb */ | ||
| 126 | }; | ||
| 127 | |||
| 128 | #define gdlm_assert(assertion, fmt, args...) \ | ||
| 129 | do { \ | ||
| 130 | if (unlikely(!(assertion))) { \ | ||
| 131 | printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \ | ||
| 132 | "lock_dlm: " fmt "\n", \ | ||
| 133 | #assertion, ##args); \ | ||
| 134 | BUG(); \ | ||
| 135 | } \ | ||
| 136 | } while (0) | ||
| 137 | |||
| 138 | #define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg) | ||
| 139 | #define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg) | ||
| 140 | #define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg) | ||
| 141 | #ifdef LOCK_DLM_LOG_DEBUG | ||
| 142 | #define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg) | ||
| 143 | #else | ||
| 144 | #define log_debug(fmt, arg...) | ||
| 145 | #endif | ||
| 146 | |||
| 147 | /* sysfs.c */ | ||
| 148 | |||
| 149 | int gdlm_sysfs_init(void); | ||
| 150 | void gdlm_sysfs_exit(void); | ||
| 151 | int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *); | ||
| 152 | void gdlm_kobject_release(struct gdlm_ls *); | ||
| 153 | |||
| 154 | /* thread.c */ | ||
| 155 | |||
| 156 | int gdlm_init_threads(struct gdlm_ls *); | ||
| 157 | void gdlm_release_threads(struct gdlm_ls *); | ||
| 158 | |||
| 159 | /* lock.c */ | ||
| 160 | |||
| 161 | s16 gdlm_make_lmstate(s16); | ||
| 162 | void gdlm_queue_delayed(struct gdlm_lock *); | ||
| 163 | void gdlm_submit_delayed(struct gdlm_ls *); | ||
| 164 | int gdlm_release_all_locks(struct gdlm_ls *); | ||
| 165 | void gdlm_delete_lp(struct gdlm_lock *); | ||
| 166 | unsigned int gdlm_do_lock(struct gdlm_lock *); | ||
| 167 | |||
| 168 | int gdlm_get_lock(void *, struct lm_lockname *, void **); | ||
| 169 | void gdlm_put_lock(void *); | ||
| 170 | unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int); | ||
| 171 | unsigned int gdlm_unlock(void *, unsigned int); | ||
| 172 | void gdlm_cancel(void *); | ||
| 173 | int gdlm_hold_lvb(void *, char **); | ||
| 174 | void gdlm_unhold_lvb(void *, char *); | ||
| 175 | |||
| 176 | /* plock.c */ | ||
| 177 | |||
| 178 | int gdlm_plock_init(void); | ||
| 179 | void gdlm_plock_exit(void); | ||
| 180 | int gdlm_plock(void *, struct lm_lockname *, struct file *, int, | ||
| 181 | struct file_lock *); | ||
| 182 | int gdlm_plock_get(void *, struct lm_lockname *, struct file *, | ||
| 183 | struct file_lock *); | ||
| 184 | int gdlm_punlock(void *, struct lm_lockname *, struct file *, | ||
| 185 | struct file_lock *); | ||
| 186 | #endif | ||
| 187 | |||
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c new file mode 100644 index 000000000000..2194b1d5b5ec --- /dev/null +++ b/fs/gfs2/locking/dlm/main.c | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/init.h> | ||
| 11 | |||
| 12 | #include "lock_dlm.h" | ||
| 13 | |||
| 14 | extern int gdlm_drop_count; | ||
| 15 | extern int gdlm_drop_period; | ||
| 16 | |||
| 17 | extern struct lm_lockops gdlm_ops; | ||
| 18 | |||
| 19 | static int __init init_lock_dlm(void) | ||
| 20 | { | ||
| 21 | int error; | ||
| 22 | |||
| 23 | error = gfs2_register_lockproto(&gdlm_ops); | ||
| 24 | if (error) { | ||
| 25 | printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n", | ||
| 26 | error); | ||
| 27 | return error; | ||
| 28 | } | ||
| 29 | |||
| 30 | error = gdlm_sysfs_init(); | ||
| 31 | if (error) { | ||
| 32 | gfs2_unregister_lockproto(&gdlm_ops); | ||
| 33 | return error; | ||
| 34 | } | ||
| 35 | |||
| 36 | error = gdlm_plock_init(); | ||
| 37 | if (error) { | ||
| 38 | gdlm_sysfs_exit(); | ||
| 39 | gfs2_unregister_lockproto(&gdlm_ops); | ||
| 40 | return error; | ||
| 41 | } | ||
| 42 | |||
| 43 | gdlm_drop_count = GDLM_DROP_COUNT; | ||
| 44 | gdlm_drop_period = GDLM_DROP_PERIOD; | ||
| 45 | |||
| 46 | printk(KERN_INFO | ||
| 47 | "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | static void __exit exit_lock_dlm(void) | ||
| 52 | { | ||
| 53 | gdlm_plock_exit(); | ||
| 54 | gdlm_sysfs_exit(); | ||
| 55 | gfs2_unregister_lockproto(&gdlm_ops); | ||
| 56 | } | ||
| 57 | |||
| 58 | module_init(init_lock_dlm); | ||
| 59 | module_exit(exit_lock_dlm); | ||
| 60 | |||
| 61 | MODULE_DESCRIPTION("GFS DLM Locking Module"); | ||
| 62 | MODULE_AUTHOR("Red Hat, Inc."); | ||
| 63 | MODULE_LICENSE("GPL"); | ||
| 64 | |||
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c new file mode 100644 index 000000000000..1f94dd35a943 --- /dev/null +++ b/fs/gfs2/locking/dlm/mount.c | |||
| @@ -0,0 +1,255 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include "lock_dlm.h" | ||
| 11 | |||
| 12 | int gdlm_drop_count; | ||
| 13 | int gdlm_drop_period; | ||
| 14 | const struct lm_lockops gdlm_ops; | ||
| 15 | |||
| 16 | |||
| 17 | static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, | ||
| 18 | int flags, char *table_name) | ||
| 19 | { | ||
| 20 | struct gdlm_ls *ls; | ||
| 21 | char buf[256], *p; | ||
| 22 | |||
| 23 | ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL); | ||
| 24 | if (!ls) | ||
| 25 | return NULL; | ||
| 26 | |||
| 27 | ls->drop_locks_count = gdlm_drop_count; | ||
| 28 | ls->drop_locks_period = gdlm_drop_period; | ||
| 29 | ls->fscb = cb; | ||
| 30 | ls->sdp = sdp; | ||
| 31 | ls->fsflags = flags; | ||
| 32 | spin_lock_init(&ls->async_lock); | ||
| 33 | INIT_LIST_HEAD(&ls->complete); | ||
| 34 | INIT_LIST_HEAD(&ls->blocking); | ||
| 35 | INIT_LIST_HEAD(&ls->delayed); | ||
| 36 | INIT_LIST_HEAD(&ls->submit); | ||
| 37 | INIT_LIST_HEAD(&ls->all_locks); | ||
| 38 | init_waitqueue_head(&ls->thread_wait); | ||
| 39 | init_waitqueue_head(&ls->wait_control); | ||
| 40 | ls->thread1 = NULL; | ||
| 41 | ls->thread2 = NULL; | ||
| 42 | ls->drop_time = jiffies; | ||
| 43 | ls->jid = -1; | ||
| 44 | |||
| 45 | strncpy(buf, table_name, 256); | ||
| 46 | buf[255] = '\0'; | ||
| 47 | |||
| 48 | p = strstr(buf, ":"); | ||
| 49 | if (!p) { | ||
| 50 | log_info("invalid table_name \"%s\"", table_name); | ||
| 51 | kfree(ls); | ||
| 52 | return NULL; | ||
| 53 | } | ||
| 54 | *p = '\0'; | ||
| 55 | p++; | ||
| 56 | |||
| 57 | strncpy(ls->clustername, buf, GDLM_NAME_LEN); | ||
| 58 | strncpy(ls->fsname, p, GDLM_NAME_LEN); | ||
| 59 | |||
| 60 | return ls; | ||
| 61 | } | ||
| 62 | |||
| 63 | static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir) | ||
| 64 | { | ||
| 65 | char data[256]; | ||
| 66 | char *options, *x, *y; | ||
| 67 | int error = 0; | ||
| 68 | |||
| 69 | memset(data, 0, 256); | ||
| 70 | strncpy(data, data_arg, 255); | ||
| 71 | |||
| 72 | for (options = data; (x = strsep(&options, ":")); ) { | ||
| 73 | if (!*x) | ||
| 74 | continue; | ||
| 75 | |||
| 76 | y = strchr(x, '='); | ||
| 77 | if (y) | ||
| 78 | *y++ = 0; | ||
| 79 | |||
| 80 | if (!strcmp(x, "jid")) { | ||
| 81 | if (!y) { | ||
| 82 | log_error("need argument to jid"); | ||
| 83 | error = -EINVAL; | ||
| 84 | break; | ||
| 85 | } | ||
| 86 | sscanf(y, "%u", &ls->jid); | ||
| 87 | |||
| 88 | } else if (!strcmp(x, "first")) { | ||
| 89 | if (!y) { | ||
| 90 | log_error("need argument to first"); | ||
| 91 | error = -EINVAL; | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | sscanf(y, "%u", &ls->first); | ||
| 95 | |||
| 96 | } else if (!strcmp(x, "id")) { | ||
| 97 | if (!y) { | ||
| 98 | log_error("need argument to id"); | ||
| 99 | error = -EINVAL; | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | sscanf(y, "%u", &ls->id); | ||
| 103 | |||
| 104 | } else if (!strcmp(x, "nodir")) { | ||
| 105 | if (!y) { | ||
| 106 | log_error("need argument to nodir"); | ||
| 107 | error = -EINVAL; | ||
| 108 | break; | ||
| 109 | } | ||
| 110 | sscanf(y, "%u", nodir); | ||
| 111 | |||
| 112 | } else { | ||
| 113 | log_error("unkonwn option: %s", x); | ||
| 114 | error = -EINVAL; | ||
| 115 | break; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | return error; | ||
| 120 | } | ||
| 121 | |||
| 122 | static int gdlm_mount(char *table_name, char *host_data, | ||
| 123 | lm_callback_t cb, void *cb_data, | ||
| 124 | unsigned int min_lvb_size, int flags, | ||
| 125 | struct lm_lockstruct *lockstruct, | ||
| 126 | struct kobject *fskobj) | ||
| 127 | { | ||
| 128 | struct gdlm_ls *ls; | ||
| 129 | int error = -ENOMEM, nodir = 0; | ||
| 130 | |||
| 131 | if (min_lvb_size > GDLM_LVB_SIZE) | ||
| 132 | goto out; | ||
| 133 | |||
| 134 | ls = init_gdlm(cb, cb_data, flags, table_name); | ||
| 135 | if (!ls) | ||
| 136 | goto out; | ||
| 137 | |||
| 138 | error = make_args(ls, host_data, &nodir); | ||
| 139 | if (error) | ||
| 140 | goto out; | ||
| 141 | |||
| 142 | error = gdlm_init_threads(ls); | ||
| 143 | if (error) | ||
| 144 | goto out_free; | ||
| 145 | |||
| 146 | error = gdlm_kobject_setup(ls, fskobj); | ||
| 147 | if (error) | ||
| 148 | goto out_thread; | ||
| 149 | |||
| 150 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), | ||
| 151 | &ls->dlm_lockspace, | ||
| 152 | nodir ? DLM_LSFL_NODIR : 0, | ||
| 153 | GDLM_LVB_SIZE); | ||
| 154 | if (error) { | ||
| 155 | log_error("dlm_new_lockspace error %d", error); | ||
| 156 | goto out_kobj; | ||
| 157 | } | ||
| 158 | |||
| 159 | lockstruct->ls_jid = ls->jid; | ||
| 160 | lockstruct->ls_first = ls->first; | ||
| 161 | lockstruct->ls_lockspace = ls; | ||
| 162 | lockstruct->ls_ops = &gdlm_ops; | ||
| 163 | lockstruct->ls_flags = 0; | ||
| 164 | lockstruct->ls_lvb_size = GDLM_LVB_SIZE; | ||
| 165 | return 0; | ||
| 166 | |||
| 167 | out_kobj: | ||
| 168 | gdlm_kobject_release(ls); | ||
| 169 | out_thread: | ||
| 170 | gdlm_release_threads(ls); | ||
| 171 | out_free: | ||
| 172 | kfree(ls); | ||
| 173 | out: | ||
| 174 | return error; | ||
| 175 | } | ||
| 176 | |||
| 177 | static void gdlm_unmount(void *lockspace) | ||
| 178 | { | ||
| 179 | struct gdlm_ls *ls = lockspace; | ||
| 180 | int rv; | ||
| 181 | |||
| 182 | log_debug("unmount flags %lx", ls->flags); | ||
| 183 | |||
| 184 | /* FIXME: serialize unmount and withdraw in case they | ||
| 185 | happen at once. Also, if unmount follows withdraw, | ||
| 186 | wait for withdraw to finish. */ | ||
| 187 | |||
| 188 | if (test_bit(DFL_WITHDRAW, &ls->flags)) | ||
| 189 | goto out; | ||
| 190 | |||
| 191 | gdlm_kobject_release(ls); | ||
| 192 | dlm_release_lockspace(ls->dlm_lockspace, 2); | ||
| 193 | gdlm_release_threads(ls); | ||
| 194 | rv = gdlm_release_all_locks(ls); | ||
| 195 | if (rv) | ||
| 196 | log_info("gdlm_unmount: %d stray locks freed", rv); | ||
| 197 | out: | ||
| 198 | kfree(ls); | ||
| 199 | } | ||
| 200 | |||
| 201 | static void gdlm_recovery_done(void *lockspace, unsigned int jid, | ||
| 202 | unsigned int message) | ||
| 203 | { | ||
| 204 | struct gdlm_ls *ls = lockspace; | ||
| 205 | ls->recover_jid_done = jid; | ||
| 206 | ls->recover_jid_status = message; | ||
| 207 | kobject_uevent(&ls->kobj, KOBJ_CHANGE); | ||
| 208 | } | ||
| 209 | |||
| 210 | static void gdlm_others_may_mount(void *lockspace) | ||
| 211 | { | ||
| 212 | struct gdlm_ls *ls = lockspace; | ||
| 213 | ls->first_done = 1; | ||
| 214 | kobject_uevent(&ls->kobj, KOBJ_CHANGE); | ||
| 215 | } | ||
| 216 | |||
| 217 | /* Userspace gets the offline uevent, blocks new gfs locks on | ||
| 218 | other mounters, and lets us know (sets WITHDRAW flag). Then, | ||
| 219 | userspace leaves the mount group while we leave the lockspace. */ | ||
| 220 | |||
| 221 | static void gdlm_withdraw(void *lockspace) | ||
| 222 | { | ||
| 223 | struct gdlm_ls *ls = lockspace; | ||
| 224 | |||
| 225 | kobject_uevent(&ls->kobj, KOBJ_OFFLINE); | ||
| 226 | |||
| 227 | wait_event_interruptible(ls->wait_control, | ||
| 228 | test_bit(DFL_WITHDRAW, &ls->flags)); | ||
| 229 | |||
| 230 | dlm_release_lockspace(ls->dlm_lockspace, 2); | ||
| 231 | gdlm_release_threads(ls); | ||
| 232 | gdlm_release_all_locks(ls); | ||
| 233 | gdlm_kobject_release(ls); | ||
| 234 | } | ||
| 235 | |||
| 236 | const struct lm_lockops gdlm_ops = { | ||
| 237 | .lm_proto_name = "lock_dlm", | ||
| 238 | .lm_mount = gdlm_mount, | ||
| 239 | .lm_others_may_mount = gdlm_others_may_mount, | ||
| 240 | .lm_unmount = gdlm_unmount, | ||
| 241 | .lm_withdraw = gdlm_withdraw, | ||
| 242 | .lm_get_lock = gdlm_get_lock, | ||
| 243 | .lm_put_lock = gdlm_put_lock, | ||
| 244 | .lm_lock = gdlm_lock, | ||
| 245 | .lm_unlock = gdlm_unlock, | ||
| 246 | .lm_plock = gdlm_plock, | ||
| 247 | .lm_punlock = gdlm_punlock, | ||
| 248 | .lm_plock_get = gdlm_plock_get, | ||
| 249 | .lm_cancel = gdlm_cancel, | ||
| 250 | .lm_hold_lvb = gdlm_hold_lvb, | ||
| 251 | .lm_unhold_lvb = gdlm_unhold_lvb, | ||
| 252 | .lm_recovery_done = gdlm_recovery_done, | ||
| 253 | .lm_owner = THIS_MODULE, | ||
| 254 | }; | ||
| 255 | |||
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c new file mode 100644 index 000000000000..7365aec9511b --- /dev/null +++ b/fs/gfs2/locking/dlm/plock.c | |||
| @@ -0,0 +1,301 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This copyrighted material is made available to anyone wishing to use, | ||
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 6 | * of the GNU General Public License version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/miscdevice.h> | ||
| 10 | #include <linux/lock_dlm_plock.h> | ||
| 11 | |||
| 12 | #include "lock_dlm.h" | ||
| 13 | |||
| 14 | |||
| 15 | static spinlock_t ops_lock; | ||
| 16 | static struct list_head send_list; | ||
| 17 | static struct list_head recv_list; | ||
| 18 | static wait_queue_head_t send_wq; | ||
| 19 | static wait_queue_head_t recv_wq; | ||
| 20 | |||
| 21 | struct plock_op { | ||
| 22 | struct list_head list; | ||
| 23 | int done; | ||
| 24 | struct gdlm_plock_info info; | ||
| 25 | }; | ||
| 26 | |||
| 27 | static inline void set_version(struct gdlm_plock_info *info) | ||
| 28 | { | ||
| 29 | info->version[0] = GDLM_PLOCK_VERSION_MAJOR; | ||
| 30 | info->version[1] = GDLM_PLOCK_VERSION_MINOR; | ||
| 31 | info->version[2] = GDLM_PLOCK_VERSION_PATCH; | ||
| 32 | } | ||
| 33 | |||
| 34 | static int check_version(struct gdlm_plock_info *info) | ||
| 35 | { | ||
| 36 | if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) || | ||
| 37 | (GDLM_PLOCK_VERSION_MINOR < info->version[1])) { | ||
| 38 | log_error("plock device version mismatch: " | ||
| 39 | "kernel (%u.%u.%u), user (%u.%u.%u)", | ||
| 40 | GDLM_PLOCK_VERSION_MAJOR, | ||
| 41 | GDLM_PLOCK_VERSION_MINOR, | ||
| 42 | GDLM_PLOCK_VERSION_PATCH, | ||
| 43 | info->version[0], | ||
| 44 | info->version[1], | ||
| 45 | info->version[2]); | ||
| 46 | return -EINVAL; | ||
| 47 | } | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | static void send_op(struct plock_op *op) | ||
| 52 | { | ||
| 53 | set_version(&op->info); | ||
| 54 | INIT_LIST_HEAD(&op->list); | ||
| 55 | spin_lock(&ops_lock); | ||
| 56 | list_add_tail(&op->list, &send_list); | ||
| 57 | spin_unlock(&ops_lock); | ||
| 58 | wake_up(&send_wq); | ||
| 59 | } | ||
| 60 | |||
| 61 | int gdlm_plock(void *lockspace, struct lm_lockname *name, | ||
| 62 | struct file *file, int cmd, struct file_lock *fl) | ||
| 63 | { | ||
| 64 | struct gdlm_ls *ls = lockspace; | ||
| 65 | struct plock_op *op; | ||
| 66 | int rv; | ||
| 67 | |||
| 68 | op = kzalloc(sizeof(*op), GFP_KERNEL); | ||
| 69 | if (!op) | ||
| 70 | return -ENOMEM; | ||
| 71 | |||
| 72 | op->info.optype = GDLM_PLOCK_OP_LOCK; | ||
| 73 | op->info.pid = fl->fl_pid; | ||
| 74 | op->info.ex = (fl->fl_type == F_WRLCK); | ||
| 75 | op->info.wait = IS_SETLKW(cmd); | ||
| 76 | op->info.fsid = ls->id; | ||
| 77 | op->info.number = name->ln_number; | ||
| 78 | op->info.start = fl->fl_start; | ||
| 79 | op->info.end = fl->fl_end; | ||
| 80 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
| 81 | |||
| 82 | send_op(op); | ||
| 83 | wait_event(recv_wq, (op->done != 0)); | ||
| 84 | |||
| 85 | spin_lock(&ops_lock); | ||
| 86 | if (!list_empty(&op->list)) { | ||
| 87 | printk(KERN_INFO "plock op on list\n"); | ||
| 88 | list_del(&op->list); | ||
| 89 | } | ||
| 90 | spin_unlock(&ops_lock); | ||
| 91 | |||
| 92 | rv = op->info.rv; | ||
| 93 | |||
| 94 | if (!rv) { | ||
| 95 | if (posix_lock_file_wait(file, fl) < 0) | ||
| 96 | log_error("gdlm_plock: vfs lock error %x,%llx", | ||
| 97 | name->ln_type, | ||
| 98 | (unsigned long long)name->ln_number); | ||
| 99 | } | ||
| 100 | |||
| 101 | kfree(op); | ||
| 102 | return rv; | ||
| 103 | } | ||
| 104 | |||
| 105 | int gdlm_punlock(void *lockspace, struct lm_lockname *name, | ||
| 106 | struct file *file, struct file_lock *fl) | ||
| 107 | { | ||
| 108 | struct gdlm_ls *ls = lockspace; | ||
| 109 | struct plock_op *op; | ||
| 110 | int rv; | ||
| 111 | |||
| 112 | op = kzalloc(sizeof(*op), GFP_KERNEL); | ||
| 113 | if (!op) | ||
| 114 | return -ENOMEM; | ||
| 115 | |||
| 116 | if (posix_lock_file_wait(file, fl) < 0) | ||
| 117 | log_error("gdlm_punlock: vfs unlock error %x,%llx", | ||
| 118 | name->ln_type, (unsigned long long)name->ln_number); | ||
| 119 | |||
| 120 | op->info.optype = GDLM_PLOCK_OP_UNLOCK; | ||
| 121 | op->info.pid = fl->fl_pid; | ||
| 122 | op->info.fsid = ls->id; | ||
| 123 | op->info.number = name->ln_number; | ||
| 124 | op->info.start = fl->fl_start; | ||
| 125 | op->info.end = fl->fl_end; | ||
| 126 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
| 127 | |||
| 128 | send_op(op); | ||
| 129 | wait_event(recv_wq, (op->done != 0)); | ||
| 130 | |||
| 131 | spin_lock(&ops_lock); | ||
| 132 | if (!list_empty(&op->list)) { | ||
| 133 | printk(KERN_INFO "punlock op on list\n"); | ||
| 134 | list_del(&op->list); | ||
| 135 | } | ||
| 136 | spin_unlock(&ops_lock); | ||
| 137 | |||
| 138 | rv = op->info.rv; | ||
| 139 | |||
| 140 | kfree(op); | ||
| 141 | return rv; | ||
| 142 | } | ||
| 143 | |||
| 144 | int gdlm_plock_get(void *lockspace, struct lm_lockname *name, | ||
| 145 | struct file *file, struct file_lock *fl) | ||
| 146 | { | ||
| 147 | struct gdlm_ls *ls = lockspace; | ||
| 148 | struct plock_op *op; | ||
| 149 | int rv; | ||
| 150 | |||
| 151 | op = kzalloc(sizeof(*op), GFP_KERNEL); | ||
| 152 | if (!op) | ||
| 153 | return -ENOMEM; | ||
| 154 | |||
| 155 | op->info.optype = GDLM_PLOCK_OP_GET; | ||
| 156 | op->info.pid = fl->fl_pid; | ||
| 157 | op->info.ex = (fl->fl_type == F_WRLCK); | ||
| 158 | op->info.fsid = ls->id; | ||
| 159 | op->info.number = name->ln_number; | ||
| 160 | op->info.start = fl->fl_start; | ||
| 161 | op->info.end = fl->fl_end; | ||
| 162 | |||
| 163 | send_op(op); | ||
| 164 | wait_event(recv_wq, (op->done != 0)); | ||
| 165 | |||
| 166 | spin_lock(&ops_lock); | ||
| 167 | if (!list_empty(&op->list)) { | ||
| 168 | printk(KERN_INFO "plock_get op on list\n"); | ||
| 169 | list_del(&op->list); | ||
| 170 | } | ||
| 171 | spin_unlock(&ops_lock); | ||
| 172 | |||
| 173 | rv = op->info.rv; | ||
| 174 | |||
| 175 | if (rv == 0) | ||
| 176 | fl->fl_type = F_UNLCK; | ||
| 177 | else if (rv > 0) { | ||
| 178 | fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; | ||
| 179 | fl->fl_pid = op->info.pid; | ||
| 180 | fl->fl_start = op->info.start; | ||
| 181 | fl->fl_end = op->info.end; | ||
| 182 | } | ||
| 183 | |||
| 184 | kfree(op); | ||
| 185 | return rv; | ||
| 186 | } | ||
| 187 | |||
| 188 | /* a read copies out one plock request from the send list */ | ||
| 189 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, | ||
| 190 | loff_t *ppos) | ||
| 191 | { | ||
| 192 | struct gdlm_plock_info info; | ||
| 193 | struct plock_op *op = NULL; | ||
| 194 | |||
| 195 | if (count < sizeof(info)) | ||
| 196 | return -EINVAL; | ||
| 197 | |||
| 198 | spin_lock(&ops_lock); | ||
| 199 | if (!list_empty(&send_list)) { | ||
| 200 | op = list_entry(send_list.next, struct plock_op, list); | ||
| 201 | list_move(&op->list, &recv_list); | ||
| 202 | memcpy(&info, &op->info, sizeof(info)); | ||
| 203 | } | ||
| 204 | spin_unlock(&ops_lock); | ||
| 205 | |||
| 206 | if (!op) | ||
| 207 | return -EAGAIN; | ||
| 208 | |||
| 209 | if (copy_to_user(u, &info, sizeof(info))) | ||
| 210 | return -EFAULT; | ||
| 211 | return sizeof(info); | ||
| 212 | } | ||
| 213 | |||
| 214 | /* a write copies in one plock result that should match a plock_op | ||
| 215 | on the recv list */ | ||
| 216 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | ||
| 217 | loff_t *ppos) | ||
| 218 | { | ||
| 219 | struct gdlm_plock_info info; | ||
| 220 | struct plock_op *op; | ||
| 221 | int found = 0; | ||
| 222 | |||
| 223 | if (count != sizeof(info)) | ||
| 224 | return -EINVAL; | ||
| 225 | |||
| 226 | if (copy_from_user(&info, u, sizeof(info))) | ||
| 227 | return -EFAULT; | ||
| 228 | |||
| 229 | if (check_version(&info)) | ||
| 230 | return -EINVAL; | ||
| 231 | |||
| 232 | spin_lock(&ops_lock); | ||
| 233 | list_for_each_entry(op, &recv_list, list) { | ||
| 234 | if (op->info.fsid == info.fsid && op->info.number == info.number && | ||
| 235 | op->info.owner == info.owner) { | ||
| 236 | list_del_init(&op->list); | ||
| 237 | found = 1; | ||
| 238 | op->done = 1; | ||
| 239 | memcpy(&op->info, &info, sizeof(info)); | ||
| 240 | break; | ||
| 241 | } | ||
| 242 | } | ||
| 243 | spin_unlock(&ops_lock); | ||
| 244 | |||
| 245 | if (found) | ||
| 246 | wake_up(&recv_wq); | ||
| 247 | else | ||
| 248 | printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid, | ||
| 249 | (unsigned long long)info.number); | ||
| 250 | return count; | ||
| 251 | } | ||
| 252 | |||
| 253 | static unsigned int dev_poll(struct file *file, poll_table *wait) | ||
| 254 | { | ||
| 255 | poll_wait(file, &send_wq, wait); | ||
| 256 | |||
| 257 | spin_lock(&ops_lock); | ||
| 258 | if (!list_empty(&send_list)) { | ||
| 259 | spin_unlock(&ops_lock); | ||
| 260 | return POLLIN | POLLRDNORM; | ||
| 261 | } | ||
| 262 | spin_unlock(&ops_lock); | ||
| 263 | return 0; | ||
| 264 | } | ||
| 265 | |||
| 266 | static struct file_operations dev_fops = { | ||
| 267 | .read = dev_read, | ||
| 268 | .write = dev_write, | ||
| 269 | .poll = dev_poll, | ||
| 270 | .owner = THIS_MODULE | ||
| 271 | }; | ||
| 272 | |||
| 273 | static struct miscdevice plock_dev_misc = { | ||
| 274 | .minor = MISC_DYNAMIC_MINOR, | ||
| 275 | .name = GDLM_PLOCK_MISC_NAME, | ||
| 276 | .fops = &dev_fops | ||
| 277 | }; | ||
| 278 | |||
| 279 | int gdlm_plock_init(void) | ||
| 280 | { | ||
| 281 | int rv; | ||
| 282 | |||
| 283 | spin_lock_init(&ops_lock); | ||
| 284 | INIT_LIST_HEAD(&send_list); | ||
| 285 | INIT_LIST_HEAD(&recv_list); | ||
| 286 | init_waitqueue_head(&send_wq); | ||
| 287 | init_waitqueue_head(&recv_wq); | ||
| 288 | |||
| 289 | rv = misc_register(&plock_dev_misc); | ||
| 290 | if (rv) | ||
| 291 | printk(KERN_INFO "gdlm_plock_init: misc_register failed %d", | ||
| 292 | rv); | ||
| 293 | return rv; | ||
| 294 | } | ||
| 295 | |||
| 296 | void gdlm_plock_exit(void) | ||
| 297 | { | ||
| 298 | if (misc_deregister(&plock_dev_misc) < 0) | ||
| 299 | printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed"); | ||
| 300 | } | ||
| 301 | |||
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c new file mode 100644 index 000000000000..29ae06f94944 --- /dev/null +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
| @@ -0,0 +1,226 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/ctype.h> | ||
| 11 | #include <linux/stat.h> | ||
| 12 | |||
| 13 | #include "lock_dlm.h" | ||
| 14 | |||
| 15 | extern struct lm_lockops gdlm_ops; | ||
| 16 | |||
| 17 | static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf) | ||
| 18 | { | ||
| 19 | return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name); | ||
| 20 | } | ||
| 21 | |||
| 22 | static ssize_t block_show(struct gdlm_ls *ls, char *buf) | ||
| 23 | { | ||
| 24 | ssize_t ret; | ||
| 25 | int val = 0; | ||
| 26 | |||
| 27 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags)) | ||
| 28 | val = 1; | ||
| 29 | ret = sprintf(buf, "%d\n", val); | ||
| 30 | return ret; | ||
| 31 | } | ||
| 32 | |||
| 33 | static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len) | ||
| 34 | { | ||
| 35 | ssize_t ret = len; | ||
| 36 | int val; | ||
| 37 | |||
| 38 | val = simple_strtol(buf, NULL, 0); | ||
| 39 | |||
| 40 | if (val == 1) | ||
| 41 | set_bit(DFL_BLOCK_LOCKS, &ls->flags); | ||
| 42 | else if (val == 0) { | ||
| 43 | clear_bit(DFL_BLOCK_LOCKS, &ls->flags); | ||
| 44 | gdlm_submit_delayed(ls); | ||
| 45 | } else { | ||
| 46 | ret = -EINVAL; | ||
| 47 | } | ||
| 48 | return ret; | ||
| 49 | } | ||
| 50 | |||
| 51 | static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf) | ||
| 52 | { | ||
| 53 | ssize_t ret; | ||
| 54 | int val = 0; | ||
| 55 | |||
| 56 | if (test_bit(DFL_WITHDRAW, &ls->flags)) | ||
| 57 | val = 1; | ||
| 58 | ret = sprintf(buf, "%d\n", val); | ||
| 59 | return ret; | ||
| 60 | } | ||
| 61 | |||
| 62 | static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len) | ||
| 63 | { | ||
| 64 | ssize_t ret = len; | ||
| 65 | int val; | ||
| 66 | |||
| 67 | val = simple_strtol(buf, NULL, 0); | ||
| 68 | |||
| 69 | if (val == 1) | ||
| 70 | set_bit(DFL_WITHDRAW, &ls->flags); | ||
| 71 | else | ||
| 72 | ret = -EINVAL; | ||
| 73 | wake_up(&ls->wait_control); | ||
| 74 | return ret; | ||
| 75 | } | ||
| 76 | |||
| 77 | static ssize_t id_show(struct gdlm_ls *ls, char *buf) | ||
| 78 | { | ||
| 79 | return sprintf(buf, "%u\n", ls->id); | ||
| 80 | } | ||
| 81 | |||
| 82 | static ssize_t jid_show(struct gdlm_ls *ls, char *buf) | ||
| 83 | { | ||
| 84 | return sprintf(buf, "%d\n", ls->jid); | ||
| 85 | } | ||
| 86 | |||
| 87 | static ssize_t first_show(struct gdlm_ls *ls, char *buf) | ||
| 88 | { | ||
| 89 | return sprintf(buf, "%d\n", ls->first); | ||
| 90 | } | ||
| 91 | |||
| 92 | static ssize_t first_done_show(struct gdlm_ls *ls, char *buf) | ||
| 93 | { | ||
| 94 | return sprintf(buf, "%d\n", ls->first_done); | ||
| 95 | } | ||
| 96 | |||
| 97 | static ssize_t recover_show(struct gdlm_ls *ls, char *buf) | ||
| 98 | { | ||
| 99 | return sprintf(buf, "%d\n", ls->recover_jid); | ||
| 100 | } | ||
| 101 | |||
| 102 | static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len) | ||
| 103 | { | ||
| 104 | ls->recover_jid = simple_strtol(buf, NULL, 0); | ||
| 105 | ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid); | ||
| 106 | return len; | ||
| 107 | } | ||
| 108 | |||
| 109 | static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf) | ||
| 110 | { | ||
| 111 | return sprintf(buf, "%d\n", ls->recover_jid_done); | ||
| 112 | } | ||
| 113 | |||
| 114 | static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) | ||
| 115 | { | ||
| 116 | return sprintf(buf, "%d\n", ls->recover_jid_status); | ||
| 117 | } | ||
| 118 | |||
| 119 | struct gdlm_attr { | ||
| 120 | struct attribute attr; | ||
| 121 | ssize_t (*show)(struct gdlm_ls *, char *); | ||
| 122 | ssize_t (*store)(struct gdlm_ls *, const char *, size_t); | ||
| 123 | }; | ||
| 124 | |||
| 125 | #define GDLM_ATTR(_name,_mode,_show,_store) \ | ||
| 126 | static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) | ||
| 127 | |||
| 128 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); | ||
| 129 | GDLM_ATTR(block, 0644, block_show, block_store); | ||
| 130 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | ||
| 131 | GDLM_ATTR(id, 0444, id_show, NULL); | ||
| 132 | GDLM_ATTR(jid, 0444, jid_show, NULL); | ||
| 133 | GDLM_ATTR(first, 0444, first_show, NULL); | ||
| 134 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); | ||
| 135 | GDLM_ATTR(recover, 0644, recover_show, recover_store); | ||
| 136 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | ||
| 137 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | ||
| 138 | |||
| 139 | static struct attribute *gdlm_attrs[] = { | ||
| 140 | &gdlm_attr_proto_name.attr, | ||
| 141 | &gdlm_attr_block.attr, | ||
| 142 | &gdlm_attr_withdraw.attr, | ||
| 143 | &gdlm_attr_id.attr, | ||
| 144 | &gdlm_attr_jid.attr, | ||
| 145 | &gdlm_attr_first.attr, | ||
| 146 | &gdlm_attr_first_done.attr, | ||
| 147 | &gdlm_attr_recover.attr, | ||
| 148 | &gdlm_attr_recover_done.attr, | ||
| 149 | &gdlm_attr_recover_status.attr, | ||
| 150 | NULL, | ||
| 151 | }; | ||
| 152 | |||
| 153 | static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr, | ||
| 154 | char *buf) | ||
| 155 | { | ||
| 156 | struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj); | ||
| 157 | struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr); | ||
| 158 | return a->show ? a->show(ls, buf) : 0; | ||
| 159 | } | ||
| 160 | |||
| 161 | static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr, | ||
| 162 | const char *buf, size_t len) | ||
| 163 | { | ||
| 164 | struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj); | ||
| 165 | struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr); | ||
| 166 | return a->store ? a->store(ls, buf, len) : len; | ||
| 167 | } | ||
| 168 | |||
| 169 | static struct sysfs_ops gdlm_attr_ops = { | ||
| 170 | .show = gdlm_attr_show, | ||
| 171 | .store = gdlm_attr_store, | ||
| 172 | }; | ||
| 173 | |||
| 174 | static struct kobj_type gdlm_ktype = { | ||
| 175 | .default_attrs = gdlm_attrs, | ||
| 176 | .sysfs_ops = &gdlm_attr_ops, | ||
| 177 | }; | ||
| 178 | |||
| 179 | static struct kset gdlm_kset = { | ||
| 180 | .subsys = &kernel_subsys, | ||
| 181 | .kobj = {.name = "lock_dlm",}, | ||
| 182 | .ktype = &gdlm_ktype, | ||
| 183 | }; | ||
| 184 | |||
| 185 | int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj) | ||
| 186 | { | ||
| 187 | int error; | ||
| 188 | |||
| 189 | error = kobject_set_name(&ls->kobj, "%s", "lock_module"); | ||
| 190 | if (error) { | ||
| 191 | log_error("can't set kobj name %d", error); | ||
| 192 | return error; | ||
| 193 | } | ||
| 194 | |||
| 195 | ls->kobj.kset = &gdlm_kset; | ||
| 196 | ls->kobj.ktype = &gdlm_ktype; | ||
| 197 | ls->kobj.parent = fskobj; | ||
| 198 | |||
| 199 | error = kobject_register(&ls->kobj); | ||
| 200 | if (error) | ||
| 201 | log_error("can't register kobj %d", error); | ||
| 202 | |||
| 203 | return error; | ||
| 204 | } | ||
| 205 | |||
| 206 | void gdlm_kobject_release(struct gdlm_ls *ls) | ||
| 207 | { | ||
| 208 | kobject_unregister(&ls->kobj); | ||
| 209 | } | ||
| 210 | |||
| 211 | int gdlm_sysfs_init(void) | ||
| 212 | { | ||
| 213 | int error; | ||
| 214 | |||
| 215 | error = kset_register(&gdlm_kset); | ||
| 216 | if (error) | ||
| 217 | printk("lock_dlm: cannot register kset %d\n", error); | ||
| 218 | |||
| 219 | return error; | ||
| 220 | } | ||
| 221 | |||
| 222 | void gdlm_sysfs_exit(void) | ||
| 223 | { | ||
| 224 | kset_unregister(&gdlm_kset); | ||
| 225 | } | ||
| 226 | |||
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c new file mode 100644 index 000000000000..9cf1f168eaf8 --- /dev/null +++ b/fs/gfs2/locking/dlm/thread.c | |||
| @@ -0,0 +1,359 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include "lock_dlm.h" | ||
| 11 | |||
| 12 | /* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm | ||
| 13 | thread gets to it. */ | ||
| 14 | |||
| 15 | static void queue_submit(struct gdlm_lock *lp) | ||
| 16 | { | ||
| 17 | struct gdlm_ls *ls = lp->ls; | ||
| 18 | |||
| 19 | spin_lock(&ls->async_lock); | ||
| 20 | list_add_tail(&lp->delay_list, &ls->submit); | ||
| 21 | spin_unlock(&ls->async_lock); | ||
| 22 | wake_up(&ls->thread_wait); | ||
| 23 | } | ||
| 24 | |||
| 25 | static void process_blocking(struct gdlm_lock *lp, int bast_mode) | ||
| 26 | { | ||
| 27 | struct gdlm_ls *ls = lp->ls; | ||
| 28 | unsigned int cb = 0; | ||
| 29 | |||
| 30 | switch (gdlm_make_lmstate(bast_mode)) { | ||
| 31 | case LM_ST_EXCLUSIVE: | ||
| 32 | cb = LM_CB_NEED_E; | ||
| 33 | break; | ||
| 34 | case LM_ST_DEFERRED: | ||
| 35 | cb = LM_CB_NEED_D; | ||
| 36 | break; | ||
| 37 | case LM_ST_SHARED: | ||
| 38 | cb = LM_CB_NEED_S; | ||
| 39 | break; | ||
| 40 | default: | ||
| 41 | gdlm_assert(0, "unknown bast mode %u", lp->bast_mode); | ||
| 42 | } | ||
| 43 | |||
| 44 | ls->fscb(ls->sdp, cb, &lp->lockname); | ||
| 45 | } | ||
| 46 | |||
| 47 | static void process_complete(struct gdlm_lock *lp) | ||
| 48 | { | ||
| 49 | struct gdlm_ls *ls = lp->ls; | ||
| 50 | struct lm_async_cb acb; | ||
| 51 | s16 prev_mode = lp->cur; | ||
| 52 | |||
| 53 | memset(&acb, 0, sizeof(acb)); | ||
| 54 | |||
| 55 | if (lp->lksb.sb_status == -DLM_ECANCEL) { | ||
| 56 | log_info("complete dlm cancel %x,%llx flags %lx", | ||
| 57 | lp->lockname.ln_type, | ||
| 58 | (unsigned long long)lp->lockname.ln_number, | ||
| 59 | lp->flags); | ||
| 60 | |||
| 61 | lp->req = lp->cur; | ||
| 62 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 63 | if (lp->cur == DLM_LOCK_IV) | ||
| 64 | lp->lksb.sb_lkid = 0; | ||
| 65 | goto out; | ||
| 66 | } | ||
| 67 | |||
| 68 | if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) { | ||
| 69 | if (lp->lksb.sb_status != -DLM_EUNLOCK) { | ||
| 70 | log_info("unlock sb_status %d %x,%llx flags %lx", | ||
| 71 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 72 | (unsigned long long)lp->lockname.ln_number, | ||
| 73 | lp->flags); | ||
| 74 | return; | ||
| 75 | } | ||
| 76 | |||
| 77 | lp->cur = DLM_LOCK_IV; | ||
| 78 | lp->req = DLM_LOCK_IV; | ||
| 79 | lp->lksb.sb_lkid = 0; | ||
| 80 | |||
| 81 | if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) { | ||
| 82 | gdlm_delete_lp(lp); | ||
| 83 | return; | ||
| 84 | } | ||
| 85 | goto out; | ||
| 86 | } | ||
| 87 | |||
| 88 | if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID) | ||
| 89 | memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); | ||
| 90 | |||
| 91 | if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) { | ||
| 92 | if (lp->req == DLM_LOCK_PR) | ||
| 93 | lp->req = DLM_LOCK_CW; | ||
| 94 | else if (lp->req == DLM_LOCK_CW) | ||
| 95 | lp->req = DLM_LOCK_PR; | ||
| 96 | } | ||
| 97 | |||
| 98 | /* | ||
| 99 | * A canceled lock request. The lock was just taken off the delayed | ||
| 100 | * list and was never even submitted to dlm. | ||
| 101 | */ | ||
| 102 | |||
| 103 | if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { | ||
| 104 | log_info("complete internal cancel %x,%llx", | ||
| 105 | lp->lockname.ln_type, | ||
| 106 | (unsigned long long)lp->lockname.ln_number); | ||
| 107 | lp->req = lp->cur; | ||
| 108 | acb.lc_ret |= LM_OUT_CANCELED; | ||
| 109 | goto out; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * An error occured. | ||
| 114 | */ | ||
| 115 | |||
| 116 | if (lp->lksb.sb_status) { | ||
| 117 | /* a "normal" error */ | ||
| 118 | if ((lp->lksb.sb_status == -EAGAIN) && | ||
| 119 | (lp->lkf & DLM_LKF_NOQUEUE)) { | ||
| 120 | lp->req = lp->cur; | ||
| 121 | if (lp->cur == DLM_LOCK_IV) | ||
| 122 | lp->lksb.sb_lkid = 0; | ||
| 123 | goto out; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* this could only happen with cancels I think */ | ||
| 127 | log_info("ast sb_status %d %x,%llx flags %lx", | ||
| 128 | lp->lksb.sb_status, lp->lockname.ln_type, | ||
| 129 | (unsigned long long)lp->lockname.ln_number, | ||
| 130 | lp->flags); | ||
| 131 | return; | ||
| 132 | } | ||
| 133 | |||
| 134 | /* | ||
| 135 | * This is an AST for an EX->EX conversion for sync_lvb from GFS. | ||
| 136 | */ | ||
| 137 | |||
| 138 | if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { | ||
| 139 | complete(&lp->ast_wait); | ||
| 140 | return; | ||
| 141 | } | ||
| 142 | |||
| 143 | /* | ||
| 144 | * A lock has been demoted to NL because it initially completed during | ||
| 145 | * BLOCK_LOCKS. Now it must be requested in the originally requested | ||
| 146 | * mode. | ||
| 147 | */ | ||
| 148 | |||
| 149 | if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) { | ||
| 150 | gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx", | ||
| 151 | lp->lockname.ln_type, | ||
| 152 | (unsigned long long)lp->lockname.ln_number); | ||
| 153 | gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx", | ||
| 154 | lp->lockname.ln_type, | ||
| 155 | (unsigned long long)lp->lockname.ln_number); | ||
| 156 | |||
| 157 | lp->cur = DLM_LOCK_NL; | ||
| 158 | lp->req = lp->prev_req; | ||
| 159 | lp->prev_req = DLM_LOCK_IV; | ||
| 160 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 161 | |||
| 162 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 163 | |||
| 164 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 165 | !test_bit(LFL_NOBLOCK, &lp->flags)) | ||
| 166 | gdlm_queue_delayed(lp); | ||
| 167 | else | ||
| 168 | queue_submit(lp); | ||
| 169 | return; | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * A request is granted during dlm recovery. It may be granted | ||
| 174 | * because the locks of a failed node were cleared. In that case, | ||
| 175 | * there may be inconsistent data beneath this lock and we must wait | ||
| 176 | * for recovery to complete to use it. When gfs recovery is done this | ||
| 177 | * granted lock will be converted to NL and then reacquired in this | ||
| 178 | * granted state. | ||
| 179 | */ | ||
| 180 | |||
| 181 | if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) && | ||
| 182 | !test_bit(LFL_NOBLOCK, &lp->flags) && | ||
| 183 | lp->req != DLM_LOCK_NL) { | ||
| 184 | |||
| 185 | lp->cur = lp->req; | ||
| 186 | lp->prev_req = lp->req; | ||
| 187 | lp->req = DLM_LOCK_NL; | ||
| 188 | lp->lkf |= DLM_LKF_CONVERT; | ||
| 189 | lp->lkf &= ~DLM_LKF_CONVDEADLK; | ||
| 190 | |||
| 191 | log_debug("rereq %x,%llx id %x %d,%d", | ||
| 192 | lp->lockname.ln_type, | ||
| 193 | (unsigned long long)lp->lockname.ln_number, | ||
| 194 | lp->lksb.sb_lkid, lp->cur, lp->req); | ||
| 195 | |||
| 196 | set_bit(LFL_REREQUEST, &lp->flags); | ||
| 197 | queue_submit(lp); | ||
| 198 | return; | ||
| 199 | } | ||
| 200 | |||
| 201 | /* | ||
| 202 | * DLM demoted the lock to NL before it was granted so GFS must be | ||
| 203 | * told it cannot cache data for this lock. | ||
| 204 | */ | ||
| 205 | |||
| 206 | if (lp->lksb.sb_flags & DLM_SBF_DEMOTED) | ||
| 207 | set_bit(LFL_NOCACHE, &lp->flags); | ||
| 208 | |||
| 209 | out: | ||
| 210 | /* | ||
| 211 | * This is an internal lock_dlm lock | ||
| 212 | */ | ||
| 213 | |||
| 214 | if (test_bit(LFL_INLOCK, &lp->flags)) { | ||
| 215 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 216 | lp->cur = lp->req; | ||
| 217 | complete(&lp->ast_wait); | ||
| 218 | return; | ||
| 219 | } | ||
| 220 | |||
| 221 | /* | ||
| 222 | * Normal completion of a lock request. Tell GFS it now has the lock. | ||
| 223 | */ | ||
| 224 | |||
| 225 | clear_bit(LFL_NOBLOCK, &lp->flags); | ||
| 226 | lp->cur = lp->req; | ||
| 227 | |||
| 228 | acb.lc_name = lp->lockname; | ||
| 229 | acb.lc_ret |= gdlm_make_lmstate(lp->cur); | ||
| 230 | |||
| 231 | if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) && | ||
| 232 | (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL)) | ||
| 233 | acb.lc_ret |= LM_OUT_CACHEABLE; | ||
| 234 | |||
| 235 | ls->fscb(ls->sdp, LM_CB_ASYNC, &acb); | ||
| 236 | } | ||
| 237 | |||
| 238 | static inline int no_work(struct gdlm_ls *ls, int blocking) | ||
| 239 | { | ||
| 240 | int ret; | ||
| 241 | |||
| 242 | spin_lock(&ls->async_lock); | ||
| 243 | ret = list_empty(&ls->complete) && list_empty(&ls->submit); | ||
| 244 | if (ret && blocking) | ||
| 245 | ret = list_empty(&ls->blocking); | ||
| 246 | spin_unlock(&ls->async_lock); | ||
| 247 | |||
| 248 | return ret; | ||
| 249 | } | ||
| 250 | |||
| 251 | static inline int check_drop(struct gdlm_ls *ls) | ||
| 252 | { | ||
| 253 | if (!ls->drop_locks_count) | ||
| 254 | return 0; | ||
| 255 | |||
| 256 | if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) { | ||
| 257 | ls->drop_time = jiffies; | ||
| 258 | if (ls->all_locks_count >= ls->drop_locks_count) | ||
| 259 | return 1; | ||
| 260 | } | ||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | |||
| 264 | static int gdlm_thread(void *data) | ||
| 265 | { | ||
| 266 | struct gdlm_ls *ls = (struct gdlm_ls *) data; | ||
| 267 | struct gdlm_lock *lp = NULL; | ||
| 268 | int blist = 0; | ||
| 269 | uint8_t complete, blocking, submit, drop; | ||
| 270 | DECLARE_WAITQUEUE(wait, current); | ||
| 271 | |||
| 272 | /* Only thread1 is allowed to do blocking callbacks since gfs | ||
| 273 | may wait for a completion callback within a blocking cb. */ | ||
| 274 | |||
| 275 | if (current == ls->thread1) | ||
| 276 | blist = 1; | ||
| 277 | |||
| 278 | while (!kthread_should_stop()) { | ||
| 279 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 280 | add_wait_queue(&ls->thread_wait, &wait); | ||
| 281 | if (no_work(ls, blist)) | ||
| 282 | schedule(); | ||
| 283 | remove_wait_queue(&ls->thread_wait, &wait); | ||
| 284 | set_current_state(TASK_RUNNING); | ||
| 285 | |||
| 286 | complete = blocking = submit = drop = 0; | ||
| 287 | |||
| 288 | spin_lock(&ls->async_lock); | ||
| 289 | |||
| 290 | if (blist && !list_empty(&ls->blocking)) { | ||
| 291 | lp = list_entry(ls->blocking.next, struct gdlm_lock, | ||
| 292 | blist); | ||
| 293 | list_del_init(&lp->blist); | ||
| 294 | blocking = lp->bast_mode; | ||
| 295 | lp->bast_mode = 0; | ||
| 296 | } else if (!list_empty(&ls->complete)) { | ||
| 297 | lp = list_entry(ls->complete.next, struct gdlm_lock, | ||
| 298 | clist); | ||
| 299 | list_del_init(&lp->clist); | ||
| 300 | complete = 1; | ||
| 301 | } else if (!list_empty(&ls->submit)) { | ||
| 302 | lp = list_entry(ls->submit.next, struct gdlm_lock, | ||
| 303 | delay_list); | ||
| 304 | list_del_init(&lp->delay_list); | ||
| 305 | submit = 1; | ||
| 306 | } | ||
| 307 | |||
| 308 | drop = check_drop(ls); | ||
| 309 | spin_unlock(&ls->async_lock); | ||
| 310 | |||
| 311 | if (complete) | ||
| 312 | process_complete(lp); | ||
| 313 | |||
| 314 | else if (blocking) | ||
| 315 | process_blocking(lp, blocking); | ||
| 316 | |||
| 317 | else if (submit) | ||
| 318 | gdlm_do_lock(lp); | ||
| 319 | |||
| 320 | if (drop) | ||
| 321 | ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL); | ||
| 322 | |||
| 323 | schedule(); | ||
| 324 | } | ||
| 325 | |||
| 326 | return 0; | ||
| 327 | } | ||
| 328 | |||
| 329 | int gdlm_init_threads(struct gdlm_ls *ls) | ||
| 330 | { | ||
| 331 | struct task_struct *p; | ||
| 332 | int error; | ||
| 333 | |||
| 334 | p = kthread_run(gdlm_thread, ls, "lock_dlm1"); | ||
| 335 | error = IS_ERR(p); | ||
| 336 | if (error) { | ||
| 337 | log_error("can't start lock_dlm1 thread %d", error); | ||
| 338 | return error; | ||
| 339 | } | ||
| 340 | ls->thread1 = p; | ||
| 341 | |||
| 342 | p = kthread_run(gdlm_thread, ls, "lock_dlm2"); | ||
| 343 | error = IS_ERR(p); | ||
| 344 | if (error) { | ||
| 345 | log_error("can't start lock_dlm2 thread %d", error); | ||
| 346 | kthread_stop(ls->thread1); | ||
| 347 | return error; | ||
| 348 | } | ||
| 349 | ls->thread2 = p; | ||
| 350 | |||
| 351 | return 0; | ||
| 352 | } | ||
| 353 | |||
| 354 | void gdlm_release_threads(struct gdlm_ls *ls) | ||
| 355 | { | ||
| 356 | kthread_stop(ls->thread1); | ||
| 357 | kthread_stop(ls->thread2); | ||
| 358 | } | ||
| 359 | |||
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile new file mode 100644 index 000000000000..35e9730bc3a8 --- /dev/null +++ b/fs/gfs2/locking/nolock/Makefile | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o | ||
| 2 | lock_nolock-y := main.o | ||
| 3 | |||
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c new file mode 100644 index 000000000000..acfbc941f319 --- /dev/null +++ b/fs/gfs2/locking/nolock/main.c | |||
| @@ -0,0 +1,246 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/module.h> | ||
| 13 | #include <linux/init.h> | ||
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/fs.h> | ||
| 16 | #include <linux/smp_lock.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | struct nolock_lockspace { | ||
| 20 | unsigned int nl_lvb_size; | ||
| 21 | }; | ||
| 22 | |||
| 23 | static const struct lm_lockops nolock_ops; | ||
| 24 | |||
| 25 | static int nolock_mount(char *table_name, char *host_data, | ||
| 26 | lm_callback_t cb, void *cb_data, | ||
| 27 | unsigned int min_lvb_size, int flags, | ||
| 28 | struct lm_lockstruct *lockstruct, | ||
| 29 | struct kobject *fskobj) | ||
| 30 | { | ||
| 31 | char *c; | ||
| 32 | unsigned int jid; | ||
| 33 | struct nolock_lockspace *nl; | ||
| 34 | |||
| 35 | c = strstr(host_data, "jid="); | ||
| 36 | if (!c) | ||
| 37 | jid = 0; | ||
| 38 | else { | ||
| 39 | c += 4; | ||
| 40 | sscanf(c, "%u", &jid); | ||
| 41 | } | ||
| 42 | |||
| 43 | nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL); | ||
| 44 | if (!nl) | ||
| 45 | return -ENOMEM; | ||
| 46 | |||
| 47 | nl->nl_lvb_size = min_lvb_size; | ||
| 48 | |||
| 49 | lockstruct->ls_jid = jid; | ||
| 50 | lockstruct->ls_first = 1; | ||
| 51 | lockstruct->ls_lvb_size = min_lvb_size; | ||
| 52 | lockstruct->ls_lockspace = nl; | ||
| 53 | lockstruct->ls_ops = &nolock_ops; | ||
| 54 | lockstruct->ls_flags = LM_LSFLAG_LOCAL; | ||
| 55 | |||
| 56 | return 0; | ||
| 57 | } | ||
| 58 | |||
| 59 | static void nolock_others_may_mount(void *lockspace) | ||
| 60 | { | ||
| 61 | } | ||
| 62 | |||
| 63 | static void nolock_unmount(void *lockspace) | ||
| 64 | { | ||
| 65 | struct nolock_lockspace *nl = lockspace; | ||
| 66 | kfree(nl); | ||
| 67 | } | ||
| 68 | |||
| 69 | static void nolock_withdraw(void *lockspace) | ||
| 70 | { | ||
| 71 | } | ||
| 72 | |||
| 73 | /** | ||
| 74 | * nolock_get_lock - get a lm_lock_t given a descripton of the lock | ||
| 75 | * @lockspace: the lockspace the lock lives in | ||
| 76 | * @name: the name of the lock | ||
| 77 | * @lockp: return the lm_lock_t here | ||
| 78 | * | ||
| 79 | * Returns: 0 on success, -EXXX on failure | ||
| 80 | */ | ||
| 81 | |||
| 82 | static int nolock_get_lock(void *lockspace, struct lm_lockname *name, | ||
| 83 | void **lockp) | ||
| 84 | { | ||
| 85 | *lockp = lockspace; | ||
| 86 | return 0; | ||
| 87 | } | ||
| 88 | |||
| 89 | /** | ||
| 90 | * nolock_put_lock - get rid of a lock structure | ||
| 91 | * @lock: the lock to throw away | ||
| 92 | * | ||
| 93 | */ | ||
| 94 | |||
| 95 | static void nolock_put_lock(void *lock) | ||
| 96 | { | ||
| 97 | } | ||
| 98 | |||
| 99 | /** | ||
| 100 | * nolock_lock - acquire a lock | ||
| 101 | * @lock: the lock to manipulate | ||
| 102 | * @cur_state: the current state | ||
| 103 | * @req_state: the requested state | ||
| 104 | * @flags: modifier flags | ||
| 105 | * | ||
| 106 | * Returns: A bitmap of LM_OUT_* | ||
| 107 | */ | ||
| 108 | |||
| 109 | static unsigned int nolock_lock(void *lock, unsigned int cur_state, | ||
| 110 | unsigned int req_state, unsigned int flags) | ||
| 111 | { | ||
| 112 | return req_state | LM_OUT_CACHEABLE; | ||
| 113 | } | ||
| 114 | |||
| 115 | /** | ||
| 116 | * nolock_unlock - unlock a lock | ||
| 117 | * @lock: the lock to manipulate | ||
| 118 | * @cur_state: the current state | ||
| 119 | * | ||
| 120 | * Returns: 0 | ||
| 121 | */ | ||
| 122 | |||
| 123 | static unsigned int nolock_unlock(void *lock, unsigned int cur_state) | ||
| 124 | { | ||
| 125 | return 0; | ||
| 126 | } | ||
| 127 | |||
| 128 | static void nolock_cancel(void *lock) | ||
| 129 | { | ||
| 130 | } | ||
| 131 | |||
| 132 | /** | ||
| 133 | * nolock_hold_lvb - hold on to a lock value block | ||
| 134 | * @lock: the lock the LVB is associated with | ||
| 135 | * @lvbp: return the lm_lvb_t here | ||
| 136 | * | ||
| 137 | * Returns: 0 on success, -EXXX on failure | ||
| 138 | */ | ||
| 139 | |||
| 140 | static int nolock_hold_lvb(void *lock, char **lvbp) | ||
| 141 | { | ||
| 142 | struct nolock_lockspace *nl = lock; | ||
| 143 | int error = 0; | ||
| 144 | |||
| 145 | *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL); | ||
| 146 | if (!*lvbp) | ||
| 147 | error = -ENOMEM; | ||
| 148 | |||
| 149 | return error; | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * nolock_unhold_lvb - release a LVB | ||
| 154 | * @lock: the lock the LVB is associated with | ||
| 155 | * @lvb: the lock value block | ||
| 156 | * | ||
| 157 | */ | ||
| 158 | |||
| 159 | static void nolock_unhold_lvb(void *lock, char *lvb) | ||
| 160 | { | ||
| 161 | kfree(lvb); | ||
| 162 | } | ||
| 163 | |||
| 164 | static int nolock_plock_get(void *lockspace, struct lm_lockname *name, | ||
| 165 | struct file *file, struct file_lock *fl) | ||
| 166 | { | ||
| 167 | struct file_lock tmp; | ||
| 168 | int ret; | ||
| 169 | |||
| 170 | ret = posix_test_lock(file, fl, &tmp); | ||
| 171 | fl->fl_type = F_UNLCK; | ||
| 172 | if (ret) | ||
| 173 | memcpy(fl, &tmp, sizeof(struct file_lock)); | ||
| 174 | |||
| 175 | return 0; | ||
| 176 | } | ||
| 177 | |||
| 178 | static int nolock_plock(void *lockspace, struct lm_lockname *name, | ||
| 179 | struct file *file, int cmd, struct file_lock *fl) | ||
| 180 | { | ||
| 181 | int error; | ||
| 182 | error = posix_lock_file_wait(file, fl); | ||
| 183 | return error; | ||
| 184 | } | ||
| 185 | |||
| 186 | static int nolock_punlock(void *lockspace, struct lm_lockname *name, | ||
| 187 | struct file *file, struct file_lock *fl) | ||
| 188 | { | ||
| 189 | int error; | ||
| 190 | error = posix_lock_file_wait(file, fl); | ||
| 191 | return error; | ||
| 192 | } | ||
| 193 | |||
| 194 | static void nolock_recovery_done(void *lockspace, unsigned int jid, | ||
| 195 | unsigned int message) | ||
| 196 | { | ||
| 197 | } | ||
| 198 | |||
| 199 | static const struct lm_lockops nolock_ops = { | ||
| 200 | .lm_proto_name = "lock_nolock", | ||
| 201 | .lm_mount = nolock_mount, | ||
| 202 | .lm_others_may_mount = nolock_others_may_mount, | ||
| 203 | .lm_unmount = nolock_unmount, | ||
| 204 | .lm_withdraw = nolock_withdraw, | ||
| 205 | .lm_get_lock = nolock_get_lock, | ||
| 206 | .lm_put_lock = nolock_put_lock, | ||
| 207 | .lm_lock = nolock_lock, | ||
| 208 | .lm_unlock = nolock_unlock, | ||
| 209 | .lm_cancel = nolock_cancel, | ||
| 210 | .lm_hold_lvb = nolock_hold_lvb, | ||
| 211 | .lm_unhold_lvb = nolock_unhold_lvb, | ||
| 212 | .lm_plock_get = nolock_plock_get, | ||
| 213 | .lm_plock = nolock_plock, | ||
| 214 | .lm_punlock = nolock_punlock, | ||
| 215 | .lm_recovery_done = nolock_recovery_done, | ||
| 216 | .lm_owner = THIS_MODULE, | ||
| 217 | }; | ||
| 218 | |||
| 219 | static int __init init_nolock(void) | ||
| 220 | { | ||
| 221 | int error; | ||
| 222 | |||
| 223 | error = gfs2_register_lockproto(&nolock_ops); | ||
| 224 | if (error) { | ||
| 225 | printk(KERN_WARNING | ||
| 226 | "lock_nolock: can't register protocol: %d\n", error); | ||
| 227 | return error; | ||
| 228 | } | ||
| 229 | |||
| 230 | printk(KERN_INFO | ||
| 231 | "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__); | ||
| 232 | return 0; | ||
| 233 | } | ||
| 234 | |||
| 235 | static void __exit exit_nolock(void) | ||
| 236 | { | ||
| 237 | gfs2_unregister_lockproto(&nolock_ops); | ||
| 238 | } | ||
| 239 | |||
| 240 | module_init(init_nolock); | ||
| 241 | module_exit(exit_nolock); | ||
| 242 | |||
| 243 | MODULE_DESCRIPTION("GFS Nolock Locking Module"); | ||
| 244 | MODULE_AUTHOR("Red Hat, Inc."); | ||
| 245 | MODULE_LICENSE("GPL"); | ||
| 246 | |||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c new file mode 100644 index 000000000000..554fe5bd1b72 --- /dev/null +++ b/fs/gfs2/log.c | |||
| @@ -0,0 +1,687 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/crc32.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | #include "gfs2.h" | ||
| 20 | #include "incore.h" | ||
| 21 | #include "bmap.h" | ||
| 22 | #include "glock.h" | ||
| 23 | #include "log.h" | ||
| 24 | #include "lops.h" | ||
| 25 | #include "meta_io.h" | ||
| 26 | #include "util.h" | ||
| 27 | #include "dir.h" | ||
| 28 | |||
| 29 | #define PULL 1 | ||
| 30 | |||
| 31 | /** | ||
| 32 | * gfs2_struct2blk - compute stuff | ||
| 33 | * @sdp: the filesystem | ||
| 34 | * @nstruct: the number of structures | ||
| 35 | * @ssize: the size of the structures | ||
| 36 | * | ||
| 37 | * Compute the number of log descriptor blocks needed to hold a certain number | ||
| 38 | * of structures of a certain size. | ||
| 39 | * | ||
| 40 | * Returns: the number of blocks needed (minimum is always 1) | ||
| 41 | */ | ||
| 42 | |||
| 43 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | ||
| 44 | unsigned int ssize) | ||
| 45 | { | ||
| 46 | unsigned int blks; | ||
| 47 | unsigned int first, second; | ||
| 48 | |||
| 49 | blks = 1; | ||
| 50 | first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize; | ||
| 51 | |||
| 52 | if (nstruct > first) { | ||
| 53 | second = (sdp->sd_sb.sb_bsize - | ||
| 54 | sizeof(struct gfs2_meta_header)) / ssize; | ||
| 55 | blks += DIV_ROUND_UP(nstruct - first, second); | ||
| 56 | } | ||
| 57 | |||
| 58 | return blks; | ||
| 59 | } | ||
| 60 | |||
| 61 | /** | ||
| 62 | * gfs2_ail1_start_one - Start I/O on a part of the AIL | ||
| 63 | * @sdp: the filesystem | ||
| 64 | * @tr: the part of the AIL | ||
| 65 | * | ||
| 66 | */ | ||
| 67 | |||
| 68 | static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
| 69 | { | ||
| 70 | struct gfs2_bufdata *bd, *s; | ||
| 71 | struct buffer_head *bh; | ||
| 72 | int retry; | ||
| 73 | |||
| 74 | BUG_ON(!spin_is_locked(&sdp->sd_log_lock)); | ||
| 75 | |||
| 76 | do { | ||
| 77 | retry = 0; | ||
| 78 | |||
| 79 | list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, | ||
| 80 | bd_ail_st_list) { | ||
| 81 | bh = bd->bd_bh; | ||
| 82 | |||
| 83 | gfs2_assert(sdp, bd->bd_ail == ai); | ||
| 84 | |||
| 85 | if (!buffer_busy(bh)) { | ||
| 86 | if (!buffer_uptodate(bh)) { | ||
| 87 | gfs2_log_unlock(sdp); | ||
| 88 | gfs2_io_error_bh(sdp, bh); | ||
| 89 | gfs2_log_lock(sdp); | ||
| 90 | } | ||
| 91 | list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); | ||
| 92 | continue; | ||
| 93 | } | ||
| 94 | |||
| 95 | if (!buffer_dirty(bh)) | ||
| 96 | continue; | ||
| 97 | |||
| 98 | list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); | ||
| 99 | |||
| 100 | gfs2_log_unlock(sdp); | ||
| 101 | wait_on_buffer(bh); | ||
| 102 | ll_rw_block(WRITE, 1, &bh); | ||
| 103 | gfs2_log_lock(sdp); | ||
| 104 | |||
| 105 | retry = 1; | ||
| 106 | break; | ||
| 107 | } | ||
| 108 | } while (retry); | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced | ||
| 113 | * @sdp: the filesystem | ||
| 114 | * @ai: the AIL entry | ||
| 115 | * | ||
| 116 | */ | ||
| 117 | |||
| 118 | static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) | ||
| 119 | { | ||
| 120 | struct gfs2_bufdata *bd, *s; | ||
| 121 | struct buffer_head *bh; | ||
| 122 | |||
| 123 | list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, | ||
| 124 | bd_ail_st_list) { | ||
| 125 | bh = bd->bd_bh; | ||
| 126 | |||
| 127 | gfs2_assert(sdp, bd->bd_ail == ai); | ||
| 128 | |||
| 129 | if (buffer_busy(bh)) { | ||
| 130 | if (flags & DIO_ALL) | ||
| 131 | continue; | ||
| 132 | else | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | |||
| 136 | if (!buffer_uptodate(bh)) | ||
| 137 | gfs2_io_error_bh(sdp, bh); | ||
| 138 | |||
| 139 | list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); | ||
| 140 | } | ||
| 141 | |||
| 142 | return list_empty(&ai->ai_ail1_list); | ||
| 143 | } | ||
| 144 | |||
| 145 | void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) | ||
| 146 | { | ||
| 147 | struct list_head *head = &sdp->sd_ail1_list; | ||
| 148 | u64 sync_gen; | ||
| 149 | struct list_head *first; | ||
| 150 | struct gfs2_ail *first_ai, *ai, *tmp; | ||
| 151 | int done = 0; | ||
| 152 | |||
| 153 | gfs2_log_lock(sdp); | ||
| 154 | if (list_empty(head)) { | ||
| 155 | gfs2_log_unlock(sdp); | ||
| 156 | return; | ||
| 157 | } | ||
| 158 | sync_gen = sdp->sd_ail_sync_gen++; | ||
| 159 | |||
| 160 | first = head->prev; | ||
| 161 | first_ai = list_entry(first, struct gfs2_ail, ai_list); | ||
| 162 | first_ai->ai_sync_gen = sync_gen; | ||
| 163 | gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */ | ||
| 164 | |||
| 165 | if (flags & DIO_ALL) | ||
| 166 | first = NULL; | ||
| 167 | |||
| 168 | while(!done) { | ||
| 169 | if (first && (head->prev != first || | ||
| 170 | gfs2_ail1_empty_one(sdp, first_ai, 0))) | ||
| 171 | break; | ||
| 172 | |||
| 173 | done = 1; | ||
| 174 | list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) { | ||
| 175 | if (ai->ai_sync_gen >= sync_gen) | ||
| 176 | continue; | ||
| 177 | ai->ai_sync_gen = sync_gen; | ||
| 178 | gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ | ||
| 179 | done = 0; | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | gfs2_log_unlock(sdp); | ||
| 185 | } | ||
| 186 | |||
| 187 | int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) | ||
| 188 | { | ||
| 189 | struct gfs2_ail *ai, *s; | ||
| 190 | int ret; | ||
| 191 | |||
| 192 | gfs2_log_lock(sdp); | ||
| 193 | |||
| 194 | list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { | ||
| 195 | if (gfs2_ail1_empty_one(sdp, ai, flags)) | ||
| 196 | list_move(&ai->ai_list, &sdp->sd_ail2_list); | ||
| 197 | else if (!(flags & DIO_ALL)) | ||
| 198 | break; | ||
| 199 | } | ||
| 200 | |||
| 201 | ret = list_empty(&sdp->sd_ail1_list); | ||
| 202 | |||
| 203 | gfs2_log_unlock(sdp); | ||
| 204 | |||
| 205 | return ret; | ||
| 206 | } | ||
| 207 | |||
| 208 | |||
| 209 | /** | ||
| 210 | * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced | ||
| 211 | * @sdp: the filesystem | ||
| 212 | * @ai: the AIL entry | ||
| 213 | * | ||
| 214 | */ | ||
| 215 | |||
| 216 | static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
| 217 | { | ||
| 218 | struct list_head *head = &ai->ai_ail2_list; | ||
| 219 | struct gfs2_bufdata *bd; | ||
| 220 | |||
| 221 | while (!list_empty(head)) { | ||
| 222 | bd = list_entry(head->prev, struct gfs2_bufdata, | ||
| 223 | bd_ail_st_list); | ||
| 224 | gfs2_assert(sdp, bd->bd_ail == ai); | ||
| 225 | bd->bd_ail = NULL; | ||
| 226 | list_del(&bd->bd_ail_st_list); | ||
| 227 | list_del(&bd->bd_ail_gl_list); | ||
| 228 | atomic_dec(&bd->bd_gl->gl_ail_count); | ||
| 229 | brelse(bd->bd_bh); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) | ||
| 234 | { | ||
| 235 | struct gfs2_ail *ai, *safe; | ||
| 236 | unsigned int old_tail = sdp->sd_log_tail; | ||
| 237 | int wrap = (new_tail < old_tail); | ||
| 238 | int a, b, rm; | ||
| 239 | |||
| 240 | gfs2_log_lock(sdp); | ||
| 241 | |||
| 242 | list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { | ||
| 243 | a = (old_tail <= ai->ai_first); | ||
| 244 | b = (ai->ai_first < new_tail); | ||
| 245 | rm = (wrap) ? (a || b) : (a && b); | ||
| 246 | if (!rm) | ||
| 247 | continue; | ||
| 248 | |||
| 249 | gfs2_ail2_empty_one(sdp, ai); | ||
| 250 | list_del(&ai->ai_list); | ||
| 251 | gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list)); | ||
| 252 | gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list)); | ||
| 253 | kfree(ai); | ||
| 254 | } | ||
| 255 | |||
| 256 | gfs2_log_unlock(sdp); | ||
| 257 | } | ||
| 258 | |||
| 259 | /** | ||
| 260 | * gfs2_log_reserve - Make a log reservation | ||
| 261 | * @sdp: The GFS2 superblock | ||
| 262 | * @blks: The number of blocks to reserve | ||
| 263 | * | ||
| 264 | * Returns: errno | ||
| 265 | */ | ||
| 266 | |||
| 267 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | ||
| 268 | { | ||
| 269 | unsigned int try = 0; | ||
| 270 | |||
| 271 | if (gfs2_assert_warn(sdp, blks) || | ||
| 272 | gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) | ||
| 273 | return -EINVAL; | ||
| 274 | |||
| 275 | mutex_lock(&sdp->sd_log_reserve_mutex); | ||
| 276 | gfs2_log_lock(sdp); | ||
| 277 | while(sdp->sd_log_blks_free <= blks) { | ||
| 278 | gfs2_log_unlock(sdp); | ||
| 279 | gfs2_ail1_empty(sdp, 0); | ||
| 280 | gfs2_log_flush(sdp, NULL); | ||
| 281 | |||
| 282 | if (try++) | ||
| 283 | gfs2_ail1_start(sdp, 0); | ||
| 284 | gfs2_log_lock(sdp); | ||
| 285 | } | ||
| 286 | sdp->sd_log_blks_free -= blks; | ||
| 287 | gfs2_log_unlock(sdp); | ||
| 288 | mutex_unlock(&sdp->sd_log_reserve_mutex); | ||
| 289 | |||
| 290 | down_read(&sdp->sd_log_flush_lock); | ||
| 291 | |||
| 292 | return 0; | ||
| 293 | } | ||
| 294 | |||
| 295 | /** | ||
| 296 | * gfs2_log_release - Release a given number of log blocks | ||
| 297 | * @sdp: The GFS2 superblock | ||
| 298 | * @blks: The number of blocks | ||
| 299 | * | ||
| 300 | */ | ||
| 301 | |||
| 302 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) | ||
| 303 | { | ||
| 304 | |||
| 305 | gfs2_log_lock(sdp); | ||
| 306 | sdp->sd_log_blks_free += blks; | ||
| 307 | gfs2_assert_withdraw(sdp, | ||
| 308 | sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); | ||
| 309 | gfs2_log_unlock(sdp); | ||
| 310 | up_read(&sdp->sd_log_flush_lock); | ||
| 311 | } | ||
| 312 | |||
| 313 | static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) | ||
| 314 | { | ||
| 315 | int error; | ||
| 316 | struct buffer_head bh_map; | ||
| 317 | |||
| 318 | error = gfs2_block_map(sdp->sd_jdesc->jd_inode, lbn, 0, &bh_map, 1); | ||
| 319 | if (error || !bh_map.b_blocknr) | ||
| 320 | printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn); | ||
| 321 | gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr); | ||
| 322 | |||
| 323 | return bh_map.b_blocknr; | ||
| 324 | } | ||
| 325 | |||
| 326 | /** | ||
| 327 | * log_distance - Compute distance between two journal blocks | ||
| 328 | * @sdp: The GFS2 superblock | ||
| 329 | * @newer: The most recent journal block of the pair | ||
| 330 | * @older: The older journal block of the pair | ||
| 331 | * | ||
| 332 | * Compute the distance (in the journal direction) between two | ||
| 333 | * blocks in the journal | ||
| 334 | * | ||
| 335 | * Returns: the distance in blocks | ||
| 336 | */ | ||
| 337 | |||
| 338 | static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer, | ||
| 339 | unsigned int older) | ||
| 340 | { | ||
| 341 | int dist; | ||
| 342 | |||
| 343 | dist = newer - older; | ||
| 344 | if (dist < 0) | ||
| 345 | dist += sdp->sd_jdesc->jd_blocks; | ||
| 346 | |||
| 347 | return dist; | ||
| 348 | } | ||
| 349 | |||
| 350 | static unsigned int current_tail(struct gfs2_sbd *sdp) | ||
| 351 | { | ||
| 352 | struct gfs2_ail *ai; | ||
| 353 | unsigned int tail; | ||
| 354 | |||
| 355 | gfs2_log_lock(sdp); | ||
| 356 | |||
| 357 | if (list_empty(&sdp->sd_ail1_list)) { | ||
| 358 | tail = sdp->sd_log_head; | ||
| 359 | } else { | ||
| 360 | ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list); | ||
| 361 | tail = ai->ai_first; | ||
| 362 | } | ||
| 363 | |||
| 364 | gfs2_log_unlock(sdp); | ||
| 365 | |||
| 366 | return tail; | ||
| 367 | } | ||
| 368 | |||
| 369 | static inline void log_incr_head(struct gfs2_sbd *sdp) | ||
| 370 | { | ||
| 371 | if (sdp->sd_log_flush_head == sdp->sd_log_tail) | ||
| 372 | gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head); | ||
| 373 | |||
| 374 | if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { | ||
| 375 | sdp->sd_log_flush_head = 0; | ||
| 376 | sdp->sd_log_flush_wrapped = 1; | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | /** | ||
| 381 | * gfs2_log_get_buf - Get and initialize a buffer to use for log control data | ||
| 382 | * @sdp: The GFS2 superblock | ||
| 383 | * | ||
| 384 | * Returns: the buffer_head | ||
| 385 | */ | ||
| 386 | |||
| 387 | struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) | ||
| 388 | { | ||
| 389 | u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); | ||
| 390 | struct gfs2_log_buf *lb; | ||
| 391 | struct buffer_head *bh; | ||
| 392 | |||
| 393 | lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); | ||
| 394 | list_add(&lb->lb_list, &sdp->sd_log_flush_list); | ||
| 395 | |||
| 396 | bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno); | ||
| 397 | lock_buffer(bh); | ||
| 398 | memset(bh->b_data, 0, bh->b_size); | ||
| 399 | set_buffer_uptodate(bh); | ||
| 400 | clear_buffer_dirty(bh); | ||
| 401 | unlock_buffer(bh); | ||
| 402 | |||
| 403 | log_incr_head(sdp); | ||
| 404 | |||
| 405 | return bh; | ||
| 406 | } | ||
| 407 | |||
| 408 | /** | ||
| 409 | * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log | ||
| 410 | * @sdp: the filesystem | ||
| 411 | * @data: the data the buffer_head should point to | ||
| 412 | * | ||
| 413 | * Returns: the log buffer descriptor | ||
| 414 | */ | ||
| 415 | |||
| 416 | struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, | ||
| 417 | struct buffer_head *real) | ||
| 418 | { | ||
| 419 | u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); | ||
| 420 | struct gfs2_log_buf *lb; | ||
| 421 | struct buffer_head *bh; | ||
| 422 | |||
| 423 | lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); | ||
| 424 | list_add(&lb->lb_list, &sdp->sd_log_flush_list); | ||
| 425 | lb->lb_real = real; | ||
| 426 | |||
| 427 | bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); | ||
| 428 | atomic_set(&bh->b_count, 1); | ||
| 429 | bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); | ||
| 430 | set_bh_page(bh, real->b_page, bh_offset(real)); | ||
| 431 | bh->b_blocknr = blkno; | ||
| 432 | bh->b_size = sdp->sd_sb.sb_bsize; | ||
| 433 | bh->b_bdev = sdp->sd_vfs->s_bdev; | ||
| 434 | |||
| 435 | log_incr_head(sdp); | ||
| 436 | |||
| 437 | return bh; | ||
| 438 | } | ||
| 439 | |||
| 440 | static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull) | ||
| 441 | { | ||
| 442 | unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); | ||
| 443 | |||
| 444 | ail2_empty(sdp, new_tail); | ||
| 445 | |||
| 446 | gfs2_log_lock(sdp); | ||
| 447 | sdp->sd_log_blks_free += dist - (pull ? 1 : 0); | ||
| 448 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); | ||
| 449 | gfs2_log_unlock(sdp); | ||
| 450 | |||
| 451 | sdp->sd_log_tail = new_tail; | ||
| 452 | } | ||
| 453 | |||
| 454 | /** | ||
| 455 | * log_write_header - Get and initialize a journal header buffer | ||
| 456 | * @sdp: The GFS2 superblock | ||
| 457 | * | ||
| 458 | * Returns: the initialized log buffer descriptor | ||
| 459 | */ | ||
| 460 | |||
| 461 | static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | ||
| 462 | { | ||
| 463 | u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); | ||
| 464 | struct buffer_head *bh; | ||
| 465 | struct gfs2_log_header *lh; | ||
| 466 | unsigned int tail; | ||
| 467 | u32 hash; | ||
| 468 | |||
| 469 | bh = sb_getblk(sdp->sd_vfs, blkno); | ||
| 470 | lock_buffer(bh); | ||
| 471 | memset(bh->b_data, 0, bh->b_size); | ||
| 472 | set_buffer_uptodate(bh); | ||
| 473 | clear_buffer_dirty(bh); | ||
| 474 | unlock_buffer(bh); | ||
| 475 | |||
| 476 | gfs2_ail1_empty(sdp, 0); | ||
| 477 | tail = current_tail(sdp); | ||
| 478 | |||
| 479 | lh = (struct gfs2_log_header *)bh->b_data; | ||
| 480 | memset(lh, 0, sizeof(struct gfs2_log_header)); | ||
| 481 | lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 482 | lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); | ||
| 483 | lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); | ||
| 484 | lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); | ||
| 485 | lh->lh_flags = cpu_to_be32(flags); | ||
| 486 | lh->lh_tail = cpu_to_be32(tail); | ||
| 487 | lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head); | ||
| 488 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); | ||
| 489 | lh->lh_hash = cpu_to_be32(hash); | ||
| 490 | |||
| 491 | set_buffer_dirty(bh); | ||
| 492 | if (sync_dirty_buffer(bh)) | ||
| 493 | gfs2_io_error_bh(sdp, bh); | ||
| 494 | brelse(bh); | ||
| 495 | |||
| 496 | if (sdp->sd_log_tail != tail) | ||
| 497 | log_pull_tail(sdp, tail, pull); | ||
| 498 | else | ||
| 499 | gfs2_assert_withdraw(sdp, !pull); | ||
| 500 | |||
| 501 | sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); | ||
| 502 | log_incr_head(sdp); | ||
| 503 | } | ||
| 504 | |||
| 505 | static void log_flush_commit(struct gfs2_sbd *sdp) | ||
| 506 | { | ||
| 507 | struct list_head *head = &sdp->sd_log_flush_list; | ||
| 508 | struct gfs2_log_buf *lb; | ||
| 509 | struct buffer_head *bh; | ||
| 510 | |||
| 511 | while (!list_empty(head)) { | ||
| 512 | lb = list_entry(head->next, struct gfs2_log_buf, lb_list); | ||
| 513 | list_del(&lb->lb_list); | ||
| 514 | bh = lb->lb_bh; | ||
| 515 | |||
| 516 | wait_on_buffer(bh); | ||
| 517 | if (!buffer_uptodate(bh)) | ||
| 518 | gfs2_io_error_bh(sdp, bh); | ||
| 519 | if (lb->lb_real) { | ||
| 520 | while (atomic_read(&bh->b_count) != 1) /* Grrrr... */ | ||
| 521 | schedule(); | ||
| 522 | free_buffer_head(bh); | ||
| 523 | } else | ||
| 524 | brelse(bh); | ||
| 525 | kfree(lb); | ||
| 526 | } | ||
| 527 | |||
| 528 | log_write_header(sdp, 0, 0); | ||
| 529 | } | ||
| 530 | |||
| 531 | /** | ||
| 532 | * gfs2_log_flush - flush incore transaction(s) | ||
| 533 | * @sdp: the filesystem | ||
| 534 | * @gl: The glock structure to flush. If NULL, flush the whole incore log | ||
| 535 | * | ||
| 536 | */ | ||
| 537 | |||
| 538 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | ||
| 539 | { | ||
| 540 | struct gfs2_ail *ai; | ||
| 541 | |||
| 542 | down_write(&sdp->sd_log_flush_lock); | ||
| 543 | |||
| 544 | if (gl) { | ||
| 545 | gfs2_log_lock(sdp); | ||
| 546 | if (list_empty(&gl->gl_le.le_list)) { | ||
| 547 | gfs2_log_unlock(sdp); | ||
| 548 | up_write(&sdp->sd_log_flush_lock); | ||
| 549 | return; | ||
| 550 | } | ||
| 551 | gfs2_log_unlock(sdp); | ||
| 552 | } | ||
| 553 | |||
| 554 | ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); | ||
| 555 | INIT_LIST_HEAD(&ai->ai_ail1_list); | ||
| 556 | INIT_LIST_HEAD(&ai->ai_ail2_list); | ||
| 557 | |||
| 558 | gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf); | ||
| 559 | gfs2_assert_withdraw(sdp, | ||
| 560 | sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); | ||
| 561 | |||
| 562 | sdp->sd_log_flush_head = sdp->sd_log_head; | ||
| 563 | sdp->sd_log_flush_wrapped = 0; | ||
| 564 | ai->ai_first = sdp->sd_log_flush_head; | ||
| 565 | |||
| 566 | lops_before_commit(sdp); | ||
| 567 | if (!list_empty(&sdp->sd_log_flush_list)) | ||
| 568 | log_flush_commit(sdp); | ||
| 569 | else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) | ||
| 570 | log_write_header(sdp, 0, PULL); | ||
| 571 | lops_after_commit(sdp, ai); | ||
| 572 | sdp->sd_log_head = sdp->sd_log_flush_head; | ||
| 573 | |||
| 574 | sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs; | ||
| 575 | |||
| 576 | sdp->sd_log_blks_reserved = 0; | ||
| 577 | sdp->sd_log_commited_buf = 0; | ||
| 578 | sdp->sd_log_num_hdrs = 0; | ||
| 579 | sdp->sd_log_commited_revoke = 0; | ||
| 580 | |||
| 581 | gfs2_log_lock(sdp); | ||
| 582 | if (!list_empty(&ai->ai_ail1_list)) { | ||
| 583 | list_add(&ai->ai_list, &sdp->sd_ail1_list); | ||
| 584 | ai = NULL; | ||
| 585 | } | ||
| 586 | gfs2_log_unlock(sdp); | ||
| 587 | |||
| 588 | sdp->sd_vfs->s_dirt = 0; | ||
| 589 | up_write(&sdp->sd_log_flush_lock); | ||
| 590 | |||
| 591 | kfree(ai); | ||
| 592 | } | ||
| 593 | |||
| 594 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | ||
| 595 | { | ||
| 596 | unsigned int reserved = 0; | ||
| 597 | unsigned int old; | ||
| 598 | |||
| 599 | gfs2_log_lock(sdp); | ||
| 600 | |||
| 601 | sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; | ||
| 602 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0); | ||
| 603 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; | ||
| 604 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); | ||
| 605 | |||
| 606 | if (sdp->sd_log_commited_buf) | ||
| 607 | reserved += sdp->sd_log_commited_buf; | ||
| 608 | if (sdp->sd_log_commited_revoke) | ||
| 609 | reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, | ||
| 610 | sizeof(u64)); | ||
| 611 | if (reserved) | ||
| 612 | reserved++; | ||
| 613 | |||
| 614 | old = sdp->sd_log_blks_free; | ||
| 615 | sdp->sd_log_blks_free += tr->tr_reserved - | ||
| 616 | (reserved - sdp->sd_log_blks_reserved); | ||
| 617 | |||
| 618 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old); | ||
| 619 | gfs2_assert_withdraw(sdp, | ||
| 620 | sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks + | ||
| 621 | sdp->sd_log_num_hdrs); | ||
| 622 | |||
| 623 | sdp->sd_log_blks_reserved = reserved; | ||
| 624 | |||
| 625 | gfs2_log_unlock(sdp); | ||
| 626 | } | ||
| 627 | |||
| 628 | /** | ||
| 629 | * gfs2_log_commit - Commit a transaction to the log | ||
| 630 | * @sdp: the filesystem | ||
| 631 | * @tr: the transaction | ||
| 632 | * | ||
| 633 | * Returns: errno | ||
| 634 | */ | ||
| 635 | |||
| 636 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | ||
| 637 | { | ||
| 638 | log_refund(sdp, tr); | ||
| 639 | lops_incore_commit(sdp, tr); | ||
| 640 | |||
| 641 | sdp->sd_vfs->s_dirt = 1; | ||
| 642 | up_read(&sdp->sd_log_flush_lock); | ||
| 643 | |||
| 644 | gfs2_log_lock(sdp); | ||
| 645 | if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) { | ||
| 646 | gfs2_log_unlock(sdp); | ||
| 647 | gfs2_log_flush(sdp, NULL); | ||
| 648 | } else { | ||
| 649 | gfs2_log_unlock(sdp); | ||
| 650 | } | ||
| 651 | } | ||
| 652 | |||
| 653 | /** | ||
| 654 | * gfs2_log_shutdown - write a shutdown header into a journal | ||
| 655 | * @sdp: the filesystem | ||
| 656 | * | ||
| 657 | */ | ||
| 658 | |||
| 659 | void gfs2_log_shutdown(struct gfs2_sbd *sdp) | ||
| 660 | { | ||
| 661 | down_write(&sdp->sd_log_flush_lock); | ||
| 662 | |||
| 663 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); | ||
| 664 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); | ||
| 665 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); | ||
| 666 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata); | ||
| 667 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | ||
| 668 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); | ||
| 669 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); | ||
| 670 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs); | ||
| 671 | gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); | ||
| 672 | |||
| 673 | sdp->sd_log_flush_head = sdp->sd_log_head; | ||
| 674 | sdp->sd_log_flush_wrapped = 0; | ||
| 675 | |||
| 676 | log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0); | ||
| 677 | |||
| 678 | gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks); | ||
| 679 | gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); | ||
| 680 | gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); | ||
| 681 | |||
| 682 | sdp->sd_log_head = sdp->sd_log_flush_head; | ||
| 683 | sdp->sd_log_tail = sdp->sd_log_head; | ||
| 684 | |||
| 685 | up_write(&sdp->sd_log_flush_lock); | ||
| 686 | } | ||
| 687 | |||
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h new file mode 100644 index 000000000000..7f5737d55612 --- /dev/null +++ b/fs/gfs2/log.h | |||
| @@ -0,0 +1,65 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __LOG_DOT_H__ | ||
| 11 | #define __LOG_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/list.h> | ||
| 14 | #include <linux/spinlock.h> | ||
| 15 | #include "incore.h" | ||
| 16 | |||
| 17 | /** | ||
| 18 | * gfs2_log_lock - acquire the right to mess with the log manager | ||
| 19 | * @sdp: the filesystem | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | static inline void gfs2_log_lock(struct gfs2_sbd *sdp) | ||
| 24 | { | ||
| 25 | spin_lock(&sdp->sd_log_lock); | ||
| 26 | } | ||
| 27 | |||
| 28 | /** | ||
| 29 | * gfs2_log_unlock - release the right to mess with the log manager | ||
| 30 | * @sdp: the filesystem | ||
| 31 | * | ||
| 32 | */ | ||
| 33 | |||
| 34 | static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) | ||
| 35 | { | ||
| 36 | spin_unlock(&sdp->sd_log_lock); | ||
| 37 | } | ||
| 38 | |||
| 39 | static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, | ||
| 40 | unsigned int value) | ||
| 41 | { | ||
| 42 | if (++value == sdp->sd_jdesc->jd_blocks) { | ||
| 43 | value = 0; | ||
| 44 | } | ||
| 45 | sdp->sd_log_head = sdp->sd_log_tail = value; | ||
| 46 | } | ||
| 47 | |||
| 48 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | ||
| 49 | unsigned int ssize); | ||
| 50 | |||
| 51 | void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags); | ||
| 52 | int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags); | ||
| 53 | |||
| 54 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); | ||
| 55 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); | ||
| 56 | |||
| 57 | struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); | ||
| 58 | struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, | ||
| 59 | struct buffer_head *real); | ||
| 60 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); | ||
| 61 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); | ||
| 62 | |||
| 63 | void gfs2_log_shutdown(struct gfs2_sbd *sdp); | ||
| 64 | |||
| 65 | #endif /* __LOG_DOT_H__ */ | ||
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c new file mode 100644 index 000000000000..881e337b6a70 --- /dev/null +++ b/fs/gfs2/lops.c | |||
| @@ -0,0 +1,809 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/lm_interface.h> | ||
| 17 | |||
| 18 | #include "gfs2.h" | ||
| 19 | #include "incore.h" | ||
| 20 | #include "glock.h" | ||
| 21 | #include "log.h" | ||
| 22 | #include "lops.h" | ||
| 23 | #include "meta_io.h" | ||
| 24 | #include "recovery.h" | ||
| 25 | #include "rgrp.h" | ||
| 26 | #include "trans.h" | ||
| 27 | #include "util.h" | ||
| 28 | |||
| 29 | static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
| 30 | { | ||
| 31 | struct gfs2_glock *gl; | ||
| 32 | struct gfs2_trans *tr = current->journal_info; | ||
| 33 | |||
| 34 | tr->tr_touched = 1; | ||
| 35 | |||
| 36 | if (!list_empty(&le->le_list)) | ||
| 37 | return; | ||
| 38 | |||
| 39 | gl = container_of(le, struct gfs2_glock, gl_le); | ||
| 40 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) | ||
| 41 | return; | ||
| 42 | gfs2_glock_hold(gl); | ||
| 43 | set_bit(GLF_DIRTY, &gl->gl_flags); | ||
| 44 | |||
| 45 | gfs2_log_lock(sdp); | ||
| 46 | sdp->sd_log_num_gl++; | ||
| 47 | list_add(&le->le_list, &sdp->sd_log_le_gl); | ||
| 48 | gfs2_log_unlock(sdp); | ||
| 49 | } | ||
| 50 | |||
| 51 | static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
| 52 | { | ||
| 53 | struct list_head *head = &sdp->sd_log_le_gl; | ||
| 54 | struct gfs2_glock *gl; | ||
| 55 | |||
| 56 | while (!list_empty(head)) { | ||
| 57 | gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list); | ||
| 58 | list_del_init(&gl->gl_le.le_list); | ||
| 59 | sdp->sd_log_num_gl--; | ||
| 60 | |||
| 61 | gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)); | ||
| 62 | gfs2_glock_put(gl); | ||
| 63 | } | ||
| 64 | gfs2_assert_warn(sdp, !sdp->sd_log_num_gl); | ||
| 65 | } | ||
| 66 | |||
| 67 | static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
| 68 | { | ||
| 69 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); | ||
| 70 | struct gfs2_trans *tr; | ||
| 71 | |||
| 72 | if (!list_empty(&bd->bd_list_tr)) | ||
| 73 | return; | ||
| 74 | |||
| 75 | tr = current->journal_info; | ||
| 76 | tr->tr_touched = 1; | ||
| 77 | tr->tr_num_buf++; | ||
| 78 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | ||
| 79 | |||
| 80 | if (!list_empty(&le->le_list)) | ||
| 81 | return; | ||
| 82 | |||
| 83 | gfs2_trans_add_gl(bd->bd_gl); | ||
| 84 | |||
| 85 | gfs2_meta_check(sdp, bd->bd_bh); | ||
| 86 | gfs2_pin(sdp, bd->bd_bh); | ||
| 87 | |||
| 88 | gfs2_log_lock(sdp); | ||
| 89 | sdp->sd_log_num_buf++; | ||
| 90 | list_add(&le->le_list, &sdp->sd_log_le_buf); | ||
| 91 | gfs2_log_unlock(sdp); | ||
| 92 | |||
| 93 | tr->tr_num_buf_new++; | ||
| 94 | } | ||
| 95 | |||
| 96 | static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | ||
| 97 | { | ||
| 98 | struct list_head *head = &tr->tr_list_buf; | ||
| 99 | struct gfs2_bufdata *bd; | ||
| 100 | |||
| 101 | while (!list_empty(head)) { | ||
| 102 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); | ||
| 103 | list_del_init(&bd->bd_list_tr); | ||
| 104 | tr->tr_num_buf--; | ||
| 105 | } | ||
| 106 | gfs2_assert_warn(sdp, !tr->tr_num_buf); | ||
| 107 | } | ||
| 108 | |||
| 109 | static void buf_lo_before_commit(struct gfs2_sbd *sdp) | ||
| 110 | { | ||
| 111 | struct buffer_head *bh; | ||
| 112 | struct gfs2_log_descriptor *ld; | ||
| 113 | struct gfs2_bufdata *bd1 = NULL, *bd2; | ||
| 114 | unsigned int total = sdp->sd_log_num_buf; | ||
| 115 | unsigned int offset = sizeof(struct gfs2_log_descriptor); | ||
| 116 | unsigned int limit; | ||
| 117 | unsigned int num; | ||
| 118 | unsigned n; | ||
| 119 | __be64 *ptr; | ||
| 120 | |||
| 121 | offset += sizeof(__be64) - 1; | ||
| 122 | offset &= ~(sizeof(__be64) - 1); | ||
| 123 | limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); | ||
| 124 | /* for 4k blocks, limit = 503 */ | ||
| 125 | |||
| 126 | bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); | ||
| 127 | while(total) { | ||
| 128 | num = total; | ||
| 129 | if (total > limit) | ||
| 130 | num = limit; | ||
| 131 | bh = gfs2_log_get_buf(sdp); | ||
| 132 | sdp->sd_log_num_hdrs++; | ||
| 133 | ld = (struct gfs2_log_descriptor *)bh->b_data; | ||
| 134 | ptr = (__be64 *)(bh->b_data + offset); | ||
| 135 | ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 136 | ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); | ||
| 137 | ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); | ||
| 138 | ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA); | ||
| 139 | ld->ld_length = cpu_to_be32(num + 1); | ||
| 140 | ld->ld_data1 = cpu_to_be32(num); | ||
| 141 | ld->ld_data2 = cpu_to_be32(0); | ||
| 142 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
| 143 | |||
| 144 | n = 0; | ||
| 145 | list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, | ||
| 146 | bd_le.le_list) { | ||
| 147 | *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); | ||
| 148 | if (++n >= num) | ||
| 149 | break; | ||
| 150 | } | ||
| 151 | |||
| 152 | set_buffer_dirty(bh); | ||
| 153 | ll_rw_block(WRITE, 1, &bh); | ||
| 154 | |||
| 155 | n = 0; | ||
| 156 | list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, | ||
| 157 | bd_le.le_list) { | ||
| 158 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); | ||
| 159 | set_buffer_dirty(bh); | ||
| 160 | ll_rw_block(WRITE, 1, &bh); | ||
| 161 | if (++n >= num) | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | |||
| 165 | total -= num; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 169 | static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
| 170 | { | ||
| 171 | struct list_head *head = &sdp->sd_log_le_buf; | ||
| 172 | struct gfs2_bufdata *bd; | ||
| 173 | |||
| 174 | while (!list_empty(head)) { | ||
| 175 | bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); | ||
| 176 | list_del_init(&bd->bd_le.le_list); | ||
| 177 | sdp->sd_log_num_buf--; | ||
| 178 | |||
| 179 | gfs2_unpin(sdp, bd->bd_bh, ai); | ||
| 180 | } | ||
| 181 | gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); | ||
| 182 | } | ||
| 183 | |||
| 184 | static void buf_lo_before_scan(struct gfs2_jdesc *jd, | ||
| 185 | struct gfs2_log_header *head, int pass) | ||
| 186 | { | ||
| 187 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 188 | |||
| 189 | if (pass != 0) | ||
| 190 | return; | ||
| 191 | |||
| 192 | sdp->sd_found_blocks = 0; | ||
| 193 | sdp->sd_replayed_blocks = 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | ||
| 197 | struct gfs2_log_descriptor *ld, __be64 *ptr, | ||
| 198 | int pass) | ||
| 199 | { | ||
| 200 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 201 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 202 | struct gfs2_glock *gl = ip->i_gl; | ||
| 203 | unsigned int blks = be32_to_cpu(ld->ld_data1); | ||
| 204 | struct buffer_head *bh_log, *bh_ip; | ||
| 205 | u64 blkno; | ||
| 206 | int error = 0; | ||
| 207 | |||
| 208 | if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) | ||
| 209 | return 0; | ||
| 210 | |||
| 211 | gfs2_replay_incr_blk(sdp, &start); | ||
| 212 | |||
| 213 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { | ||
| 214 | blkno = be64_to_cpu(*ptr++); | ||
| 215 | |||
| 216 | sdp->sd_found_blocks++; | ||
| 217 | |||
| 218 | if (gfs2_revoke_check(sdp, blkno, start)) | ||
| 219 | continue; | ||
| 220 | |||
| 221 | error = gfs2_replay_read_block(jd, start, &bh_log); | ||
| 222 | if (error) | ||
| 223 | return error; | ||
| 224 | |||
| 225 | bh_ip = gfs2_meta_new(gl, blkno); | ||
| 226 | memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); | ||
| 227 | |||
| 228 | if (gfs2_meta_check(sdp, bh_ip)) | ||
| 229 | error = -EIO; | ||
| 230 | else | ||
| 231 | mark_buffer_dirty(bh_ip); | ||
| 232 | |||
| 233 | brelse(bh_log); | ||
| 234 | brelse(bh_ip); | ||
| 235 | |||
| 236 | if (error) | ||
| 237 | break; | ||
| 238 | |||
| 239 | sdp->sd_replayed_blocks++; | ||
| 240 | } | ||
| 241 | |||
| 242 | return error; | ||
| 243 | } | ||
| 244 | |||
| 245 | static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | ||
| 246 | { | ||
| 247 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 248 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 249 | |||
| 250 | if (error) { | ||
| 251 | gfs2_meta_sync(ip->i_gl); | ||
| 252 | return; | ||
| 253 | } | ||
| 254 | if (pass != 1) | ||
| 255 | return; | ||
| 256 | |||
| 257 | gfs2_meta_sync(ip->i_gl); | ||
| 258 | |||
| 259 | fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", | ||
| 260 | jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); | ||
| 261 | } | ||
| 262 | |||
| 263 | static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
| 264 | { | ||
| 265 | struct gfs2_trans *tr; | ||
| 266 | |||
| 267 | tr = current->journal_info; | ||
| 268 | tr->tr_touched = 1; | ||
| 269 | tr->tr_num_revoke++; | ||
| 270 | |||
| 271 | gfs2_log_lock(sdp); | ||
| 272 | sdp->sd_log_num_revoke++; | ||
| 273 | list_add(&le->le_list, &sdp->sd_log_le_revoke); | ||
| 274 | gfs2_log_unlock(sdp); | ||
| 275 | } | ||
| 276 | |||
| 277 | static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | ||
| 278 | { | ||
| 279 | struct gfs2_log_descriptor *ld; | ||
| 280 | struct gfs2_meta_header *mh; | ||
| 281 | struct buffer_head *bh; | ||
| 282 | unsigned int offset; | ||
| 283 | struct list_head *head = &sdp->sd_log_le_revoke; | ||
| 284 | struct gfs2_revoke *rv; | ||
| 285 | |||
| 286 | if (!sdp->sd_log_num_revoke) | ||
| 287 | return; | ||
| 288 | |||
| 289 | bh = gfs2_log_get_buf(sdp); | ||
| 290 | ld = (struct gfs2_log_descriptor *)bh->b_data; | ||
| 291 | ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 292 | ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); | ||
| 293 | ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); | ||
| 294 | ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE); | ||
| 295 | ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, | ||
| 296 | sizeof(u64))); | ||
| 297 | ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); | ||
| 298 | ld->ld_data2 = cpu_to_be32(0); | ||
| 299 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
| 300 | offset = sizeof(struct gfs2_log_descriptor); | ||
| 301 | |||
| 302 | while (!list_empty(head)) { | ||
| 303 | rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); | ||
| 304 | list_del_init(&rv->rv_le.le_list); | ||
| 305 | sdp->sd_log_num_revoke--; | ||
| 306 | |||
| 307 | if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { | ||
| 308 | set_buffer_dirty(bh); | ||
| 309 | ll_rw_block(WRITE, 1, &bh); | ||
| 310 | |||
| 311 | bh = gfs2_log_get_buf(sdp); | ||
| 312 | mh = (struct gfs2_meta_header *)bh->b_data; | ||
| 313 | mh->mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 314 | mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB); | ||
| 315 | mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB); | ||
| 316 | offset = sizeof(struct gfs2_meta_header); | ||
| 317 | } | ||
| 318 | |||
| 319 | *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno); | ||
| 320 | kfree(rv); | ||
| 321 | |||
| 322 | offset += sizeof(u64); | ||
| 323 | } | ||
| 324 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | ||
| 325 | |||
| 326 | set_buffer_dirty(bh); | ||
| 327 | ll_rw_block(WRITE, 1, &bh); | ||
| 328 | } | ||
| 329 | |||
| 330 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, | ||
| 331 | struct gfs2_log_header *head, int pass) | ||
| 332 | { | ||
| 333 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 334 | |||
| 335 | if (pass != 0) | ||
| 336 | return; | ||
| 337 | |||
| 338 | sdp->sd_found_revokes = 0; | ||
| 339 | sdp->sd_replay_tail = head->lh_tail; | ||
| 340 | } | ||
| 341 | |||
| 342 | static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | ||
| 343 | struct gfs2_log_descriptor *ld, __be64 *ptr, | ||
| 344 | int pass) | ||
| 345 | { | ||
| 346 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 347 | unsigned int blks = be32_to_cpu(ld->ld_length); | ||
| 348 | unsigned int revokes = be32_to_cpu(ld->ld_data1); | ||
| 349 | struct buffer_head *bh; | ||
| 350 | unsigned int offset; | ||
| 351 | u64 blkno; | ||
| 352 | int first = 1; | ||
| 353 | int error; | ||
| 354 | |||
| 355 | if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE) | ||
| 356 | return 0; | ||
| 357 | |||
| 358 | offset = sizeof(struct gfs2_log_descriptor); | ||
| 359 | |||
| 360 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { | ||
| 361 | error = gfs2_replay_read_block(jd, start, &bh); | ||
| 362 | if (error) | ||
| 363 | return error; | ||
| 364 | |||
| 365 | if (!first) | ||
| 366 | gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB); | ||
| 367 | |||
| 368 | while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { | ||
| 369 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); | ||
| 370 | |||
| 371 | error = gfs2_revoke_add(sdp, blkno, start); | ||
| 372 | if (error < 0) | ||
| 373 | return error; | ||
| 374 | else if (error) | ||
| 375 | sdp->sd_found_revokes++; | ||
| 376 | |||
| 377 | if (!--revokes) | ||
| 378 | break; | ||
| 379 | offset += sizeof(u64); | ||
| 380 | } | ||
| 381 | |||
| 382 | brelse(bh); | ||
| 383 | offset = sizeof(struct gfs2_meta_header); | ||
| 384 | first = 0; | ||
| 385 | } | ||
| 386 | |||
| 387 | return 0; | ||
| 388 | } | ||
| 389 | |||
| 390 | static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | ||
| 391 | { | ||
| 392 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 393 | |||
| 394 | if (error) { | ||
| 395 | gfs2_revoke_clean(sdp); | ||
| 396 | return; | ||
| 397 | } | ||
| 398 | if (pass != 1) | ||
| 399 | return; | ||
| 400 | |||
| 401 | fs_info(sdp, "jid=%u: Found %u revoke tags\n", | ||
| 402 | jd->jd_jid, sdp->sd_found_revokes); | ||
| 403 | |||
| 404 | gfs2_revoke_clean(sdp); | ||
| 405 | } | ||
| 406 | |||
| 407 | static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
| 408 | { | ||
| 409 | struct gfs2_rgrpd *rgd; | ||
| 410 | struct gfs2_trans *tr = current->journal_info; | ||
| 411 | |||
| 412 | tr->tr_touched = 1; | ||
| 413 | |||
| 414 | if (!list_empty(&le->le_list)) | ||
| 415 | return; | ||
| 416 | |||
| 417 | rgd = container_of(le, struct gfs2_rgrpd, rd_le); | ||
| 418 | gfs2_rgrp_bh_hold(rgd); | ||
| 419 | |||
| 420 | gfs2_log_lock(sdp); | ||
| 421 | sdp->sd_log_num_rg++; | ||
| 422 | list_add(&le->le_list, &sdp->sd_log_le_rg); | ||
| 423 | gfs2_log_unlock(sdp); | ||
| 424 | } | ||
| 425 | |||
| 426 | static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
| 427 | { | ||
| 428 | struct list_head *head = &sdp->sd_log_le_rg; | ||
| 429 | struct gfs2_rgrpd *rgd; | ||
| 430 | |||
| 431 | while (!list_empty(head)) { | ||
| 432 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list); | ||
| 433 | list_del_init(&rgd->rd_le.le_list); | ||
| 434 | sdp->sd_log_num_rg--; | ||
| 435 | |||
| 436 | gfs2_rgrp_repolish_clones(rgd); | ||
| 437 | gfs2_rgrp_bh_put(rgd); | ||
| 438 | } | ||
| 439 | gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); | ||
| 440 | } | ||
| 441 | |||
| 442 | /** | ||
| 443 | * databuf_lo_add - Add a databuf to the transaction. | ||
| 444 | * | ||
| 445 | * This is used in two distinct cases: | ||
| 446 | * i) In ordered write mode | ||
| 447 | * We put the data buffer on a list so that we can ensure that its | ||
| 448 | * synced to disk at the right time | ||
| 449 | * ii) In journaled data mode | ||
| 450 | * We need to journal the data block in the same way as metadata in | ||
| 451 | * the functions above. The difference is that here we have a tag | ||
| 452 | * which is two __be64's being the block number (as per meta data) | ||
| 453 | * and a flag which says whether the data block needs escaping or | ||
| 454 | * not. This means we need a new log entry for each 251 or so data | ||
| 455 | * blocks, which isn't an enormous overhead but twice as much as | ||
| 456 | * for normal metadata blocks. | ||
| 457 | */ | ||
| 458 | static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
| 459 | { | ||
| 460 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); | ||
| 461 | struct gfs2_trans *tr = current->journal_info; | ||
| 462 | struct address_space *mapping = bd->bd_bh->b_page->mapping; | ||
| 463 | struct gfs2_inode *ip = GFS2_I(mapping->host); | ||
| 464 | |||
| 465 | tr->tr_touched = 1; | ||
| 466 | if (list_empty(&bd->bd_list_tr) && | ||
| 467 | (ip->i_di.di_flags & GFS2_DIF_JDATA)) { | ||
| 468 | tr->tr_num_buf++; | ||
| 469 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | ||
| 470 | gfs2_pin(sdp, bd->bd_bh); | ||
| 471 | tr->tr_num_buf_new++; | ||
| 472 | } | ||
| 473 | gfs2_trans_add_gl(bd->bd_gl); | ||
| 474 | gfs2_log_lock(sdp); | ||
| 475 | if (list_empty(&le->le_list)) { | ||
| 476 | if (ip->i_di.di_flags & GFS2_DIF_JDATA) | ||
| 477 | sdp->sd_log_num_jdata++; | ||
| 478 | sdp->sd_log_num_databuf++; | ||
| 479 | list_add(&le->le_list, &sdp->sd_log_le_databuf); | ||
| 480 | } | ||
| 481 | gfs2_log_unlock(sdp); | ||
| 482 | } | ||
| 483 | |||
| 484 | static int gfs2_check_magic(struct buffer_head *bh) | ||
| 485 | { | ||
| 486 | struct page *page = bh->b_page; | ||
| 487 | void *kaddr; | ||
| 488 | __be32 *ptr; | ||
| 489 | int rv = 0; | ||
| 490 | |||
| 491 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 492 | ptr = kaddr + bh_offset(bh); | ||
| 493 | if (*ptr == cpu_to_be32(GFS2_MAGIC)) | ||
| 494 | rv = 1; | ||
| 495 | kunmap_atomic(page, KM_USER0); | ||
| 496 | |||
| 497 | return rv; | ||
| 498 | } | ||
| 499 | |||
| 500 | /** | ||
| 501 | * databuf_lo_before_commit - Scan the data buffers, writing as we go | ||
| 502 | * | ||
| 503 | * Here we scan through the lists of buffers and make the assumption | ||
| 504 | * that any buffer thats been pinned is being journaled, and that | ||
| 505 | * any unpinned buffer is an ordered write data buffer and therefore | ||
| 506 | * will be written back rather than journaled. | ||
| 507 | */ | ||
| 508 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp) | ||
| 509 | { | ||
| 510 | LIST_HEAD(started); | ||
| 511 | struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; | ||
| 512 | struct buffer_head *bh = NULL; | ||
| 513 | unsigned int offset = sizeof(struct gfs2_log_descriptor); | ||
| 514 | struct gfs2_log_descriptor *ld; | ||
| 515 | unsigned int limit; | ||
| 516 | unsigned int total_dbuf = sdp->sd_log_num_databuf; | ||
| 517 | unsigned int total_jdata = sdp->sd_log_num_jdata; | ||
| 518 | unsigned int num, n; | ||
| 519 | __be64 *ptr = NULL; | ||
| 520 | |||
| 521 | offset += 2*sizeof(__be64) - 1; | ||
| 522 | offset &= ~(2*sizeof(__be64) - 1); | ||
| 523 | limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); | ||
| 524 | |||
| 525 | /* | ||
| 526 | * Start writing ordered buffers, write journaled buffers | ||
| 527 | * into the log along with a header | ||
| 528 | */ | ||
| 529 | gfs2_log_lock(sdp); | ||
| 530 | bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, | ||
| 531 | bd_le.le_list); | ||
| 532 | while(total_dbuf) { | ||
| 533 | num = total_jdata; | ||
| 534 | if (num > limit) | ||
| 535 | num = limit; | ||
| 536 | n = 0; | ||
| 537 | list_for_each_entry_safe_continue(bd1, bdt, | ||
| 538 | &sdp->sd_log_le_databuf, | ||
| 539 | bd_le.le_list) { | ||
| 540 | /* An ordered write buffer */ | ||
| 541 | if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) { | ||
| 542 | list_move(&bd1->bd_le.le_list, &started); | ||
| 543 | if (bd1 == bd2) { | ||
| 544 | bd2 = NULL; | ||
| 545 | bd2 = list_prepare_entry(bd2, | ||
| 546 | &sdp->sd_log_le_databuf, | ||
| 547 | bd_le.le_list); | ||
| 548 | } | ||
| 549 | total_dbuf--; | ||
| 550 | if (bd1->bd_bh) { | ||
| 551 | get_bh(bd1->bd_bh); | ||
| 552 | if (buffer_dirty(bd1->bd_bh)) { | ||
| 553 | gfs2_log_unlock(sdp); | ||
| 554 | wait_on_buffer(bd1->bd_bh); | ||
| 555 | ll_rw_block(WRITE, 1, | ||
| 556 | &bd1->bd_bh); | ||
| 557 | gfs2_log_lock(sdp); | ||
| 558 | } | ||
| 559 | brelse(bd1->bd_bh); | ||
| 560 | continue; | ||
| 561 | } | ||
| 562 | continue; | ||
| 563 | } else if (bd1->bd_bh) { /* A journaled buffer */ | ||
| 564 | int magic; | ||
| 565 | gfs2_log_unlock(sdp); | ||
| 566 | if (!bh) { | ||
| 567 | bh = gfs2_log_get_buf(sdp); | ||
| 568 | sdp->sd_log_num_hdrs++; | ||
| 569 | ld = (struct gfs2_log_descriptor *) | ||
| 570 | bh->b_data; | ||
| 571 | ptr = (__be64 *)(bh->b_data + offset); | ||
| 572 | ld->ld_header.mh_magic = | ||
| 573 | cpu_to_be32(GFS2_MAGIC); | ||
| 574 | ld->ld_header.mh_type = | ||
| 575 | cpu_to_be32(GFS2_METATYPE_LD); | ||
| 576 | ld->ld_header.mh_format = | ||
| 577 | cpu_to_be32(GFS2_FORMAT_LD); | ||
| 578 | ld->ld_type = | ||
| 579 | cpu_to_be32(GFS2_LOG_DESC_JDATA); | ||
| 580 | ld->ld_length = cpu_to_be32(num + 1); | ||
| 581 | ld->ld_data1 = cpu_to_be32(num); | ||
| 582 | ld->ld_data2 = cpu_to_be32(0); | ||
| 583 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
| 584 | } | ||
| 585 | magic = gfs2_check_magic(bd1->bd_bh); | ||
| 586 | *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); | ||
| 587 | *ptr++ = cpu_to_be64((__u64)magic); | ||
| 588 | clear_buffer_escaped(bd1->bd_bh); | ||
| 589 | if (unlikely(magic != 0)) | ||
| 590 | set_buffer_escaped(bd1->bd_bh); | ||
| 591 | gfs2_log_lock(sdp); | ||
| 592 | if (n++ > num) | ||
| 593 | break; | ||
| 594 | } else if (!bd1->bd_bh) { | ||
| 595 | total_dbuf--; | ||
| 596 | sdp->sd_log_num_databuf--; | ||
| 597 | list_del_init(&bd1->bd_le.le_list); | ||
| 598 | if (bd1 == bd2) { | ||
| 599 | bd2 = NULL; | ||
| 600 | bd2 = list_prepare_entry(bd2, | ||
| 601 | &sdp->sd_log_le_databuf, | ||
| 602 | bd_le.le_list); | ||
| 603 | } | ||
| 604 | kmem_cache_free(gfs2_bufdata_cachep, bd1); | ||
| 605 | } | ||
| 606 | } | ||
| 607 | gfs2_log_unlock(sdp); | ||
| 608 | if (bh) { | ||
| 609 | set_buffer_dirty(bh); | ||
| 610 | ll_rw_block(WRITE, 1, &bh); | ||
| 611 | bh = NULL; | ||
| 612 | } | ||
| 613 | n = 0; | ||
| 614 | gfs2_log_lock(sdp); | ||
| 615 | list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, | ||
| 616 | bd_le.le_list) { | ||
| 617 | if (!bd2->bd_bh) | ||
| 618 | continue; | ||
| 619 | /* copy buffer if it needs escaping */ | ||
| 620 | gfs2_log_unlock(sdp); | ||
| 621 | if (unlikely(buffer_escaped(bd2->bd_bh))) { | ||
| 622 | void *kaddr; | ||
| 623 | struct page *page = bd2->bd_bh->b_page; | ||
| 624 | bh = gfs2_log_get_buf(sdp); | ||
| 625 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 626 | memcpy(bh->b_data, | ||
| 627 | kaddr + bh_offset(bd2->bd_bh), | ||
| 628 | sdp->sd_sb.sb_bsize); | ||
| 629 | kunmap_atomic(page, KM_USER0); | ||
| 630 | *(__be32 *)bh->b_data = 0; | ||
| 631 | } else { | ||
| 632 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); | ||
| 633 | } | ||
| 634 | set_buffer_dirty(bh); | ||
| 635 | ll_rw_block(WRITE, 1, &bh); | ||
| 636 | gfs2_log_lock(sdp); | ||
| 637 | if (++n >= num) | ||
| 638 | break; | ||
| 639 | } | ||
| 640 | bh = NULL; | ||
| 641 | total_dbuf -= num; | ||
| 642 | total_jdata -= num; | ||
| 643 | } | ||
| 644 | gfs2_log_unlock(sdp); | ||
| 645 | |||
| 646 | /* Wait on all ordered buffers */ | ||
| 647 | while (!list_empty(&started)) { | ||
| 648 | gfs2_log_lock(sdp); | ||
| 649 | bd1 = list_entry(started.next, struct gfs2_bufdata, | ||
| 650 | bd_le.le_list); | ||
| 651 | list_del_init(&bd1->bd_le.le_list); | ||
| 652 | sdp->sd_log_num_databuf--; | ||
| 653 | bh = bd1->bd_bh; | ||
| 654 | if (bh) { | ||
| 655 | bh->b_private = NULL; | ||
| 656 | get_bh(bh); | ||
| 657 | gfs2_log_unlock(sdp); | ||
| 658 | wait_on_buffer(bh); | ||
| 659 | brelse(bh); | ||
| 660 | } else | ||
| 661 | gfs2_log_unlock(sdp); | ||
| 662 | |||
| 663 | kmem_cache_free(gfs2_bufdata_cachep, bd1); | ||
| 664 | } | ||
| 665 | |||
| 666 | /* We've removed all the ordered write bufs here, so only jdata left */ | ||
| 667 | gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); | ||
| 668 | } | ||
| 669 | |||
| 670 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | ||
| 671 | struct gfs2_log_descriptor *ld, | ||
| 672 | __be64 *ptr, int pass) | ||
| 673 | { | ||
| 674 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 675 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 676 | struct gfs2_glock *gl = ip->i_gl; | ||
| 677 | unsigned int blks = be32_to_cpu(ld->ld_data1); | ||
| 678 | struct buffer_head *bh_log, *bh_ip; | ||
| 679 | u64 blkno; | ||
| 680 | u64 esc; | ||
| 681 | int error = 0; | ||
| 682 | |||
| 683 | if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) | ||
| 684 | return 0; | ||
| 685 | |||
| 686 | gfs2_replay_incr_blk(sdp, &start); | ||
| 687 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { | ||
| 688 | blkno = be64_to_cpu(*ptr++); | ||
| 689 | esc = be64_to_cpu(*ptr++); | ||
| 690 | |||
| 691 | sdp->sd_found_blocks++; | ||
| 692 | |||
| 693 | if (gfs2_revoke_check(sdp, blkno, start)) | ||
| 694 | continue; | ||
| 695 | |||
| 696 | error = gfs2_replay_read_block(jd, start, &bh_log); | ||
| 697 | if (error) | ||
| 698 | return error; | ||
| 699 | |||
| 700 | bh_ip = gfs2_meta_new(gl, blkno); | ||
| 701 | memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); | ||
| 702 | |||
| 703 | /* Unescape */ | ||
| 704 | if (esc) { | ||
| 705 | __be32 *eptr = (__be32 *)bh_ip->b_data; | ||
| 706 | *eptr = cpu_to_be32(GFS2_MAGIC); | ||
| 707 | } | ||
| 708 | mark_buffer_dirty(bh_ip); | ||
| 709 | |||
| 710 | brelse(bh_log); | ||
| 711 | brelse(bh_ip); | ||
| 712 | if (error) | ||
| 713 | break; | ||
| 714 | |||
| 715 | sdp->sd_replayed_blocks++; | ||
| 716 | } | ||
| 717 | |||
| 718 | return error; | ||
| 719 | } | ||
| 720 | |||
| 721 | /* FIXME: sort out accounting for log blocks etc. */ | ||
| 722 | |||
| 723 | static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | ||
| 724 | { | ||
| 725 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 726 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 727 | |||
| 728 | if (error) { | ||
| 729 | gfs2_meta_sync(ip->i_gl); | ||
| 730 | return; | ||
| 731 | } | ||
| 732 | if (pass != 1) | ||
| 733 | return; | ||
| 734 | |||
| 735 | /* data sync? */ | ||
| 736 | gfs2_meta_sync(ip->i_gl); | ||
| 737 | |||
| 738 | fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", | ||
| 739 | jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); | ||
| 740 | } | ||
| 741 | |||
| 742 | static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
| 743 | { | ||
| 744 | struct list_head *head = &sdp->sd_log_le_databuf; | ||
| 745 | struct gfs2_bufdata *bd; | ||
| 746 | |||
| 747 | while (!list_empty(head)) { | ||
| 748 | bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); | ||
| 749 | list_del_init(&bd->bd_le.le_list); | ||
| 750 | sdp->sd_log_num_databuf--; | ||
| 751 | sdp->sd_log_num_jdata--; | ||
| 752 | gfs2_unpin(sdp, bd->bd_bh, ai); | ||
| 753 | } | ||
| 754 | gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); | ||
| 755 | gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata); | ||
| 756 | } | ||
| 757 | |||
| 758 | |||
| 759 | const struct gfs2_log_operations gfs2_glock_lops = { | ||
| 760 | .lo_add = glock_lo_add, | ||
| 761 | .lo_after_commit = glock_lo_after_commit, | ||
| 762 | .lo_name = "glock", | ||
| 763 | }; | ||
| 764 | |||
| 765 | const struct gfs2_log_operations gfs2_buf_lops = { | ||
| 766 | .lo_add = buf_lo_add, | ||
| 767 | .lo_incore_commit = buf_lo_incore_commit, | ||
| 768 | .lo_before_commit = buf_lo_before_commit, | ||
| 769 | .lo_after_commit = buf_lo_after_commit, | ||
| 770 | .lo_before_scan = buf_lo_before_scan, | ||
| 771 | .lo_scan_elements = buf_lo_scan_elements, | ||
| 772 | .lo_after_scan = buf_lo_after_scan, | ||
| 773 | .lo_name = "buf", | ||
| 774 | }; | ||
| 775 | |||
| 776 | const struct gfs2_log_operations gfs2_revoke_lops = { | ||
| 777 | .lo_add = revoke_lo_add, | ||
| 778 | .lo_before_commit = revoke_lo_before_commit, | ||
| 779 | .lo_before_scan = revoke_lo_before_scan, | ||
| 780 | .lo_scan_elements = revoke_lo_scan_elements, | ||
| 781 | .lo_after_scan = revoke_lo_after_scan, | ||
| 782 | .lo_name = "revoke", | ||
| 783 | }; | ||
| 784 | |||
| 785 | const struct gfs2_log_operations gfs2_rg_lops = { | ||
| 786 | .lo_add = rg_lo_add, | ||
| 787 | .lo_after_commit = rg_lo_after_commit, | ||
| 788 | .lo_name = "rg", | ||
| 789 | }; | ||
| 790 | |||
| 791 | const struct gfs2_log_operations gfs2_databuf_lops = { | ||
| 792 | .lo_add = databuf_lo_add, | ||
| 793 | .lo_incore_commit = buf_lo_incore_commit, | ||
| 794 | .lo_before_commit = databuf_lo_before_commit, | ||
| 795 | .lo_after_commit = databuf_lo_after_commit, | ||
| 796 | .lo_scan_elements = databuf_lo_scan_elements, | ||
| 797 | .lo_after_scan = databuf_lo_after_scan, | ||
| 798 | .lo_name = "databuf", | ||
| 799 | }; | ||
| 800 | |||
| 801 | const struct gfs2_log_operations *gfs2_log_ops[] = { | ||
| 802 | &gfs2_glock_lops, | ||
| 803 | &gfs2_buf_lops, | ||
| 804 | &gfs2_revoke_lops, | ||
| 805 | &gfs2_rg_lops, | ||
| 806 | &gfs2_databuf_lops, | ||
| 807 | NULL, | ||
| 808 | }; | ||
| 809 | |||
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h new file mode 100644 index 000000000000..5839c05ae6be --- /dev/null +++ b/fs/gfs2/lops.h | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __LOPS_DOT_H__ | ||
| 11 | #define __LOPS_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/list.h> | ||
| 14 | #include "incore.h" | ||
| 15 | |||
| 16 | extern const struct gfs2_log_operations gfs2_glock_lops; | ||
| 17 | extern const struct gfs2_log_operations gfs2_buf_lops; | ||
| 18 | extern const struct gfs2_log_operations gfs2_revoke_lops; | ||
| 19 | extern const struct gfs2_log_operations gfs2_rg_lops; | ||
| 20 | extern const struct gfs2_log_operations gfs2_databuf_lops; | ||
| 21 | |||
| 22 | extern const struct gfs2_log_operations *gfs2_log_ops[]; | ||
| 23 | |||
| 24 | static inline void lops_init_le(struct gfs2_log_element *le, | ||
| 25 | const struct gfs2_log_operations *lops) | ||
| 26 | { | ||
| 27 | INIT_LIST_HEAD(&le->le_list); | ||
| 28 | le->le_ops = lops; | ||
| 29 | } | ||
| 30 | |||
| 31 | static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
| 32 | { | ||
| 33 | if (le->le_ops->lo_add) | ||
| 34 | le->le_ops->lo_add(sdp, le); | ||
| 35 | } | ||
| 36 | |||
| 37 | static inline void lops_incore_commit(struct gfs2_sbd *sdp, | ||
| 38 | struct gfs2_trans *tr) | ||
| 39 | { | ||
| 40 | int x; | ||
| 41 | for (x = 0; gfs2_log_ops[x]; x++) | ||
| 42 | if (gfs2_log_ops[x]->lo_incore_commit) | ||
| 43 | gfs2_log_ops[x]->lo_incore_commit(sdp, tr); | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void lops_before_commit(struct gfs2_sbd *sdp) | ||
| 47 | { | ||
| 48 | int x; | ||
| 49 | for (x = 0; gfs2_log_ops[x]; x++) | ||
| 50 | if (gfs2_log_ops[x]->lo_before_commit) | ||
| 51 | gfs2_log_ops[x]->lo_before_commit(sdp); | ||
| 52 | } | ||
| 53 | |||
| 54 | static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
| 55 | { | ||
| 56 | int x; | ||
| 57 | for (x = 0; gfs2_log_ops[x]; x++) | ||
| 58 | if (gfs2_log_ops[x]->lo_after_commit) | ||
| 59 | gfs2_log_ops[x]->lo_after_commit(sdp, ai); | ||
| 60 | } | ||
| 61 | |||
| 62 | static inline void lops_before_scan(struct gfs2_jdesc *jd, | ||
| 63 | struct gfs2_log_header *head, | ||
| 64 | unsigned int pass) | ||
| 65 | { | ||
| 66 | int x; | ||
| 67 | for (x = 0; gfs2_log_ops[x]; x++) | ||
| 68 | if (gfs2_log_ops[x]->lo_before_scan) | ||
| 69 | gfs2_log_ops[x]->lo_before_scan(jd, head, pass); | ||
| 70 | } | ||
| 71 | |||
| 72 | static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | ||
| 73 | struct gfs2_log_descriptor *ld, | ||
| 74 | __be64 *ptr, | ||
| 75 | unsigned int pass) | ||
| 76 | { | ||
| 77 | int x, error; | ||
| 78 | for (x = 0; gfs2_log_ops[x]; x++) | ||
| 79 | if (gfs2_log_ops[x]->lo_scan_elements) { | ||
| 80 | error = gfs2_log_ops[x]->lo_scan_elements(jd, start, | ||
| 81 | ld, ptr, pass); | ||
| 82 | if (error) | ||
| 83 | return error; | ||
| 84 | } | ||
| 85 | |||
| 86 | return 0; | ||
| 87 | } | ||
| 88 | |||
| 89 | static inline void lops_after_scan(struct gfs2_jdesc *jd, int error, | ||
| 90 | unsigned int pass) | ||
| 91 | { | ||
| 92 | int x; | ||
| 93 | for (x = 0; gfs2_log_ops[x]; x++) | ||
| 94 | if (gfs2_log_ops[x]->lo_before_scan) | ||
| 95 | gfs2_log_ops[x]->lo_after_scan(jd, error, pass); | ||
| 96 | } | ||
| 97 | |||
| 98 | #endif /* __LOPS_DOT_H__ */ | ||
| 99 | |||
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c new file mode 100644 index 000000000000..21508a13bb78 --- /dev/null +++ b/fs/gfs2/main.c | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | #include <linux/init.h> | ||
| 17 | #include <linux/gfs2_ondisk.h> | ||
| 18 | #include <linux/lm_interface.h> | ||
| 19 | #include <asm/atomic.h> | ||
| 20 | |||
| 21 | #include "gfs2.h" | ||
| 22 | #include "incore.h" | ||
| 23 | #include "ops_fstype.h" | ||
| 24 | #include "sys.h" | ||
| 25 | #include "util.h" | ||
| 26 | #include "glock.h" | ||
| 27 | |||
| 28 | static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | ||
| 29 | { | ||
| 30 | struct gfs2_inode *ip = foo; | ||
| 31 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
| 32 | SLAB_CTOR_CONSTRUCTOR) { | ||
| 33 | inode_init_once(&ip->i_inode); | ||
| 34 | spin_lock_init(&ip->i_spin); | ||
| 35 | init_rwsem(&ip->i_rw_mutex); | ||
| 36 | memset(ip->i_cache, 0, sizeof(ip->i_cache)); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | ||
| 41 | { | ||
| 42 | struct gfs2_glock *gl = foo; | ||
| 43 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
| 44 | SLAB_CTOR_CONSTRUCTOR) { | ||
| 45 | INIT_HLIST_NODE(&gl->gl_list); | ||
| 46 | spin_lock_init(&gl->gl_spin); | ||
| 47 | INIT_LIST_HEAD(&gl->gl_holders); | ||
| 48 | INIT_LIST_HEAD(&gl->gl_waiters1); | ||
| 49 | INIT_LIST_HEAD(&gl->gl_waiters2); | ||
| 50 | INIT_LIST_HEAD(&gl->gl_waiters3); | ||
| 51 | gl->gl_lvb = NULL; | ||
| 52 | atomic_set(&gl->gl_lvb_count, 0); | ||
| 53 | INIT_LIST_HEAD(&gl->gl_reclaim); | ||
| 54 | INIT_LIST_HEAD(&gl->gl_ail_list); | ||
| 55 | atomic_set(&gl->gl_ail_count, 0); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | /** | ||
| 60 | * init_gfs2_fs - Register GFS2 as a filesystem | ||
| 61 | * | ||
| 62 | * Returns: 0 on success, error code on failure | ||
| 63 | */ | ||
| 64 | |||
| 65 | static int __init init_gfs2_fs(void) | ||
| 66 | { | ||
| 67 | int error; | ||
| 68 | |||
| 69 | error = gfs2_sys_init(); | ||
| 70 | if (error) | ||
| 71 | return error; | ||
| 72 | |||
| 73 | error = gfs2_glock_init(); | ||
| 74 | if (error) | ||
| 75 | goto fail; | ||
| 76 | |||
| 77 | error = -ENOMEM; | ||
| 78 | gfs2_glock_cachep = kmem_cache_create("gfs2_glock", | ||
| 79 | sizeof(struct gfs2_glock), | ||
| 80 | 0, 0, | ||
| 81 | gfs2_init_glock_once, NULL); | ||
| 82 | if (!gfs2_glock_cachep) | ||
| 83 | goto fail; | ||
| 84 | |||
| 85 | gfs2_inode_cachep = kmem_cache_create("gfs2_inode", | ||
| 86 | sizeof(struct gfs2_inode), | ||
| 87 | 0, (SLAB_RECLAIM_ACCOUNT| | ||
| 88 | SLAB_PANIC|SLAB_MEM_SPREAD), | ||
| 89 | gfs2_init_inode_once, NULL); | ||
| 90 | if (!gfs2_inode_cachep) | ||
| 91 | goto fail; | ||
| 92 | |||
| 93 | gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata", | ||
| 94 | sizeof(struct gfs2_bufdata), | ||
| 95 | 0, 0, NULL, NULL); | ||
| 96 | if (!gfs2_bufdata_cachep) | ||
| 97 | goto fail; | ||
| 98 | |||
| 99 | error = register_filesystem(&gfs2_fs_type); | ||
| 100 | if (error) | ||
| 101 | goto fail; | ||
| 102 | |||
| 103 | error = register_filesystem(&gfs2meta_fs_type); | ||
| 104 | if (error) | ||
| 105 | goto fail_unregister; | ||
| 106 | |||
| 107 | printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); | ||
| 108 | |||
| 109 | return 0; | ||
| 110 | |||
| 111 | fail_unregister: | ||
| 112 | unregister_filesystem(&gfs2_fs_type); | ||
| 113 | fail: | ||
| 114 | if (gfs2_bufdata_cachep) | ||
| 115 | kmem_cache_destroy(gfs2_bufdata_cachep); | ||
| 116 | |||
| 117 | if (gfs2_inode_cachep) | ||
| 118 | kmem_cache_destroy(gfs2_inode_cachep); | ||
| 119 | |||
| 120 | if (gfs2_glock_cachep) | ||
| 121 | kmem_cache_destroy(gfs2_glock_cachep); | ||
| 122 | |||
| 123 | gfs2_sys_uninit(); | ||
| 124 | return error; | ||
| 125 | } | ||
| 126 | |||
| 127 | /** | ||
| 128 | * exit_gfs2_fs - Unregister the file system | ||
| 129 | * | ||
| 130 | */ | ||
| 131 | |||
| 132 | static void __exit exit_gfs2_fs(void) | ||
| 133 | { | ||
| 134 | unregister_filesystem(&gfs2_fs_type); | ||
| 135 | unregister_filesystem(&gfs2meta_fs_type); | ||
| 136 | |||
| 137 | kmem_cache_destroy(gfs2_bufdata_cachep); | ||
| 138 | kmem_cache_destroy(gfs2_inode_cachep); | ||
| 139 | kmem_cache_destroy(gfs2_glock_cachep); | ||
| 140 | |||
| 141 | gfs2_sys_uninit(); | ||
| 142 | } | ||
| 143 | |||
| 144 | MODULE_DESCRIPTION("Global File System"); | ||
| 145 | MODULE_AUTHOR("Red Hat, Inc."); | ||
| 146 | MODULE_LICENSE("GPL"); | ||
| 147 | |||
| 148 | module_init(init_gfs2_fs); | ||
| 149 | module_exit(exit_gfs2_fs); | ||
| 150 | |||
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c new file mode 100644 index 000000000000..3912d6a4b1e6 --- /dev/null +++ b/fs/gfs2/meta_io.c | |||
| @@ -0,0 +1,590 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/mm.h> | ||
| 16 | #include <linux/pagemap.h> | ||
| 17 | #include <linux/writeback.h> | ||
| 18 | #include <linux/swap.h> | ||
| 19 | #include <linux/delay.h> | ||
| 20 | #include <linux/bio.h> | ||
| 21 | #include <linux/gfs2_ondisk.h> | ||
| 22 | #include <linux/lm_interface.h> | ||
| 23 | |||
| 24 | #include "gfs2.h" | ||
| 25 | #include "incore.h" | ||
| 26 | #include "glock.h" | ||
| 27 | #include "glops.h" | ||
| 28 | #include "inode.h" | ||
| 29 | #include "log.h" | ||
| 30 | #include "lops.h" | ||
| 31 | #include "meta_io.h" | ||
| 32 | #include "rgrp.h" | ||
| 33 | #include "trans.h" | ||
| 34 | #include "util.h" | ||
| 35 | #include "ops_address.h" | ||
| 36 | |||
| 37 | static int aspace_get_block(struct inode *inode, sector_t lblock, | ||
| 38 | struct buffer_head *bh_result, int create) | ||
| 39 | { | ||
| 40 | gfs2_assert_warn(inode->i_sb->s_fs_info, 0); | ||
| 41 | return -EOPNOTSUPP; | ||
| 42 | } | ||
| 43 | |||
| 44 | static int gfs2_aspace_writepage(struct page *page, | ||
| 45 | struct writeback_control *wbc) | ||
| 46 | { | ||
| 47 | return block_write_full_page(page, aspace_get_block, wbc); | ||
| 48 | } | ||
| 49 | |||
| 50 | static const struct address_space_operations aspace_aops = { | ||
| 51 | .writepage = gfs2_aspace_writepage, | ||
| 52 | .releasepage = gfs2_releasepage, | ||
| 53 | }; | ||
| 54 | |||
| 55 | /** | ||
| 56 | * gfs2_aspace_get - Create and initialize a struct inode structure | ||
| 57 | * @sdp: the filesystem the aspace is in | ||
| 58 | * | ||
| 59 | * Right now a struct inode is just a struct inode. Maybe Linux | ||
| 60 | * will supply a more lightweight address space construct (that works) | ||
| 61 | * in the future. | ||
| 62 | * | ||
| 63 | * Make sure pages/buffers in this aspace aren't in high memory. | ||
| 64 | * | ||
| 65 | * Returns: the aspace | ||
| 66 | */ | ||
| 67 | |||
| 68 | struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp) | ||
| 69 | { | ||
| 70 | struct inode *aspace; | ||
| 71 | |||
| 72 | aspace = new_inode(sdp->sd_vfs); | ||
| 73 | if (aspace) { | ||
| 74 | mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); | ||
| 75 | aspace->i_mapping->a_ops = &aspace_aops; | ||
| 76 | aspace->i_size = ~0ULL; | ||
| 77 | aspace->i_private = NULL; | ||
| 78 | insert_inode_hash(aspace); | ||
| 79 | } | ||
| 80 | return aspace; | ||
| 81 | } | ||
| 82 | |||
| 83 | void gfs2_aspace_put(struct inode *aspace) | ||
| 84 | { | ||
| 85 | remove_inode_hash(aspace); | ||
| 86 | iput(aspace); | ||
| 87 | } | ||
| 88 | |||
| 89 | /** | ||
| 90 | * gfs2_meta_inval - Invalidate all buffers associated with a glock | ||
| 91 | * @gl: the glock | ||
| 92 | * | ||
| 93 | */ | ||
| 94 | |||
| 95 | void gfs2_meta_inval(struct gfs2_glock *gl) | ||
| 96 | { | ||
| 97 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 98 | struct inode *aspace = gl->gl_aspace; | ||
| 99 | struct address_space *mapping = gl->gl_aspace->i_mapping; | ||
| 100 | |||
| 101 | gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); | ||
| 102 | |||
| 103 | atomic_inc(&aspace->i_writecount); | ||
| 104 | truncate_inode_pages(mapping, 0); | ||
| 105 | atomic_dec(&aspace->i_writecount); | ||
| 106 | |||
| 107 | gfs2_assert_withdraw(sdp, !mapping->nrpages); | ||
| 108 | } | ||
| 109 | |||
| 110 | /** | ||
| 111 | * gfs2_meta_sync - Sync all buffers associated with a glock | ||
| 112 | * @gl: The glock | ||
| 113 | * | ||
| 114 | */ | ||
| 115 | |||
| 116 | void gfs2_meta_sync(struct gfs2_glock *gl) | ||
| 117 | { | ||
| 118 | struct address_space *mapping = gl->gl_aspace->i_mapping; | ||
| 119 | int error; | ||
| 120 | |||
| 121 | filemap_fdatawrite(mapping); | ||
| 122 | error = filemap_fdatawait(mapping); | ||
| 123 | |||
| 124 | if (error) | ||
| 125 | gfs2_io_error(gl->gl_sbd); | ||
| 126 | } | ||
| 127 | |||
| 128 | /** | ||
| 129 | * getbuf - Get a buffer with a given address space | ||
| 130 | * @sdp: the filesystem | ||
| 131 | * @aspace: the address space | ||
| 132 | * @blkno: the block number (filesystem scope) | ||
| 133 | * @create: 1 if the buffer should be created | ||
| 134 | * | ||
| 135 | * Returns: the buffer | ||
| 136 | */ | ||
| 137 | |||
| 138 | static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace, | ||
| 139 | u64 blkno, int create) | ||
| 140 | { | ||
| 141 | struct page *page; | ||
| 142 | struct buffer_head *bh; | ||
| 143 | unsigned int shift; | ||
| 144 | unsigned long index; | ||
| 145 | unsigned int bufnum; | ||
| 146 | |||
| 147 | shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; | ||
| 148 | index = blkno >> shift; /* convert block to page */ | ||
| 149 | bufnum = blkno - (index << shift); /* block buf index within page */ | ||
| 150 | |||
| 151 | if (create) { | ||
| 152 | for (;;) { | ||
| 153 | page = grab_cache_page(aspace->i_mapping, index); | ||
| 154 | if (page) | ||
| 155 | break; | ||
| 156 | yield(); | ||
| 157 | } | ||
| 158 | } else { | ||
| 159 | page = find_lock_page(aspace->i_mapping, index); | ||
| 160 | if (!page) | ||
| 161 | return NULL; | ||
| 162 | } | ||
| 163 | |||
| 164 | if (!page_has_buffers(page)) | ||
| 165 | create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0); | ||
| 166 | |||
| 167 | /* Locate header for our buffer within our page */ | ||
| 168 | for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) | ||
| 169 | /* Do nothing */; | ||
| 170 | get_bh(bh); | ||
| 171 | |||
| 172 | if (!buffer_mapped(bh)) | ||
| 173 | map_bh(bh, sdp->sd_vfs, blkno); | ||
| 174 | |||
| 175 | unlock_page(page); | ||
| 176 | mark_page_accessed(page); | ||
| 177 | page_cache_release(page); | ||
| 178 | |||
| 179 | return bh; | ||
| 180 | } | ||
| 181 | |||
| 182 | static void meta_prep_new(struct buffer_head *bh) | ||
| 183 | { | ||
| 184 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; | ||
| 185 | |||
| 186 | lock_buffer(bh); | ||
| 187 | clear_buffer_dirty(bh); | ||
| 188 | set_buffer_uptodate(bh); | ||
| 189 | unlock_buffer(bh); | ||
| 190 | |||
| 191 | mh->mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 192 | } | ||
| 193 | |||
| 194 | /** | ||
| 195 | * gfs2_meta_new - Get a block | ||
| 196 | * @gl: The glock associated with this block | ||
| 197 | * @blkno: The block number | ||
| 198 | * | ||
| 199 | * Returns: The buffer | ||
| 200 | */ | ||
| 201 | |||
| 202 | struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) | ||
| 203 | { | ||
| 204 | struct buffer_head *bh; | ||
| 205 | bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); | ||
| 206 | meta_prep_new(bh); | ||
| 207 | return bh; | ||
| 208 | } | ||
| 209 | |||
| 210 | /** | ||
| 211 | * gfs2_meta_read - Read a block from disk | ||
| 212 | * @gl: The glock covering the block | ||
| 213 | * @blkno: The block number | ||
| 214 | * @flags: flags | ||
| 215 | * @bhp: the place where the buffer is returned (NULL on failure) | ||
| 216 | * | ||
| 217 | * Returns: errno | ||
| 218 | */ | ||
| 219 | |||
| 220 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | ||
| 221 | struct buffer_head **bhp) | ||
| 222 | { | ||
| 223 | *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); | ||
| 224 | if (!buffer_uptodate(*bhp)) | ||
| 225 | ll_rw_block(READ_META, 1, bhp); | ||
| 226 | if (flags & DIO_WAIT) { | ||
| 227 | int error = gfs2_meta_wait(gl->gl_sbd, *bhp); | ||
| 228 | if (error) { | ||
| 229 | brelse(*bhp); | ||
| 230 | return error; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | return 0; | ||
| 235 | } | ||
| 236 | |||
| 237 | /** | ||
| 238 | * gfs2_meta_wait - Reread a block from disk | ||
| 239 | * @sdp: the filesystem | ||
| 240 | * @bh: The block to wait for | ||
| 241 | * | ||
| 242 | * Returns: errno | ||
| 243 | */ | ||
| 244 | |||
| 245 | int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) | ||
| 246 | { | ||
| 247 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 248 | return -EIO; | ||
| 249 | |||
| 250 | wait_on_buffer(bh); | ||
| 251 | |||
| 252 | if (!buffer_uptodate(bh)) { | ||
| 253 | struct gfs2_trans *tr = current->journal_info; | ||
| 254 | if (tr && tr->tr_touched) | ||
| 255 | gfs2_io_error_bh(sdp, bh); | ||
| 256 | return -EIO; | ||
| 257 | } | ||
| 258 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 259 | return -EIO; | ||
| 260 | |||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | |||
| 264 | /** | ||
| 265 | * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer | ||
| 266 | * @gl: the glock the buffer belongs to | ||
| 267 | * @bh: The buffer to be attached to | ||
| 268 | * @meta: Flag to indicate whether its metadata or not | ||
| 269 | */ | ||
| 270 | |||
| 271 | void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, | ||
| 272 | int meta) | ||
| 273 | { | ||
| 274 | struct gfs2_bufdata *bd; | ||
| 275 | |||
| 276 | if (meta) | ||
| 277 | lock_page(bh->b_page); | ||
| 278 | |||
| 279 | if (bh->b_private) { | ||
| 280 | if (meta) | ||
| 281 | unlock_page(bh->b_page); | ||
| 282 | return; | ||
| 283 | } | ||
| 284 | |||
| 285 | bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL), | ||
| 286 | memset(bd, 0, sizeof(struct gfs2_bufdata)); | ||
| 287 | bd->bd_bh = bh; | ||
| 288 | bd->bd_gl = gl; | ||
| 289 | |||
| 290 | INIT_LIST_HEAD(&bd->bd_list_tr); | ||
| 291 | if (meta) | ||
| 292 | lops_init_le(&bd->bd_le, &gfs2_buf_lops); | ||
| 293 | else | ||
| 294 | lops_init_le(&bd->bd_le, &gfs2_databuf_lops); | ||
| 295 | bh->b_private = bd; | ||
| 296 | |||
| 297 | if (meta) | ||
| 298 | unlock_page(bh->b_page); | ||
| 299 | } | ||
| 300 | |||
| 301 | /** | ||
| 302 | * gfs2_pin - Pin a buffer in memory | ||
| 303 | * @sdp: the filesystem the buffer belongs to | ||
| 304 | * @bh: The buffer to be pinned | ||
| 305 | * | ||
| 306 | */ | ||
| 307 | |||
| 308 | void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) | ||
| 309 | { | ||
| 310 | struct gfs2_bufdata *bd = bh->b_private; | ||
| 311 | |||
| 312 | gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); | ||
| 313 | |||
| 314 | if (test_set_buffer_pinned(bh)) | ||
| 315 | gfs2_assert_withdraw(sdp, 0); | ||
| 316 | |||
| 317 | wait_on_buffer(bh); | ||
| 318 | |||
| 319 | /* If this buffer is in the AIL and it has already been written | ||
| 320 | to in-place disk block, remove it from the AIL. */ | ||
| 321 | |||
| 322 | gfs2_log_lock(sdp); | ||
| 323 | if (bd->bd_ail && !buffer_in_io(bh)) | ||
| 324 | list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); | ||
| 325 | gfs2_log_unlock(sdp); | ||
| 326 | |||
| 327 | clear_buffer_dirty(bh); | ||
| 328 | wait_on_buffer(bh); | ||
| 329 | |||
| 330 | if (!buffer_uptodate(bh)) | ||
| 331 | gfs2_io_error_bh(sdp, bh); | ||
| 332 | |||
| 333 | get_bh(bh); | ||
| 334 | } | ||
| 335 | |||
| 336 | /** | ||
| 337 | * gfs2_unpin - Unpin a buffer | ||
| 338 | * @sdp: the filesystem the buffer belongs to | ||
| 339 | * @bh: The buffer to unpin | ||
| 340 | * @ai: | ||
| 341 | * | ||
| 342 | */ | ||
| 343 | |||
| 344 | void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 345 | struct gfs2_ail *ai) | ||
| 346 | { | ||
| 347 | struct gfs2_bufdata *bd = bh->b_private; | ||
| 348 | |||
| 349 | gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); | ||
| 350 | |||
| 351 | if (!buffer_pinned(bh)) | ||
| 352 | gfs2_assert_withdraw(sdp, 0); | ||
| 353 | |||
| 354 | mark_buffer_dirty(bh); | ||
| 355 | clear_buffer_pinned(bh); | ||
| 356 | |||
| 357 | gfs2_log_lock(sdp); | ||
| 358 | if (bd->bd_ail) { | ||
| 359 | list_del(&bd->bd_ail_st_list); | ||
| 360 | brelse(bh); | ||
| 361 | } else { | ||
| 362 | struct gfs2_glock *gl = bd->bd_gl; | ||
| 363 | list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); | ||
| 364 | atomic_inc(&gl->gl_ail_count); | ||
| 365 | } | ||
| 366 | bd->bd_ail = ai; | ||
| 367 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); | ||
| 368 | gfs2_log_unlock(sdp); | ||
| 369 | } | ||
| 370 | |||
| 371 | /** | ||
| 372 | * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore | ||
| 373 | * @ip: the inode who owns the buffers | ||
| 374 | * @bstart: the first buffer in the run | ||
| 375 | * @blen: the number of buffers in the run | ||
| 376 | * | ||
| 377 | */ | ||
| 378 | |||
| 379 | void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) | ||
| 380 | { | ||
| 381 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 382 | struct inode *aspace = ip->i_gl->gl_aspace; | ||
| 383 | struct buffer_head *bh; | ||
| 384 | |||
| 385 | while (blen) { | ||
| 386 | bh = getbuf(sdp, aspace, bstart, NO_CREATE); | ||
| 387 | if (bh) { | ||
| 388 | struct gfs2_bufdata *bd = bh->b_private; | ||
| 389 | |||
| 390 | if (test_clear_buffer_pinned(bh)) { | ||
| 391 | struct gfs2_trans *tr = current->journal_info; | ||
| 392 | gfs2_log_lock(sdp); | ||
| 393 | list_del_init(&bd->bd_le.le_list); | ||
| 394 | gfs2_assert_warn(sdp, sdp->sd_log_num_buf); | ||
| 395 | sdp->sd_log_num_buf--; | ||
| 396 | gfs2_log_unlock(sdp); | ||
| 397 | tr->tr_num_buf_rm++; | ||
| 398 | brelse(bh); | ||
| 399 | } | ||
| 400 | if (bd) { | ||
| 401 | gfs2_log_lock(sdp); | ||
| 402 | if (bd->bd_ail) { | ||
| 403 | u64 blkno = bh->b_blocknr; | ||
| 404 | bd->bd_ail = NULL; | ||
| 405 | list_del(&bd->bd_ail_st_list); | ||
| 406 | list_del(&bd->bd_ail_gl_list); | ||
| 407 | atomic_dec(&bd->bd_gl->gl_ail_count); | ||
| 408 | brelse(bh); | ||
| 409 | gfs2_log_unlock(sdp); | ||
| 410 | gfs2_trans_add_revoke(sdp, blkno); | ||
| 411 | } else | ||
| 412 | gfs2_log_unlock(sdp); | ||
| 413 | } | ||
| 414 | |||
| 415 | lock_buffer(bh); | ||
| 416 | clear_buffer_dirty(bh); | ||
| 417 | clear_buffer_uptodate(bh); | ||
| 418 | unlock_buffer(bh); | ||
| 419 | |||
| 420 | brelse(bh); | ||
| 421 | } | ||
| 422 | |||
| 423 | bstart++; | ||
| 424 | blen--; | ||
| 425 | } | ||
| 426 | } | ||
| 427 | |||
| 428 | /** | ||
| 429 | * gfs2_meta_cache_flush - get rid of any references on buffers for this inode | ||
| 430 | * @ip: The GFS2 inode | ||
| 431 | * | ||
| 432 | * This releases buffers that are in the most-recently-used array of | ||
| 433 | * blocks used for indirect block addressing for this inode. | ||
| 434 | */ | ||
| 435 | |||
| 436 | void gfs2_meta_cache_flush(struct gfs2_inode *ip) | ||
| 437 | { | ||
| 438 | struct buffer_head **bh_slot; | ||
| 439 | unsigned int x; | ||
| 440 | |||
| 441 | spin_lock(&ip->i_spin); | ||
| 442 | |||
| 443 | for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { | ||
| 444 | bh_slot = &ip->i_cache[x]; | ||
| 445 | if (!*bh_slot) | ||
| 446 | break; | ||
| 447 | brelse(*bh_slot); | ||
| 448 | *bh_slot = NULL; | ||
| 449 | } | ||
| 450 | |||
| 451 | spin_unlock(&ip->i_spin); | ||
| 452 | } | ||
| 453 | |||
| 454 | /** | ||
| 455 | * gfs2_meta_indirect_buffer - Get a metadata buffer | ||
| 456 | * @ip: The GFS2 inode | ||
| 457 | * @height: The level of this buf in the metadata (indir addr) tree (if any) | ||
| 458 | * @num: The block number (device relative) of the buffer | ||
| 459 | * @new: Non-zero if we may create a new buffer | ||
| 460 | * @bhp: the buffer is returned here | ||
| 461 | * | ||
| 462 | * Try to use the gfs2_inode's MRU metadata tree cache. | ||
| 463 | * | ||
| 464 | * Returns: errno | ||
| 465 | */ | ||
| 466 | |||
| 467 | int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, | ||
| 468 | int new, struct buffer_head **bhp) | ||
| 469 | { | ||
| 470 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 471 | struct gfs2_glock *gl = ip->i_gl; | ||
| 472 | struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height; | ||
| 473 | int in_cache = 0; | ||
| 474 | |||
| 475 | spin_lock(&ip->i_spin); | ||
| 476 | if (*bh_slot && (*bh_slot)->b_blocknr == num) { | ||
| 477 | bh = *bh_slot; | ||
| 478 | get_bh(bh); | ||
| 479 | in_cache = 1; | ||
| 480 | } | ||
| 481 | spin_unlock(&ip->i_spin); | ||
| 482 | |||
| 483 | if (!bh) | ||
| 484 | bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE); | ||
| 485 | |||
| 486 | if (!bh) | ||
| 487 | return -ENOBUFS; | ||
| 488 | |||
| 489 | if (new) { | ||
| 490 | if (gfs2_assert_warn(sdp, height)) | ||
| 491 | goto err; | ||
| 492 | meta_prep_new(bh); | ||
| 493 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 494 | gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); | ||
| 495 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); | ||
| 496 | } else { | ||
| 497 | u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; | ||
| 498 | if (!buffer_uptodate(bh)) { | ||
| 499 | ll_rw_block(READ_META, 1, &bh); | ||
| 500 | if (gfs2_meta_wait(sdp, bh)) | ||
| 501 | goto err; | ||
| 502 | } | ||
| 503 | if (gfs2_metatype_check(sdp, bh, mtype)) | ||
| 504 | goto err; | ||
| 505 | } | ||
| 506 | |||
| 507 | if (!in_cache) { | ||
| 508 | spin_lock(&ip->i_spin); | ||
| 509 | if (*bh_slot) | ||
| 510 | brelse(*bh_slot); | ||
| 511 | *bh_slot = bh; | ||
| 512 | get_bh(bh); | ||
| 513 | spin_unlock(&ip->i_spin); | ||
| 514 | } | ||
| 515 | |||
| 516 | *bhp = bh; | ||
| 517 | return 0; | ||
| 518 | err: | ||
| 519 | brelse(bh); | ||
| 520 | return -EIO; | ||
| 521 | } | ||
| 522 | |||
| 523 | /** | ||
| 524 | * gfs2_meta_ra - start readahead on an extent of a file | ||
| 525 | * @gl: the glock the blocks belong to | ||
| 526 | * @dblock: the starting disk block | ||
| 527 | * @extlen: the number of blocks in the extent | ||
| 528 | * | ||
| 529 | * returns: the first buffer in the extent | ||
| 530 | */ | ||
| 531 | |||
| 532 | struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | ||
| 533 | { | ||
| 534 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 535 | struct inode *aspace = gl->gl_aspace; | ||
| 536 | struct buffer_head *first_bh, *bh; | ||
| 537 | u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> | ||
| 538 | sdp->sd_sb.sb_bsize_shift; | ||
| 539 | |||
| 540 | BUG_ON(!extlen); | ||
| 541 | |||
| 542 | if (max_ra < 1) | ||
| 543 | max_ra = 1; | ||
| 544 | if (extlen > max_ra) | ||
| 545 | extlen = max_ra; | ||
| 546 | |||
| 547 | first_bh = getbuf(sdp, aspace, dblock, CREATE); | ||
| 548 | |||
| 549 | if (buffer_uptodate(first_bh)) | ||
| 550 | goto out; | ||
| 551 | if (!buffer_locked(first_bh)) | ||
| 552 | ll_rw_block(READ_META, 1, &first_bh); | ||
| 553 | |||
| 554 | dblock++; | ||
| 555 | extlen--; | ||
| 556 | |||
| 557 | while (extlen) { | ||
| 558 | bh = getbuf(sdp, aspace, dblock, CREATE); | ||
| 559 | |||
| 560 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) | ||
| 561 | ll_rw_block(READA, 1, &bh); | ||
| 562 | brelse(bh); | ||
| 563 | dblock++; | ||
| 564 | extlen--; | ||
| 565 | if (!buffer_locked(first_bh) && buffer_uptodate(first_bh)) | ||
| 566 | goto out; | ||
| 567 | } | ||
| 568 | |||
| 569 | wait_on_buffer(first_bh); | ||
| 570 | out: | ||
| 571 | return first_bh; | ||
| 572 | } | ||
| 573 | |||
| 574 | /** | ||
| 575 | * gfs2_meta_syncfs - sync all the buffers in a filesystem | ||
| 576 | * @sdp: the filesystem | ||
| 577 | * | ||
| 578 | */ | ||
| 579 | |||
| 580 | void gfs2_meta_syncfs(struct gfs2_sbd *sdp) | ||
| 581 | { | ||
| 582 | gfs2_log_flush(sdp, NULL); | ||
| 583 | for (;;) { | ||
| 584 | gfs2_ail1_start(sdp, DIO_ALL); | ||
| 585 | if (gfs2_ail1_empty(sdp, DIO_ALL)) | ||
| 586 | break; | ||
| 587 | msleep(10); | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h new file mode 100644 index 000000000000..3ec939e20dff --- /dev/null +++ b/fs/gfs2/meta_io.h | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __DIO_DOT_H__ | ||
| 11 | #define __DIO_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/buffer_head.h> | ||
| 14 | #include <linux/string.h> | ||
| 15 | #include "incore.h" | ||
| 16 | |||
| 17 | static inline void gfs2_buffer_clear(struct buffer_head *bh) | ||
| 18 | { | ||
| 19 | memset(bh->b_data, 0, bh->b_size); | ||
| 20 | } | ||
| 21 | |||
| 22 | static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head) | ||
| 23 | { | ||
| 24 | BUG_ON(head > bh->b_size); | ||
| 25 | memset(bh->b_data + head, 0, bh->b_size - head); | ||
| 26 | } | ||
| 27 | |||
| 28 | static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, | ||
| 29 | int to_head, | ||
| 30 | struct buffer_head *from_bh, | ||
| 31 | int from_head) | ||
| 32 | { | ||
| 33 | BUG_ON(from_head < to_head); | ||
| 34 | memcpy(to_bh->b_data + to_head, from_bh->b_data + from_head, | ||
| 35 | from_bh->b_size - from_head); | ||
| 36 | memset(to_bh->b_data + to_bh->b_size + to_head - from_head, | ||
| 37 | 0, from_head - to_head); | ||
| 38 | } | ||
| 39 | |||
| 40 | struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); | ||
| 41 | void gfs2_aspace_put(struct inode *aspace); | ||
| 42 | |||
| 43 | void gfs2_meta_inval(struct gfs2_glock *gl); | ||
| 44 | void gfs2_meta_sync(struct gfs2_glock *gl); | ||
| 45 | |||
| 46 | struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); | ||
| 47 | int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, | ||
| 48 | int flags, struct buffer_head **bhp); | ||
| 49 | int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); | ||
| 50 | |||
| 51 | void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, | ||
| 52 | int meta); | ||
| 53 | void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); | ||
| 54 | void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 55 | struct gfs2_ail *ai); | ||
| 56 | |||
| 57 | void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); | ||
| 58 | |||
| 59 | void gfs2_meta_cache_flush(struct gfs2_inode *ip); | ||
| 60 | int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, | ||
| 61 | int new, struct buffer_head **bhp); | ||
| 62 | |||
| 63 | static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, | ||
| 64 | struct buffer_head **bhp) | ||
| 65 | { | ||
| 66 | return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp); | ||
| 67 | } | ||
| 68 | |||
| 69 | struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); | ||
| 70 | void gfs2_meta_syncfs(struct gfs2_sbd *sdp); | ||
| 71 | |||
| 72 | #define buffer_busy(bh) \ | ||
| 73 | ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) | ||
| 74 | #define buffer_in_io(bh) \ | ||
| 75 | ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock))) | ||
| 76 | |||
| 77 | #endif /* __DIO_DOT_H__ */ | ||
| 78 | |||
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c new file mode 100644 index 000000000000..ef3092e29607 --- /dev/null +++ b/fs/gfs2/mount.c | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/lm_interface.h> | ||
| 17 | |||
| 18 | #include "gfs2.h" | ||
| 19 | #include "incore.h" | ||
| 20 | #include "mount.h" | ||
| 21 | #include "sys.h" | ||
| 22 | #include "util.h" | ||
| 23 | |||
| 24 | /** | ||
| 25 | * gfs2_mount_args - Parse mount options | ||
| 26 | * @sdp: | ||
| 27 | * @data: | ||
| 28 | * | ||
| 29 | * Return: errno | ||
| 30 | */ | ||
| 31 | |||
| 32 | int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | ||
| 33 | { | ||
| 34 | struct gfs2_args *args = &sdp->sd_args; | ||
| 35 | char *data = data_arg; | ||
| 36 | char *options, *o, *v; | ||
| 37 | int error = 0; | ||
| 38 | |||
| 39 | if (!remount) { | ||
| 40 | /* If someone preloaded options, use those instead */ | ||
| 41 | spin_lock(&gfs2_sys_margs_lock); | ||
| 42 | if (gfs2_sys_margs) { | ||
| 43 | data = gfs2_sys_margs; | ||
| 44 | gfs2_sys_margs = NULL; | ||
| 45 | } | ||
| 46 | spin_unlock(&gfs2_sys_margs_lock); | ||
| 47 | |||
| 48 | /* Set some defaults */ | ||
| 49 | args->ar_num_glockd = GFS2_GLOCKD_DEFAULT; | ||
| 50 | args->ar_quota = GFS2_QUOTA_DEFAULT; | ||
| 51 | args->ar_data = GFS2_DATA_DEFAULT; | ||
| 52 | } | ||
| 53 | |||
| 54 | /* Split the options into tokens with the "," character and | ||
| 55 | process them */ | ||
| 56 | |||
| 57 | for (options = data; (o = strsep(&options, ",")); ) { | ||
| 58 | if (!*o) | ||
| 59 | continue; | ||
| 60 | |||
| 61 | v = strchr(o, '='); | ||
| 62 | if (v) | ||
| 63 | *v++ = 0; | ||
| 64 | |||
| 65 | if (!strcmp(o, "lockproto")) { | ||
| 66 | if (!v) | ||
| 67 | goto need_value; | ||
| 68 | if (remount && strcmp(v, args->ar_lockproto)) | ||
| 69 | goto cant_remount; | ||
| 70 | strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN); | ||
| 71 | args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | else if (!strcmp(o, "locktable")) { | ||
| 75 | if (!v) | ||
| 76 | goto need_value; | ||
| 77 | if (remount && strcmp(v, args->ar_locktable)) | ||
| 78 | goto cant_remount; | ||
| 79 | strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN); | ||
| 80 | args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | else if (!strcmp(o, "hostdata")) { | ||
| 84 | if (!v) | ||
| 85 | goto need_value; | ||
| 86 | if (remount && strcmp(v, args->ar_hostdata)) | ||
| 87 | goto cant_remount; | ||
| 88 | strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN); | ||
| 89 | args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0; | ||
| 90 | } | ||
| 91 | |||
| 92 | else if (!strcmp(o, "spectator")) { | ||
| 93 | if (remount && !args->ar_spectator) | ||
| 94 | goto cant_remount; | ||
| 95 | args->ar_spectator = 1; | ||
| 96 | sdp->sd_vfs->s_flags |= MS_RDONLY; | ||
| 97 | } | ||
| 98 | |||
| 99 | else if (!strcmp(o, "ignore_local_fs")) { | ||
| 100 | if (remount && !args->ar_ignore_local_fs) | ||
| 101 | goto cant_remount; | ||
| 102 | args->ar_ignore_local_fs = 1; | ||
| 103 | } | ||
| 104 | |||
| 105 | else if (!strcmp(o, "localflocks")) { | ||
| 106 | if (remount && !args->ar_localflocks) | ||
| 107 | goto cant_remount; | ||
| 108 | args->ar_localflocks = 1; | ||
| 109 | } | ||
| 110 | |||
| 111 | else if (!strcmp(o, "localcaching")) { | ||
| 112 | if (remount && !args->ar_localcaching) | ||
| 113 | goto cant_remount; | ||
| 114 | args->ar_localcaching = 1; | ||
| 115 | } | ||
| 116 | |||
| 117 | else if (!strcmp(o, "debug")) | ||
| 118 | args->ar_debug = 1; | ||
| 119 | |||
| 120 | else if (!strcmp(o, "nodebug")) | ||
| 121 | args->ar_debug = 0; | ||
| 122 | |||
| 123 | else if (!strcmp(o, "upgrade")) { | ||
| 124 | if (remount && !args->ar_upgrade) | ||
| 125 | goto cant_remount; | ||
| 126 | args->ar_upgrade = 1; | ||
| 127 | } | ||
| 128 | |||
| 129 | else if (!strcmp(o, "num_glockd")) { | ||
| 130 | unsigned int x; | ||
| 131 | if (!v) | ||
| 132 | goto need_value; | ||
| 133 | sscanf(v, "%u", &x); | ||
| 134 | if (remount && x != args->ar_num_glockd) | ||
| 135 | goto cant_remount; | ||
| 136 | if (!x || x > GFS2_GLOCKD_MAX) { | ||
| 137 | fs_info(sdp, "0 < num_glockd <= %u (not %u)\n", | ||
| 138 | GFS2_GLOCKD_MAX, x); | ||
| 139 | error = -EINVAL; | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | args->ar_num_glockd = x; | ||
| 143 | } | ||
| 144 | |||
| 145 | else if (!strcmp(o, "acl")) { | ||
| 146 | args->ar_posix_acl = 1; | ||
| 147 | sdp->sd_vfs->s_flags |= MS_POSIXACL; | ||
| 148 | } | ||
| 149 | |||
| 150 | else if (!strcmp(o, "noacl")) { | ||
| 151 | args->ar_posix_acl = 0; | ||
| 152 | sdp->sd_vfs->s_flags &= ~MS_POSIXACL; | ||
| 153 | } | ||
| 154 | |||
| 155 | else if (!strcmp(o, "quota")) { | ||
| 156 | if (!v) | ||
| 157 | goto need_value; | ||
| 158 | if (!strcmp(v, "off")) | ||
| 159 | args->ar_quota = GFS2_QUOTA_OFF; | ||
| 160 | else if (!strcmp(v, "account")) | ||
| 161 | args->ar_quota = GFS2_QUOTA_ACCOUNT; | ||
| 162 | else if (!strcmp(v, "on")) | ||
| 163 | args->ar_quota = GFS2_QUOTA_ON; | ||
| 164 | else { | ||
| 165 | fs_info(sdp, "invalid value for quota\n"); | ||
| 166 | error = -EINVAL; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | else if (!strcmp(o, "suiddir")) | ||
| 172 | args->ar_suiddir = 1; | ||
| 173 | |||
| 174 | else if (!strcmp(o, "nosuiddir")) | ||
| 175 | args->ar_suiddir = 0; | ||
| 176 | |||
| 177 | else if (!strcmp(o, "data")) { | ||
| 178 | if (!v) | ||
| 179 | goto need_value; | ||
| 180 | if (!strcmp(v, "writeback")) | ||
| 181 | args->ar_data = GFS2_DATA_WRITEBACK; | ||
| 182 | else if (!strcmp(v, "ordered")) | ||
| 183 | args->ar_data = GFS2_DATA_ORDERED; | ||
| 184 | else { | ||
| 185 | fs_info(sdp, "invalid value for data\n"); | ||
| 186 | error = -EINVAL; | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | else { | ||
| 192 | fs_info(sdp, "unknown option: %s\n", o); | ||
| 193 | error = -EINVAL; | ||
| 194 | break; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | if (error) | ||
| 199 | fs_info(sdp, "invalid mount option(s)\n"); | ||
| 200 | |||
| 201 | if (data != data_arg) | ||
| 202 | kfree(data); | ||
| 203 | |||
| 204 | return error; | ||
| 205 | |||
| 206 | need_value: | ||
| 207 | fs_info(sdp, "need value for option %s\n", o); | ||
| 208 | return -EINVAL; | ||
| 209 | |||
| 210 | cant_remount: | ||
| 211 | fs_info(sdp, "can't remount with option %s\n", o); | ||
| 212 | return -EINVAL; | ||
| 213 | } | ||
| 214 | |||
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h new file mode 100644 index 000000000000..401288acfdf3 --- /dev/null +++ b/fs/gfs2/mount.h | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __MOUNT_DOT_H__ | ||
| 11 | #define __MOUNT_DOT_H__ | ||
| 12 | |||
| 13 | struct gfs2_sbd; | ||
| 14 | |||
| 15 | int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount); | ||
| 16 | |||
| 17 | #endif /* __MOUNT_DOT_H__ */ | ||
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c new file mode 100644 index 000000000000..1025960b0e6e --- /dev/null +++ b/fs/gfs2/ondisk.c | |||
| @@ -0,0 +1,308 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | |||
| 16 | #include "gfs2.h" | ||
| 17 | #include <linux/gfs2_ondisk.h> | ||
| 18 | |||
| 19 | #define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \ | ||
| 20 | struct->member); | ||
| 21 | |||
| 22 | /* | ||
| 23 | * gfs2_xxx_in - read in an xxx struct | ||
| 24 | * first arg: the cpu-order structure | ||
| 25 | * buf: the disk-order buffer | ||
| 26 | * | ||
| 27 | * gfs2_xxx_out - write out an xxx struct | ||
| 28 | * first arg: the cpu-order structure | ||
| 29 | * buf: the disk-order buffer | ||
| 30 | * | ||
| 31 | * gfs2_xxx_print - print out an xxx struct | ||
| 32 | * first arg: the cpu-order structure | ||
| 33 | */ | ||
| 34 | |||
| 35 | void gfs2_inum_in(struct gfs2_inum *no, const void *buf) | ||
| 36 | { | ||
| 37 | const struct gfs2_inum *str = buf; | ||
| 38 | |||
| 39 | no->no_formal_ino = be64_to_cpu(str->no_formal_ino); | ||
| 40 | no->no_addr = be64_to_cpu(str->no_addr); | ||
| 41 | } | ||
| 42 | |||
| 43 | void gfs2_inum_out(const struct gfs2_inum *no, void *buf) | ||
| 44 | { | ||
| 45 | struct gfs2_inum *str = buf; | ||
| 46 | |||
| 47 | str->no_formal_ino = cpu_to_be64(no->no_formal_ino); | ||
| 48 | str->no_addr = cpu_to_be64(no->no_addr); | ||
| 49 | } | ||
| 50 | |||
| 51 | static void gfs2_inum_print(const struct gfs2_inum *no) | ||
| 52 | { | ||
| 53 | printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino); | ||
| 54 | printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr); | ||
| 55 | } | ||
| 56 | |||
| 57 | static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf) | ||
| 58 | { | ||
| 59 | const struct gfs2_meta_header *str = buf; | ||
| 60 | |||
| 61 | mh->mh_magic = be32_to_cpu(str->mh_magic); | ||
| 62 | mh->mh_type = be32_to_cpu(str->mh_type); | ||
| 63 | mh->mh_format = be32_to_cpu(str->mh_format); | ||
| 64 | } | ||
| 65 | |||
| 66 | static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf) | ||
| 67 | { | ||
| 68 | struct gfs2_meta_header *str = buf; | ||
| 69 | |||
| 70 | str->mh_magic = cpu_to_be32(mh->mh_magic); | ||
| 71 | str->mh_type = cpu_to_be32(mh->mh_type); | ||
| 72 | str->mh_format = cpu_to_be32(mh->mh_format); | ||
| 73 | } | ||
| 74 | |||
| 75 | static void gfs2_meta_header_print(const struct gfs2_meta_header *mh) | ||
| 76 | { | ||
| 77 | pv(mh, mh_magic, "0x%.8X"); | ||
| 78 | pv(mh, mh_type, "%u"); | ||
| 79 | pv(mh, mh_format, "%u"); | ||
| 80 | } | ||
| 81 | |||
| 82 | void gfs2_sb_in(struct gfs2_sb *sb, const void *buf) | ||
| 83 | { | ||
| 84 | const struct gfs2_sb *str = buf; | ||
| 85 | |||
| 86 | gfs2_meta_header_in(&sb->sb_header, buf); | ||
| 87 | |||
| 88 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
| 89 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
| 90 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
| 91 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
| 92 | |||
| 93 | gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir); | ||
| 94 | gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir); | ||
| 95 | |||
| 96 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
| 97 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
| 98 | } | ||
| 99 | |||
| 100 | void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf) | ||
| 101 | { | ||
| 102 | const struct gfs2_rindex *str = buf; | ||
| 103 | |||
| 104 | ri->ri_addr = be64_to_cpu(str->ri_addr); | ||
| 105 | ri->ri_length = be32_to_cpu(str->ri_length); | ||
| 106 | ri->ri_data0 = be64_to_cpu(str->ri_data0); | ||
| 107 | ri->ri_data = be32_to_cpu(str->ri_data); | ||
| 108 | ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes); | ||
| 109 | |||
| 110 | } | ||
| 111 | |||
| 112 | void gfs2_rindex_print(const struct gfs2_rindex *ri) | ||
| 113 | { | ||
| 114 | printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr); | ||
| 115 | pv(ri, ri_length, "%u"); | ||
| 116 | |||
| 117 | printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0); | ||
| 118 | pv(ri, ri_data, "%u"); | ||
| 119 | |||
| 120 | pv(ri, ri_bitbytes, "%u"); | ||
| 121 | } | ||
| 122 | |||
| 123 | void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf) | ||
| 124 | { | ||
| 125 | const struct gfs2_rgrp *str = buf; | ||
| 126 | |||
| 127 | gfs2_meta_header_in(&rg->rg_header, buf); | ||
| 128 | rg->rg_flags = be32_to_cpu(str->rg_flags); | ||
| 129 | rg->rg_free = be32_to_cpu(str->rg_free); | ||
| 130 | rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); | ||
| 131 | rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); | ||
| 132 | } | ||
| 133 | |||
| 134 | void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf) | ||
| 135 | { | ||
| 136 | struct gfs2_rgrp *str = buf; | ||
| 137 | |||
| 138 | gfs2_meta_header_out(&rg->rg_header, buf); | ||
| 139 | str->rg_flags = cpu_to_be32(rg->rg_flags); | ||
| 140 | str->rg_free = cpu_to_be32(rg->rg_free); | ||
| 141 | str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); | ||
| 142 | str->__pad = cpu_to_be32(0); | ||
| 143 | str->rg_igeneration = cpu_to_be64(rg->rg_igeneration); | ||
| 144 | memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); | ||
| 145 | } | ||
| 146 | |||
| 147 | void gfs2_quota_in(struct gfs2_quota *qu, const void *buf) | ||
| 148 | { | ||
| 149 | const struct gfs2_quota *str = buf; | ||
| 150 | |||
| 151 | qu->qu_limit = be64_to_cpu(str->qu_limit); | ||
| 152 | qu->qu_warn = be64_to_cpu(str->qu_warn); | ||
| 153 | qu->qu_value = be64_to_cpu(str->qu_value); | ||
| 154 | } | ||
| 155 | |||
| 156 | void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf) | ||
| 157 | { | ||
| 158 | const struct gfs2_dinode *str = buf; | ||
| 159 | |||
| 160 | gfs2_meta_header_in(&di->di_header, buf); | ||
| 161 | gfs2_inum_in(&di->di_num, &str->di_num); | ||
| 162 | |||
| 163 | di->di_mode = be32_to_cpu(str->di_mode); | ||
| 164 | di->di_uid = be32_to_cpu(str->di_uid); | ||
| 165 | di->di_gid = be32_to_cpu(str->di_gid); | ||
| 166 | di->di_nlink = be32_to_cpu(str->di_nlink); | ||
| 167 | di->di_size = be64_to_cpu(str->di_size); | ||
| 168 | di->di_blocks = be64_to_cpu(str->di_blocks); | ||
| 169 | di->di_atime = be64_to_cpu(str->di_atime); | ||
| 170 | di->di_mtime = be64_to_cpu(str->di_mtime); | ||
| 171 | di->di_ctime = be64_to_cpu(str->di_ctime); | ||
| 172 | di->di_major = be32_to_cpu(str->di_major); | ||
| 173 | di->di_minor = be32_to_cpu(str->di_minor); | ||
| 174 | |||
| 175 | di->di_goal_meta = be64_to_cpu(str->di_goal_meta); | ||
| 176 | di->di_goal_data = be64_to_cpu(str->di_goal_data); | ||
| 177 | di->di_generation = be64_to_cpu(str->di_generation); | ||
| 178 | |||
| 179 | di->di_flags = be32_to_cpu(str->di_flags); | ||
| 180 | di->di_payload_format = be32_to_cpu(str->di_payload_format); | ||
| 181 | di->di_height = be16_to_cpu(str->di_height); | ||
| 182 | |||
| 183 | di->di_depth = be16_to_cpu(str->di_depth); | ||
| 184 | di->di_entries = be32_to_cpu(str->di_entries); | ||
| 185 | |||
| 186 | di->di_eattr = be64_to_cpu(str->di_eattr); | ||
| 187 | |||
| 188 | } | ||
| 189 | |||
| 190 | void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf) | ||
| 191 | { | ||
| 192 | struct gfs2_dinode *str = buf; | ||
| 193 | |||
| 194 | gfs2_meta_header_out(&di->di_header, buf); | ||
| 195 | gfs2_inum_out(&di->di_num, (char *)&str->di_num); | ||
| 196 | |||
| 197 | str->di_mode = cpu_to_be32(di->di_mode); | ||
| 198 | str->di_uid = cpu_to_be32(di->di_uid); | ||
| 199 | str->di_gid = cpu_to_be32(di->di_gid); | ||
| 200 | str->di_nlink = cpu_to_be32(di->di_nlink); | ||
| 201 | str->di_size = cpu_to_be64(di->di_size); | ||
| 202 | str->di_blocks = cpu_to_be64(di->di_blocks); | ||
| 203 | str->di_atime = cpu_to_be64(di->di_atime); | ||
| 204 | str->di_mtime = cpu_to_be64(di->di_mtime); | ||
| 205 | str->di_ctime = cpu_to_be64(di->di_ctime); | ||
| 206 | str->di_major = cpu_to_be32(di->di_major); | ||
| 207 | str->di_minor = cpu_to_be32(di->di_minor); | ||
| 208 | |||
| 209 | str->di_goal_meta = cpu_to_be64(di->di_goal_meta); | ||
| 210 | str->di_goal_data = cpu_to_be64(di->di_goal_data); | ||
| 211 | str->di_generation = cpu_to_be64(di->di_generation); | ||
| 212 | |||
| 213 | str->di_flags = cpu_to_be32(di->di_flags); | ||
| 214 | str->di_payload_format = cpu_to_be32(di->di_payload_format); | ||
| 215 | str->di_height = cpu_to_be16(di->di_height); | ||
| 216 | |||
| 217 | str->di_depth = cpu_to_be16(di->di_depth); | ||
| 218 | str->di_entries = cpu_to_be32(di->di_entries); | ||
| 219 | |||
| 220 | str->di_eattr = cpu_to_be64(di->di_eattr); | ||
| 221 | |||
| 222 | } | ||
| 223 | |||
| 224 | void gfs2_dinode_print(const struct gfs2_dinode *di) | ||
| 225 | { | ||
| 226 | gfs2_meta_header_print(&di->di_header); | ||
| 227 | gfs2_inum_print(&di->di_num); | ||
| 228 | |||
| 229 | pv(di, di_mode, "0%o"); | ||
| 230 | pv(di, di_uid, "%u"); | ||
| 231 | pv(di, di_gid, "%u"); | ||
| 232 | pv(di, di_nlink, "%u"); | ||
| 233 | printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); | ||
| 234 | printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks); | ||
| 235 | printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime); | ||
| 236 | printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime); | ||
| 237 | printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime); | ||
| 238 | pv(di, di_major, "%u"); | ||
| 239 | pv(di, di_minor, "%u"); | ||
| 240 | |||
| 241 | printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta); | ||
| 242 | printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data); | ||
| 243 | |||
| 244 | pv(di, di_flags, "0x%.8X"); | ||
| 245 | pv(di, di_payload_format, "%u"); | ||
| 246 | pv(di, di_height, "%u"); | ||
| 247 | |||
| 248 | pv(di, di_depth, "%u"); | ||
| 249 | pv(di, di_entries, "%u"); | ||
| 250 | |||
| 251 | printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr); | ||
| 252 | } | ||
| 253 | |||
| 254 | void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf) | ||
| 255 | { | ||
| 256 | const struct gfs2_log_header *str = buf; | ||
| 257 | |||
| 258 | gfs2_meta_header_in(&lh->lh_header, buf); | ||
| 259 | lh->lh_sequence = be64_to_cpu(str->lh_sequence); | ||
| 260 | lh->lh_flags = be32_to_cpu(str->lh_flags); | ||
| 261 | lh->lh_tail = be32_to_cpu(str->lh_tail); | ||
| 262 | lh->lh_blkno = be32_to_cpu(str->lh_blkno); | ||
| 263 | lh->lh_hash = be32_to_cpu(str->lh_hash); | ||
| 264 | } | ||
| 265 | |||
| 266 | void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf) | ||
| 267 | { | ||
| 268 | const struct gfs2_inum_range *str = buf; | ||
| 269 | |||
| 270 | ir->ir_start = be64_to_cpu(str->ir_start); | ||
| 271 | ir->ir_length = be64_to_cpu(str->ir_length); | ||
| 272 | } | ||
| 273 | |||
| 274 | void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf) | ||
| 275 | { | ||
| 276 | struct gfs2_inum_range *str = buf; | ||
| 277 | |||
| 278 | str->ir_start = cpu_to_be64(ir->ir_start); | ||
| 279 | str->ir_length = cpu_to_be64(ir->ir_length); | ||
| 280 | } | ||
| 281 | |||
| 282 | void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf) | ||
| 283 | { | ||
| 284 | const struct gfs2_statfs_change *str = buf; | ||
| 285 | |||
| 286 | sc->sc_total = be64_to_cpu(str->sc_total); | ||
| 287 | sc->sc_free = be64_to_cpu(str->sc_free); | ||
| 288 | sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); | ||
| 289 | } | ||
| 290 | |||
| 291 | void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf) | ||
| 292 | { | ||
| 293 | struct gfs2_statfs_change *str = buf; | ||
| 294 | |||
| 295 | str->sc_total = cpu_to_be64(sc->sc_total); | ||
| 296 | str->sc_free = cpu_to_be64(sc->sc_free); | ||
| 297 | str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); | ||
| 298 | } | ||
| 299 | |||
| 300 | void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf) | ||
| 301 | { | ||
| 302 | const struct gfs2_quota_change *str = buf; | ||
| 303 | |||
| 304 | qc->qc_change = be64_to_cpu(str->qc_change); | ||
| 305 | qc->qc_flags = be32_to_cpu(str->qc_flags); | ||
| 306 | qc->qc_id = be32_to_cpu(str->qc_id); | ||
| 307 | } | ||
| 308 | |||
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c new file mode 100644 index 000000000000..4fb743f4e4a4 --- /dev/null +++ b/fs/gfs2/ops_address.c | |||
| @@ -0,0 +1,790 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/pagemap.h> | ||
| 16 | #include <linux/pagevec.h> | ||
| 17 | #include <linux/mpage.h> | ||
| 18 | #include <linux/fs.h> | ||
| 19 | #include <linux/gfs2_ondisk.h> | ||
| 20 | #include <linux/lm_interface.h> | ||
| 21 | |||
| 22 | #include "gfs2.h" | ||
| 23 | #include "incore.h" | ||
| 24 | #include "bmap.h" | ||
| 25 | #include "glock.h" | ||
| 26 | #include "inode.h" | ||
| 27 | #include "log.h" | ||
| 28 | #include "meta_io.h" | ||
| 29 | #include "ops_address.h" | ||
| 30 | #include "quota.h" | ||
| 31 | #include "trans.h" | ||
| 32 | #include "rgrp.h" | ||
| 33 | #include "ops_file.h" | ||
| 34 | #include "util.h" | ||
| 35 | #include "glops.h" | ||
| 36 | |||
| 37 | |||
| 38 | static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | ||
| 39 | unsigned int from, unsigned int to) | ||
| 40 | { | ||
| 41 | struct buffer_head *head = page_buffers(page); | ||
| 42 | unsigned int bsize = head->b_size; | ||
| 43 | struct buffer_head *bh; | ||
| 44 | unsigned int start, end; | ||
| 45 | |||
| 46 | for (bh = head, start = 0; bh != head || !start; | ||
| 47 | bh = bh->b_this_page, start = end) { | ||
| 48 | end = start + bsize; | ||
| 49 | if (end <= from || start >= to) | ||
| 50 | continue; | ||
| 51 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | /** | ||
| 56 | * gfs2_get_block - Fills in a buffer head with details about a block | ||
| 57 | * @inode: The inode | ||
| 58 | * @lblock: The block number to look up | ||
| 59 | * @bh_result: The buffer head to return the result in | ||
| 60 | * @create: Non-zero if we may add block to the file | ||
| 61 | * | ||
| 62 | * Returns: errno | ||
| 63 | */ | ||
| 64 | |||
| 65 | int gfs2_get_block(struct inode *inode, sector_t lblock, | ||
| 66 | struct buffer_head *bh_result, int create) | ||
| 67 | { | ||
| 68 | return gfs2_block_map(inode, lblock, create, bh_result, 32); | ||
| 69 | } | ||
| 70 | |||
| 71 | /** | ||
| 72 | * gfs2_get_block_noalloc - Fills in a buffer head with details about a block | ||
| 73 | * @inode: The inode | ||
| 74 | * @lblock: The block number to look up | ||
| 75 | * @bh_result: The buffer head to return the result in | ||
| 76 | * @create: Non-zero if we may add block to the file | ||
| 77 | * | ||
| 78 | * Returns: errno | ||
| 79 | */ | ||
| 80 | |||
| 81 | static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | ||
| 82 | struct buffer_head *bh_result, int create) | ||
| 83 | { | ||
| 84 | int error; | ||
| 85 | |||
| 86 | error = gfs2_block_map(inode, lblock, 0, bh_result, 1); | ||
| 87 | if (error) | ||
| 88 | return error; | ||
| 89 | if (bh_result->b_blocknr == 0) | ||
| 90 | return -EIO; | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, | ||
| 95 | struct buffer_head *bh_result, int create) | ||
| 96 | { | ||
| 97 | return gfs2_block_map(inode, lblock, 0, bh_result, 32); | ||
| 98 | } | ||
| 99 | |||
| 100 | /** | ||
| 101 | * gfs2_writepage - Write complete page | ||
| 102 | * @page: Page to write | ||
| 103 | * | ||
| 104 | * Returns: errno | ||
| 105 | * | ||
| 106 | * Some of this is copied from block_write_full_page() although we still | ||
| 107 | * call it to do most of the work. | ||
| 108 | */ | ||
| 109 | |||
| 110 | static int gfs2_writepage(struct page *page, struct writeback_control *wbc) | ||
| 111 | { | ||
| 112 | struct inode *inode = page->mapping->host; | ||
| 113 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 114 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 115 | loff_t i_size = i_size_read(inode); | ||
| 116 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 117 | unsigned offset; | ||
| 118 | int error; | ||
| 119 | int done_trans = 0; | ||
| 120 | |||
| 121 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { | ||
| 122 | unlock_page(page); | ||
| 123 | return -EIO; | ||
| 124 | } | ||
| 125 | if (current->journal_info) | ||
| 126 | goto out_ignore; | ||
| 127 | |||
| 128 | /* Is the page fully outside i_size? (truncate in progress) */ | ||
| 129 | offset = i_size & (PAGE_CACHE_SIZE-1); | ||
| 130 | if (page->index > end_index || (page->index == end_index && !offset)) { | ||
| 131 | page->mapping->a_ops->invalidatepage(page, 0); | ||
| 132 | unlock_page(page); | ||
| 133 | return 0; /* don't care */ | ||
| 134 | } | ||
| 135 | |||
| 136 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) { | ||
| 137 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | ||
| 138 | if (error) | ||
| 139 | goto out_ignore; | ||
| 140 | if (!page_has_buffers(page)) { | ||
| 141 | create_empty_buffers(page, inode->i_sb->s_blocksize, | ||
| 142 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
| 143 | } | ||
| 144 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); | ||
| 145 | done_trans = 1; | ||
| 146 | } | ||
| 147 | error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
| 148 | if (done_trans) | ||
| 149 | gfs2_trans_end(sdp); | ||
| 150 | gfs2_meta_cache_flush(ip); | ||
| 151 | return error; | ||
| 152 | |||
| 153 | out_ignore: | ||
| 154 | redirty_page_for_writepage(wbc, page); | ||
| 155 | unlock_page(page); | ||
| 156 | return 0; | ||
| 157 | } | ||
| 158 | |||
| 159 | static int zero_readpage(struct page *page) | ||
| 160 | { | ||
| 161 | void *kaddr; | ||
| 162 | |||
| 163 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 164 | memset(kaddr, 0, PAGE_CACHE_SIZE); | ||
| 165 | kunmap_atomic(page, KM_USER0); | ||
| 166 | |||
| 167 | SetPageUptodate(page); | ||
| 168 | |||
| 169 | return 0; | ||
| 170 | } | ||
| 171 | |||
| 172 | /** | ||
| 173 | * stuffed_readpage - Fill in a Linux page with stuffed file data | ||
| 174 | * @ip: the inode | ||
| 175 | * @page: the page | ||
| 176 | * | ||
| 177 | * Returns: errno | ||
| 178 | */ | ||
| 179 | |||
| 180 | static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) | ||
| 181 | { | ||
| 182 | struct buffer_head *dibh; | ||
| 183 | void *kaddr; | ||
| 184 | int error; | ||
| 185 | |||
| 186 | /* Only the first page of a stuffed file might contain data */ | ||
| 187 | if (unlikely(page->index)) | ||
| 188 | return zero_readpage(page); | ||
| 189 | |||
| 190 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 191 | if (error) | ||
| 192 | return error; | ||
| 193 | |||
| 194 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 195 | memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), | ||
| 196 | ip->i_di.di_size); | ||
| 197 | memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size); | ||
| 198 | kunmap_atomic(page, KM_USER0); | ||
| 199 | |||
| 200 | brelse(dibh); | ||
| 201 | |||
| 202 | SetPageUptodate(page); | ||
| 203 | |||
| 204 | return 0; | ||
| 205 | } | ||
| 206 | |||
| 207 | |||
| 208 | /** | ||
| 209 | * gfs2_readpage - readpage with locking | ||
| 210 | * @file: The file to read a page for. N.B. This may be NULL if we are | ||
| 211 | * reading an internal file. | ||
| 212 | * @page: The page to read | ||
| 213 | * | ||
| 214 | * Returns: errno | ||
| 215 | */ | ||
| 216 | |||
| 217 | static int gfs2_readpage(struct file *file, struct page *page) | ||
| 218 | { | ||
| 219 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
| 220 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | ||
| 221 | struct gfs2_file *gf = NULL; | ||
| 222 | struct gfs2_holder gh; | ||
| 223 | int error; | ||
| 224 | int do_unlock = 0; | ||
| 225 | |||
| 226 | if (likely(file != &gfs2_internal_file_sentinel)) { | ||
| 227 | if (file) { | ||
| 228 | gf = file->private_data; | ||
| 229 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | ||
| 230 | /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ | ||
| 231 | goto skip_lock; | ||
| 232 | } | ||
| 233 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh); | ||
| 234 | do_unlock = 1; | ||
| 235 | error = gfs2_glock_nq_m_atime(1, &gh); | ||
| 236 | if (unlikely(error)) | ||
| 237 | goto out_unlock; | ||
| 238 | } | ||
| 239 | |||
| 240 | skip_lock: | ||
| 241 | if (gfs2_is_stuffed(ip)) { | ||
| 242 | error = stuffed_readpage(ip, page); | ||
| 243 | unlock_page(page); | ||
| 244 | } else | ||
| 245 | error = mpage_readpage(page, gfs2_get_block); | ||
| 246 | |||
| 247 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 248 | error = -EIO; | ||
| 249 | |||
| 250 | if (do_unlock) { | ||
| 251 | gfs2_glock_dq_m(1, &gh); | ||
| 252 | gfs2_holder_uninit(&gh); | ||
| 253 | } | ||
| 254 | out: | ||
| 255 | return error; | ||
| 256 | out_unlock: | ||
| 257 | unlock_page(page); | ||
| 258 | if (do_unlock) | ||
| 259 | gfs2_holder_uninit(&gh); | ||
| 260 | goto out; | ||
| 261 | } | ||
| 262 | |||
| 263 | /** | ||
| 264 | * gfs2_readpages - Read a bunch of pages at once | ||
| 265 | * | ||
| 266 | * Some notes: | ||
| 267 | * 1. This is only for readahead, so we can simply ignore any things | ||
| 268 | * which are slightly inconvenient (such as locking conflicts between | ||
| 269 | * the page lock and the glock) and return having done no I/O. Its | ||
| 270 | * obviously not something we'd want to do on too regular a basis. | ||
| 271 | * Any I/O we ignore at this time will be done via readpage later. | ||
| 272 | * 2. We have to handle stuffed files here too. | ||
| 273 | * 3. mpage_readpages() does most of the heavy lifting in the common case. | ||
| 274 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. | ||
| 275 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as | ||
| 276 | * well as read-ahead. | ||
| 277 | */ | ||
| 278 | static int gfs2_readpages(struct file *file, struct address_space *mapping, | ||
| 279 | struct list_head *pages, unsigned nr_pages) | ||
| 280 | { | ||
| 281 | struct inode *inode = mapping->host; | ||
| 282 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 283 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 284 | struct gfs2_holder gh; | ||
| 285 | unsigned page_idx; | ||
| 286 | int ret; | ||
| 287 | int do_unlock = 0; | ||
| 288 | |||
| 289 | if (likely(file != &gfs2_internal_file_sentinel)) { | ||
| 290 | if (file) { | ||
| 291 | struct gfs2_file *gf = file->private_data; | ||
| 292 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | ||
| 293 | goto skip_lock; | ||
| 294 | } | ||
| 295 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, | ||
| 296 | LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh); | ||
| 297 | do_unlock = 1; | ||
| 298 | ret = gfs2_glock_nq_m_atime(1, &gh); | ||
| 299 | if (ret == GLR_TRYFAILED) | ||
| 300 | goto out_noerror; | ||
| 301 | if (unlikely(ret)) | ||
| 302 | goto out_unlock; | ||
| 303 | } | ||
| 304 | skip_lock: | ||
| 305 | if (gfs2_is_stuffed(ip)) { | ||
| 306 | struct pagevec lru_pvec; | ||
| 307 | pagevec_init(&lru_pvec, 0); | ||
| 308 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | ||
| 309 | struct page *page = list_entry(pages->prev, struct page, lru); | ||
| 310 | prefetchw(&page->flags); | ||
| 311 | list_del(&page->lru); | ||
| 312 | if (!add_to_page_cache(page, mapping, | ||
| 313 | page->index, GFP_KERNEL)) { | ||
| 314 | ret = stuffed_readpage(ip, page); | ||
| 315 | unlock_page(page); | ||
| 316 | if (!pagevec_add(&lru_pvec, page)) | ||
| 317 | __pagevec_lru_add(&lru_pvec); | ||
| 318 | } else { | ||
| 319 | page_cache_release(page); | ||
| 320 | } | ||
| 321 | } | ||
| 322 | pagevec_lru_add(&lru_pvec); | ||
| 323 | ret = 0; | ||
| 324 | } else { | ||
| 325 | /* What we really want to do .... */ | ||
| 326 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); | ||
| 327 | } | ||
| 328 | |||
| 329 | if (do_unlock) { | ||
| 330 | gfs2_glock_dq_m(1, &gh); | ||
| 331 | gfs2_holder_uninit(&gh); | ||
| 332 | } | ||
| 333 | out: | ||
| 334 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 335 | ret = -EIO; | ||
| 336 | return ret; | ||
| 337 | out_noerror: | ||
| 338 | ret = 0; | ||
| 339 | out_unlock: | ||
| 340 | /* unlock all pages, we can't do any I/O right now */ | ||
| 341 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | ||
| 342 | struct page *page = list_entry(pages->prev, struct page, lru); | ||
| 343 | list_del(&page->lru); | ||
| 344 | unlock_page(page); | ||
| 345 | page_cache_release(page); | ||
| 346 | } | ||
| 347 | if (do_unlock) | ||
| 348 | gfs2_holder_uninit(&gh); | ||
| 349 | goto out; | ||
| 350 | } | ||
| 351 | |||
| 352 | /** | ||
| 353 | * gfs2_prepare_write - Prepare to write a page to a file | ||
| 354 | * @file: The file to write to | ||
| 355 | * @page: The page which is to be prepared for writing | ||
| 356 | * @from: From (byte range within page) | ||
| 357 | * @to: To (byte range within page) | ||
| 358 | * | ||
| 359 | * Returns: errno | ||
| 360 | */ | ||
| 361 | |||
| 362 | static int gfs2_prepare_write(struct file *file, struct page *page, | ||
| 363 | unsigned from, unsigned to) | ||
| 364 | { | ||
| 365 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
| 366 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | ||
| 367 | unsigned int data_blocks, ind_blocks, rblocks; | ||
| 368 | int alloc_required; | ||
| 369 | int error = 0; | ||
| 370 | loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from; | ||
| 371 | loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
| 372 | struct gfs2_alloc *al; | ||
| 373 | |||
| 374 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh); | ||
| 375 | error = gfs2_glock_nq_m_atime(1, &ip->i_gh); | ||
| 376 | if (error) | ||
| 377 | goto out_uninit; | ||
| 378 | |||
| 379 | gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks); | ||
| 380 | |||
| 381 | error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required); | ||
| 382 | if (error) | ||
| 383 | goto out_unlock; | ||
| 384 | |||
| 385 | |||
| 386 | if (alloc_required) { | ||
| 387 | al = gfs2_alloc_get(ip); | ||
| 388 | |||
| 389 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 390 | if (error) | ||
| 391 | goto out_alloc_put; | ||
| 392 | |||
| 393 | error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 394 | if (error) | ||
| 395 | goto out_qunlock; | ||
| 396 | |||
| 397 | al->al_requested = data_blocks + ind_blocks; | ||
| 398 | error = gfs2_inplace_reserve(ip); | ||
| 399 | if (error) | ||
| 400 | goto out_qunlock; | ||
| 401 | } | ||
| 402 | |||
| 403 | rblocks = RES_DINODE + ind_blocks; | ||
| 404 | if (gfs2_is_jdata(ip)) | ||
| 405 | rblocks += data_blocks ? data_blocks : 1; | ||
| 406 | if (ind_blocks || data_blocks) | ||
| 407 | rblocks += RES_STATFS + RES_QUOTA; | ||
| 408 | |||
| 409 | error = gfs2_trans_begin(sdp, rblocks, 0); | ||
| 410 | if (error) | ||
| 411 | goto out; | ||
| 412 | |||
| 413 | if (gfs2_is_stuffed(ip)) { | ||
| 414 | if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | ||
| 415 | error = gfs2_unstuff_dinode(ip, page); | ||
| 416 | if (error == 0) | ||
| 417 | goto prepare_write; | ||
| 418 | } else if (!PageUptodate(page)) | ||
| 419 | error = stuffed_readpage(ip, page); | ||
| 420 | goto out; | ||
| 421 | } | ||
| 422 | |||
| 423 | prepare_write: | ||
| 424 | error = block_prepare_write(page, from, to, gfs2_get_block); | ||
| 425 | |||
| 426 | out: | ||
| 427 | if (error) { | ||
| 428 | gfs2_trans_end(sdp); | ||
| 429 | if (alloc_required) { | ||
| 430 | gfs2_inplace_release(ip); | ||
| 431 | out_qunlock: | ||
| 432 | gfs2_quota_unlock(ip); | ||
| 433 | out_alloc_put: | ||
| 434 | gfs2_alloc_put(ip); | ||
| 435 | } | ||
| 436 | out_unlock: | ||
| 437 | gfs2_glock_dq_m(1, &ip->i_gh); | ||
| 438 | out_uninit: | ||
| 439 | gfs2_holder_uninit(&ip->i_gh); | ||
| 440 | } | ||
| 441 | |||
| 442 | return error; | ||
| 443 | } | ||
| 444 | |||
| 445 | /** | ||
| 446 | * gfs2_commit_write - Commit write to a file | ||
| 447 | * @file: The file to write to | ||
| 448 | * @page: The page containing the data | ||
| 449 | * @from: From (byte range within page) | ||
| 450 | * @to: To (byte range within page) | ||
| 451 | * | ||
| 452 | * Returns: errno | ||
| 453 | */ | ||
| 454 | |||
| 455 | static int gfs2_commit_write(struct file *file, struct page *page, | ||
| 456 | unsigned from, unsigned to) | ||
| 457 | { | ||
| 458 | struct inode *inode = page->mapping->host; | ||
| 459 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 460 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 461 | int error = -EOPNOTSUPP; | ||
| 462 | struct buffer_head *dibh; | ||
| 463 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 464 | struct gfs2_dinode *di; | ||
| 465 | |||
| 466 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) | ||
| 467 | goto fail_nounlock; | ||
| 468 | |||
| 469 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 470 | if (error) | ||
| 471 | goto fail_endtrans; | ||
| 472 | |||
| 473 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 474 | di = (struct gfs2_dinode *)dibh->b_data; | ||
| 475 | |||
| 476 | if (gfs2_is_stuffed(ip)) { | ||
| 477 | u64 file_size; | ||
| 478 | void *kaddr; | ||
| 479 | |||
| 480 | file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to; | ||
| 481 | |||
| 482 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 483 | memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, | ||
| 484 | kaddr + from, to - from); | ||
| 485 | kunmap_atomic(page, KM_USER0); | ||
| 486 | |||
| 487 | SetPageUptodate(page); | ||
| 488 | |||
| 489 | if (inode->i_size < file_size) | ||
| 490 | i_size_write(inode, file_size); | ||
| 491 | } else { | ||
| 492 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || | ||
| 493 | gfs2_is_jdata(ip)) | ||
| 494 | gfs2_page_add_databufs(ip, page, from, to); | ||
| 495 | error = generic_commit_write(file, page, from, to); | ||
| 496 | if (error) | ||
| 497 | goto fail; | ||
| 498 | } | ||
| 499 | |||
| 500 | if (ip->i_di.di_size < inode->i_size) { | ||
| 501 | ip->i_di.di_size = inode->i_size; | ||
| 502 | di->di_size = cpu_to_be64(inode->i_size); | ||
| 503 | } | ||
| 504 | |||
| 505 | di->di_mode = cpu_to_be32(inode->i_mode); | ||
| 506 | di->di_atime = cpu_to_be64(inode->i_atime.tv_sec); | ||
| 507 | di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); | ||
| 508 | di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); | ||
| 509 | |||
| 510 | brelse(dibh); | ||
| 511 | gfs2_trans_end(sdp); | ||
| 512 | if (al->al_requested) { | ||
| 513 | gfs2_inplace_release(ip); | ||
| 514 | gfs2_quota_unlock(ip); | ||
| 515 | gfs2_alloc_put(ip); | ||
| 516 | } | ||
| 517 | gfs2_glock_dq_m(1, &ip->i_gh); | ||
| 518 | gfs2_holder_uninit(&ip->i_gh); | ||
| 519 | return 0; | ||
| 520 | |||
| 521 | fail: | ||
| 522 | brelse(dibh); | ||
| 523 | fail_endtrans: | ||
| 524 | gfs2_trans_end(sdp); | ||
| 525 | if (al->al_requested) { | ||
| 526 | gfs2_inplace_release(ip); | ||
| 527 | gfs2_quota_unlock(ip); | ||
| 528 | gfs2_alloc_put(ip); | ||
| 529 | } | ||
| 530 | gfs2_glock_dq_m(1, &ip->i_gh); | ||
| 531 | gfs2_holder_uninit(&ip->i_gh); | ||
| 532 | fail_nounlock: | ||
| 533 | ClearPageUptodate(page); | ||
| 534 | return error; | ||
| 535 | } | ||
| 536 | |||
| 537 | /** | ||
| 538 | * gfs2_bmap - Block map function | ||
| 539 | * @mapping: Address space info | ||
| 540 | * @lblock: The block to map | ||
| 541 | * | ||
| 542 | * Returns: The disk address for the block or 0 on hole or error | ||
| 543 | */ | ||
| 544 | |||
| 545 | static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) | ||
| 546 | { | ||
| 547 | struct gfs2_inode *ip = GFS2_I(mapping->host); | ||
| 548 | struct gfs2_holder i_gh; | ||
| 549 | sector_t dblock = 0; | ||
| 550 | int error; | ||
| 551 | |||
| 552 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | ||
| 553 | if (error) | ||
| 554 | return 0; | ||
| 555 | |||
| 556 | if (!gfs2_is_stuffed(ip)) | ||
| 557 | dblock = generic_block_bmap(mapping, lblock, gfs2_get_block); | ||
| 558 | |||
| 559 | gfs2_glock_dq_uninit(&i_gh); | ||
| 560 | |||
| 561 | return dblock; | ||
| 562 | } | ||
| 563 | |||
| 564 | static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) | ||
| 565 | { | ||
| 566 | struct gfs2_bufdata *bd; | ||
| 567 | |||
| 568 | gfs2_log_lock(sdp); | ||
| 569 | bd = bh->b_private; | ||
| 570 | if (bd) { | ||
| 571 | bd->bd_bh = NULL; | ||
| 572 | bh->b_private = NULL; | ||
| 573 | } | ||
| 574 | gfs2_log_unlock(sdp); | ||
| 575 | |||
| 576 | lock_buffer(bh); | ||
| 577 | clear_buffer_dirty(bh); | ||
| 578 | bh->b_bdev = NULL; | ||
| 579 | clear_buffer_mapped(bh); | ||
| 580 | clear_buffer_req(bh); | ||
| 581 | clear_buffer_new(bh); | ||
| 582 | clear_buffer_delay(bh); | ||
| 583 | unlock_buffer(bh); | ||
| 584 | } | ||
| 585 | |||
| 586 | static void gfs2_invalidatepage(struct page *page, unsigned long offset) | ||
| 587 | { | ||
| 588 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | ||
| 589 | struct buffer_head *head, *bh, *next; | ||
| 590 | unsigned int curr_off = 0; | ||
| 591 | |||
| 592 | BUG_ON(!PageLocked(page)); | ||
| 593 | if (!page_has_buffers(page)) | ||
| 594 | return; | ||
| 595 | |||
| 596 | bh = head = page_buffers(page); | ||
| 597 | do { | ||
| 598 | unsigned int next_off = curr_off + bh->b_size; | ||
| 599 | next = bh->b_this_page; | ||
| 600 | |||
| 601 | if (offset <= curr_off) | ||
| 602 | discard_buffer(sdp, bh); | ||
| 603 | |||
| 604 | curr_off = next_off; | ||
| 605 | bh = next; | ||
| 606 | } while (bh != head); | ||
| 607 | |||
| 608 | if (!offset) | ||
| 609 | try_to_release_page(page, 0); | ||
| 610 | |||
| 611 | return; | ||
| 612 | } | ||
| 613 | |||
| 614 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | ||
| 615 | const struct iovec *iov, loff_t offset, | ||
| 616 | unsigned long nr_segs) | ||
| 617 | { | ||
| 618 | struct file *file = iocb->ki_filp; | ||
| 619 | struct inode *inode = file->f_mapping->host; | ||
| 620 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 621 | struct gfs2_holder gh; | ||
| 622 | int rv; | ||
| 623 | |||
| 624 | if (rw == READ) | ||
| 625 | mutex_lock(&inode->i_mutex); | ||
| 626 | /* | ||
| 627 | * Shared lock, even if its a write, since we do no allocation | ||
| 628 | * on this path. All we need change is atime. | ||
| 629 | */ | ||
| 630 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | ||
| 631 | rv = gfs2_glock_nq_m_atime(1, &gh); | ||
| 632 | if (rv) | ||
| 633 | goto out; | ||
| 634 | |||
| 635 | if (offset > i_size_read(inode)) | ||
| 636 | goto out; | ||
| 637 | |||
| 638 | /* | ||
| 639 | * Should we return an error here? I can't see that O_DIRECT for | ||
| 640 | * a journaled file makes any sense. For now we'll silently fall | ||
| 641 | * back to buffered I/O, likewise we do the same for stuffed | ||
| 642 | * files since they are (a) small and (b) unaligned. | ||
| 643 | */ | ||
| 644 | if (gfs2_is_jdata(ip)) | ||
| 645 | goto out; | ||
| 646 | |||
| 647 | if (gfs2_is_stuffed(ip)) | ||
| 648 | goto out; | ||
| 649 | |||
| 650 | rv = blockdev_direct_IO_own_locking(rw, iocb, inode, | ||
| 651 | inode->i_sb->s_bdev, | ||
| 652 | iov, offset, nr_segs, | ||
| 653 | gfs2_get_block_direct, NULL); | ||
| 654 | out: | ||
| 655 | gfs2_glock_dq_m(1, &gh); | ||
| 656 | gfs2_holder_uninit(&gh); | ||
| 657 | if (rw == READ) | ||
| 658 | mutex_unlock(&inode->i_mutex); | ||
| 659 | |||
| 660 | return rv; | ||
| 661 | } | ||
| 662 | |||
| 663 | /** | ||
| 664 | * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out. | ||
| 665 | * @bh: the buffer we're stuck on | ||
| 666 | * | ||
| 667 | */ | ||
| 668 | |||
| 669 | static void stuck_releasepage(struct buffer_head *bh) | ||
| 670 | { | ||
| 671 | struct inode *inode = bh->b_page->mapping->host; | ||
| 672 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | ||
| 673 | struct gfs2_bufdata *bd = bh->b_private; | ||
| 674 | struct gfs2_glock *gl; | ||
| 675 | static unsigned limit = 0; | ||
| 676 | |||
| 677 | if (limit > 3) | ||
| 678 | return; | ||
| 679 | limit++; | ||
| 680 | |||
| 681 | fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode); | ||
| 682 | fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n", | ||
| 683 | (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count)); | ||
| 684 | fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh)); | ||
| 685 | fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL"); | ||
| 686 | |||
| 687 | if (!bd) | ||
| 688 | return; | ||
| 689 | |||
| 690 | gl = bd->bd_gl; | ||
| 691 | |||
| 692 | fs_warn(sdp, "gl = (%u, %llu)\n", | ||
| 693 | gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); | ||
| 694 | |||
| 695 | fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n", | ||
| 696 | (list_empty(&bd->bd_list_tr)) ? "no" : "yes", | ||
| 697 | (list_empty(&bd->bd_le.le_list)) ? "no" : "yes"); | ||
| 698 | |||
| 699 | if (gl->gl_ops == &gfs2_inode_glops) { | ||
| 700 | struct gfs2_inode *ip = gl->gl_object; | ||
| 701 | unsigned int x; | ||
| 702 | |||
| 703 | if (!ip) | ||
| 704 | return; | ||
| 705 | |||
| 706 | fs_warn(sdp, "ip = %llu %llu\n", | ||
| 707 | (unsigned long long)ip->i_num.no_formal_ino, | ||
| 708 | (unsigned long long)ip->i_num.no_addr); | ||
| 709 | |||
| 710 | for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) | ||
| 711 | fs_warn(sdp, "ip->i_cache[%u] = %s\n", | ||
| 712 | x, (ip->i_cache[x]) ? "!NULL" : "NULL"); | ||
| 713 | } | ||
| 714 | } | ||
| 715 | |||
| 716 | /** | ||
| 717 | * gfs2_releasepage - free the metadata associated with a page | ||
| 718 | * @page: the page that's being released | ||
| 719 | * @gfp_mask: passed from Linux VFS, ignored by us | ||
| 720 | * | ||
| 721 | * Call try_to_free_buffers() if the buffers in this page can be | ||
| 722 | * released. | ||
| 723 | * | ||
| 724 | * Returns: 0 | ||
| 725 | */ | ||
| 726 | |||
| 727 | int gfs2_releasepage(struct page *page, gfp_t gfp_mask) | ||
| 728 | { | ||
| 729 | struct inode *aspace = page->mapping->host; | ||
| 730 | struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; | ||
| 731 | struct buffer_head *bh, *head; | ||
| 732 | struct gfs2_bufdata *bd; | ||
| 733 | unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ; | ||
| 734 | |||
| 735 | if (!page_has_buffers(page)) | ||
| 736 | goto out; | ||
| 737 | |||
| 738 | head = bh = page_buffers(page); | ||
| 739 | do { | ||
| 740 | while (atomic_read(&bh->b_count)) { | ||
| 741 | if (!atomic_read(&aspace->i_writecount)) | ||
| 742 | return 0; | ||
| 743 | |||
| 744 | if (time_after_eq(jiffies, t)) { | ||
| 745 | stuck_releasepage(bh); | ||
| 746 | /* should we withdraw here? */ | ||
| 747 | return 0; | ||
| 748 | } | ||
| 749 | |||
| 750 | yield(); | ||
| 751 | } | ||
| 752 | |||
| 753 | gfs2_assert_warn(sdp, !buffer_pinned(bh)); | ||
| 754 | gfs2_assert_warn(sdp, !buffer_dirty(bh)); | ||
| 755 | |||
| 756 | gfs2_log_lock(sdp); | ||
| 757 | bd = bh->b_private; | ||
| 758 | if (bd) { | ||
| 759 | gfs2_assert_warn(sdp, bd->bd_bh == bh); | ||
| 760 | gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); | ||
| 761 | gfs2_assert_warn(sdp, !bd->bd_ail); | ||
| 762 | bd->bd_bh = NULL; | ||
| 763 | if (!list_empty(&bd->bd_le.le_list)) | ||
| 764 | bd = NULL; | ||
| 765 | bh->b_private = NULL; | ||
| 766 | } | ||
| 767 | gfs2_log_unlock(sdp); | ||
| 768 | if (bd) | ||
| 769 | kmem_cache_free(gfs2_bufdata_cachep, bd); | ||
| 770 | |||
| 771 | bh = bh->b_this_page; | ||
| 772 | } while (bh != head); | ||
| 773 | |||
| 774 | out: | ||
| 775 | return try_to_free_buffers(page); | ||
| 776 | } | ||
| 777 | |||
| 778 | const struct address_space_operations gfs2_file_aops = { | ||
| 779 | .writepage = gfs2_writepage, | ||
| 780 | .readpage = gfs2_readpage, | ||
| 781 | .readpages = gfs2_readpages, | ||
| 782 | .sync_page = block_sync_page, | ||
| 783 | .prepare_write = gfs2_prepare_write, | ||
| 784 | .commit_write = gfs2_commit_write, | ||
| 785 | .bmap = gfs2_bmap, | ||
| 786 | .invalidatepage = gfs2_invalidatepage, | ||
| 787 | .releasepage = gfs2_releasepage, | ||
| 788 | .direct_IO = gfs2_direct_IO, | ||
| 789 | }; | ||
| 790 | |||
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h new file mode 100644 index 000000000000..35aaee4aa7e1 --- /dev/null +++ b/fs/gfs2/ops_address.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_ADDRESS_DOT_H__ | ||
| 11 | #define __OPS_ADDRESS_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/fs.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/mm.h> | ||
| 16 | |||
| 17 | extern const struct address_space_operations gfs2_file_aops; | ||
| 18 | extern int gfs2_get_block(struct inode *inode, sector_t lblock, | ||
| 19 | struct buffer_head *bh_result, int create); | ||
| 20 | extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | ||
| 21 | |||
| 22 | #endif /* __OPS_ADDRESS_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c new file mode 100644 index 000000000000..00041b1b8025 --- /dev/null +++ b/fs/gfs2/ops_dentry.c | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/smp_lock.h> | ||
| 16 | #include <linux/gfs2_ondisk.h> | ||
| 17 | #include <linux/crc32.h> | ||
| 18 | #include <linux/lm_interface.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "dir.h" | ||
| 23 | #include "glock.h" | ||
| 24 | #include "ops_dentry.h" | ||
| 25 | #include "util.h" | ||
| 26 | |||
| 27 | /** | ||
| 28 | * gfs2_drevalidate - Check directory lookup consistency | ||
| 29 | * @dentry: the mapping to check | ||
| 30 | * @nd: | ||
| 31 | * | ||
| 32 | * Check to make sure the lookup necessary to arrive at this inode from its | ||
| 33 | * parent is still good. | ||
| 34 | * | ||
| 35 | * Returns: 1 if the dentry is ok, 0 if it isn't | ||
| 36 | */ | ||
| 37 | |||
| 38 | static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | ||
| 39 | { | ||
| 40 | struct dentry *parent = dget_parent(dentry); | ||
| 41 | struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); | ||
| 42 | struct gfs2_inode *dip = GFS2_I(parent->d_inode); | ||
| 43 | struct inode *inode = dentry->d_inode; | ||
| 44 | struct gfs2_holder d_gh; | ||
| 45 | struct gfs2_inode *ip; | ||
| 46 | struct gfs2_inum inum; | ||
| 47 | unsigned int type; | ||
| 48 | int error; | ||
| 49 | |||
| 50 | if (inode && is_bad_inode(inode)) | ||
| 51 | goto invalid; | ||
| 52 | |||
| 53 | if (sdp->sd_args.ar_localcaching) | ||
| 54 | goto valid; | ||
| 55 | |||
| 56 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | ||
| 57 | if (error) | ||
| 58 | goto fail; | ||
| 59 | |||
| 60 | error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); | ||
| 61 | switch (error) { | ||
| 62 | case 0: | ||
| 63 | if (!inode) | ||
| 64 | goto invalid_gunlock; | ||
| 65 | break; | ||
| 66 | case -ENOENT: | ||
| 67 | if (!inode) | ||
| 68 | goto valid_gunlock; | ||
| 69 | goto invalid_gunlock; | ||
| 70 | default: | ||
| 71 | goto fail_gunlock; | ||
| 72 | } | ||
| 73 | |||
| 74 | ip = GFS2_I(inode); | ||
| 75 | |||
| 76 | if (!gfs2_inum_equal(&ip->i_num, &inum)) | ||
| 77 | goto invalid_gunlock; | ||
| 78 | |||
| 79 | if (IF2DT(ip->i_di.di_mode) != type) { | ||
| 80 | gfs2_consist_inode(dip); | ||
| 81 | goto fail_gunlock; | ||
| 82 | } | ||
| 83 | |||
| 84 | valid_gunlock: | ||
| 85 | gfs2_glock_dq_uninit(&d_gh); | ||
| 86 | valid: | ||
| 87 | dput(parent); | ||
| 88 | return 1; | ||
| 89 | |||
| 90 | invalid_gunlock: | ||
| 91 | gfs2_glock_dq_uninit(&d_gh); | ||
| 92 | invalid: | ||
| 93 | if (inode && S_ISDIR(inode->i_mode)) { | ||
| 94 | if (have_submounts(dentry)) | ||
| 95 | goto valid; | ||
| 96 | shrink_dcache_parent(dentry); | ||
| 97 | } | ||
| 98 | d_drop(dentry); | ||
| 99 | dput(parent); | ||
| 100 | return 0; | ||
| 101 | |||
| 102 | fail_gunlock: | ||
| 103 | gfs2_glock_dq_uninit(&d_gh); | ||
| 104 | fail: | ||
| 105 | dput(parent); | ||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | static int gfs2_dhash(struct dentry *dentry, struct qstr *str) | ||
| 110 | { | ||
| 111 | str->hash = gfs2_disk_hash(str->name, str->len); | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | struct dentry_operations gfs2_dops = { | ||
| 116 | .d_revalidate = gfs2_drevalidate, | ||
| 117 | .d_hash = gfs2_dhash, | ||
| 118 | }; | ||
| 119 | |||
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h new file mode 100644 index 000000000000..5caa3db4d3f5 --- /dev/null +++ b/fs/gfs2/ops_dentry.h | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_DENTRY_DOT_H__ | ||
| 11 | #define __OPS_DENTRY_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/dcache.h> | ||
| 14 | |||
| 15 | extern struct dentry_operations gfs2_dops; | ||
| 16 | |||
| 17 | #endif /* __OPS_DENTRY_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c new file mode 100644 index 000000000000..86127d93bd35 --- /dev/null +++ b/fs/gfs2/ops_export.c | |||
| @@ -0,0 +1,298 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/crc32.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | #include "gfs2.h" | ||
| 20 | #include "incore.h" | ||
| 21 | #include "dir.h" | ||
| 22 | #include "glock.h" | ||
| 23 | #include "glops.h" | ||
| 24 | #include "inode.h" | ||
| 25 | #include "ops_export.h" | ||
| 26 | #include "rgrp.h" | ||
| 27 | #include "util.h" | ||
| 28 | |||
| 29 | static struct dentry *gfs2_decode_fh(struct super_block *sb, | ||
| 30 | __u32 *fh, | ||
| 31 | int fh_len, | ||
| 32 | int fh_type, | ||
| 33 | int (*acceptable)(void *context, | ||
| 34 | struct dentry *dentry), | ||
| 35 | void *context) | ||
| 36 | { | ||
| 37 | struct gfs2_fh_obj fh_obj; | ||
| 38 | struct gfs2_inum *this, parent; | ||
| 39 | |||
| 40 | if (fh_type != fh_len) | ||
| 41 | return NULL; | ||
| 42 | |||
| 43 | this = &fh_obj.this; | ||
| 44 | fh_obj.imode = DT_UNKNOWN; | ||
| 45 | memset(&parent, 0, sizeof(struct gfs2_inum)); | ||
| 46 | |||
| 47 | switch (fh_type) { | ||
| 48 | case GFS2_LARGE_FH_SIZE: | ||
| 49 | parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; | ||
| 50 | parent.no_formal_ino |= be32_to_cpu(fh[5]); | ||
| 51 | parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; | ||
| 52 | parent.no_addr |= be32_to_cpu(fh[7]); | ||
| 53 | fh_obj.imode = be32_to_cpu(fh[8]); | ||
| 54 | case GFS2_SMALL_FH_SIZE: | ||
| 55 | this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; | ||
| 56 | this->no_formal_ino |= be32_to_cpu(fh[1]); | ||
| 57 | this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32; | ||
| 58 | this->no_addr |= be32_to_cpu(fh[3]); | ||
| 59 | break; | ||
| 60 | default: | ||
| 61 | return NULL; | ||
| 62 | } | ||
| 63 | |||
| 64 | return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent, | ||
| 65 | acceptable, context); | ||
| 66 | } | ||
| 67 | |||
| 68 | static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | ||
| 69 | int connectable) | ||
| 70 | { | ||
| 71 | struct inode *inode = dentry->d_inode; | ||
| 72 | struct super_block *sb = inode->i_sb; | ||
| 73 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 74 | |||
| 75 | if (*len < GFS2_SMALL_FH_SIZE || | ||
| 76 | (connectable && *len < GFS2_LARGE_FH_SIZE)) | ||
| 77 | return 255; | ||
| 78 | |||
| 79 | fh[0] = ip->i_num.no_formal_ino >> 32; | ||
| 80 | fh[0] = cpu_to_be32(fh[0]); | ||
| 81 | fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF; | ||
| 82 | fh[1] = cpu_to_be32(fh[1]); | ||
| 83 | fh[2] = ip->i_num.no_addr >> 32; | ||
| 84 | fh[2] = cpu_to_be32(fh[2]); | ||
| 85 | fh[3] = ip->i_num.no_addr & 0xFFFFFFFF; | ||
| 86 | fh[3] = cpu_to_be32(fh[3]); | ||
| 87 | *len = GFS2_SMALL_FH_SIZE; | ||
| 88 | |||
| 89 | if (!connectable || inode == sb->s_root->d_inode) | ||
| 90 | return *len; | ||
| 91 | |||
| 92 | spin_lock(&dentry->d_lock); | ||
| 93 | inode = dentry->d_parent->d_inode; | ||
| 94 | ip = GFS2_I(inode); | ||
| 95 | igrab(inode); | ||
| 96 | spin_unlock(&dentry->d_lock); | ||
| 97 | |||
| 98 | fh[4] = ip->i_num.no_formal_ino >> 32; | ||
| 99 | fh[4] = cpu_to_be32(fh[4]); | ||
| 100 | fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF; | ||
| 101 | fh[5] = cpu_to_be32(fh[5]); | ||
| 102 | fh[6] = ip->i_num.no_addr >> 32; | ||
| 103 | fh[6] = cpu_to_be32(fh[6]); | ||
| 104 | fh[7] = ip->i_num.no_addr & 0xFFFFFFFF; | ||
| 105 | fh[7] = cpu_to_be32(fh[7]); | ||
| 106 | |||
| 107 | fh[8] = cpu_to_be32(inode->i_mode); | ||
| 108 | fh[9] = 0; /* pad to double word */ | ||
| 109 | *len = GFS2_LARGE_FH_SIZE; | ||
| 110 | |||
| 111 | iput(inode); | ||
| 112 | |||
| 113 | return *len; | ||
| 114 | } | ||
| 115 | |||
| 116 | struct get_name_filldir { | ||
| 117 | struct gfs2_inum inum; | ||
| 118 | char *name; | ||
| 119 | }; | ||
| 120 | |||
| 121 | static int get_name_filldir(void *opaque, const char *name, unsigned int length, | ||
| 122 | u64 offset, struct gfs2_inum *inum, | ||
| 123 | unsigned int type) | ||
| 124 | { | ||
| 125 | struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; | ||
| 126 | |||
| 127 | if (!gfs2_inum_equal(inum, &gnfd->inum)) | ||
| 128 | return 0; | ||
| 129 | |||
| 130 | memcpy(gnfd->name, name, length); | ||
| 131 | gnfd->name[length] = 0; | ||
| 132 | |||
| 133 | return 1; | ||
| 134 | } | ||
| 135 | |||
| 136 | static int gfs2_get_name(struct dentry *parent, char *name, | ||
| 137 | struct dentry *child) | ||
| 138 | { | ||
| 139 | struct inode *dir = parent->d_inode; | ||
| 140 | struct inode *inode = child->d_inode; | ||
| 141 | struct gfs2_inode *dip, *ip; | ||
| 142 | struct get_name_filldir gnfd; | ||
| 143 | struct gfs2_holder gh; | ||
| 144 | u64 offset = 0; | ||
| 145 | int error; | ||
| 146 | |||
| 147 | if (!dir) | ||
| 148 | return -EINVAL; | ||
| 149 | |||
| 150 | if (!S_ISDIR(dir->i_mode) || !inode) | ||
| 151 | return -EINVAL; | ||
| 152 | |||
| 153 | dip = GFS2_I(dir); | ||
| 154 | ip = GFS2_I(inode); | ||
| 155 | |||
| 156 | *name = 0; | ||
| 157 | gnfd.inum = ip->i_num; | ||
| 158 | gnfd.name = name; | ||
| 159 | |||
| 160 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); | ||
| 161 | if (error) | ||
| 162 | return error; | ||
| 163 | |||
| 164 | error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir); | ||
| 165 | |||
| 166 | gfs2_glock_dq_uninit(&gh); | ||
| 167 | |||
| 168 | if (!error && !*name) | ||
| 169 | error = -ENOENT; | ||
| 170 | |||
| 171 | return error; | ||
| 172 | } | ||
| 173 | |||
| 174 | static struct dentry *gfs2_get_parent(struct dentry *child) | ||
| 175 | { | ||
| 176 | struct qstr dotdot; | ||
| 177 | struct inode *inode; | ||
| 178 | struct dentry *dentry; | ||
| 179 | |||
| 180 | gfs2_str2qstr(&dotdot, ".."); | ||
| 181 | inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL); | ||
| 182 | |||
| 183 | if (!inode) | ||
| 184 | return ERR_PTR(-ENOENT); | ||
| 185 | /* | ||
| 186 | * In case of an error, @inode carries the error value, and we | ||
| 187 | * have to return that as a(n invalid) pointer to dentry. | ||
| 188 | */ | ||
| 189 | if (IS_ERR(inode)) | ||
| 190 | return ERR_PTR(PTR_ERR(inode)); | ||
| 191 | |||
| 192 | dentry = d_alloc_anon(inode); | ||
| 193 | if (!dentry) { | ||
| 194 | iput(inode); | ||
| 195 | return ERR_PTR(-ENOMEM); | ||
| 196 | } | ||
| 197 | |||
| 198 | return dentry; | ||
| 199 | } | ||
| 200 | |||
| 201 | static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) | ||
| 202 | { | ||
| 203 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 204 | struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj; | ||
| 205 | struct gfs2_inum *inum = &fh_obj->this; | ||
| 206 | struct gfs2_holder i_gh, ri_gh, rgd_gh; | ||
| 207 | struct gfs2_rgrpd *rgd; | ||
| 208 | struct inode *inode; | ||
| 209 | struct dentry *dentry; | ||
| 210 | int error; | ||
| 211 | |||
| 212 | /* System files? */ | ||
| 213 | |||
| 214 | inode = gfs2_ilookup(sb, inum); | ||
| 215 | if (inode) { | ||
| 216 | if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) { | ||
| 217 | iput(inode); | ||
| 218 | return ERR_PTR(-ESTALE); | ||
| 219 | } | ||
| 220 | goto out_inode; | ||
| 221 | } | ||
| 222 | |||
| 223 | error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, | ||
| 224 | LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, | ||
| 225 | &i_gh); | ||
| 226 | if (error) | ||
| 227 | return ERR_PTR(error); | ||
| 228 | |||
| 229 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
| 230 | if (error) | ||
| 231 | goto fail; | ||
| 232 | |||
| 233 | error = -EINVAL; | ||
| 234 | rgd = gfs2_blk2rgrpd(sdp, inum->no_addr); | ||
| 235 | if (!rgd) | ||
| 236 | goto fail_rindex; | ||
| 237 | |||
| 238 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); | ||
| 239 | if (error) | ||
| 240 | goto fail_rindex; | ||
| 241 | |||
| 242 | error = -ESTALE; | ||
| 243 | if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE) | ||
| 244 | goto fail_rgd; | ||
| 245 | |||
| 246 | gfs2_glock_dq_uninit(&rgd_gh); | ||
| 247 | gfs2_glock_dq_uninit(&ri_gh); | ||
| 248 | |||
| 249 | inode = gfs2_inode_lookup(sb, inum, fh_obj->imode); | ||
| 250 | if (!inode) | ||
| 251 | goto fail; | ||
| 252 | if (IS_ERR(inode)) { | ||
| 253 | error = PTR_ERR(inode); | ||
| 254 | goto fail; | ||
| 255 | } | ||
| 256 | |||
| 257 | error = gfs2_inode_refresh(GFS2_I(inode)); | ||
| 258 | if (error) { | ||
| 259 | iput(inode); | ||
| 260 | goto fail; | ||
| 261 | } | ||
| 262 | |||
| 263 | error = -EIO; | ||
| 264 | if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) { | ||
| 265 | iput(inode); | ||
| 266 | goto fail; | ||
| 267 | } | ||
| 268 | |||
| 269 | gfs2_glock_dq_uninit(&i_gh); | ||
| 270 | |||
| 271 | out_inode: | ||
| 272 | dentry = d_alloc_anon(inode); | ||
| 273 | if (!dentry) { | ||
| 274 | iput(inode); | ||
| 275 | return ERR_PTR(-ENOMEM); | ||
| 276 | } | ||
| 277 | |||
| 278 | return dentry; | ||
| 279 | |||
| 280 | fail_rgd: | ||
| 281 | gfs2_glock_dq_uninit(&rgd_gh); | ||
| 282 | |||
| 283 | fail_rindex: | ||
| 284 | gfs2_glock_dq_uninit(&ri_gh); | ||
| 285 | |||
| 286 | fail: | ||
| 287 | gfs2_glock_dq_uninit(&i_gh); | ||
| 288 | return ERR_PTR(error); | ||
| 289 | } | ||
| 290 | |||
| 291 | struct export_operations gfs2_export_ops = { | ||
| 292 | .decode_fh = gfs2_decode_fh, | ||
| 293 | .encode_fh = gfs2_encode_fh, | ||
| 294 | .get_name = gfs2_get_name, | ||
| 295 | .get_parent = gfs2_get_parent, | ||
| 296 | .get_dentry = gfs2_get_dentry, | ||
| 297 | }; | ||
| 298 | |||
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h new file mode 100644 index 000000000000..09aca5046fb1 --- /dev/null +++ b/fs/gfs2/ops_export.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_EXPORT_DOT_H__ | ||
| 11 | #define __OPS_EXPORT_DOT_H__ | ||
| 12 | |||
| 13 | #define GFS2_SMALL_FH_SIZE 4 | ||
| 14 | #define GFS2_LARGE_FH_SIZE 10 | ||
| 15 | |||
| 16 | extern struct export_operations gfs2_export_ops; | ||
| 17 | struct gfs2_fh_obj { | ||
| 18 | struct gfs2_inum this; | ||
| 19 | __u32 imode; | ||
| 20 | }; | ||
| 21 | |||
| 22 | #endif /* __OPS_EXPORT_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c new file mode 100644 index 000000000000..3064f133bf3c --- /dev/null +++ b/fs/gfs2/ops_file.c | |||
| @@ -0,0 +1,661 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/pagemap.h> | ||
| 16 | #include <linux/uio.h> | ||
| 17 | #include <linux/blkdev.h> | ||
| 18 | #include <linux/mm.h> | ||
| 19 | #include <linux/smp_lock.h> | ||
| 20 | #include <linux/fs.h> | ||
| 21 | #include <linux/gfs2_ondisk.h> | ||
| 22 | #include <linux/ext2_fs.h> | ||
| 23 | #include <linux/crc32.h> | ||
| 24 | #include <linux/lm_interface.h> | ||
| 25 | #include <asm/uaccess.h> | ||
| 26 | |||
| 27 | #include "gfs2.h" | ||
| 28 | #include "incore.h" | ||
| 29 | #include "bmap.h" | ||
| 30 | #include "dir.h" | ||
| 31 | #include "glock.h" | ||
| 32 | #include "glops.h" | ||
| 33 | #include "inode.h" | ||
| 34 | #include "lm.h" | ||
| 35 | #include "log.h" | ||
| 36 | #include "meta_io.h" | ||
| 37 | #include "ops_file.h" | ||
| 38 | #include "ops_vm.h" | ||
| 39 | #include "quota.h" | ||
| 40 | #include "rgrp.h" | ||
| 41 | #include "trans.h" | ||
| 42 | #include "util.h" | ||
| 43 | #include "eaops.h" | ||
| 44 | |||
| 45 | /* For regular, non-NFS */ | ||
| 46 | struct filldir_reg { | ||
| 47 | struct gfs2_sbd *fdr_sbd; | ||
| 48 | int fdr_prefetch; | ||
| 49 | |||
| 50 | filldir_t fdr_filldir; | ||
| 51 | void *fdr_opaque; | ||
| 52 | }; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Most fields left uninitialised to catch anybody who tries to | ||
| 56 | * use them. f_flags set to prevent file_accessed() from touching | ||
| 57 | * any other part of this. Its use is purely as a flag so that we | ||
| 58 | * know (in readpage()) whether or not do to locking. | ||
| 59 | */ | ||
| 60 | struct file gfs2_internal_file_sentinel = { | ||
| 61 | .f_flags = O_NOATIME|O_RDONLY, | ||
| 62 | }; | ||
| 63 | |||
| 64 | static int gfs2_read_actor(read_descriptor_t *desc, struct page *page, | ||
| 65 | unsigned long offset, unsigned long size) | ||
| 66 | { | ||
| 67 | char *kaddr; | ||
| 68 | unsigned long count = desc->count; | ||
| 69 | |||
| 70 | if (size > count) | ||
| 71 | size = count; | ||
| 72 | |||
| 73 | kaddr = kmap(page); | ||
| 74 | memcpy(desc->arg.buf, kaddr + offset, size); | ||
| 75 | kunmap(page); | ||
| 76 | |||
| 77 | desc->count = count - size; | ||
| 78 | desc->written += size; | ||
| 79 | desc->arg.buf += size; | ||
| 80 | return size; | ||
| 81 | } | ||
| 82 | |||
| 83 | int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, | ||
| 84 | char *buf, loff_t *pos, unsigned size) | ||
| 85 | { | ||
| 86 | struct inode *inode = &ip->i_inode; | ||
| 87 | read_descriptor_t desc; | ||
| 88 | desc.written = 0; | ||
| 89 | desc.arg.buf = buf; | ||
| 90 | desc.count = size; | ||
| 91 | desc.error = 0; | ||
| 92 | do_generic_mapping_read(inode->i_mapping, ra_state, | ||
| 93 | &gfs2_internal_file_sentinel, pos, &desc, | ||
| 94 | gfs2_read_actor); | ||
| 95 | return desc.written ? desc.written : desc.error; | ||
| 96 | } | ||
| 97 | |||
| 98 | /** | ||
| 99 | * gfs2_llseek - seek to a location in a file | ||
| 100 | * @file: the file | ||
| 101 | * @offset: the offset | ||
| 102 | * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) | ||
| 103 | * | ||
| 104 | * SEEK_END requires the glock for the file because it references the | ||
| 105 | * file's size. | ||
| 106 | * | ||
| 107 | * Returns: The new offset, or errno | ||
| 108 | */ | ||
| 109 | |||
| 110 | static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) | ||
| 111 | { | ||
| 112 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
| 113 | struct gfs2_holder i_gh; | ||
| 114 | loff_t error; | ||
| 115 | |||
| 116 | if (origin == 2) { | ||
| 117 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, | ||
| 118 | &i_gh); | ||
| 119 | if (!error) { | ||
| 120 | error = remote_llseek(file, offset, origin); | ||
| 121 | gfs2_glock_dq_uninit(&i_gh); | ||
| 122 | } | ||
| 123 | } else | ||
| 124 | error = remote_llseek(file, offset, origin); | ||
| 125 | |||
| 126 | return error; | ||
| 127 | } | ||
| 128 | |||
| 129 | /** | ||
| 130 | * filldir_func - Report a directory entry to the caller of gfs2_dir_read() | ||
| 131 | * @opaque: opaque data used by the function | ||
| 132 | * @name: the name of the directory entry | ||
| 133 | * @length: the length of the name | ||
| 134 | * @offset: the entry's offset in the directory | ||
| 135 | * @inum: the inode number the entry points to | ||
| 136 | * @type: the type of inode the entry points to | ||
| 137 | * | ||
| 138 | * Returns: 0 on success, 1 if buffer full | ||
| 139 | */ | ||
| 140 | |||
| 141 | static int filldir_func(void *opaque, const char *name, unsigned int length, | ||
| 142 | u64 offset, struct gfs2_inum *inum, | ||
| 143 | unsigned int type) | ||
| 144 | { | ||
| 145 | struct filldir_reg *fdr = (struct filldir_reg *)opaque; | ||
| 146 | struct gfs2_sbd *sdp = fdr->fdr_sbd; | ||
| 147 | int error; | ||
| 148 | |||
| 149 | error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, | ||
| 150 | inum->no_addr, type); | ||
| 151 | if (error) | ||
| 152 | return 1; | ||
| 153 | |||
| 154 | if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { | ||
| 155 | gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops, | ||
| 156 | LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); | ||
| 157 | gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops, | ||
| 158 | LM_ST_SHARED, LM_FLAG_TRY); | ||
| 159 | } | ||
| 160 | |||
| 161 | return 0; | ||
| 162 | } | ||
| 163 | |||
| 164 | /** | ||
| 165 | * gfs2_readdir - Read directory entries from a directory | ||
| 166 | * @file: The directory to read from | ||
| 167 | * @dirent: Buffer for dirents | ||
| 168 | * @filldir: Function used to do the copying | ||
| 169 | * | ||
| 170 | * Returns: errno | ||
| 171 | */ | ||
| 172 | |||
| 173 | static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | ||
| 174 | { | ||
| 175 | struct inode *dir = file->f_mapping->host; | ||
| 176 | struct gfs2_inode *dip = GFS2_I(dir); | ||
| 177 | struct filldir_reg fdr; | ||
| 178 | struct gfs2_holder d_gh; | ||
| 179 | u64 offset = file->f_pos; | ||
| 180 | int error; | ||
| 181 | |||
| 182 | fdr.fdr_sbd = GFS2_SB(dir); | ||
| 183 | fdr.fdr_prefetch = 1; | ||
| 184 | fdr.fdr_filldir = filldir; | ||
| 185 | fdr.fdr_opaque = dirent; | ||
| 186 | |||
| 187 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | ||
| 188 | error = gfs2_glock_nq_atime(&d_gh); | ||
| 189 | if (error) { | ||
| 190 | gfs2_holder_uninit(&d_gh); | ||
| 191 | return error; | ||
| 192 | } | ||
| 193 | |||
| 194 | error = gfs2_dir_read(dir, &offset, &fdr, filldir_func); | ||
| 195 | |||
| 196 | gfs2_glock_dq_uninit(&d_gh); | ||
| 197 | |||
| 198 | file->f_pos = offset; | ||
| 199 | |||
| 200 | return error; | ||
| 201 | } | ||
| 202 | |||
| 203 | /** | ||
| 204 | * fsflags_cvt | ||
| 205 | * @table: A table of 32 u32 flags | ||
| 206 | * @val: a 32 bit value to convert | ||
| 207 | * | ||
| 208 | * This function can be used to convert between fsflags values and | ||
| 209 | * GFS2's own flags values. | ||
| 210 | * | ||
| 211 | * Returns: the converted flags | ||
| 212 | */ | ||
| 213 | static u32 fsflags_cvt(const u32 *table, u32 val) | ||
| 214 | { | ||
| 215 | u32 res = 0; | ||
| 216 | while(val) { | ||
| 217 | if (val & 1) | ||
| 218 | res |= *table; | ||
| 219 | table++; | ||
| 220 | val >>= 1; | ||
| 221 | } | ||
| 222 | return res; | ||
| 223 | } | ||
| 224 | |||
| 225 | static const u32 fsflags_to_gfs2[32] = { | ||
| 226 | [3] = GFS2_DIF_SYNC, | ||
| 227 | [4] = GFS2_DIF_IMMUTABLE, | ||
| 228 | [5] = GFS2_DIF_APPENDONLY, | ||
| 229 | [7] = GFS2_DIF_NOATIME, | ||
| 230 | [12] = GFS2_DIF_EXHASH, | ||
| 231 | [14] = GFS2_DIF_JDATA, | ||
| 232 | [20] = GFS2_DIF_DIRECTIO, | ||
| 233 | }; | ||
| 234 | |||
| 235 | static const u32 gfs2_to_fsflags[32] = { | ||
| 236 | [gfs2fl_Sync] = FS_SYNC_FL, | ||
| 237 | [gfs2fl_Immutable] = FS_IMMUTABLE_FL, | ||
| 238 | [gfs2fl_AppendOnly] = FS_APPEND_FL, | ||
| 239 | [gfs2fl_NoAtime] = FS_NOATIME_FL, | ||
| 240 | [gfs2fl_ExHash] = FS_INDEX_FL, | ||
| 241 | [gfs2fl_Jdata] = FS_JOURNAL_DATA_FL, | ||
| 242 | [gfs2fl_Directio] = FS_DIRECTIO_FL, | ||
| 243 | [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL, | ||
| 244 | [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, | ||
| 245 | }; | ||
| 246 | |||
| 247 | static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | ||
| 248 | { | ||
| 249 | struct inode *inode = filp->f_dentry->d_inode; | ||
| 250 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 251 | struct gfs2_holder gh; | ||
| 252 | int error; | ||
| 253 | u32 fsflags; | ||
| 254 | |||
| 255 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | ||
| 256 | error = gfs2_glock_nq_m_atime(1, &gh); | ||
| 257 | if (error) | ||
| 258 | return error; | ||
| 259 | |||
| 260 | fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); | ||
| 261 | if (put_user(fsflags, ptr)) | ||
| 262 | error = -EFAULT; | ||
| 263 | |||
| 264 | gfs2_glock_dq_m(1, &gh); | ||
| 265 | gfs2_holder_uninit(&gh); | ||
| 266 | return error; | ||
| 267 | } | ||
| 268 | |||
| 269 | /* Flags that can be set by user space */ | ||
| 270 | #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ | ||
| 271 | GFS2_DIF_DIRECTIO| \ | ||
| 272 | GFS2_DIF_IMMUTABLE| \ | ||
| 273 | GFS2_DIF_APPENDONLY| \ | ||
| 274 | GFS2_DIF_NOATIME| \ | ||
| 275 | GFS2_DIF_SYNC| \ | ||
| 276 | GFS2_DIF_SYSTEM| \ | ||
| 277 | GFS2_DIF_INHERIT_DIRECTIO| \ | ||
| 278 | GFS2_DIF_INHERIT_JDATA) | ||
| 279 | |||
| 280 | /** | ||
| 281 | * gfs2_set_flags - set flags on an inode | ||
| 282 | * @inode: The inode | ||
| 283 | * @flags: The flags to set | ||
| 284 | * @mask: Indicates which flags are valid | ||
| 285 | * | ||
| 286 | */ | ||
| 287 | static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | ||
| 288 | { | ||
| 289 | struct inode *inode = filp->f_dentry->d_inode; | ||
| 290 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 291 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 292 | struct buffer_head *bh; | ||
| 293 | struct gfs2_holder gh; | ||
| 294 | int error; | ||
| 295 | u32 new_flags, flags; | ||
| 296 | |||
| 297 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
| 298 | if (error) | ||
| 299 | return error; | ||
| 300 | |||
| 301 | flags = ip->i_di.di_flags; | ||
| 302 | new_flags = (flags & ~mask) | (reqflags & mask); | ||
| 303 | if ((new_flags ^ flags) == 0) | ||
| 304 | goto out; | ||
| 305 | |||
| 306 | if (S_ISDIR(inode->i_mode)) { | ||
| 307 | if ((new_flags ^ flags) & GFS2_DIF_JDATA) | ||
| 308 | new_flags ^= (GFS2_DIF_JDATA|GFS2_DIF_INHERIT_JDATA); | ||
| 309 | if ((new_flags ^ flags) & GFS2_DIF_DIRECTIO) | ||
| 310 | new_flags ^= (GFS2_DIF_DIRECTIO|GFS2_DIF_INHERIT_DIRECTIO); | ||
| 311 | } | ||
| 312 | |||
| 313 | error = -EINVAL; | ||
| 314 | if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET) | ||
| 315 | goto out; | ||
| 316 | |||
| 317 | error = -EPERM; | ||
| 318 | if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE)) | ||
| 319 | goto out; | ||
| 320 | if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) | ||
| 321 | goto out; | ||
| 322 | if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) && | ||
| 323 | !capable(CAP_LINUX_IMMUTABLE)) | ||
| 324 | goto out; | ||
| 325 | if (!IS_IMMUTABLE(inode)) { | ||
| 326 | error = permission(inode, MAY_WRITE, NULL); | ||
| 327 | if (error) | ||
| 328 | goto out; | ||
| 329 | } | ||
| 330 | |||
| 331 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 332 | if (error) | ||
| 333 | goto out; | ||
| 334 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
| 335 | if (error) | ||
| 336 | goto out_trans_end; | ||
| 337 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 338 | ip->i_di.di_flags = new_flags; | ||
| 339 | gfs2_dinode_out(&ip->i_di, bh->b_data); | ||
| 340 | brelse(bh); | ||
| 341 | out_trans_end: | ||
| 342 | gfs2_trans_end(sdp); | ||
| 343 | out: | ||
| 344 | gfs2_glock_dq_uninit(&gh); | ||
| 345 | return error; | ||
| 346 | } | ||
| 347 | |||
| 348 | static int gfs2_set_flags(struct file *filp, u32 __user *ptr) | ||
| 349 | { | ||
| 350 | u32 fsflags, gfsflags; | ||
| 351 | if (get_user(fsflags, ptr)) | ||
| 352 | return -EFAULT; | ||
| 353 | gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); | ||
| 354 | return do_gfs2_set_flags(filp, gfsflags, ~0); | ||
| 355 | } | ||
| 356 | |||
| 357 | static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
| 358 | { | ||
| 359 | switch(cmd) { | ||
| 360 | case FS_IOC_GETFLAGS: | ||
| 361 | return gfs2_get_flags(filp, (u32 __user *)arg); | ||
| 362 | case FS_IOC_SETFLAGS: | ||
| 363 | return gfs2_set_flags(filp, (u32 __user *)arg); | ||
| 364 | } | ||
| 365 | return -ENOTTY; | ||
| 366 | } | ||
| 367 | |||
| 368 | |||
| 369 | /** | ||
| 370 | * gfs2_mmap - | ||
| 371 | * @file: The file to map | ||
| 372 | * @vma: The VMA which described the mapping | ||
| 373 | * | ||
| 374 | * Returns: 0 or error code | ||
| 375 | */ | ||
| 376 | |||
| 377 | static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 378 | { | ||
| 379 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
| 380 | struct gfs2_holder i_gh; | ||
| 381 | int error; | ||
| 382 | |||
| 383 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | ||
| 384 | error = gfs2_glock_nq_atime(&i_gh); | ||
| 385 | if (error) { | ||
| 386 | gfs2_holder_uninit(&i_gh); | ||
| 387 | return error; | ||
| 388 | } | ||
| 389 | |||
| 390 | /* This is VM_MAYWRITE instead of VM_WRITE because a call | ||
| 391 | to mprotect() can turn on VM_WRITE later. */ | ||
| 392 | |||
| 393 | if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == | ||
| 394 | (VM_MAYSHARE | VM_MAYWRITE)) | ||
| 395 | vma->vm_ops = &gfs2_vm_ops_sharewrite; | ||
| 396 | else | ||
| 397 | vma->vm_ops = &gfs2_vm_ops_private; | ||
| 398 | |||
| 399 | gfs2_glock_dq_uninit(&i_gh); | ||
| 400 | |||
| 401 | return error; | ||
| 402 | } | ||
| 403 | |||
| 404 | /** | ||
| 405 | * gfs2_open - open a file | ||
| 406 | * @inode: the inode to open | ||
| 407 | * @file: the struct file for this opening | ||
| 408 | * | ||
| 409 | * Returns: errno | ||
| 410 | */ | ||
| 411 | |||
| 412 | static int gfs2_open(struct inode *inode, struct file *file) | ||
| 413 | { | ||
| 414 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 415 | struct gfs2_holder i_gh; | ||
| 416 | struct gfs2_file *fp; | ||
| 417 | int error; | ||
| 418 | |||
| 419 | fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); | ||
| 420 | if (!fp) | ||
| 421 | return -ENOMEM; | ||
| 422 | |||
| 423 | mutex_init(&fp->f_fl_mutex); | ||
| 424 | |||
| 425 | gfs2_assert_warn(GFS2_SB(inode), !file->private_data); | ||
| 426 | file->private_data = fp; | ||
| 427 | |||
| 428 | if (S_ISREG(ip->i_di.di_mode)) { | ||
| 429 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, | ||
| 430 | &i_gh); | ||
| 431 | if (error) | ||
| 432 | goto fail; | ||
| 433 | |||
| 434 | if (!(file->f_flags & O_LARGEFILE) && | ||
| 435 | ip->i_di.di_size > MAX_NON_LFS) { | ||
| 436 | error = -EFBIG; | ||
| 437 | goto fail_gunlock; | ||
| 438 | } | ||
| 439 | |||
| 440 | /* Listen to the Direct I/O flag */ | ||
| 441 | |||
| 442 | if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) | ||
| 443 | file->f_flags |= O_DIRECT; | ||
| 444 | |||
| 445 | gfs2_glock_dq_uninit(&i_gh); | ||
| 446 | } | ||
| 447 | |||
| 448 | return 0; | ||
| 449 | |||
| 450 | fail_gunlock: | ||
| 451 | gfs2_glock_dq_uninit(&i_gh); | ||
| 452 | fail: | ||
| 453 | file->private_data = NULL; | ||
| 454 | kfree(fp); | ||
| 455 | return error; | ||
| 456 | } | ||
| 457 | |||
| 458 | /** | ||
| 459 | * gfs2_close - called to close a struct file | ||
| 460 | * @inode: the inode the struct file belongs to | ||
| 461 | * @file: the struct file being closed | ||
| 462 | * | ||
| 463 | * Returns: errno | ||
| 464 | */ | ||
| 465 | |||
| 466 | static int gfs2_close(struct inode *inode, struct file *file) | ||
| 467 | { | ||
| 468 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | ||
| 469 | struct gfs2_file *fp; | ||
| 470 | |||
| 471 | fp = file->private_data; | ||
| 472 | file->private_data = NULL; | ||
| 473 | |||
| 474 | if (gfs2_assert_warn(sdp, fp)) | ||
| 475 | return -EIO; | ||
| 476 | |||
| 477 | kfree(fp); | ||
| 478 | |||
| 479 | return 0; | ||
| 480 | } | ||
| 481 | |||
| 482 | /** | ||
| 483 | * gfs2_fsync - sync the dirty data for a file (across the cluster) | ||
| 484 | * @file: the file that points to the dentry (we ignore this) | ||
| 485 | * @dentry: the dentry that points to the inode to sync | ||
| 486 | * | ||
| 487 | * Returns: errno | ||
| 488 | */ | ||
| 489 | |||
| 490 | static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) | ||
| 491 | { | ||
| 492 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | ||
| 493 | |||
| 494 | gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); | ||
| 495 | |||
| 496 | return 0; | ||
| 497 | } | ||
| 498 | |||
| 499 | /** | ||
| 500 | * gfs2_lock - acquire/release a posix lock on a file | ||
| 501 | * @file: the file pointer | ||
| 502 | * @cmd: either modify or retrieve lock state, possibly wait | ||
| 503 | * @fl: type and range of lock | ||
| 504 | * | ||
| 505 | * Returns: errno | ||
| 506 | */ | ||
| 507 | |||
| 508 | static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) | ||
| 509 | { | ||
| 510 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
| 511 | struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); | ||
| 512 | struct lm_lockname name = | ||
| 513 | { .ln_number = ip->i_num.no_addr, | ||
| 514 | .ln_type = LM_TYPE_PLOCK }; | ||
| 515 | |||
| 516 | if (!(fl->fl_flags & FL_POSIX)) | ||
| 517 | return -ENOLCK; | ||
| 518 | if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) | ||
| 519 | return -ENOLCK; | ||
| 520 | |||
| 521 | if (sdp->sd_args.ar_localflocks) { | ||
| 522 | if (IS_GETLK(cmd)) { | ||
| 523 | struct file_lock tmp; | ||
| 524 | int ret; | ||
| 525 | ret = posix_test_lock(file, fl, &tmp); | ||
| 526 | fl->fl_type = F_UNLCK; | ||
| 527 | if (ret) | ||
| 528 | memcpy(fl, &tmp, sizeof(struct file_lock)); | ||
| 529 | return 0; | ||
| 530 | } else { | ||
| 531 | return posix_lock_file_wait(file, fl); | ||
| 532 | } | ||
| 533 | } | ||
| 534 | |||
| 535 | if (IS_GETLK(cmd)) | ||
| 536 | return gfs2_lm_plock_get(sdp, &name, file, fl); | ||
| 537 | else if (fl->fl_type == F_UNLCK) | ||
| 538 | return gfs2_lm_punlock(sdp, &name, file, fl); | ||
| 539 | else | ||
| 540 | return gfs2_lm_plock(sdp, &name, file, cmd, fl); | ||
| 541 | } | ||
| 542 | |||
| 543 | static int do_flock(struct file *file, int cmd, struct file_lock *fl) | ||
| 544 | { | ||
| 545 | struct gfs2_file *fp = file->private_data; | ||
| 546 | struct gfs2_holder *fl_gh = &fp->f_fl_gh; | ||
| 547 | struct gfs2_inode *ip = GFS2_I(file->f_dentry->d_inode); | ||
| 548 | struct gfs2_glock *gl; | ||
| 549 | unsigned int state; | ||
| 550 | int flags; | ||
| 551 | int error = 0; | ||
| 552 | |||
| 553 | state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; | ||
| 554 | flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; | ||
| 555 | |||
| 556 | mutex_lock(&fp->f_fl_mutex); | ||
| 557 | |||
| 558 | gl = fl_gh->gh_gl; | ||
| 559 | if (gl) { | ||
| 560 | if (fl_gh->gh_state == state) | ||
| 561 | goto out; | ||
| 562 | gfs2_glock_hold(gl); | ||
| 563 | flock_lock_file_wait(file, | ||
| 564 | &(struct file_lock){.fl_type = F_UNLCK}); | ||
| 565 | gfs2_glock_dq_uninit(fl_gh); | ||
| 566 | } else { | ||
| 567 | error = gfs2_glock_get(GFS2_SB(&ip->i_inode), | ||
| 568 | ip->i_num.no_addr, &gfs2_flock_glops, | ||
| 569 | CREATE, &gl); | ||
| 570 | if (error) | ||
| 571 | goto out; | ||
| 572 | } | ||
| 573 | |||
| 574 | gfs2_holder_init(gl, state, flags, fl_gh); | ||
| 575 | gfs2_glock_put(gl); | ||
| 576 | |||
| 577 | error = gfs2_glock_nq(fl_gh); | ||
| 578 | if (error) { | ||
| 579 | gfs2_holder_uninit(fl_gh); | ||
| 580 | if (error == GLR_TRYFAILED) | ||
| 581 | error = -EAGAIN; | ||
| 582 | } else { | ||
| 583 | error = flock_lock_file_wait(file, fl); | ||
| 584 | gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); | ||
| 585 | } | ||
| 586 | |||
| 587 | out: | ||
| 588 | mutex_unlock(&fp->f_fl_mutex); | ||
| 589 | return error; | ||
| 590 | } | ||
| 591 | |||
| 592 | static void do_unflock(struct file *file, struct file_lock *fl) | ||
| 593 | { | ||
| 594 | struct gfs2_file *fp = file->private_data; | ||
| 595 | struct gfs2_holder *fl_gh = &fp->f_fl_gh; | ||
| 596 | |||
| 597 | mutex_lock(&fp->f_fl_mutex); | ||
| 598 | flock_lock_file_wait(file, fl); | ||
| 599 | if (fl_gh->gh_gl) | ||
| 600 | gfs2_glock_dq_uninit(fl_gh); | ||
| 601 | mutex_unlock(&fp->f_fl_mutex); | ||
| 602 | } | ||
| 603 | |||
| 604 | /** | ||
| 605 | * gfs2_flock - acquire/release a flock lock on a file | ||
| 606 | * @file: the file pointer | ||
| 607 | * @cmd: either modify or retrieve lock state, possibly wait | ||
| 608 | * @fl: type and range of lock | ||
| 609 | * | ||
| 610 | * Returns: errno | ||
| 611 | */ | ||
| 612 | |||
| 613 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) | ||
| 614 | { | ||
| 615 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
| 616 | struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); | ||
| 617 | |||
| 618 | if (!(fl->fl_flags & FL_FLOCK)) | ||
| 619 | return -ENOLCK; | ||
| 620 | if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) | ||
| 621 | return -ENOLCK; | ||
| 622 | |||
| 623 | if (sdp->sd_args.ar_localflocks) | ||
| 624 | return flock_lock_file_wait(file, fl); | ||
| 625 | |||
| 626 | if (fl->fl_type == F_UNLCK) { | ||
| 627 | do_unflock(file, fl); | ||
| 628 | return 0; | ||
| 629 | } else { | ||
| 630 | return do_flock(file, cmd, fl); | ||
| 631 | } | ||
| 632 | } | ||
| 633 | |||
| 634 | const struct file_operations gfs2_file_fops = { | ||
| 635 | .llseek = gfs2_llseek, | ||
| 636 | .read = do_sync_read, | ||
| 637 | .aio_read = generic_file_aio_read, | ||
| 638 | .write = do_sync_write, | ||
| 639 | .aio_write = generic_file_aio_write, | ||
| 640 | .unlocked_ioctl = gfs2_ioctl, | ||
| 641 | .mmap = gfs2_mmap, | ||
| 642 | .open = gfs2_open, | ||
| 643 | .release = gfs2_close, | ||
| 644 | .fsync = gfs2_fsync, | ||
| 645 | .lock = gfs2_lock, | ||
| 646 | .sendfile = generic_file_sendfile, | ||
| 647 | .flock = gfs2_flock, | ||
| 648 | .splice_read = generic_file_splice_read, | ||
| 649 | .splice_write = generic_file_splice_write, | ||
| 650 | }; | ||
| 651 | |||
| 652 | const struct file_operations gfs2_dir_fops = { | ||
| 653 | .readdir = gfs2_readdir, | ||
| 654 | .unlocked_ioctl = gfs2_ioctl, | ||
| 655 | .open = gfs2_open, | ||
| 656 | .release = gfs2_close, | ||
| 657 | .fsync = gfs2_fsync, | ||
| 658 | .lock = gfs2_lock, | ||
| 659 | .flock = gfs2_flock, | ||
| 660 | }; | ||
| 661 | |||
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h new file mode 100644 index 000000000000..ce319f89ec8e --- /dev/null +++ b/fs/gfs2/ops_file.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_FILE_DOT_H__ | ||
| 11 | #define __OPS_FILE_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/fs.h> | ||
| 14 | struct gfs2_inode; | ||
| 15 | |||
| 16 | extern struct file gfs2_internal_file_sentinel; | ||
| 17 | extern int gfs2_internal_read(struct gfs2_inode *ip, | ||
| 18 | struct file_ra_state *ra_state, | ||
| 19 | char *buf, loff_t *pos, unsigned size); | ||
| 20 | |||
| 21 | extern const struct file_operations gfs2_file_fops; | ||
| 22 | extern const struct file_operations gfs2_dir_fops; | ||
| 23 | |||
| 24 | #endif /* __OPS_FILE_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c new file mode 100644 index 000000000000..178b33911843 --- /dev/null +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -0,0 +1,928 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/blkdev.h> | ||
| 16 | #include <linux/kthread.h> | ||
| 17 | #include <linux/namei.h> | ||
| 18 | #include <linux/mount.h> | ||
| 19 | #include <linux/gfs2_ondisk.h> | ||
| 20 | #include <linux/lm_interface.h> | ||
| 21 | |||
| 22 | #include "gfs2.h" | ||
| 23 | #include "incore.h" | ||
| 24 | #include "daemon.h" | ||
| 25 | #include "glock.h" | ||
| 26 | #include "glops.h" | ||
| 27 | #include "inode.h" | ||
| 28 | #include "lm.h" | ||
| 29 | #include "mount.h" | ||
| 30 | #include "ops_export.h" | ||
| 31 | #include "ops_fstype.h" | ||
| 32 | #include "ops_super.h" | ||
| 33 | #include "recovery.h" | ||
| 34 | #include "rgrp.h" | ||
| 35 | #include "super.h" | ||
| 36 | #include "sys.h" | ||
| 37 | #include "util.h" | ||
| 38 | |||
| 39 | #define DO 0 | ||
| 40 | #define UNDO 1 | ||
| 41 | |||
| 42 | extern struct dentry_operations gfs2_dops; | ||
| 43 | |||
| 44 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | ||
| 45 | { | ||
| 46 | struct gfs2_sbd *sdp; | ||
| 47 | |||
| 48 | sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); | ||
| 49 | if (!sdp) | ||
| 50 | return NULL; | ||
| 51 | |||
| 52 | sb->s_fs_info = sdp; | ||
| 53 | sdp->sd_vfs = sb; | ||
| 54 | |||
| 55 | gfs2_tune_init(&sdp->sd_tune); | ||
| 56 | |||
| 57 | INIT_LIST_HEAD(&sdp->sd_reclaim_list); | ||
| 58 | spin_lock_init(&sdp->sd_reclaim_lock); | ||
| 59 | init_waitqueue_head(&sdp->sd_reclaim_wq); | ||
| 60 | |||
| 61 | mutex_init(&sdp->sd_inum_mutex); | ||
| 62 | spin_lock_init(&sdp->sd_statfs_spin); | ||
| 63 | mutex_init(&sdp->sd_statfs_mutex); | ||
| 64 | |||
| 65 | spin_lock_init(&sdp->sd_rindex_spin); | ||
| 66 | mutex_init(&sdp->sd_rindex_mutex); | ||
| 67 | INIT_LIST_HEAD(&sdp->sd_rindex_list); | ||
| 68 | INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); | ||
| 69 | INIT_LIST_HEAD(&sdp->sd_rindex_recent_list); | ||
| 70 | |||
| 71 | INIT_LIST_HEAD(&sdp->sd_jindex_list); | ||
| 72 | spin_lock_init(&sdp->sd_jindex_spin); | ||
| 73 | mutex_init(&sdp->sd_jindex_mutex); | ||
| 74 | |||
| 75 | INIT_LIST_HEAD(&sdp->sd_quota_list); | ||
| 76 | spin_lock_init(&sdp->sd_quota_spin); | ||
| 77 | mutex_init(&sdp->sd_quota_mutex); | ||
| 78 | |||
| 79 | spin_lock_init(&sdp->sd_log_lock); | ||
| 80 | |||
| 81 | INIT_LIST_HEAD(&sdp->sd_log_le_gl); | ||
| 82 | INIT_LIST_HEAD(&sdp->sd_log_le_buf); | ||
| 83 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); | ||
| 84 | INIT_LIST_HEAD(&sdp->sd_log_le_rg); | ||
| 85 | INIT_LIST_HEAD(&sdp->sd_log_le_databuf); | ||
| 86 | |||
| 87 | mutex_init(&sdp->sd_log_reserve_mutex); | ||
| 88 | INIT_LIST_HEAD(&sdp->sd_ail1_list); | ||
| 89 | INIT_LIST_HEAD(&sdp->sd_ail2_list); | ||
| 90 | |||
| 91 | init_rwsem(&sdp->sd_log_flush_lock); | ||
| 92 | INIT_LIST_HEAD(&sdp->sd_log_flush_list); | ||
| 93 | |||
| 94 | INIT_LIST_HEAD(&sdp->sd_revoke_list); | ||
| 95 | |||
| 96 | mutex_init(&sdp->sd_freeze_lock); | ||
| 97 | |||
| 98 | return sdp; | ||
| 99 | } | ||
| 100 | |||
| 101 | static void init_vfs(struct super_block *sb, unsigned noatime) | ||
| 102 | { | ||
| 103 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 104 | |||
| 105 | sb->s_magic = GFS2_MAGIC; | ||
| 106 | sb->s_op = &gfs2_super_ops; | ||
| 107 | sb->s_export_op = &gfs2_export_ops; | ||
| 108 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
| 109 | |||
| 110 | if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) | ||
| 111 | set_bit(noatime, &sdp->sd_flags); | ||
| 112 | |||
| 113 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | ||
| 114 | sb->s_flags |= MS_NOATIME | MS_NODIRATIME; | ||
| 115 | } | ||
| 116 | |||
| 117 | static int init_names(struct gfs2_sbd *sdp, int silent) | ||
| 118 | { | ||
| 119 | struct page *page; | ||
| 120 | char *proto, *table; | ||
| 121 | int error = 0; | ||
| 122 | |||
| 123 | proto = sdp->sd_args.ar_lockproto; | ||
| 124 | table = sdp->sd_args.ar_locktable; | ||
| 125 | |||
| 126 | /* Try to autodetect */ | ||
| 127 | |||
| 128 | if (!proto[0] || !table[0]) { | ||
| 129 | struct gfs2_sb *sb; | ||
| 130 | page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
| 131 | if (!page) | ||
| 132 | return -ENOBUFS; | ||
| 133 | sb = kmap(page); | ||
| 134 | gfs2_sb_in(&sdp->sd_sb, sb); | ||
| 135 | kunmap(page); | ||
| 136 | __free_page(page); | ||
| 137 | |||
| 138 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
| 139 | if (error) | ||
| 140 | goto out; | ||
| 141 | |||
| 142 | if (!proto[0]) | ||
| 143 | proto = sdp->sd_sb.sb_lockproto; | ||
| 144 | if (!table[0]) | ||
| 145 | table = sdp->sd_sb.sb_locktable; | ||
| 146 | } | ||
| 147 | |||
| 148 | if (!table[0]) | ||
| 149 | table = sdp->sd_vfs->s_id; | ||
| 150 | |||
| 151 | snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); | ||
| 152 | snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); | ||
| 153 | |||
| 154 | out: | ||
| 155 | return error; | ||
| 156 | } | ||
| 157 | |||
| 158 | static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, | ||
| 159 | int undo) | ||
| 160 | { | ||
| 161 | struct task_struct *p; | ||
| 162 | int error = 0; | ||
| 163 | |||
| 164 | if (undo) | ||
| 165 | goto fail_trans; | ||
| 166 | |||
| 167 | p = kthread_run(gfs2_scand, sdp, "gfs2_scand"); | ||
| 168 | error = IS_ERR(p); | ||
| 169 | if (error) { | ||
| 170 | fs_err(sdp, "can't start scand thread: %d\n", error); | ||
| 171 | return error; | ||
| 172 | } | ||
| 173 | sdp->sd_scand_process = p; | ||
| 174 | |||
| 175 | for (sdp->sd_glockd_num = 0; | ||
| 176 | sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; | ||
| 177 | sdp->sd_glockd_num++) { | ||
| 178 | p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd"); | ||
| 179 | error = IS_ERR(p); | ||
| 180 | if (error) { | ||
| 181 | fs_err(sdp, "can't start glockd thread: %d\n", error); | ||
| 182 | goto fail; | ||
| 183 | } | ||
| 184 | sdp->sd_glockd_process[sdp->sd_glockd_num] = p; | ||
| 185 | } | ||
| 186 | |||
| 187 | error = gfs2_glock_nq_num(sdp, | ||
| 188 | GFS2_MOUNT_LOCK, &gfs2_nondisk_glops, | ||
| 189 | LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE, | ||
| 190 | mount_gh); | ||
| 191 | if (error) { | ||
| 192 | fs_err(sdp, "can't acquire mount glock: %d\n", error); | ||
| 193 | goto fail; | ||
| 194 | } | ||
| 195 | |||
| 196 | error = gfs2_glock_nq_num(sdp, | ||
| 197 | GFS2_LIVE_LOCK, &gfs2_nondisk_glops, | ||
| 198 | LM_ST_SHARED, | ||
| 199 | LM_FLAG_NOEXP | GL_EXACT, | ||
| 200 | &sdp->sd_live_gh); | ||
| 201 | if (error) { | ||
| 202 | fs_err(sdp, "can't acquire live glock: %d\n", error); | ||
| 203 | goto fail_mount; | ||
| 204 | } | ||
| 205 | |||
| 206 | error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops, | ||
| 207 | CREATE, &sdp->sd_rename_gl); | ||
| 208 | if (error) { | ||
| 209 | fs_err(sdp, "can't create rename glock: %d\n", error); | ||
| 210 | goto fail_live; | ||
| 211 | } | ||
| 212 | |||
| 213 | error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops, | ||
| 214 | CREATE, &sdp->sd_trans_gl); | ||
| 215 | if (error) { | ||
| 216 | fs_err(sdp, "can't create transaction glock: %d\n", error); | ||
| 217 | goto fail_rename; | ||
| 218 | } | ||
| 219 | set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags); | ||
| 220 | |||
| 221 | return 0; | ||
| 222 | |||
| 223 | fail_trans: | ||
| 224 | gfs2_glock_put(sdp->sd_trans_gl); | ||
| 225 | fail_rename: | ||
| 226 | gfs2_glock_put(sdp->sd_rename_gl); | ||
| 227 | fail_live: | ||
| 228 | gfs2_glock_dq_uninit(&sdp->sd_live_gh); | ||
| 229 | fail_mount: | ||
| 230 | gfs2_glock_dq_uninit(mount_gh); | ||
| 231 | fail: | ||
| 232 | while (sdp->sd_glockd_num--) | ||
| 233 | kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); | ||
| 234 | |||
| 235 | kthread_stop(sdp->sd_scand_process); | ||
| 236 | return error; | ||
| 237 | } | ||
| 238 | |||
| 239 | static struct inode *gfs2_lookup_root(struct super_block *sb, | ||
| 240 | struct gfs2_inum *inum) | ||
| 241 | { | ||
| 242 | return gfs2_inode_lookup(sb, inum, DT_DIR); | ||
| 243 | } | ||
| 244 | |||
| 245 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | ||
| 246 | { | ||
| 247 | struct super_block *sb = sdp->sd_vfs; | ||
| 248 | struct gfs2_holder sb_gh; | ||
| 249 | struct gfs2_inum *inum; | ||
| 250 | struct inode *inode; | ||
| 251 | int error = 0; | ||
| 252 | |||
| 253 | if (undo) { | ||
| 254 | if (sb->s_root) { | ||
| 255 | dput(sb->s_root); | ||
| 256 | sb->s_root = NULL; | ||
| 257 | } | ||
| 258 | return 0; | ||
| 259 | } | ||
| 260 | |||
| 261 | error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, | ||
| 262 | LM_ST_SHARED, 0, &sb_gh); | ||
| 263 | if (error) { | ||
| 264 | fs_err(sdp, "can't acquire superblock glock: %d\n", error); | ||
| 265 | return error; | ||
| 266 | } | ||
| 267 | |||
| 268 | error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); | ||
| 269 | if (error) { | ||
| 270 | fs_err(sdp, "can't read superblock: %d\n", error); | ||
| 271 | goto out; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* Set up the buffer cache and SB for real */ | ||
| 275 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { | ||
| 276 | error = -EINVAL; | ||
| 277 | fs_err(sdp, "FS block size (%u) is too small for device " | ||
| 278 | "block size (%u)\n", | ||
| 279 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); | ||
| 280 | goto out; | ||
| 281 | } | ||
| 282 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { | ||
| 283 | error = -EINVAL; | ||
| 284 | fs_err(sdp, "FS block size (%u) is too big for machine " | ||
| 285 | "page size (%u)\n", | ||
| 286 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); | ||
| 287 | goto out; | ||
| 288 | } | ||
| 289 | sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); | ||
| 290 | |||
| 291 | /* Get the root inode */ | ||
| 292 | inum = &sdp->sd_sb.sb_root_dir; | ||
| 293 | if (sb->s_type == &gfs2meta_fs_type) | ||
| 294 | inum = &sdp->sd_sb.sb_master_dir; | ||
| 295 | inode = gfs2_lookup_root(sb, inum); | ||
| 296 | if (IS_ERR(inode)) { | ||
| 297 | error = PTR_ERR(inode); | ||
| 298 | fs_err(sdp, "can't read in root inode: %d\n", error); | ||
| 299 | goto out; | ||
| 300 | } | ||
| 301 | |||
| 302 | sb->s_root = d_alloc_root(inode); | ||
| 303 | if (!sb->s_root) { | ||
| 304 | fs_err(sdp, "can't get root dentry\n"); | ||
| 305 | error = -ENOMEM; | ||
| 306 | iput(inode); | ||
| 307 | } | ||
| 308 | sb->s_root->d_op = &gfs2_dops; | ||
| 309 | out: | ||
| 310 | gfs2_glock_dq_uninit(&sb_gh); | ||
| 311 | return error; | ||
| 312 | } | ||
| 313 | |||
| 314 | static int init_journal(struct gfs2_sbd *sdp, int undo) | ||
| 315 | { | ||
| 316 | struct gfs2_holder ji_gh; | ||
| 317 | struct task_struct *p; | ||
| 318 | struct gfs2_inode *ip; | ||
| 319 | int jindex = 1; | ||
| 320 | int error = 0; | ||
| 321 | |||
| 322 | if (undo) { | ||
| 323 | jindex = 0; | ||
| 324 | goto fail_recoverd; | ||
| 325 | } | ||
| 326 | |||
| 327 | sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); | ||
| 328 | if (IS_ERR(sdp->sd_jindex)) { | ||
| 329 | fs_err(sdp, "can't lookup journal index: %d\n", error); | ||
| 330 | return PTR_ERR(sdp->sd_jindex); | ||
| 331 | } | ||
| 332 | ip = GFS2_I(sdp->sd_jindex); | ||
| 333 | set_bit(GLF_STICKY, &ip->i_gl->gl_flags); | ||
| 334 | |||
| 335 | /* Load in the journal index special file */ | ||
| 336 | |||
| 337 | error = gfs2_jindex_hold(sdp, &ji_gh); | ||
| 338 | if (error) { | ||
| 339 | fs_err(sdp, "can't read journal index: %d\n", error); | ||
| 340 | goto fail; | ||
| 341 | } | ||
| 342 | |||
| 343 | error = -EINVAL; | ||
| 344 | if (!gfs2_jindex_size(sdp)) { | ||
| 345 | fs_err(sdp, "no journals!\n"); | ||
| 346 | goto fail_jindex; | ||
| 347 | } | ||
| 348 | |||
| 349 | if (sdp->sd_args.ar_spectator) { | ||
| 350 | sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); | ||
| 351 | sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; | ||
| 352 | } else { | ||
| 353 | if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { | ||
| 354 | fs_err(sdp, "can't mount journal #%u\n", | ||
| 355 | sdp->sd_lockstruct.ls_jid); | ||
| 356 | fs_err(sdp, "there are only %u journals (0 - %u)\n", | ||
| 357 | gfs2_jindex_size(sdp), | ||
| 358 | gfs2_jindex_size(sdp) - 1); | ||
| 359 | goto fail_jindex; | ||
| 360 | } | ||
| 361 | sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid); | ||
| 362 | |||
| 363 | error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, | ||
| 364 | &gfs2_journal_glops, | ||
| 365 | LM_ST_EXCLUSIVE, LM_FLAG_NOEXP, | ||
| 366 | &sdp->sd_journal_gh); | ||
| 367 | if (error) { | ||
| 368 | fs_err(sdp, "can't acquire journal glock: %d\n", error); | ||
| 369 | goto fail_jindex; | ||
| 370 | } | ||
| 371 | |||
| 372 | ip = GFS2_I(sdp->sd_jdesc->jd_inode); | ||
| 373 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, | ||
| 374 | LM_FLAG_NOEXP | GL_EXACT, | ||
| 375 | &sdp->sd_jinode_gh); | ||
| 376 | if (error) { | ||
| 377 | fs_err(sdp, "can't acquire journal inode glock: %d\n", | ||
| 378 | error); | ||
| 379 | goto fail_journal_gh; | ||
| 380 | } | ||
| 381 | |||
| 382 | error = gfs2_jdesc_check(sdp->sd_jdesc); | ||
| 383 | if (error) { | ||
| 384 | fs_err(sdp, "my journal (%u) is bad: %d\n", | ||
| 385 | sdp->sd_jdesc->jd_jid, error); | ||
| 386 | goto fail_jinode_gh; | ||
| 387 | } | ||
| 388 | sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; | ||
| 389 | } | ||
| 390 | |||
| 391 | if (sdp->sd_lockstruct.ls_first) { | ||
| 392 | unsigned int x; | ||
| 393 | for (x = 0; x < sdp->sd_journals; x++) { | ||
| 394 | error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x)); | ||
| 395 | if (error) { | ||
| 396 | fs_err(sdp, "error recovering journal %u: %d\n", | ||
| 397 | x, error); | ||
| 398 | goto fail_jinode_gh; | ||
| 399 | } | ||
| 400 | } | ||
| 401 | |||
| 402 | gfs2_lm_others_may_mount(sdp); | ||
| 403 | } else if (!sdp->sd_args.ar_spectator) { | ||
| 404 | error = gfs2_recover_journal(sdp->sd_jdesc); | ||
| 405 | if (error) { | ||
| 406 | fs_err(sdp, "error recovering my journal: %d\n", error); | ||
| 407 | goto fail_jinode_gh; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); | ||
| 412 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 413 | jindex = 0; | ||
| 414 | |||
| 415 | p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd"); | ||
| 416 | error = IS_ERR(p); | ||
| 417 | if (error) { | ||
| 418 | fs_err(sdp, "can't start recoverd thread: %d\n", error); | ||
| 419 | goto fail_jinode_gh; | ||
| 420 | } | ||
| 421 | sdp->sd_recoverd_process = p; | ||
| 422 | |||
| 423 | return 0; | ||
| 424 | |||
| 425 | fail_recoverd: | ||
| 426 | kthread_stop(sdp->sd_recoverd_process); | ||
| 427 | fail_jinode_gh: | ||
| 428 | if (!sdp->sd_args.ar_spectator) | ||
| 429 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); | ||
| 430 | fail_journal_gh: | ||
| 431 | if (!sdp->sd_args.ar_spectator) | ||
| 432 | gfs2_glock_dq_uninit(&sdp->sd_journal_gh); | ||
| 433 | fail_jindex: | ||
| 434 | gfs2_jindex_free(sdp); | ||
| 435 | if (jindex) | ||
| 436 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 437 | fail: | ||
| 438 | iput(sdp->sd_jindex); | ||
| 439 | return error; | ||
| 440 | } | ||
| 441 | |||
| 442 | |||
| 443 | static int init_inodes(struct gfs2_sbd *sdp, int undo) | ||
| 444 | { | ||
| 445 | int error = 0; | ||
| 446 | struct gfs2_inode *ip; | ||
| 447 | struct inode *inode; | ||
| 448 | |||
| 449 | if (undo) | ||
| 450 | goto fail_qinode; | ||
| 451 | |||
| 452 | inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir); | ||
| 453 | if (IS_ERR(inode)) { | ||
| 454 | error = PTR_ERR(inode); | ||
| 455 | fs_err(sdp, "can't read in master directory: %d\n", error); | ||
| 456 | goto fail; | ||
| 457 | } | ||
| 458 | sdp->sd_master_dir = inode; | ||
| 459 | |||
| 460 | error = init_journal(sdp, undo); | ||
| 461 | if (error) | ||
| 462 | goto fail_master; | ||
| 463 | |||
| 464 | /* Read in the master inode number inode */ | ||
| 465 | sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); | ||
| 466 | if (IS_ERR(sdp->sd_inum_inode)) { | ||
| 467 | error = PTR_ERR(sdp->sd_inum_inode); | ||
| 468 | fs_err(sdp, "can't read in inum inode: %d\n", error); | ||
| 469 | goto fail_journal; | ||
| 470 | } | ||
| 471 | |||
| 472 | |||
| 473 | /* Read in the master statfs inode */ | ||
| 474 | sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); | ||
| 475 | if (IS_ERR(sdp->sd_statfs_inode)) { | ||
| 476 | error = PTR_ERR(sdp->sd_statfs_inode); | ||
| 477 | fs_err(sdp, "can't read in statfs inode: %d\n", error); | ||
| 478 | goto fail_inum; | ||
| 479 | } | ||
| 480 | |||
| 481 | /* Read in the resource index inode */ | ||
| 482 | sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); | ||
| 483 | if (IS_ERR(sdp->sd_rindex)) { | ||
| 484 | error = PTR_ERR(sdp->sd_rindex); | ||
| 485 | fs_err(sdp, "can't get resource index inode: %d\n", error); | ||
| 486 | goto fail_statfs; | ||
| 487 | } | ||
| 488 | ip = GFS2_I(sdp->sd_rindex); | ||
| 489 | set_bit(GLF_STICKY, &ip->i_gl->gl_flags); | ||
| 490 | sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1; | ||
| 491 | |||
| 492 | /* Read in the quota inode */ | ||
| 493 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | ||
| 494 | if (IS_ERR(sdp->sd_quota_inode)) { | ||
| 495 | error = PTR_ERR(sdp->sd_quota_inode); | ||
| 496 | fs_err(sdp, "can't get quota file inode: %d\n", error); | ||
| 497 | goto fail_rindex; | ||
| 498 | } | ||
| 499 | return 0; | ||
| 500 | |||
| 501 | fail_qinode: | ||
| 502 | iput(sdp->sd_quota_inode); | ||
| 503 | fail_rindex: | ||
| 504 | gfs2_clear_rgrpd(sdp); | ||
| 505 | iput(sdp->sd_rindex); | ||
| 506 | fail_statfs: | ||
| 507 | iput(sdp->sd_statfs_inode); | ||
| 508 | fail_inum: | ||
| 509 | iput(sdp->sd_inum_inode); | ||
| 510 | fail_journal: | ||
| 511 | init_journal(sdp, UNDO); | ||
| 512 | fail_master: | ||
| 513 | iput(sdp->sd_master_dir); | ||
| 514 | fail: | ||
| 515 | return error; | ||
| 516 | } | ||
| 517 | |||
| 518 | static int init_per_node(struct gfs2_sbd *sdp, int undo) | ||
| 519 | { | ||
| 520 | struct inode *pn = NULL; | ||
| 521 | char buf[30]; | ||
| 522 | int error = 0; | ||
| 523 | struct gfs2_inode *ip; | ||
| 524 | |||
| 525 | if (sdp->sd_args.ar_spectator) | ||
| 526 | return 0; | ||
| 527 | |||
| 528 | if (undo) | ||
| 529 | goto fail_qc_gh; | ||
| 530 | |||
| 531 | pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); | ||
| 532 | if (IS_ERR(pn)) { | ||
| 533 | error = PTR_ERR(pn); | ||
| 534 | fs_err(sdp, "can't find per_node directory: %d\n", error); | ||
| 535 | return error; | ||
| 536 | } | ||
| 537 | |||
| 538 | sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid); | ||
| 539 | sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf); | ||
| 540 | if (IS_ERR(sdp->sd_ir_inode)) { | ||
| 541 | error = PTR_ERR(sdp->sd_ir_inode); | ||
| 542 | fs_err(sdp, "can't find local \"ir\" file: %d\n", error); | ||
| 543 | goto fail; | ||
| 544 | } | ||
| 545 | |||
| 546 | sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid); | ||
| 547 | sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf); | ||
| 548 | if (IS_ERR(sdp->sd_sc_inode)) { | ||
| 549 | error = PTR_ERR(sdp->sd_sc_inode); | ||
| 550 | fs_err(sdp, "can't find local \"sc\" file: %d\n", error); | ||
| 551 | goto fail_ir_i; | ||
| 552 | } | ||
| 553 | |||
| 554 | sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); | ||
| 555 | sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf); | ||
| 556 | if (IS_ERR(sdp->sd_qc_inode)) { | ||
| 557 | error = PTR_ERR(sdp->sd_qc_inode); | ||
| 558 | fs_err(sdp, "can't find local \"qc\" file: %d\n", error); | ||
| 559 | goto fail_ut_i; | ||
| 560 | } | ||
| 561 | |||
| 562 | iput(pn); | ||
| 563 | pn = NULL; | ||
| 564 | |||
| 565 | ip = GFS2_I(sdp->sd_ir_inode); | ||
| 566 | error = gfs2_glock_nq_init(ip->i_gl, | ||
| 567 | LM_ST_EXCLUSIVE, 0, | ||
| 568 | &sdp->sd_ir_gh); | ||
| 569 | if (error) { | ||
| 570 | fs_err(sdp, "can't lock local \"ir\" file: %d\n", error); | ||
| 571 | goto fail_qc_i; | ||
| 572 | } | ||
| 573 | |||
| 574 | ip = GFS2_I(sdp->sd_sc_inode); | ||
| 575 | error = gfs2_glock_nq_init(ip->i_gl, | ||
| 576 | LM_ST_EXCLUSIVE, 0, | ||
| 577 | &sdp->sd_sc_gh); | ||
| 578 | if (error) { | ||
| 579 | fs_err(sdp, "can't lock local \"sc\" file: %d\n", error); | ||
| 580 | goto fail_ir_gh; | ||
| 581 | } | ||
| 582 | |||
| 583 | ip = GFS2_I(sdp->sd_qc_inode); | ||
| 584 | error = gfs2_glock_nq_init(ip->i_gl, | ||
| 585 | LM_ST_EXCLUSIVE, 0, | ||
| 586 | &sdp->sd_qc_gh); | ||
| 587 | if (error) { | ||
| 588 | fs_err(sdp, "can't lock local \"qc\" file: %d\n", error); | ||
| 589 | goto fail_ut_gh; | ||
| 590 | } | ||
| 591 | |||
| 592 | return 0; | ||
| 593 | |||
| 594 | fail_qc_gh: | ||
| 595 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); | ||
| 596 | fail_ut_gh: | ||
| 597 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); | ||
| 598 | fail_ir_gh: | ||
| 599 | gfs2_glock_dq_uninit(&sdp->sd_ir_gh); | ||
| 600 | fail_qc_i: | ||
| 601 | iput(sdp->sd_qc_inode); | ||
| 602 | fail_ut_i: | ||
| 603 | iput(sdp->sd_sc_inode); | ||
| 604 | fail_ir_i: | ||
| 605 | iput(sdp->sd_ir_inode); | ||
| 606 | fail: | ||
| 607 | if (pn) | ||
| 608 | iput(pn); | ||
| 609 | return error; | ||
| 610 | } | ||
| 611 | |||
| 612 | static int init_threads(struct gfs2_sbd *sdp, int undo) | ||
| 613 | { | ||
| 614 | struct task_struct *p; | ||
| 615 | int error = 0; | ||
| 616 | |||
| 617 | if (undo) | ||
| 618 | goto fail_quotad; | ||
| 619 | |||
| 620 | sdp->sd_log_flush_time = jiffies; | ||
| 621 | sdp->sd_jindex_refresh_time = jiffies; | ||
| 622 | |||
| 623 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); | ||
| 624 | error = IS_ERR(p); | ||
| 625 | if (error) { | ||
| 626 | fs_err(sdp, "can't start logd thread: %d\n", error); | ||
| 627 | return error; | ||
| 628 | } | ||
| 629 | sdp->sd_logd_process = p; | ||
| 630 | |||
| 631 | sdp->sd_statfs_sync_time = jiffies; | ||
| 632 | sdp->sd_quota_sync_time = jiffies; | ||
| 633 | |||
| 634 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); | ||
| 635 | error = IS_ERR(p); | ||
| 636 | if (error) { | ||
| 637 | fs_err(sdp, "can't start quotad thread: %d\n", error); | ||
| 638 | goto fail; | ||
| 639 | } | ||
| 640 | sdp->sd_quotad_process = p; | ||
| 641 | |||
| 642 | return 0; | ||
| 643 | |||
| 644 | |||
| 645 | fail_quotad: | ||
| 646 | kthread_stop(sdp->sd_quotad_process); | ||
| 647 | fail: | ||
| 648 | kthread_stop(sdp->sd_logd_process); | ||
| 649 | return error; | ||
| 650 | } | ||
| 651 | |||
| 652 | /** | ||
| 653 | * fill_super - Read in superblock | ||
| 654 | * @sb: The VFS superblock | ||
| 655 | * @data: Mount options | ||
| 656 | * @silent: Don't complain if it's not a GFS2 filesystem | ||
| 657 | * | ||
| 658 | * Returns: errno | ||
| 659 | */ | ||
| 660 | |||
| 661 | static int fill_super(struct super_block *sb, void *data, int silent) | ||
| 662 | { | ||
| 663 | struct gfs2_sbd *sdp; | ||
| 664 | struct gfs2_holder mount_gh; | ||
| 665 | int error; | ||
| 666 | |||
| 667 | sdp = init_sbd(sb); | ||
| 668 | if (!sdp) { | ||
| 669 | printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); | ||
| 670 | return -ENOMEM; | ||
| 671 | } | ||
| 672 | |||
| 673 | error = gfs2_mount_args(sdp, (char *)data, 0); | ||
| 674 | if (error) { | ||
| 675 | printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); | ||
| 676 | goto fail; | ||
| 677 | } | ||
| 678 | |||
| 679 | init_vfs(sb, SDF_NOATIME); | ||
| 680 | |||
| 681 | /* Set up the buffer cache and fill in some fake block size values | ||
| 682 | to allow us to read-in the on-disk superblock. */ | ||
| 683 | sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK); | ||
| 684 | sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; | ||
| 685 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
| 686 | GFS2_BASIC_BLOCK_SHIFT; | ||
| 687 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
| 688 | |||
| 689 | error = init_names(sdp, silent); | ||
| 690 | if (error) | ||
| 691 | goto fail; | ||
| 692 | |||
| 693 | error = gfs2_sys_fs_add(sdp); | ||
| 694 | if (error) | ||
| 695 | goto fail; | ||
| 696 | |||
| 697 | error = gfs2_lm_mount(sdp, silent); | ||
| 698 | if (error) | ||
| 699 | goto fail_sys; | ||
| 700 | |||
| 701 | error = init_locking(sdp, &mount_gh, DO); | ||
| 702 | if (error) | ||
| 703 | goto fail_lm; | ||
| 704 | |||
| 705 | error = init_sb(sdp, silent, DO); | ||
| 706 | if (error) | ||
| 707 | goto fail_locking; | ||
| 708 | |||
| 709 | error = init_inodes(sdp, DO); | ||
| 710 | if (error) | ||
| 711 | goto fail_sb; | ||
| 712 | |||
| 713 | error = init_per_node(sdp, DO); | ||
| 714 | if (error) | ||
| 715 | goto fail_inodes; | ||
| 716 | |||
| 717 | error = gfs2_statfs_init(sdp); | ||
| 718 | if (error) { | ||
| 719 | fs_err(sdp, "can't initialize statfs subsystem: %d\n", error); | ||
| 720 | goto fail_per_node; | ||
| 721 | } | ||
| 722 | |||
| 723 | error = init_threads(sdp, DO); | ||
| 724 | if (error) | ||
| 725 | goto fail_per_node; | ||
| 726 | |||
| 727 | if (!(sb->s_flags & MS_RDONLY)) { | ||
| 728 | error = gfs2_make_fs_rw(sdp); | ||
| 729 | if (error) { | ||
| 730 | fs_err(sdp, "can't make FS RW: %d\n", error); | ||
| 731 | goto fail_threads; | ||
| 732 | } | ||
| 733 | } | ||
| 734 | |||
| 735 | gfs2_glock_dq_uninit(&mount_gh); | ||
| 736 | |||
| 737 | return 0; | ||
| 738 | |||
| 739 | fail_threads: | ||
| 740 | init_threads(sdp, UNDO); | ||
| 741 | fail_per_node: | ||
| 742 | init_per_node(sdp, UNDO); | ||
| 743 | fail_inodes: | ||
| 744 | init_inodes(sdp, UNDO); | ||
| 745 | fail_sb: | ||
| 746 | init_sb(sdp, 0, UNDO); | ||
| 747 | fail_locking: | ||
| 748 | init_locking(sdp, &mount_gh, UNDO); | ||
| 749 | fail_lm: | ||
| 750 | gfs2_gl_hash_clear(sdp, WAIT); | ||
| 751 | gfs2_lm_unmount(sdp); | ||
| 752 | while (invalidate_inodes(sb)) | ||
| 753 | yield(); | ||
| 754 | fail_sys: | ||
| 755 | gfs2_sys_fs_del(sdp); | ||
| 756 | fail: | ||
| 757 | kfree(sdp); | ||
| 758 | sb->s_fs_info = NULL; | ||
| 759 | return error; | ||
| 760 | } | ||
| 761 | |||
| 762 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, | ||
| 763 | const char *dev_name, void *data, struct vfsmount *mnt) | ||
| 764 | { | ||
| 765 | struct super_block *sb; | ||
| 766 | struct gfs2_sbd *sdp; | ||
| 767 | int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); | ||
| 768 | if (error) | ||
| 769 | goto out; | ||
| 770 | sb = mnt->mnt_sb; | ||
| 771 | sdp = sb->s_fs_info; | ||
| 772 | sdp->sd_gfs2mnt = mnt; | ||
| 773 | out: | ||
| 774 | return error; | ||
| 775 | } | ||
| 776 | |||
| 777 | static int fill_super_meta(struct super_block *sb, struct super_block *new, | ||
| 778 | void *data, int silent) | ||
| 779 | { | ||
| 780 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 781 | struct inode *inode; | ||
| 782 | int error = 0; | ||
| 783 | |||
| 784 | new->s_fs_info = sdp; | ||
| 785 | sdp->sd_vfs_meta = sb; | ||
| 786 | |||
| 787 | init_vfs(new, SDF_NOATIME); | ||
| 788 | |||
| 789 | /* Get the master inode */ | ||
| 790 | inode = igrab(sdp->sd_master_dir); | ||
| 791 | |||
| 792 | new->s_root = d_alloc_root(inode); | ||
| 793 | if (!new->s_root) { | ||
| 794 | fs_err(sdp, "can't get root dentry\n"); | ||
| 795 | error = -ENOMEM; | ||
| 796 | iput(inode); | ||
| 797 | } | ||
| 798 | new->s_root->d_op = &gfs2_dops; | ||
| 799 | |||
| 800 | return error; | ||
| 801 | } | ||
| 802 | |||
| 803 | static int set_bdev_super(struct super_block *s, void *data) | ||
| 804 | { | ||
| 805 | s->s_bdev = data; | ||
| 806 | s->s_dev = s->s_bdev->bd_dev; | ||
| 807 | return 0; | ||
| 808 | } | ||
| 809 | |||
| 810 | static int test_bdev_super(struct super_block *s, void *data) | ||
| 811 | { | ||
| 812 | return s->s_bdev == data; | ||
| 813 | } | ||
| 814 | |||
| 815 | static struct super_block* get_gfs2_sb(const char *dev_name) | ||
| 816 | { | ||
| 817 | struct kstat stat; | ||
| 818 | struct nameidata nd; | ||
| 819 | struct file_system_type *fstype; | ||
| 820 | struct super_block *sb = NULL, *s; | ||
| 821 | struct list_head *l; | ||
| 822 | int error; | ||
| 823 | |||
| 824 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); | ||
| 825 | if (error) { | ||
| 826 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", | ||
| 827 | dev_name); | ||
| 828 | goto out; | ||
| 829 | } | ||
| 830 | error = vfs_getattr(nd.mnt, nd.dentry, &stat); | ||
| 831 | |||
| 832 | fstype = get_fs_type("gfs2"); | ||
| 833 | list_for_each(l, &fstype->fs_supers) { | ||
| 834 | s = list_entry(l, struct super_block, s_instances); | ||
| 835 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | ||
| 836 | (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) { | ||
| 837 | sb = s; | ||
| 838 | goto free_nd; | ||
| 839 | } | ||
| 840 | } | ||
| 841 | |||
| 842 | printk(KERN_WARNING "GFS2: Unrecognized block device or " | ||
| 843 | "mount point %s", dev_name); | ||
| 844 | |||
| 845 | free_nd: | ||
| 846 | path_release(&nd); | ||
| 847 | out: | ||
| 848 | return sb; | ||
| 849 | } | ||
| 850 | |||
| 851 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | ||
| 852 | const char *dev_name, void *data, struct vfsmount *mnt) | ||
| 853 | { | ||
| 854 | int error = 0; | ||
| 855 | struct super_block *sb = NULL, *new; | ||
| 856 | struct gfs2_sbd *sdp; | ||
| 857 | char *gfs2mnt = NULL; | ||
| 858 | |||
| 859 | sb = get_gfs2_sb(dev_name); | ||
| 860 | if (!sb) { | ||
| 861 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | ||
| 862 | error = -ENOENT; | ||
| 863 | goto error; | ||
| 864 | } | ||
| 865 | sdp = (struct gfs2_sbd*) sb->s_fs_info; | ||
| 866 | if (sdp->sd_vfs_meta) { | ||
| 867 | printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); | ||
| 868 | error = -EBUSY; | ||
| 869 | goto error; | ||
| 870 | } | ||
| 871 | mutex_lock(&sb->s_bdev->bd_mount_mutex); | ||
| 872 | new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); | ||
| 873 | mutex_unlock(&sb->s_bdev->bd_mount_mutex); | ||
| 874 | if (IS_ERR(new)) { | ||
| 875 | error = PTR_ERR(new); | ||
| 876 | goto error; | ||
| 877 | } | ||
| 878 | module_put(fs_type->owner); | ||
| 879 | new->s_flags = flags; | ||
| 880 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | ||
| 881 | sb_set_blocksize(new, sb->s_blocksize); | ||
| 882 | error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); | ||
| 883 | if (error) { | ||
| 884 | up_write(&new->s_umount); | ||
| 885 | deactivate_super(new); | ||
| 886 | goto error; | ||
| 887 | } | ||
| 888 | |||
| 889 | new->s_flags |= MS_ACTIVE; | ||
| 890 | |||
| 891 | /* Grab a reference to the gfs2 mount point */ | ||
| 892 | atomic_inc(&sdp->sd_gfs2mnt->mnt_count); | ||
| 893 | return simple_set_mnt(mnt, new); | ||
| 894 | error: | ||
| 895 | if (gfs2mnt) | ||
| 896 | kfree(gfs2mnt); | ||
| 897 | return error; | ||
| 898 | } | ||
| 899 | |||
| 900 | static void gfs2_kill_sb(struct super_block *sb) | ||
| 901 | { | ||
| 902 | kill_block_super(sb); | ||
| 903 | } | ||
| 904 | |||
| 905 | static void gfs2_kill_sb_meta(struct super_block *sb) | ||
| 906 | { | ||
| 907 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 908 | generic_shutdown_super(sb); | ||
| 909 | sdp->sd_vfs_meta = NULL; | ||
| 910 | atomic_dec(&sdp->sd_gfs2mnt->mnt_count); | ||
| 911 | } | ||
| 912 | |||
| 913 | struct file_system_type gfs2_fs_type = { | ||
| 914 | .name = "gfs2", | ||
| 915 | .fs_flags = FS_REQUIRES_DEV, | ||
| 916 | .get_sb = gfs2_get_sb, | ||
| 917 | .kill_sb = gfs2_kill_sb, | ||
| 918 | .owner = THIS_MODULE, | ||
| 919 | }; | ||
| 920 | |||
| 921 | struct file_system_type gfs2meta_fs_type = { | ||
| 922 | .name = "gfs2meta", | ||
| 923 | .fs_flags = FS_REQUIRES_DEV, | ||
| 924 | .get_sb = gfs2_get_sb_meta, | ||
| 925 | .kill_sb = gfs2_kill_sb_meta, | ||
| 926 | .owner = THIS_MODULE, | ||
| 927 | }; | ||
| 928 | |||
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h new file mode 100644 index 000000000000..7cc2c296271b --- /dev/null +++ b/fs/gfs2/ops_fstype.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_FSTYPE_DOT_H__ | ||
| 11 | #define __OPS_FSTYPE_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/fs.h> | ||
| 14 | |||
| 15 | extern struct file_system_type gfs2_fs_type; | ||
| 16 | extern struct file_system_type gfs2meta_fs_type; | ||
| 17 | |||
| 18 | #endif /* __OPS_FSTYPE_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c new file mode 100644 index 000000000000..ef6e5ed70e94 --- /dev/null +++ b/fs/gfs2/ops_inode.c | |||
| @@ -0,0 +1,1151 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/namei.h> | ||
| 16 | #include <linux/utsname.h> | ||
| 17 | #include <linux/mm.h> | ||
| 18 | #include <linux/xattr.h> | ||
| 19 | #include <linux/posix_acl.h> | ||
| 20 | #include <linux/gfs2_ondisk.h> | ||
| 21 | #include <linux/crc32.h> | ||
| 22 | #include <linux/lm_interface.h> | ||
| 23 | #include <asm/uaccess.h> | ||
| 24 | |||
| 25 | #include "gfs2.h" | ||
| 26 | #include "incore.h" | ||
| 27 | #include "acl.h" | ||
| 28 | #include "bmap.h" | ||
| 29 | #include "dir.h" | ||
| 30 | #include "eaops.h" | ||
| 31 | #include "eattr.h" | ||
| 32 | #include "glock.h" | ||
| 33 | #include "inode.h" | ||
| 34 | #include "meta_io.h" | ||
| 35 | #include "ops_dentry.h" | ||
| 36 | #include "ops_inode.h" | ||
| 37 | #include "quota.h" | ||
| 38 | #include "rgrp.h" | ||
| 39 | #include "trans.h" | ||
| 40 | #include "util.h" | ||
| 41 | |||
| 42 | /** | ||
| 43 | * gfs2_create - Create a file | ||
| 44 | * @dir: The directory in which to create the file | ||
| 45 | * @dentry: The dentry of the new file | ||
| 46 | * @mode: The mode of the new file | ||
| 47 | * | ||
| 48 | * Returns: errno | ||
| 49 | */ | ||
| 50 | |||
| 51 | static int gfs2_create(struct inode *dir, struct dentry *dentry, | ||
| 52 | int mode, struct nameidata *nd) | ||
| 53 | { | ||
| 54 | struct gfs2_inode *dip = GFS2_I(dir); | ||
| 55 | struct gfs2_sbd *sdp = GFS2_SB(dir); | ||
| 56 | struct gfs2_holder ghs[2]; | ||
| 57 | struct inode *inode; | ||
| 58 | |||
| 59 | gfs2_holder_init(dip->i_gl, 0, 0, ghs); | ||
| 60 | |||
| 61 | for (;;) { | ||
| 62 | inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode); | ||
| 63 | if (!IS_ERR(inode)) { | ||
| 64 | gfs2_trans_end(sdp); | ||
| 65 | if (dip->i_alloc.al_rgd) | ||
| 66 | gfs2_inplace_release(dip); | ||
| 67 | gfs2_quota_unlock(dip); | ||
| 68 | gfs2_alloc_put(dip); | ||
| 69 | gfs2_glock_dq_uninit_m(2, ghs); | ||
| 70 | mark_inode_dirty(inode); | ||
| 71 | break; | ||
| 72 | } else if (PTR_ERR(inode) != -EEXIST || | ||
| 73 | (nd->intent.open.flags & O_EXCL)) { | ||
| 74 | gfs2_holder_uninit(ghs); | ||
| 75 | return PTR_ERR(inode); | ||
| 76 | } | ||
| 77 | |||
| 78 | inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); | ||
| 79 | if (inode) { | ||
| 80 | if (!IS_ERR(inode)) { | ||
| 81 | gfs2_holder_uninit(ghs); | ||
| 82 | break; | ||
| 83 | } else { | ||
| 84 | gfs2_holder_uninit(ghs); | ||
| 85 | return PTR_ERR(inode); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | d_instantiate(dentry, inode); | ||
| 91 | |||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | |||
| 95 | /** | ||
| 96 | * gfs2_lookup - Look up a filename in a directory and return its inode | ||
| 97 | * @dir: The directory inode | ||
| 98 | * @dentry: The dentry of the new inode | ||
| 99 | * @nd: passed from Linux VFS, ignored by us | ||
| 100 | * | ||
| 101 | * Called by the VFS layer. Lock dir and call gfs2_lookupi() | ||
| 102 | * | ||
| 103 | * Returns: errno | ||
| 104 | */ | ||
| 105 | |||
| 106 | static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, | ||
| 107 | struct nameidata *nd) | ||
| 108 | { | ||
| 109 | struct inode *inode = NULL; | ||
| 110 | |||
| 111 | dentry->d_op = &gfs2_dops; | ||
| 112 | |||
| 113 | inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); | ||
| 114 | if (inode && IS_ERR(inode)) | ||
| 115 | return ERR_PTR(PTR_ERR(inode)); | ||
| 116 | |||
| 117 | if (inode) | ||
| 118 | return d_splice_alias(inode, dentry); | ||
| 119 | d_add(dentry, inode); | ||
| 120 | |||
| 121 | return NULL; | ||
| 122 | } | ||
| 123 | |||
| 124 | /** | ||
| 125 | * gfs2_link - Link to a file | ||
| 126 | * @old_dentry: The inode to link | ||
| 127 | * @dir: Add link to this directory | ||
| 128 | * @dentry: The name of the link | ||
| 129 | * | ||
| 130 | * Link the inode in "old_dentry" into the directory "dir" with the | ||
| 131 | * name in "dentry". | ||
| 132 | * | ||
| 133 | * Returns: errno | ||
| 134 | */ | ||
| 135 | |||
| 136 | static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | ||
| 137 | struct dentry *dentry) | ||
| 138 | { | ||
| 139 | struct gfs2_inode *dip = GFS2_I(dir); | ||
| 140 | struct gfs2_sbd *sdp = GFS2_SB(dir); | ||
| 141 | struct inode *inode = old_dentry->d_inode; | ||
| 142 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 143 | struct gfs2_holder ghs[2]; | ||
| 144 | int alloc_required; | ||
| 145 | int error; | ||
| 146 | |||
| 147 | if (S_ISDIR(ip->i_di.di_mode)) | ||
| 148 | return -EPERM; | ||
| 149 | |||
| 150 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | ||
| 151 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | ||
| 152 | |||
| 153 | error = gfs2_glock_nq_m(2, ghs); | ||
| 154 | if (error) | ||
| 155 | goto out; | ||
| 156 | |||
| 157 | error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); | ||
| 158 | if (error) | ||
| 159 | goto out_gunlock; | ||
| 160 | |||
| 161 | error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL); | ||
| 162 | switch (error) { | ||
| 163 | case -ENOENT: | ||
| 164 | break; | ||
| 165 | case 0: | ||
| 166 | error = -EEXIST; | ||
| 167 | default: | ||
| 168 | goto out_gunlock; | ||
| 169 | } | ||
| 170 | |||
| 171 | error = -EINVAL; | ||
| 172 | if (!dip->i_di.di_nlink) | ||
| 173 | goto out_gunlock; | ||
| 174 | error = -EFBIG; | ||
| 175 | if (dip->i_di.di_entries == (u32)-1) | ||
| 176 | goto out_gunlock; | ||
| 177 | error = -EPERM; | ||
| 178 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | ||
| 179 | goto out_gunlock; | ||
| 180 | error = -EINVAL; | ||
| 181 | if (!ip->i_di.di_nlink) | ||
| 182 | goto out_gunlock; | ||
| 183 | error = -EMLINK; | ||
| 184 | if (ip->i_di.di_nlink == (u32)-1) | ||
| 185 | goto out_gunlock; | ||
| 186 | |||
| 187 | alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); | ||
| 188 | if (error < 0) | ||
| 189 | goto out_gunlock; | ||
| 190 | error = 0; | ||
| 191 | |||
| 192 | if (alloc_required) { | ||
| 193 | struct gfs2_alloc *al = gfs2_alloc_get(dip); | ||
| 194 | |||
| 195 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 196 | if (error) | ||
| 197 | goto out_alloc; | ||
| 198 | |||
| 199 | error = gfs2_quota_check(dip, dip->i_di.di_uid, | ||
| 200 | dip->i_di.di_gid); | ||
| 201 | if (error) | ||
| 202 | goto out_gunlock_q; | ||
| 203 | |||
| 204 | al->al_requested = sdp->sd_max_dirres; | ||
| 205 | |||
| 206 | error = gfs2_inplace_reserve(dip); | ||
| 207 | if (error) | ||
| 208 | goto out_gunlock_q; | ||
| 209 | |||
| 210 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | ||
| 211 | al->al_rgd->rd_ri.ri_length + | ||
| 212 | 2 * RES_DINODE + RES_STATFS + | ||
| 213 | RES_QUOTA, 0); | ||
| 214 | if (error) | ||
| 215 | goto out_ipres; | ||
| 216 | } else { | ||
| 217 | error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0); | ||
| 218 | if (error) | ||
| 219 | goto out_ipres; | ||
| 220 | } | ||
| 221 | |||
| 222 | error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num, | ||
| 223 | IF2DT(ip->i_di.di_mode)); | ||
| 224 | if (error) | ||
| 225 | goto out_end_trans; | ||
| 226 | |||
| 227 | error = gfs2_change_nlink(ip, +1); | ||
| 228 | |||
| 229 | out_end_trans: | ||
| 230 | gfs2_trans_end(sdp); | ||
| 231 | out_ipres: | ||
| 232 | if (alloc_required) | ||
| 233 | gfs2_inplace_release(dip); | ||
| 234 | out_gunlock_q: | ||
| 235 | if (alloc_required) | ||
| 236 | gfs2_quota_unlock(dip); | ||
| 237 | out_alloc: | ||
| 238 | if (alloc_required) | ||
| 239 | gfs2_alloc_put(dip); | ||
| 240 | out_gunlock: | ||
| 241 | gfs2_glock_dq_m(2, ghs); | ||
| 242 | out: | ||
| 243 | gfs2_holder_uninit(ghs); | ||
| 244 | gfs2_holder_uninit(ghs + 1); | ||
| 245 | if (!error) { | ||
| 246 | atomic_inc(&inode->i_count); | ||
| 247 | d_instantiate(dentry, inode); | ||
| 248 | mark_inode_dirty(inode); | ||
| 249 | } | ||
| 250 | return error; | ||
| 251 | } | ||
| 252 | |||
| 253 | /** | ||
| 254 | * gfs2_unlink - Unlink a file | ||
| 255 | * @dir: The inode of the directory containing the file to unlink | ||
| 256 | * @dentry: The file itself | ||
| 257 | * | ||
| 258 | * Unlink a file. Call gfs2_unlinki() | ||
| 259 | * | ||
| 260 | * Returns: errno | ||
| 261 | */ | ||
| 262 | |||
| 263 | static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | ||
| 264 | { | ||
| 265 | struct gfs2_inode *dip = GFS2_I(dir); | ||
| 266 | struct gfs2_sbd *sdp = GFS2_SB(dir); | ||
| 267 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | ||
| 268 | struct gfs2_holder ghs[2]; | ||
| 269 | int error; | ||
| 270 | |||
| 271 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | ||
| 272 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | ||
| 273 | |||
| 274 | error = gfs2_glock_nq_m(2, ghs); | ||
| 275 | if (error) | ||
| 276 | goto out; | ||
| 277 | |||
| 278 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | ||
| 279 | if (error) | ||
| 280 | goto out_gunlock; | ||
| 281 | |||
| 282 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); | ||
| 283 | if (error) | ||
| 284 | goto out_gunlock; | ||
| 285 | |||
| 286 | error = gfs2_dir_del(dip, &dentry->d_name); | ||
| 287 | if (error) | ||
| 288 | goto out_end_trans; | ||
| 289 | |||
| 290 | error = gfs2_change_nlink(ip, -1); | ||
| 291 | |||
| 292 | out_end_trans: | ||
| 293 | gfs2_trans_end(sdp); | ||
| 294 | out_gunlock: | ||
| 295 | gfs2_glock_dq_m(2, ghs); | ||
| 296 | out: | ||
| 297 | gfs2_holder_uninit(ghs); | ||
| 298 | gfs2_holder_uninit(ghs + 1); | ||
| 299 | return error; | ||
| 300 | } | ||
| 301 | |||
| 302 | /** | ||
| 303 | * gfs2_symlink - Create a symlink | ||
| 304 | * @dir: The directory to create the symlink in | ||
| 305 | * @dentry: The dentry to put the symlink in | ||
| 306 | * @symname: The thing which the link points to | ||
| 307 | * | ||
| 308 | * Returns: errno | ||
| 309 | */ | ||
| 310 | |||
| 311 | static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | ||
| 312 | const char *symname) | ||
| 313 | { | ||
| 314 | struct gfs2_inode *dip = GFS2_I(dir), *ip; | ||
| 315 | struct gfs2_sbd *sdp = GFS2_SB(dir); | ||
| 316 | struct gfs2_holder ghs[2]; | ||
| 317 | struct inode *inode; | ||
| 318 | struct buffer_head *dibh; | ||
| 319 | int size; | ||
| 320 | int error; | ||
| 321 | |||
| 322 | /* Must be stuffed with a null terminator for gfs2_follow_link() */ | ||
| 323 | size = strlen(symname); | ||
| 324 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) | ||
| 325 | return -ENAMETOOLONG; | ||
| 326 | |||
| 327 | gfs2_holder_init(dip->i_gl, 0, 0, ghs); | ||
| 328 | |||
| 329 | inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO); | ||
| 330 | if (IS_ERR(inode)) { | ||
| 331 | gfs2_holder_uninit(ghs); | ||
| 332 | return PTR_ERR(inode); | ||
| 333 | } | ||
| 334 | |||
| 335 | ip = ghs[1].gh_gl->gl_object; | ||
| 336 | |||
| 337 | ip->i_di.di_size = size; | ||
| 338 | |||
| 339 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 340 | |||
| 341 | if (!gfs2_assert_withdraw(sdp, !error)) { | ||
| 342 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 343 | memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, | ||
| 344 | size); | ||
| 345 | brelse(dibh); | ||
| 346 | } | ||
| 347 | |||
| 348 | gfs2_trans_end(sdp); | ||
| 349 | if (dip->i_alloc.al_rgd) | ||
| 350 | gfs2_inplace_release(dip); | ||
| 351 | gfs2_quota_unlock(dip); | ||
| 352 | gfs2_alloc_put(dip); | ||
| 353 | |||
| 354 | gfs2_glock_dq_uninit_m(2, ghs); | ||
| 355 | |||
| 356 | d_instantiate(dentry, inode); | ||
| 357 | mark_inode_dirty(inode); | ||
| 358 | |||
| 359 | return 0; | ||
| 360 | } | ||
| 361 | |||
| 362 | /** | ||
| 363 | * gfs2_mkdir - Make a directory | ||
| 364 | * @dir: The parent directory of the new one | ||
| 365 | * @dentry: The dentry of the new directory | ||
| 366 | * @mode: The mode of the new directory | ||
| 367 | * | ||
| 368 | * Returns: errno | ||
| 369 | */ | ||
| 370 | |||
| 371 | static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
| 372 | { | ||
| 373 | struct gfs2_inode *dip = GFS2_I(dir), *ip; | ||
| 374 | struct gfs2_sbd *sdp = GFS2_SB(dir); | ||
| 375 | struct gfs2_holder ghs[2]; | ||
| 376 | struct inode *inode; | ||
| 377 | struct buffer_head *dibh; | ||
| 378 | int error; | ||
| 379 | |||
| 380 | gfs2_holder_init(dip->i_gl, 0, 0, ghs); | ||
| 381 | |||
| 382 | inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode); | ||
| 383 | if (IS_ERR(inode)) { | ||
| 384 | gfs2_holder_uninit(ghs); | ||
| 385 | return PTR_ERR(inode); | ||
| 386 | } | ||
| 387 | |||
| 388 | ip = ghs[1].gh_gl->gl_object; | ||
| 389 | |||
| 390 | ip->i_di.di_nlink = 2; | ||
| 391 | ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); | ||
| 392 | ip->i_di.di_flags |= GFS2_DIF_JDATA; | ||
| 393 | ip->i_di.di_payload_format = GFS2_FORMAT_DE; | ||
| 394 | ip->i_di.di_entries = 2; | ||
| 395 | |||
| 396 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 397 | |||
| 398 | if (!gfs2_assert_withdraw(sdp, !error)) { | ||
| 399 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | ||
| 400 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); | ||
| 401 | struct qstr str; | ||
| 402 | |||
| 403 | gfs2_str2qstr(&str, "."); | ||
| 404 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 405 | gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent); | ||
| 406 | dent->de_inum = di->di_num; /* already GFS2 endian */ | ||
| 407 | dent->de_type = cpu_to_be16(DT_DIR); | ||
| 408 | di->di_entries = cpu_to_be32(1); | ||
| 409 | |||
| 410 | gfs2_str2qstr(&str, ".."); | ||
| 411 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); | ||
| 412 | gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); | ||
| 413 | |||
| 414 | gfs2_inum_out(&dip->i_num, &dent->de_inum); | ||
| 415 | dent->de_type = cpu_to_be16(DT_DIR); | ||
| 416 | |||
| 417 | gfs2_dinode_out(&ip->i_di, di); | ||
| 418 | |||
| 419 | brelse(dibh); | ||
| 420 | } | ||
| 421 | |||
| 422 | error = gfs2_change_nlink(dip, +1); | ||
| 423 | gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ | ||
| 424 | |||
| 425 | gfs2_trans_end(sdp); | ||
| 426 | if (dip->i_alloc.al_rgd) | ||
| 427 | gfs2_inplace_release(dip); | ||
| 428 | gfs2_quota_unlock(dip); | ||
| 429 | gfs2_alloc_put(dip); | ||
| 430 | |||
| 431 | gfs2_glock_dq_uninit_m(2, ghs); | ||
| 432 | |||
| 433 | d_instantiate(dentry, inode); | ||
| 434 | mark_inode_dirty(inode); | ||
| 435 | |||
| 436 | return 0; | ||
| 437 | } | ||
| 438 | |||
| 439 | /** | ||
| 440 | * gfs2_rmdir - Remove a directory | ||
| 441 | * @dir: The parent directory of the directory to be removed | ||
| 442 | * @dentry: The dentry of the directory to remove | ||
| 443 | * | ||
| 444 | * Remove a directory. Call gfs2_rmdiri() | ||
| 445 | * | ||
| 446 | * Returns: errno | ||
| 447 | */ | ||
| 448 | |||
| 449 | static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | ||
| 450 | { | ||
| 451 | struct gfs2_inode *dip = GFS2_I(dir); | ||
| 452 | struct gfs2_sbd *sdp = GFS2_SB(dir); | ||
| 453 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | ||
| 454 | struct gfs2_holder ghs[2]; | ||
| 455 | int error; | ||
| 456 | |||
| 457 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | ||
| 458 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | ||
| 459 | |||
| 460 | error = gfs2_glock_nq_m(2, ghs); | ||
| 461 | if (error) | ||
| 462 | goto out; | ||
| 463 | |||
| 464 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | ||
| 465 | if (error) | ||
| 466 | goto out_gunlock; | ||
| 467 | |||
| 468 | if (ip->i_di.di_entries < 2) { | ||
| 469 | if (gfs2_consist_inode(ip)) | ||
| 470 | gfs2_dinode_print(&ip->i_di); | ||
| 471 | error = -EIO; | ||
| 472 | goto out_gunlock; | ||
| 473 | } | ||
| 474 | if (ip->i_di.di_entries > 2) { | ||
| 475 | error = -ENOTEMPTY; | ||
| 476 | goto out_gunlock; | ||
| 477 | } | ||
| 478 | |||
| 479 | error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0); | ||
| 480 | if (error) | ||
| 481 | goto out_gunlock; | ||
| 482 | |||
| 483 | error = gfs2_rmdiri(dip, &dentry->d_name, ip); | ||
| 484 | |||
| 485 | gfs2_trans_end(sdp); | ||
| 486 | |||
| 487 | out_gunlock: | ||
| 488 | gfs2_glock_dq_m(2, ghs); | ||
| 489 | out: | ||
| 490 | gfs2_holder_uninit(ghs); | ||
| 491 | gfs2_holder_uninit(ghs + 1); | ||
| 492 | return error; | ||
| 493 | } | ||
| 494 | |||
| 495 | /** | ||
| 496 | * gfs2_mknod - Make a special file | ||
| 497 | * @dir: The directory in which the special file will reside | ||
| 498 | * @dentry: The dentry of the special file | ||
| 499 | * @mode: The mode of the special file | ||
| 500 | * @rdev: The device specification of the special file | ||
| 501 | * | ||
| 502 | */ | ||
| 503 | |||
| 504 | static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | ||
| 505 | dev_t dev) | ||
| 506 | { | ||
| 507 | struct gfs2_inode *dip = GFS2_I(dir), *ip; | ||
| 508 | struct gfs2_sbd *sdp = GFS2_SB(dir); | ||
| 509 | struct gfs2_holder ghs[2]; | ||
| 510 | struct inode *inode; | ||
| 511 | struct buffer_head *dibh; | ||
| 512 | u32 major = 0, minor = 0; | ||
| 513 | int error; | ||
| 514 | |||
| 515 | switch (mode & S_IFMT) { | ||
| 516 | case S_IFBLK: | ||
| 517 | case S_IFCHR: | ||
| 518 | major = MAJOR(dev); | ||
| 519 | minor = MINOR(dev); | ||
| 520 | break; | ||
| 521 | case S_IFIFO: | ||
| 522 | case S_IFSOCK: | ||
| 523 | break; | ||
| 524 | default: | ||
| 525 | return -EOPNOTSUPP; | ||
| 526 | }; | ||
| 527 | |||
| 528 | gfs2_holder_init(dip->i_gl, 0, 0, ghs); | ||
| 529 | |||
| 530 | inode = gfs2_createi(ghs, &dentry->d_name, mode); | ||
| 531 | if (IS_ERR(inode)) { | ||
| 532 | gfs2_holder_uninit(ghs); | ||
| 533 | return PTR_ERR(inode); | ||
| 534 | } | ||
| 535 | |||
| 536 | ip = ghs[1].gh_gl->gl_object; | ||
| 537 | |||
| 538 | ip->i_di.di_major = major; | ||
| 539 | ip->i_di.di_minor = minor; | ||
| 540 | |||
| 541 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 542 | |||
| 543 | if (!gfs2_assert_withdraw(sdp, !error)) { | ||
| 544 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 545 | brelse(dibh); | ||
| 546 | } | ||
| 547 | |||
| 548 | gfs2_trans_end(sdp); | ||
| 549 | if (dip->i_alloc.al_rgd) | ||
| 550 | gfs2_inplace_release(dip); | ||
| 551 | gfs2_quota_unlock(dip); | ||
| 552 | gfs2_alloc_put(dip); | ||
| 553 | |||
| 554 | gfs2_glock_dq_uninit_m(2, ghs); | ||
| 555 | |||
| 556 | d_instantiate(dentry, inode); | ||
| 557 | mark_inode_dirty(inode); | ||
| 558 | |||
| 559 | return 0; | ||
| 560 | } | ||
| 561 | |||
| 562 | /** | ||
| 563 | * gfs2_rename - Rename a file | ||
| 564 | * @odir: Parent directory of old file name | ||
| 565 | * @odentry: The old dentry of the file | ||
| 566 | * @ndir: Parent directory of new file name | ||
| 567 | * @ndentry: The new dentry of the file | ||
| 568 | * | ||
| 569 | * Returns: errno | ||
| 570 | */ | ||
| 571 | |||
| 572 | static int gfs2_rename(struct inode *odir, struct dentry *odentry, | ||
| 573 | struct inode *ndir, struct dentry *ndentry) | ||
| 574 | { | ||
| 575 | struct gfs2_inode *odip = GFS2_I(odir); | ||
| 576 | struct gfs2_inode *ndip = GFS2_I(ndir); | ||
| 577 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | ||
| 578 | struct gfs2_inode *nip = NULL; | ||
| 579 | struct gfs2_sbd *sdp = GFS2_SB(odir); | ||
| 580 | struct gfs2_holder ghs[4], r_gh; | ||
| 581 | unsigned int num_gh; | ||
| 582 | int dir_rename = 0; | ||
| 583 | int alloc_required; | ||
| 584 | unsigned int x; | ||
| 585 | int error; | ||
| 586 | |||
| 587 | if (ndentry->d_inode) { | ||
| 588 | nip = GFS2_I(ndentry->d_inode); | ||
| 589 | if (ip == nip) | ||
| 590 | return 0; | ||
| 591 | } | ||
| 592 | |||
| 593 | /* Make sure we aren't trying to move a dirctory into it's subdir */ | ||
| 594 | |||
| 595 | if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) { | ||
| 596 | dir_rename = 1; | ||
| 597 | |||
| 598 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, | ||
| 599 | LM_ST_EXCLUSIVE, 0, | ||
| 600 | &r_gh); | ||
| 601 | if (error) | ||
| 602 | goto out; | ||
| 603 | |||
| 604 | error = gfs2_ok_to_move(ip, ndip); | ||
| 605 | if (error) | ||
| 606 | goto out_gunlock_r; | ||
| 607 | } | ||
| 608 | |||
| 609 | num_gh = 1; | ||
| 610 | gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | ||
| 611 | if (odip != ndip) { | ||
| 612 | gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); | ||
| 613 | num_gh++; | ||
| 614 | } | ||
| 615 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); | ||
| 616 | num_gh++; | ||
| 617 | |||
| 618 | if (nip) { | ||
| 619 | gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); | ||
| 620 | num_gh++; | ||
| 621 | } | ||
| 622 | |||
| 623 | error = gfs2_glock_nq_m(num_gh, ghs); | ||
| 624 | if (error) | ||
| 625 | goto out_uninit; | ||
| 626 | |||
| 627 | /* Check out the old directory */ | ||
| 628 | |||
| 629 | error = gfs2_unlink_ok(odip, &odentry->d_name, ip); | ||
| 630 | if (error) | ||
| 631 | goto out_gunlock; | ||
| 632 | |||
| 633 | /* Check out the new directory */ | ||
| 634 | |||
| 635 | if (nip) { | ||
| 636 | error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip); | ||
| 637 | if (error) | ||
| 638 | goto out_gunlock; | ||
| 639 | |||
| 640 | if (S_ISDIR(nip->i_di.di_mode)) { | ||
| 641 | if (nip->i_di.di_entries < 2) { | ||
| 642 | if (gfs2_consist_inode(nip)) | ||
| 643 | gfs2_dinode_print(&nip->i_di); | ||
| 644 | error = -EIO; | ||
| 645 | goto out_gunlock; | ||
| 646 | } | ||
| 647 | if (nip->i_di.di_entries > 2) { | ||
| 648 | error = -ENOTEMPTY; | ||
| 649 | goto out_gunlock; | ||
| 650 | } | ||
| 651 | } | ||
| 652 | } else { | ||
| 653 | error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL); | ||
| 654 | if (error) | ||
| 655 | goto out_gunlock; | ||
| 656 | |||
| 657 | error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL); | ||
| 658 | switch (error) { | ||
| 659 | case -ENOENT: | ||
| 660 | error = 0; | ||
| 661 | break; | ||
| 662 | case 0: | ||
| 663 | error = -EEXIST; | ||
| 664 | default: | ||
| 665 | goto out_gunlock; | ||
| 666 | }; | ||
| 667 | |||
| 668 | if (odip != ndip) { | ||
| 669 | if (!ndip->i_di.di_nlink) { | ||
| 670 | error = -EINVAL; | ||
| 671 | goto out_gunlock; | ||
| 672 | } | ||
| 673 | if (ndip->i_di.di_entries == (u32)-1) { | ||
| 674 | error = -EFBIG; | ||
| 675 | goto out_gunlock; | ||
| 676 | } | ||
| 677 | if (S_ISDIR(ip->i_di.di_mode) && | ||
| 678 | ndip->i_di.di_nlink == (u32)-1) { | ||
| 679 | error = -EMLINK; | ||
| 680 | goto out_gunlock; | ||
| 681 | } | ||
| 682 | } | ||
| 683 | } | ||
| 684 | |||
| 685 | /* Check out the dir to be renamed */ | ||
| 686 | |||
| 687 | if (dir_rename) { | ||
| 688 | error = permission(odentry->d_inode, MAY_WRITE, NULL); | ||
| 689 | if (error) | ||
| 690 | goto out_gunlock; | ||
| 691 | } | ||
| 692 | |||
| 693 | alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); | ||
| 694 | if (error < 0) | ||
| 695 | goto out_gunlock; | ||
| 696 | error = 0; | ||
| 697 | |||
| 698 | if (alloc_required) { | ||
| 699 | struct gfs2_alloc *al = gfs2_alloc_get(ndip); | ||
| 700 | |||
| 701 | error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 702 | if (error) | ||
| 703 | goto out_alloc; | ||
| 704 | |||
| 705 | error = gfs2_quota_check(ndip, ndip->i_di.di_uid, | ||
| 706 | ndip->i_di.di_gid); | ||
| 707 | if (error) | ||
| 708 | goto out_gunlock_q; | ||
| 709 | |||
| 710 | al->al_requested = sdp->sd_max_dirres; | ||
| 711 | |||
| 712 | error = gfs2_inplace_reserve(ndip); | ||
| 713 | if (error) | ||
| 714 | goto out_gunlock_q; | ||
| 715 | |||
| 716 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | ||
| 717 | al->al_rgd->rd_ri.ri_length + | ||
| 718 | 4 * RES_DINODE + 4 * RES_LEAF + | ||
| 719 | RES_STATFS + RES_QUOTA, 0); | ||
| 720 | if (error) | ||
| 721 | goto out_ipreserv; | ||
| 722 | } else { | ||
| 723 | error = gfs2_trans_begin(sdp, 4 * RES_DINODE + | ||
| 724 | 5 * RES_LEAF, 0); | ||
| 725 | if (error) | ||
| 726 | goto out_gunlock; | ||
| 727 | } | ||
| 728 | |||
| 729 | /* Remove the target file, if it exists */ | ||
| 730 | |||
| 731 | if (nip) { | ||
| 732 | if (S_ISDIR(nip->i_di.di_mode)) | ||
| 733 | error = gfs2_rmdiri(ndip, &ndentry->d_name, nip); | ||
| 734 | else { | ||
| 735 | error = gfs2_dir_del(ndip, &ndentry->d_name); | ||
| 736 | if (error) | ||
| 737 | goto out_end_trans; | ||
| 738 | error = gfs2_change_nlink(nip, -1); | ||
| 739 | } | ||
| 740 | if (error) | ||
| 741 | goto out_end_trans; | ||
| 742 | } | ||
| 743 | |||
| 744 | if (dir_rename) { | ||
| 745 | struct qstr name; | ||
| 746 | gfs2_str2qstr(&name, ".."); | ||
| 747 | |||
| 748 | error = gfs2_change_nlink(ndip, +1); | ||
| 749 | if (error) | ||
| 750 | goto out_end_trans; | ||
| 751 | error = gfs2_change_nlink(odip, -1); | ||
| 752 | if (error) | ||
| 753 | goto out_end_trans; | ||
| 754 | |||
| 755 | error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR); | ||
| 756 | if (error) | ||
| 757 | goto out_end_trans; | ||
| 758 | } else { | ||
| 759 | struct buffer_head *dibh; | ||
| 760 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 761 | if (error) | ||
| 762 | goto out_end_trans; | ||
| 763 | ip->i_di.di_ctime = get_seconds(); | ||
| 764 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 765 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 766 | brelse(dibh); | ||
| 767 | } | ||
| 768 | |||
| 769 | error = gfs2_dir_del(odip, &odentry->d_name); | ||
| 770 | if (error) | ||
| 771 | goto out_end_trans; | ||
| 772 | |||
| 773 | error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num, | ||
| 774 | IF2DT(ip->i_di.di_mode)); | ||
| 775 | if (error) | ||
| 776 | goto out_end_trans; | ||
| 777 | |||
| 778 | out_end_trans: | ||
| 779 | gfs2_trans_end(sdp); | ||
| 780 | out_ipreserv: | ||
| 781 | if (alloc_required) | ||
| 782 | gfs2_inplace_release(ndip); | ||
| 783 | out_gunlock_q: | ||
| 784 | if (alloc_required) | ||
| 785 | gfs2_quota_unlock(ndip); | ||
| 786 | out_alloc: | ||
| 787 | if (alloc_required) | ||
| 788 | gfs2_alloc_put(ndip); | ||
| 789 | out_gunlock: | ||
| 790 | gfs2_glock_dq_m(num_gh, ghs); | ||
| 791 | out_uninit: | ||
| 792 | for (x = 0; x < num_gh; x++) | ||
| 793 | gfs2_holder_uninit(ghs + x); | ||
| 794 | out_gunlock_r: | ||
| 795 | if (dir_rename) | ||
| 796 | gfs2_glock_dq_uninit(&r_gh); | ||
| 797 | out: | ||
| 798 | return error; | ||
| 799 | } | ||
| 800 | |||
| 801 | /** | ||
| 802 | * gfs2_readlink - Read the value of a symlink | ||
| 803 | * @dentry: the symlink | ||
| 804 | * @buf: the buffer to read the symlink data into | ||
| 805 | * @size: the size of the buffer | ||
| 806 | * | ||
| 807 | * Returns: errno | ||
| 808 | */ | ||
| 809 | |||
| 810 | static int gfs2_readlink(struct dentry *dentry, char __user *user_buf, | ||
| 811 | int user_size) | ||
| 812 | { | ||
| 813 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | ||
| 814 | char array[GFS2_FAST_NAME_SIZE], *buf = array; | ||
| 815 | unsigned int len = GFS2_FAST_NAME_SIZE; | ||
| 816 | int error; | ||
| 817 | |||
| 818 | error = gfs2_readlinki(ip, &buf, &len); | ||
| 819 | if (error) | ||
| 820 | return error; | ||
| 821 | |||
| 822 | if (user_size > len - 1) | ||
| 823 | user_size = len - 1; | ||
| 824 | |||
| 825 | if (copy_to_user(user_buf, buf, user_size)) | ||
| 826 | error = -EFAULT; | ||
| 827 | else | ||
| 828 | error = user_size; | ||
| 829 | |||
| 830 | if (buf != array) | ||
| 831 | kfree(buf); | ||
| 832 | |||
| 833 | return error; | ||
| 834 | } | ||
| 835 | |||
| 836 | /** | ||
| 837 | * gfs2_follow_link - Follow a symbolic link | ||
| 838 | * @dentry: The dentry of the link | ||
| 839 | * @nd: Data that we pass to vfs_follow_link() | ||
| 840 | * | ||
| 841 | * This can handle symlinks of any size. It is optimised for symlinks | ||
| 842 | * under GFS2_FAST_NAME_SIZE. | ||
| 843 | * | ||
| 844 | * Returns: 0 on success or error code | ||
| 845 | */ | ||
| 846 | |||
| 847 | static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
| 848 | { | ||
| 849 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | ||
| 850 | char array[GFS2_FAST_NAME_SIZE], *buf = array; | ||
| 851 | unsigned int len = GFS2_FAST_NAME_SIZE; | ||
| 852 | int error; | ||
| 853 | |||
| 854 | error = gfs2_readlinki(ip, &buf, &len); | ||
| 855 | if (!error) { | ||
| 856 | error = vfs_follow_link(nd, buf); | ||
| 857 | if (buf != array) | ||
| 858 | kfree(buf); | ||
| 859 | } | ||
| 860 | |||
| 861 | return ERR_PTR(error); | ||
| 862 | } | ||
| 863 | |||
| 864 | /** | ||
| 865 | * gfs2_permission - | ||
| 866 | * @inode: | ||
| 867 | * @mask: | ||
| 868 | * @nd: passed from Linux VFS, ignored by us | ||
| 869 | * | ||
| 870 | * Returns: errno | ||
| 871 | */ | ||
| 872 | |||
| 873 | static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
| 874 | { | ||
| 875 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 876 | struct gfs2_holder i_gh; | ||
| 877 | int error; | ||
| 878 | |||
| 879 | if (ip->i_vn == ip->i_gl->gl_vn) | ||
| 880 | return generic_permission(inode, mask, gfs2_check_acl); | ||
| 881 | |||
| 882 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | ||
| 883 | if (!error) { | ||
| 884 | error = generic_permission(inode, mask, gfs2_check_acl_locked); | ||
| 885 | gfs2_glock_dq_uninit(&i_gh); | ||
| 886 | } | ||
| 887 | |||
| 888 | return error; | ||
| 889 | } | ||
| 890 | |||
| 891 | static int setattr_size(struct inode *inode, struct iattr *attr) | ||
| 892 | { | ||
| 893 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 894 | int error; | ||
| 895 | |||
| 896 | if (attr->ia_size != ip->i_di.di_size) { | ||
| 897 | error = vmtruncate(inode, attr->ia_size); | ||
| 898 | if (error) | ||
| 899 | return error; | ||
| 900 | } | ||
| 901 | |||
| 902 | error = gfs2_truncatei(ip, attr->ia_size); | ||
| 903 | if (error) | ||
| 904 | return error; | ||
| 905 | |||
| 906 | return error; | ||
| 907 | } | ||
| 908 | |||
| 909 | static int setattr_chown(struct inode *inode, struct iattr *attr) | ||
| 910 | { | ||
| 911 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 912 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 913 | struct buffer_head *dibh; | ||
| 914 | u32 ouid, ogid, nuid, ngid; | ||
| 915 | int error; | ||
| 916 | |||
| 917 | ouid = ip->i_di.di_uid; | ||
| 918 | ogid = ip->i_di.di_gid; | ||
| 919 | nuid = attr->ia_uid; | ||
| 920 | ngid = attr->ia_gid; | ||
| 921 | |||
| 922 | if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) | ||
| 923 | ouid = nuid = NO_QUOTA_CHANGE; | ||
| 924 | if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) | ||
| 925 | ogid = ngid = NO_QUOTA_CHANGE; | ||
| 926 | |||
| 927 | gfs2_alloc_get(ip); | ||
| 928 | |||
| 929 | error = gfs2_quota_lock(ip, nuid, ngid); | ||
| 930 | if (error) | ||
| 931 | goto out_alloc; | ||
| 932 | |||
| 933 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { | ||
| 934 | error = gfs2_quota_check(ip, nuid, ngid); | ||
| 935 | if (error) | ||
| 936 | goto out_gunlock_q; | ||
| 937 | } | ||
| 938 | |||
| 939 | error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0); | ||
| 940 | if (error) | ||
| 941 | goto out_gunlock_q; | ||
| 942 | |||
| 943 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 944 | if (error) | ||
| 945 | goto out_end_trans; | ||
| 946 | |||
| 947 | error = inode_setattr(inode, attr); | ||
| 948 | gfs2_assert_warn(sdp, !error); | ||
| 949 | gfs2_inode_attr_out(ip); | ||
| 950 | |||
| 951 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 952 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
| 953 | brelse(dibh); | ||
| 954 | |||
| 955 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { | ||
| 956 | gfs2_quota_change(ip, -ip->i_di.di_blocks, ouid, ogid); | ||
| 957 | gfs2_quota_change(ip, ip->i_di.di_blocks, nuid, ngid); | ||
| 958 | } | ||
| 959 | |||
| 960 | out_end_trans: | ||
| 961 | gfs2_trans_end(sdp); | ||
| 962 | out_gunlock_q: | ||
| 963 | gfs2_quota_unlock(ip); | ||
| 964 | out_alloc: | ||
| 965 | gfs2_alloc_put(ip); | ||
| 966 | return error; | ||
| 967 | } | ||
| 968 | |||
| 969 | /** | ||
| 970 | * gfs2_setattr - Change attributes on an inode | ||
| 971 | * @dentry: The dentry which is changing | ||
| 972 | * @attr: The structure describing the change | ||
| 973 | * | ||
| 974 | * The VFS layer wants to change one or more of an inodes attributes. Write | ||
| 975 | * that change out to disk. | ||
| 976 | * | ||
| 977 | * Returns: errno | ||
| 978 | */ | ||
| 979 | |||
| 980 | static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) | ||
| 981 | { | ||
| 982 | struct inode *inode = dentry->d_inode; | ||
| 983 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 984 | struct gfs2_holder i_gh; | ||
| 985 | int error; | ||
| 986 | |||
| 987 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
| 988 | if (error) | ||
| 989 | return error; | ||
| 990 | |||
| 991 | error = -EPERM; | ||
| 992 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | ||
| 993 | goto out; | ||
| 994 | |||
| 995 | error = inode_change_ok(inode, attr); | ||
| 996 | if (error) | ||
| 997 | goto out; | ||
| 998 | |||
| 999 | if (attr->ia_valid & ATTR_SIZE) | ||
| 1000 | error = setattr_size(inode, attr); | ||
| 1001 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) | ||
| 1002 | error = setattr_chown(inode, attr); | ||
| 1003 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) | ||
| 1004 | error = gfs2_acl_chmod(ip, attr); | ||
| 1005 | else | ||
| 1006 | error = gfs2_setattr_simple(ip, attr); | ||
| 1007 | |||
| 1008 | out: | ||
| 1009 | gfs2_glock_dq_uninit(&i_gh); | ||
| 1010 | if (!error) | ||
| 1011 | mark_inode_dirty(inode); | ||
| 1012 | return error; | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | /** | ||
| 1016 | * gfs2_getattr - Read out an inode's attributes | ||
| 1017 | * @mnt: The vfsmount the inode is being accessed from | ||
| 1018 | * @dentry: The dentry to stat | ||
| 1019 | * @stat: The inode's stats | ||
| 1020 | * | ||
| 1021 | * Returns: errno | ||
| 1022 | */ | ||
| 1023 | |||
| 1024 | static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 1025 | struct kstat *stat) | ||
| 1026 | { | ||
| 1027 | struct inode *inode = dentry->d_inode; | ||
| 1028 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1029 | struct gfs2_holder gh; | ||
| 1030 | int error; | ||
| 1031 | |||
| 1032 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | ||
| 1033 | if (!error) { | ||
| 1034 | generic_fillattr(inode, stat); | ||
| 1035 | gfs2_glock_dq_uninit(&gh); | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | return error; | ||
| 1039 | } | ||
| 1040 | |||
| 1041 | static int gfs2_setxattr(struct dentry *dentry, const char *name, | ||
| 1042 | const void *data, size_t size, int flags) | ||
| 1043 | { | ||
| 1044 | struct inode *inode = dentry->d_inode; | ||
| 1045 | struct gfs2_ea_request er; | ||
| 1046 | |||
| 1047 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
| 1048 | er.er_type = gfs2_ea_name2type(name, &er.er_name); | ||
| 1049 | if (er.er_type == GFS2_EATYPE_UNUSED) | ||
| 1050 | return -EOPNOTSUPP; | ||
| 1051 | er.er_data = (char *)data; | ||
| 1052 | er.er_name_len = strlen(er.er_name); | ||
| 1053 | er.er_data_len = size; | ||
| 1054 | er.er_flags = flags; | ||
| 1055 | |||
| 1056 | gfs2_assert_warn(GFS2_SB(inode), !(er.er_flags & GFS2_ERF_MODE)); | ||
| 1057 | |||
| 1058 | return gfs2_ea_set(GFS2_I(inode), &er); | ||
| 1059 | } | ||
| 1060 | |||
| 1061 | static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, | ||
| 1062 | void *data, size_t size) | ||
| 1063 | { | ||
| 1064 | struct gfs2_ea_request er; | ||
| 1065 | |||
| 1066 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
| 1067 | er.er_type = gfs2_ea_name2type(name, &er.er_name); | ||
| 1068 | if (er.er_type == GFS2_EATYPE_UNUSED) | ||
| 1069 | return -EOPNOTSUPP; | ||
| 1070 | er.er_data = data; | ||
| 1071 | er.er_name_len = strlen(er.er_name); | ||
| 1072 | er.er_data_len = size; | ||
| 1073 | |||
| 1074 | return gfs2_ea_get(GFS2_I(dentry->d_inode), &er); | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) | ||
| 1078 | { | ||
| 1079 | struct gfs2_ea_request er; | ||
| 1080 | |||
| 1081 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
| 1082 | er.er_data = (size) ? buffer : NULL; | ||
| 1083 | er.er_data_len = size; | ||
| 1084 | |||
| 1085 | return gfs2_ea_list(GFS2_I(dentry->d_inode), &er); | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | static int gfs2_removexattr(struct dentry *dentry, const char *name) | ||
| 1089 | { | ||
| 1090 | struct gfs2_ea_request er; | ||
| 1091 | |||
| 1092 | memset(&er, 0, sizeof(struct gfs2_ea_request)); | ||
| 1093 | er.er_type = gfs2_ea_name2type(name, &er.er_name); | ||
| 1094 | if (er.er_type == GFS2_EATYPE_UNUSED) | ||
| 1095 | return -EOPNOTSUPP; | ||
| 1096 | er.er_name_len = strlen(er.er_name); | ||
| 1097 | |||
| 1098 | return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er); | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | struct inode_operations gfs2_file_iops = { | ||
| 1102 | .permission = gfs2_permission, | ||
| 1103 | .setattr = gfs2_setattr, | ||
| 1104 | .getattr = gfs2_getattr, | ||
| 1105 | .setxattr = gfs2_setxattr, | ||
| 1106 | .getxattr = gfs2_getxattr, | ||
| 1107 | .listxattr = gfs2_listxattr, | ||
| 1108 | .removexattr = gfs2_removexattr, | ||
| 1109 | }; | ||
| 1110 | |||
| 1111 | struct inode_operations gfs2_dev_iops = { | ||
| 1112 | .permission = gfs2_permission, | ||
| 1113 | .setattr = gfs2_setattr, | ||
| 1114 | .getattr = gfs2_getattr, | ||
| 1115 | .setxattr = gfs2_setxattr, | ||
| 1116 | .getxattr = gfs2_getxattr, | ||
| 1117 | .listxattr = gfs2_listxattr, | ||
| 1118 | .removexattr = gfs2_removexattr, | ||
| 1119 | }; | ||
| 1120 | |||
| 1121 | struct inode_operations gfs2_dir_iops = { | ||
| 1122 | .create = gfs2_create, | ||
| 1123 | .lookup = gfs2_lookup, | ||
| 1124 | .link = gfs2_link, | ||
| 1125 | .unlink = gfs2_unlink, | ||
| 1126 | .symlink = gfs2_symlink, | ||
| 1127 | .mkdir = gfs2_mkdir, | ||
| 1128 | .rmdir = gfs2_rmdir, | ||
| 1129 | .mknod = gfs2_mknod, | ||
| 1130 | .rename = gfs2_rename, | ||
| 1131 | .permission = gfs2_permission, | ||
| 1132 | .setattr = gfs2_setattr, | ||
| 1133 | .getattr = gfs2_getattr, | ||
| 1134 | .setxattr = gfs2_setxattr, | ||
| 1135 | .getxattr = gfs2_getxattr, | ||
| 1136 | .listxattr = gfs2_listxattr, | ||
| 1137 | .removexattr = gfs2_removexattr, | ||
| 1138 | }; | ||
| 1139 | |||
| 1140 | struct inode_operations gfs2_symlink_iops = { | ||
| 1141 | .readlink = gfs2_readlink, | ||
| 1142 | .follow_link = gfs2_follow_link, | ||
| 1143 | .permission = gfs2_permission, | ||
| 1144 | .setattr = gfs2_setattr, | ||
| 1145 | .getattr = gfs2_getattr, | ||
| 1146 | .setxattr = gfs2_setxattr, | ||
| 1147 | .getxattr = gfs2_getxattr, | ||
| 1148 | .listxattr = gfs2_listxattr, | ||
| 1149 | .removexattr = gfs2_removexattr, | ||
| 1150 | }; | ||
| 1151 | |||
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h new file mode 100644 index 000000000000..b15acb4fd34c --- /dev/null +++ b/fs/gfs2/ops_inode.h | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_INODE_DOT_H__ | ||
| 11 | #define __OPS_INODE_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/fs.h> | ||
| 14 | |||
| 15 | extern struct inode_operations gfs2_file_iops; | ||
| 16 | extern struct inode_operations gfs2_dir_iops; | ||
| 17 | extern struct inode_operations gfs2_symlink_iops; | ||
| 18 | extern struct inode_operations gfs2_dev_iops; | ||
| 19 | |||
| 20 | #endif /* __OPS_INODE_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c new file mode 100644 index 000000000000..06f06f7773d0 --- /dev/null +++ b/fs/gfs2/ops_super.c | |||
| @@ -0,0 +1,468 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/statfs.h> | ||
| 16 | #include <linux/seq_file.h> | ||
| 17 | #include <linux/mount.h> | ||
| 18 | #include <linux/kthread.h> | ||
| 19 | #include <linux/delay.h> | ||
| 20 | #include <linux/gfs2_ondisk.h> | ||
| 21 | #include <linux/crc32.h> | ||
| 22 | #include <linux/lm_interface.h> | ||
| 23 | |||
| 24 | #include "gfs2.h" | ||
| 25 | #include "incore.h" | ||
| 26 | #include "glock.h" | ||
| 27 | #include "inode.h" | ||
| 28 | #include "lm.h" | ||
| 29 | #include "log.h" | ||
| 30 | #include "mount.h" | ||
| 31 | #include "ops_super.h" | ||
| 32 | #include "quota.h" | ||
| 33 | #include "recovery.h" | ||
| 34 | #include "rgrp.h" | ||
| 35 | #include "super.h" | ||
| 36 | #include "sys.h" | ||
| 37 | #include "util.h" | ||
| 38 | #include "trans.h" | ||
| 39 | #include "dir.h" | ||
| 40 | #include "eattr.h" | ||
| 41 | #include "bmap.h" | ||
| 42 | |||
| 43 | /** | ||
| 44 | * gfs2_write_inode - Make sure the inode is stable on the disk | ||
| 45 | * @inode: The inode | ||
| 46 | * @sync: synchronous write flag | ||
| 47 | * | ||
| 48 | * Returns: errno | ||
| 49 | */ | ||
| 50 | |||
| 51 | static int gfs2_write_inode(struct inode *inode, int sync) | ||
| 52 | { | ||
| 53 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 54 | |||
| 55 | /* Check this is a "normal" inode */ | ||
| 56 | if (inode->i_private) { | ||
| 57 | if (current->flags & PF_MEMALLOC) | ||
| 58 | return 0; | ||
| 59 | if (sync) | ||
| 60 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
| 61 | } | ||
| 62 | |||
| 63 | return 0; | ||
| 64 | } | ||
| 65 | |||
| 66 | /** | ||
| 67 | * gfs2_put_super - Unmount the filesystem | ||
| 68 | * @sb: The VFS superblock | ||
| 69 | * | ||
| 70 | */ | ||
| 71 | |||
| 72 | static void gfs2_put_super(struct super_block *sb) | ||
| 73 | { | ||
| 74 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 75 | int error; | ||
| 76 | |||
| 77 | if (!sdp) | ||
| 78 | return; | ||
| 79 | |||
| 80 | if (!strncmp(sb->s_type->name, "gfs2meta", 8)) | ||
| 81 | return; /* Nothing to do */ | ||
| 82 | |||
| 83 | /* Unfreeze the filesystem, if we need to */ | ||
| 84 | |||
| 85 | mutex_lock(&sdp->sd_freeze_lock); | ||
| 86 | if (sdp->sd_freeze_count) | ||
| 87 | gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); | ||
| 88 | mutex_unlock(&sdp->sd_freeze_lock); | ||
| 89 | |||
| 90 | kthread_stop(sdp->sd_quotad_process); | ||
| 91 | kthread_stop(sdp->sd_logd_process); | ||
| 92 | kthread_stop(sdp->sd_recoverd_process); | ||
| 93 | while (sdp->sd_glockd_num--) | ||
| 94 | kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); | ||
| 95 | kthread_stop(sdp->sd_scand_process); | ||
| 96 | |||
| 97 | if (!(sb->s_flags & MS_RDONLY)) { | ||
| 98 | error = gfs2_make_fs_ro(sdp); | ||
| 99 | if (error) | ||
| 100 | gfs2_io_error(sdp); | ||
| 101 | } | ||
| 102 | /* At this point, we're through modifying the disk */ | ||
| 103 | |||
| 104 | /* Release stuff */ | ||
| 105 | |||
| 106 | iput(sdp->sd_master_dir); | ||
| 107 | iput(sdp->sd_jindex); | ||
| 108 | iput(sdp->sd_inum_inode); | ||
| 109 | iput(sdp->sd_statfs_inode); | ||
| 110 | iput(sdp->sd_rindex); | ||
| 111 | iput(sdp->sd_quota_inode); | ||
| 112 | |||
| 113 | gfs2_glock_put(sdp->sd_rename_gl); | ||
| 114 | gfs2_glock_put(sdp->sd_trans_gl); | ||
| 115 | |||
| 116 | if (!sdp->sd_args.ar_spectator) { | ||
| 117 | gfs2_glock_dq_uninit(&sdp->sd_journal_gh); | ||
| 118 | gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); | ||
| 119 | gfs2_glock_dq_uninit(&sdp->sd_ir_gh); | ||
| 120 | gfs2_glock_dq_uninit(&sdp->sd_sc_gh); | ||
| 121 | gfs2_glock_dq_uninit(&sdp->sd_qc_gh); | ||
| 122 | iput(sdp->sd_ir_inode); | ||
| 123 | iput(sdp->sd_sc_inode); | ||
| 124 | iput(sdp->sd_qc_inode); | ||
| 125 | } | ||
| 126 | |||
| 127 | gfs2_glock_dq_uninit(&sdp->sd_live_gh); | ||
| 128 | gfs2_clear_rgrpd(sdp); | ||
| 129 | gfs2_jindex_free(sdp); | ||
| 130 | /* Take apart glock structures and buffer lists */ | ||
| 131 | gfs2_gl_hash_clear(sdp, WAIT); | ||
| 132 | /* Unmount the locking protocol */ | ||
| 133 | gfs2_lm_unmount(sdp); | ||
| 134 | |||
| 135 | /* At this point, we're through participating in the lockspace */ | ||
| 136 | gfs2_sys_fs_del(sdp); | ||
| 137 | kfree(sdp); | ||
| 138 | } | ||
| 139 | |||
| 140 | /** | ||
| 141 | * gfs2_write_super - disk commit all incore transactions | ||
| 142 | * @sb: the filesystem | ||
| 143 | * | ||
| 144 | * This function is called every time sync(2) is called. | ||
| 145 | * After this exits, all dirty buffers are synced. | ||
| 146 | */ | ||
| 147 | |||
| 148 | static void gfs2_write_super(struct super_block *sb) | ||
| 149 | { | ||
| 150 | gfs2_log_flush(sb->s_fs_info, NULL); | ||
| 151 | } | ||
| 152 | |||
| 153 | /** | ||
| 154 | * gfs2_write_super_lockfs - prevent further writes to the filesystem | ||
| 155 | * @sb: the VFS structure for the filesystem | ||
| 156 | * | ||
| 157 | */ | ||
| 158 | |||
| 159 | static void gfs2_write_super_lockfs(struct super_block *sb) | ||
| 160 | { | ||
| 161 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 162 | int error; | ||
| 163 | |||
| 164 | for (;;) { | ||
| 165 | error = gfs2_freeze_fs(sdp); | ||
| 166 | if (!error) | ||
| 167 | break; | ||
| 168 | |||
| 169 | switch (error) { | ||
| 170 | case -EBUSY: | ||
| 171 | fs_err(sdp, "waiting for recovery before freeze\n"); | ||
| 172 | break; | ||
| 173 | |||
| 174 | default: | ||
| 175 | fs_err(sdp, "error freezing FS: %d\n", error); | ||
| 176 | break; | ||
| 177 | } | ||
| 178 | |||
| 179 | fs_err(sdp, "retrying...\n"); | ||
| 180 | msleep(1000); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | /** | ||
| 185 | * gfs2_unlockfs - reallow writes to the filesystem | ||
| 186 | * @sb: the VFS structure for the filesystem | ||
| 187 | * | ||
| 188 | */ | ||
| 189 | |||
| 190 | static void gfs2_unlockfs(struct super_block *sb) | ||
| 191 | { | ||
| 192 | gfs2_unfreeze_fs(sb->s_fs_info); | ||
| 193 | } | ||
| 194 | |||
| 195 | /** | ||
| 196 | * gfs2_statfs - Gather and return stats about the filesystem | ||
| 197 | * @sb: The superblock | ||
| 198 | * @statfsbuf: The buffer | ||
| 199 | * | ||
| 200 | * Returns: 0 on success or error code | ||
| 201 | */ | ||
| 202 | |||
| 203 | static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
| 204 | { | ||
| 205 | struct super_block *sb = dentry->d_inode->i_sb; | ||
| 206 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 207 | struct gfs2_statfs_change sc; | ||
| 208 | int error; | ||
| 209 | |||
| 210 | if (gfs2_tune_get(sdp, gt_statfs_slow)) | ||
| 211 | error = gfs2_statfs_slow(sdp, &sc); | ||
| 212 | else | ||
| 213 | error = gfs2_statfs_i(sdp, &sc); | ||
| 214 | |||
| 215 | if (error) | ||
| 216 | return error; | ||
| 217 | |||
| 218 | buf->f_type = GFS2_MAGIC; | ||
| 219 | buf->f_bsize = sdp->sd_sb.sb_bsize; | ||
| 220 | buf->f_blocks = sc.sc_total; | ||
| 221 | buf->f_bfree = sc.sc_free; | ||
| 222 | buf->f_bavail = sc.sc_free; | ||
| 223 | buf->f_files = sc.sc_dinodes + sc.sc_free; | ||
| 224 | buf->f_ffree = sc.sc_free; | ||
| 225 | buf->f_namelen = GFS2_FNAMESIZE; | ||
| 226 | |||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | |||
| 230 | /** | ||
| 231 | * gfs2_remount_fs - called when the FS is remounted | ||
| 232 | * @sb: the filesystem | ||
| 233 | * @flags: the remount flags | ||
| 234 | * @data: extra data passed in (not used right now) | ||
| 235 | * | ||
| 236 | * Returns: errno | ||
| 237 | */ | ||
| 238 | |||
| 239 | static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | ||
| 240 | { | ||
| 241 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 242 | int error; | ||
| 243 | |||
| 244 | error = gfs2_mount_args(sdp, data, 1); | ||
| 245 | if (error) | ||
| 246 | return error; | ||
| 247 | |||
| 248 | if (sdp->sd_args.ar_spectator) | ||
| 249 | *flags |= MS_RDONLY; | ||
| 250 | else { | ||
| 251 | if (*flags & MS_RDONLY) { | ||
| 252 | if (!(sb->s_flags & MS_RDONLY)) | ||
| 253 | error = gfs2_make_fs_ro(sdp); | ||
| 254 | } else if (!(*flags & MS_RDONLY) && | ||
| 255 | (sb->s_flags & MS_RDONLY)) { | ||
| 256 | error = gfs2_make_fs_rw(sdp); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | if (*flags & (MS_NOATIME | MS_NODIRATIME)) | ||
| 261 | set_bit(SDF_NOATIME, &sdp->sd_flags); | ||
| 262 | else | ||
| 263 | clear_bit(SDF_NOATIME, &sdp->sd_flags); | ||
| 264 | |||
| 265 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | ||
| 266 | *flags |= MS_NOATIME | MS_NODIRATIME; | ||
| 267 | |||
| 268 | return error; | ||
| 269 | } | ||
| 270 | |||
| 271 | /** | ||
| 272 | * gfs2_clear_inode - Deallocate an inode when VFS is done with it | ||
| 273 | * @inode: The VFS inode | ||
| 274 | * | ||
| 275 | */ | ||
| 276 | |||
| 277 | static void gfs2_clear_inode(struct inode *inode) | ||
| 278 | { | ||
| 279 | /* This tells us its a "real" inode and not one which only | ||
| 280 | * serves to contain an address space (see rgrp.c, meta_io.c) | ||
| 281 | * which therefore doesn't have its own glocks. | ||
| 282 | */ | ||
| 283 | if (inode->i_private) { | ||
| 284 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 285 | gfs2_glock_inode_squish(inode); | ||
| 286 | gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED); | ||
| 287 | ip->i_gl->gl_object = NULL; | ||
| 288 | gfs2_glock_schedule_for_reclaim(ip->i_gl); | ||
| 289 | gfs2_glock_put(ip->i_gl); | ||
| 290 | ip->i_gl = NULL; | ||
| 291 | if (ip->i_iopen_gh.gh_gl) | ||
| 292 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | /** | ||
| 297 | * gfs2_show_options - Show mount options for /proc/mounts | ||
| 298 | * @s: seq_file structure | ||
| 299 | * @mnt: vfsmount | ||
| 300 | * | ||
| 301 | * Returns: 0 on success or error code | ||
| 302 | */ | ||
| 303 | |||
| 304 | static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | ||
| 305 | { | ||
| 306 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | ||
| 307 | struct gfs2_args *args = &sdp->sd_args; | ||
| 308 | |||
| 309 | if (args->ar_lockproto[0]) | ||
| 310 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | ||
| 311 | if (args->ar_locktable[0]) | ||
| 312 | seq_printf(s, ",locktable=%s", args->ar_locktable); | ||
| 313 | if (args->ar_hostdata[0]) | ||
| 314 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | ||
| 315 | if (args->ar_spectator) | ||
| 316 | seq_printf(s, ",spectator"); | ||
| 317 | if (args->ar_ignore_local_fs) | ||
| 318 | seq_printf(s, ",ignore_local_fs"); | ||
| 319 | if (args->ar_localflocks) | ||
| 320 | seq_printf(s, ",localflocks"); | ||
| 321 | if (args->ar_localcaching) | ||
| 322 | seq_printf(s, ",localcaching"); | ||
| 323 | if (args->ar_debug) | ||
| 324 | seq_printf(s, ",debug"); | ||
| 325 | if (args->ar_upgrade) | ||
| 326 | seq_printf(s, ",upgrade"); | ||
| 327 | if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT) | ||
| 328 | seq_printf(s, ",num_glockd=%u", args->ar_num_glockd); | ||
| 329 | if (args->ar_posix_acl) | ||
| 330 | seq_printf(s, ",acl"); | ||
| 331 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | ||
| 332 | char *state; | ||
| 333 | switch (args->ar_quota) { | ||
| 334 | case GFS2_QUOTA_OFF: | ||
| 335 | state = "off"; | ||
| 336 | break; | ||
| 337 | case GFS2_QUOTA_ACCOUNT: | ||
| 338 | state = "account"; | ||
| 339 | break; | ||
| 340 | case GFS2_QUOTA_ON: | ||
| 341 | state = "on"; | ||
| 342 | break; | ||
| 343 | default: | ||
| 344 | state = "unknown"; | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | seq_printf(s, ",quota=%s", state); | ||
| 348 | } | ||
| 349 | if (args->ar_suiddir) | ||
| 350 | seq_printf(s, ",suiddir"); | ||
| 351 | if (args->ar_data != GFS2_DATA_DEFAULT) { | ||
| 352 | char *state; | ||
| 353 | switch (args->ar_data) { | ||
| 354 | case GFS2_DATA_WRITEBACK: | ||
| 355 | state = "writeback"; | ||
| 356 | break; | ||
| 357 | case GFS2_DATA_ORDERED: | ||
| 358 | state = "ordered"; | ||
| 359 | break; | ||
| 360 | default: | ||
| 361 | state = "unknown"; | ||
| 362 | break; | ||
| 363 | } | ||
| 364 | seq_printf(s, ",data=%s", state); | ||
| 365 | } | ||
| 366 | |||
| 367 | return 0; | ||
| 368 | } | ||
| 369 | |||
| 370 | /* | ||
| 371 | * We have to (at the moment) hold the inodes main lock to cover | ||
| 372 | * the gap between unlocking the shared lock on the iopen lock and | ||
| 373 | * taking the exclusive lock. I'd rather do a shared -> exclusive | ||
| 374 | * conversion on the iopen lock, but we can change that later. This | ||
| 375 | * is safe, just less efficient. | ||
| 376 | */ | ||
| 377 | static void gfs2_delete_inode(struct inode *inode) | ||
| 378 | { | ||
| 379 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | ||
| 380 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 381 | struct gfs2_holder gh; | ||
| 382 | int error; | ||
| 383 | |||
| 384 | if (!inode->i_private) | ||
| 385 | goto out; | ||
| 386 | |||
| 387 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh); | ||
| 388 | if (unlikely(error)) { | ||
| 389 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
| 390 | goto out; | ||
| 391 | } | ||
| 392 | |||
| 393 | gfs2_glock_dq(&ip->i_iopen_gh); | ||
| 394 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); | ||
| 395 | error = gfs2_glock_nq(&ip->i_iopen_gh); | ||
| 396 | if (error) | ||
| 397 | goto out_uninit; | ||
| 398 | |||
| 399 | if (S_ISDIR(ip->i_di.di_mode) && | ||
| 400 | (ip->i_di.di_flags & GFS2_DIF_EXHASH)) { | ||
| 401 | error = gfs2_dir_exhash_dealloc(ip); | ||
| 402 | if (error) | ||
| 403 | goto out_unlock; | ||
| 404 | } | ||
| 405 | |||
| 406 | if (ip->i_di.di_eattr) { | ||
| 407 | error = gfs2_ea_dealloc(ip); | ||
| 408 | if (error) | ||
| 409 | goto out_unlock; | ||
| 410 | } | ||
| 411 | |||
| 412 | if (!gfs2_is_stuffed(ip)) { | ||
| 413 | error = gfs2_file_dealloc(ip); | ||
| 414 | if (error) | ||
| 415 | goto out_unlock; | ||
| 416 | } | ||
| 417 | |||
| 418 | error = gfs2_dinode_dealloc(ip); | ||
| 419 | |||
| 420 | out_unlock: | ||
| 421 | gfs2_glock_dq(&ip->i_iopen_gh); | ||
| 422 | out_uninit: | ||
| 423 | gfs2_holder_uninit(&ip->i_iopen_gh); | ||
| 424 | gfs2_glock_dq_uninit(&gh); | ||
| 425 | if (error) | ||
| 426 | fs_warn(sdp, "gfs2_delete_inode: %d\n", error); | ||
| 427 | out: | ||
| 428 | truncate_inode_pages(&inode->i_data, 0); | ||
| 429 | clear_inode(inode); | ||
| 430 | } | ||
| 431 | |||
| 432 | |||
| 433 | |||
| 434 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | ||
| 435 | { | ||
| 436 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 437 | struct gfs2_inode *ip; | ||
| 438 | |||
| 439 | ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); | ||
| 440 | if (ip) { | ||
| 441 | ip->i_flags = 0; | ||
| 442 | ip->i_gl = NULL; | ||
| 443 | ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default); | ||
| 444 | ip->i_last_pfault = jiffies; | ||
| 445 | } | ||
| 446 | return &ip->i_inode; | ||
| 447 | } | ||
| 448 | |||
| 449 | static void gfs2_destroy_inode(struct inode *inode) | ||
| 450 | { | ||
| 451 | kmem_cache_free(gfs2_inode_cachep, inode); | ||
| 452 | } | ||
| 453 | |||
| 454 | struct super_operations gfs2_super_ops = { | ||
| 455 | .alloc_inode = gfs2_alloc_inode, | ||
| 456 | .destroy_inode = gfs2_destroy_inode, | ||
| 457 | .write_inode = gfs2_write_inode, | ||
| 458 | .delete_inode = gfs2_delete_inode, | ||
| 459 | .put_super = gfs2_put_super, | ||
| 460 | .write_super = gfs2_write_super, | ||
| 461 | .write_super_lockfs = gfs2_write_super_lockfs, | ||
| 462 | .unlockfs = gfs2_unlockfs, | ||
| 463 | .statfs = gfs2_statfs, | ||
| 464 | .remount_fs = gfs2_remount_fs, | ||
| 465 | .clear_inode = gfs2_clear_inode, | ||
| 466 | .show_options = gfs2_show_options, | ||
| 467 | }; | ||
| 468 | |||
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h new file mode 100644 index 000000000000..9de73f042f78 --- /dev/null +++ b/fs/gfs2/ops_super.h | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_SUPER_DOT_H__ | ||
| 11 | #define __OPS_SUPER_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/fs.h> | ||
| 14 | |||
| 15 | extern struct super_operations gfs2_super_ops; | ||
| 16 | |||
| 17 | #endif /* __OPS_SUPER_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c new file mode 100644 index 000000000000..5453d2947ab3 --- /dev/null +++ b/fs/gfs2/ops_vm.c | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/mm.h> | ||
| 16 | #include <linux/pagemap.h> | ||
| 17 | #include <linux/gfs2_ondisk.h> | ||
| 18 | #include <linux/lm_interface.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "bmap.h" | ||
| 23 | #include "glock.h" | ||
| 24 | #include "inode.h" | ||
| 25 | #include "ops_vm.h" | ||
| 26 | #include "quota.h" | ||
| 27 | #include "rgrp.h" | ||
| 28 | #include "trans.h" | ||
| 29 | #include "util.h" | ||
| 30 | |||
| 31 | static void pfault_be_greedy(struct gfs2_inode *ip) | ||
| 32 | { | ||
| 33 | unsigned int time; | ||
| 34 | |||
| 35 | spin_lock(&ip->i_spin); | ||
| 36 | time = ip->i_greedy; | ||
| 37 | ip->i_last_pfault = jiffies; | ||
| 38 | spin_unlock(&ip->i_spin); | ||
| 39 | |||
| 40 | igrab(&ip->i_inode); | ||
| 41 | if (gfs2_glock_be_greedy(ip->i_gl, time)) | ||
| 42 | iput(&ip->i_inode); | ||
| 43 | } | ||
| 44 | |||
| 45 | static struct page *gfs2_private_nopage(struct vm_area_struct *area, | ||
| 46 | unsigned long address, int *type) | ||
| 47 | { | ||
| 48 | struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); | ||
| 49 | struct page *result; | ||
| 50 | |||
| 51 | set_bit(GIF_PAGED, &ip->i_flags); | ||
| 52 | |||
| 53 | result = filemap_nopage(area, address, type); | ||
| 54 | |||
| 55 | if (result && result != NOPAGE_OOM) | ||
| 56 | pfault_be_greedy(ip); | ||
| 57 | |||
| 58 | return result; | ||
| 59 | } | ||
| 60 | |||
| 61 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) | ||
| 62 | { | ||
| 63 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 64 | unsigned long index = page->index; | ||
| 65 | u64 lblock = index << (PAGE_CACHE_SHIFT - | ||
| 66 | sdp->sd_sb.sb_bsize_shift); | ||
| 67 | unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; | ||
| 68 | struct gfs2_alloc *al; | ||
| 69 | unsigned int data_blocks, ind_blocks; | ||
| 70 | unsigned int x; | ||
| 71 | int error; | ||
| 72 | |||
| 73 | al = gfs2_alloc_get(ip); | ||
| 74 | |||
| 75 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 76 | if (error) | ||
| 77 | goto out; | ||
| 78 | |||
| 79 | error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 80 | if (error) | ||
| 81 | goto out_gunlock_q; | ||
| 82 | |||
| 83 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | ||
| 84 | |||
| 85 | al->al_requested = data_blocks + ind_blocks; | ||
| 86 | |||
| 87 | error = gfs2_inplace_reserve(ip); | ||
| 88 | if (error) | ||
| 89 | goto out_gunlock_q; | ||
| 90 | |||
| 91 | error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length + | ||
| 92 | ind_blocks + RES_DINODE + | ||
| 93 | RES_STATFS + RES_QUOTA, 0); | ||
| 94 | if (error) | ||
| 95 | goto out_ipres; | ||
| 96 | |||
| 97 | if (gfs2_is_stuffed(ip)) { | ||
| 98 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 99 | if (error) | ||
| 100 | goto out_trans; | ||
| 101 | } | ||
| 102 | |||
| 103 | for (x = 0; x < blocks; ) { | ||
| 104 | u64 dblock; | ||
| 105 | unsigned int extlen; | ||
| 106 | int new = 1; | ||
| 107 | |||
| 108 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); | ||
| 109 | if (error) | ||
| 110 | goto out_trans; | ||
| 111 | |||
| 112 | lblock += extlen; | ||
| 113 | x += extlen; | ||
| 114 | } | ||
| 115 | |||
| 116 | gfs2_assert_warn(sdp, al->al_alloced); | ||
| 117 | |||
| 118 | out_trans: | ||
| 119 | gfs2_trans_end(sdp); | ||
| 120 | out_ipres: | ||
| 121 | gfs2_inplace_release(ip); | ||
| 122 | out_gunlock_q: | ||
| 123 | gfs2_quota_unlock(ip); | ||
| 124 | out: | ||
| 125 | gfs2_alloc_put(ip); | ||
| 126 | return error; | ||
| 127 | } | ||
| 128 | |||
| 129 | static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, | ||
| 130 | unsigned long address, int *type) | ||
| 131 | { | ||
| 132 | struct file *file = area->vm_file; | ||
| 133 | struct gfs2_file *gf = file->private_data; | ||
| 134 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
| 135 | struct gfs2_holder i_gh; | ||
| 136 | struct page *result = NULL; | ||
| 137 | unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + | ||
| 138 | area->vm_pgoff; | ||
| 139 | int alloc_required; | ||
| 140 | int error; | ||
| 141 | |||
| 142 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
| 143 | if (error) | ||
| 144 | return NULL; | ||
| 145 | |||
| 146 | set_bit(GIF_PAGED, &ip->i_flags); | ||
| 147 | set_bit(GIF_SW_PAGED, &ip->i_flags); | ||
| 148 | |||
| 149 | error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT, | ||
| 150 | PAGE_CACHE_SIZE, &alloc_required); | ||
| 151 | if (error) | ||
| 152 | goto out; | ||
| 153 | |||
| 154 | set_bit(GFF_EXLOCK, &gf->f_flags); | ||
| 155 | result = filemap_nopage(area, address, type); | ||
| 156 | clear_bit(GFF_EXLOCK, &gf->f_flags); | ||
| 157 | if (!result || result == NOPAGE_OOM) | ||
| 158 | goto out; | ||
| 159 | |||
| 160 | if (alloc_required) { | ||
| 161 | error = alloc_page_backing(ip, result); | ||
| 162 | if (error) { | ||
| 163 | page_cache_release(result); | ||
| 164 | result = NULL; | ||
| 165 | goto out; | ||
| 166 | } | ||
| 167 | set_page_dirty(result); | ||
| 168 | } | ||
| 169 | |||
| 170 | pfault_be_greedy(ip); | ||
| 171 | out: | ||
| 172 | gfs2_glock_dq_uninit(&i_gh); | ||
| 173 | |||
| 174 | return result; | ||
| 175 | } | ||
| 176 | |||
| 177 | struct vm_operations_struct gfs2_vm_ops_private = { | ||
| 178 | .nopage = gfs2_private_nopage, | ||
| 179 | }; | ||
| 180 | |||
| 181 | struct vm_operations_struct gfs2_vm_ops_sharewrite = { | ||
| 182 | .nopage = gfs2_sharewrite_nopage, | ||
| 183 | }; | ||
| 184 | |||
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h new file mode 100644 index 000000000000..4ae8f43ed5e3 --- /dev/null +++ b/fs/gfs2/ops_vm.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __OPS_VM_DOT_H__ | ||
| 11 | #define __OPS_VM_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/mm.h> | ||
| 14 | |||
| 15 | extern struct vm_operations_struct gfs2_vm_ops_private; | ||
| 16 | extern struct vm_operations_struct gfs2_vm_ops_sharewrite; | ||
| 17 | |||
| 18 | #endif /* __OPS_VM_DOT_H__ */ | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c new file mode 100644 index 000000000000..c69b94a55588 --- /dev/null +++ b/fs/gfs2/quota.c | |||
| @@ -0,0 +1,1227 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | /* | ||
| 11 | * Quota change tags are associated with each transaction that allocates or | ||
| 12 | * deallocates space. Those changes are accumulated locally to each node (in a | ||
| 13 | * per-node file) and then are periodically synced to the quota file. This | ||
| 14 | * avoids the bottleneck of constantly touching the quota file, but introduces | ||
| 15 | * fuzziness in the current usage value of IDs that are being used on different | ||
| 16 | * nodes in the cluster simultaneously. So, it is possible for a user on | ||
| 17 | * multiple nodes to overrun their quota, but that overrun is controlable. | ||
| 18 | * Since quota tags are part of transactions, there is no need to a quota check | ||
| 19 | * program to be run on node crashes or anything like that. | ||
| 20 | * | ||
| 21 | * There are couple of knobs that let the administrator manage the quota | ||
| 22 | * fuzziness. "quota_quantum" sets the maximum time a quota change can be | ||
| 23 | * sitting on one node before being synced to the quota file. (The default is | ||
| 24 | * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency | ||
| 25 | * of quota file syncs increases as the user moves closer to their limit. The | ||
| 26 | * more frequent the syncs, the more accurate the quota enforcement, but that | ||
| 27 | * means that there is more contention between the nodes for the quota file. | ||
| 28 | * The default value is one. This sets the maximum theoretical quota overrun | ||
| 29 | * (with infinite node with infinite bandwidth) to twice the user's limit. (In | ||
| 30 | * practice, the maximum overrun you see should be much less.) A "quota_scale" | ||
| 31 | * number greater than one makes quota syncs more frequent and reduces the | ||
| 32 | * maximum overrun. Numbers less than one (but greater than zero) make quota | ||
| 33 | * syncs less frequent. | ||
| 34 | * | ||
| 35 | * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of | ||
| 36 | * the quota file, so it is not being constantly read. | ||
| 37 | */ | ||
| 38 | |||
| 39 | #include <linux/sched.h> | ||
| 40 | #include <linux/slab.h> | ||
| 41 | #include <linux/spinlock.h> | ||
| 42 | #include <linux/completion.h> | ||
| 43 | #include <linux/buffer_head.h> | ||
| 44 | #include <linux/sort.h> | ||
| 45 | #include <linux/fs.h> | ||
| 46 | #include <linux/bio.h> | ||
| 47 | #include <linux/gfs2_ondisk.h> | ||
| 48 | #include <linux/lm_interface.h> | ||
| 49 | |||
| 50 | #include "gfs2.h" | ||
| 51 | #include "incore.h" | ||
| 52 | #include "bmap.h" | ||
| 53 | #include "glock.h" | ||
| 54 | #include "glops.h" | ||
| 55 | #include "log.h" | ||
| 56 | #include "meta_io.h" | ||
| 57 | #include "quota.h" | ||
| 58 | #include "rgrp.h" | ||
| 59 | #include "super.h" | ||
| 60 | #include "trans.h" | ||
| 61 | #include "inode.h" | ||
| 62 | #include "ops_file.h" | ||
| 63 | #include "ops_address.h" | ||
| 64 | #include "util.h" | ||
| 65 | |||
| 66 | #define QUOTA_USER 1 | ||
| 67 | #define QUOTA_GROUP 0 | ||
| 68 | |||
| 69 | static u64 qd2offset(struct gfs2_quota_data *qd) | ||
| 70 | { | ||
| 71 | u64 offset; | ||
| 72 | |||
| 73 | offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags); | ||
| 74 | offset *= sizeof(struct gfs2_quota); | ||
| 75 | |||
| 76 | return offset; | ||
| 77 | } | ||
| 78 | |||
| 79 | static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, | ||
| 80 | struct gfs2_quota_data **qdp) | ||
| 81 | { | ||
| 82 | struct gfs2_quota_data *qd; | ||
| 83 | int error; | ||
| 84 | |||
| 85 | qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL); | ||
| 86 | if (!qd) | ||
| 87 | return -ENOMEM; | ||
| 88 | |||
| 89 | qd->qd_count = 1; | ||
| 90 | qd->qd_id = id; | ||
| 91 | if (user) | ||
| 92 | set_bit(QDF_USER, &qd->qd_flags); | ||
| 93 | qd->qd_slot = -1; | ||
| 94 | |||
| 95 | error = gfs2_glock_get(sdp, 2 * (u64)id + !user, | ||
| 96 | &gfs2_quota_glops, CREATE, &qd->qd_gl); | ||
| 97 | if (error) | ||
| 98 | goto fail; | ||
| 99 | |||
| 100 | error = gfs2_lvb_hold(qd->qd_gl); | ||
| 101 | gfs2_glock_put(qd->qd_gl); | ||
| 102 | if (error) | ||
| 103 | goto fail; | ||
| 104 | |||
| 105 | *qdp = qd; | ||
| 106 | |||
| 107 | return 0; | ||
| 108 | |||
| 109 | fail: | ||
| 110 | kfree(qd); | ||
| 111 | return error; | ||
| 112 | } | ||
| 113 | |||
| 114 | static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, | ||
| 115 | struct gfs2_quota_data **qdp) | ||
| 116 | { | ||
| 117 | struct gfs2_quota_data *qd = NULL, *new_qd = NULL; | ||
| 118 | int error, found; | ||
| 119 | |||
| 120 | *qdp = NULL; | ||
| 121 | |||
| 122 | for (;;) { | ||
| 123 | found = 0; | ||
| 124 | spin_lock(&sdp->sd_quota_spin); | ||
| 125 | list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { | ||
| 126 | if (qd->qd_id == id && | ||
| 127 | !test_bit(QDF_USER, &qd->qd_flags) == !user) { | ||
| 128 | qd->qd_count++; | ||
| 129 | found = 1; | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | if (!found) | ||
| 135 | qd = NULL; | ||
| 136 | |||
| 137 | if (!qd && new_qd) { | ||
| 138 | qd = new_qd; | ||
| 139 | list_add(&qd->qd_list, &sdp->sd_quota_list); | ||
| 140 | atomic_inc(&sdp->sd_quota_count); | ||
| 141 | new_qd = NULL; | ||
| 142 | } | ||
| 143 | |||
| 144 | spin_unlock(&sdp->sd_quota_spin); | ||
| 145 | |||
| 146 | if (qd || !create) { | ||
| 147 | if (new_qd) { | ||
| 148 | gfs2_lvb_unhold(new_qd->qd_gl); | ||
| 149 | kfree(new_qd); | ||
| 150 | } | ||
| 151 | *qdp = qd; | ||
| 152 | return 0; | ||
| 153 | } | ||
| 154 | |||
| 155 | error = qd_alloc(sdp, user, id, &new_qd); | ||
| 156 | if (error) | ||
| 157 | return error; | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | static void qd_hold(struct gfs2_quota_data *qd) | ||
| 162 | { | ||
| 163 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 164 | |||
| 165 | spin_lock(&sdp->sd_quota_spin); | ||
| 166 | gfs2_assert(sdp, qd->qd_count); | ||
| 167 | qd->qd_count++; | ||
| 168 | spin_unlock(&sdp->sd_quota_spin); | ||
| 169 | } | ||
| 170 | |||
| 171 | static void qd_put(struct gfs2_quota_data *qd) | ||
| 172 | { | ||
| 173 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 174 | spin_lock(&sdp->sd_quota_spin); | ||
| 175 | gfs2_assert(sdp, qd->qd_count); | ||
| 176 | if (!--qd->qd_count) | ||
| 177 | qd->qd_last_touched = jiffies; | ||
| 178 | spin_unlock(&sdp->sd_quota_spin); | ||
| 179 | } | ||
| 180 | |||
| 181 | static int slot_get(struct gfs2_quota_data *qd) | ||
| 182 | { | ||
| 183 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 184 | unsigned int c, o = 0, b; | ||
| 185 | unsigned char byte = 0; | ||
| 186 | |||
| 187 | spin_lock(&sdp->sd_quota_spin); | ||
| 188 | |||
| 189 | if (qd->qd_slot_count++) { | ||
| 190 | spin_unlock(&sdp->sd_quota_spin); | ||
| 191 | return 0; | ||
| 192 | } | ||
| 193 | |||
| 194 | for (c = 0; c < sdp->sd_quota_chunks; c++) | ||
| 195 | for (o = 0; o < PAGE_SIZE; o++) { | ||
| 196 | byte = sdp->sd_quota_bitmap[c][o]; | ||
| 197 | if (byte != 0xFF) | ||
| 198 | goto found; | ||
| 199 | } | ||
| 200 | |||
| 201 | goto fail; | ||
| 202 | |||
| 203 | found: | ||
| 204 | for (b = 0; b < 8; b++) | ||
| 205 | if (!(byte & (1 << b))) | ||
| 206 | break; | ||
| 207 | qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b; | ||
| 208 | |||
| 209 | if (qd->qd_slot >= sdp->sd_quota_slots) | ||
| 210 | goto fail; | ||
| 211 | |||
| 212 | sdp->sd_quota_bitmap[c][o] |= 1 << b; | ||
| 213 | |||
| 214 | spin_unlock(&sdp->sd_quota_spin); | ||
| 215 | |||
| 216 | return 0; | ||
| 217 | |||
| 218 | fail: | ||
| 219 | qd->qd_slot_count--; | ||
| 220 | spin_unlock(&sdp->sd_quota_spin); | ||
| 221 | return -ENOSPC; | ||
| 222 | } | ||
| 223 | |||
| 224 | static void slot_hold(struct gfs2_quota_data *qd) | ||
| 225 | { | ||
| 226 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 227 | |||
| 228 | spin_lock(&sdp->sd_quota_spin); | ||
| 229 | gfs2_assert(sdp, qd->qd_slot_count); | ||
| 230 | qd->qd_slot_count++; | ||
| 231 | spin_unlock(&sdp->sd_quota_spin); | ||
| 232 | } | ||
| 233 | |||
| 234 | static void slot_put(struct gfs2_quota_data *qd) | ||
| 235 | { | ||
| 236 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 237 | |||
| 238 | spin_lock(&sdp->sd_quota_spin); | ||
| 239 | gfs2_assert(sdp, qd->qd_slot_count); | ||
| 240 | if (!--qd->qd_slot_count) { | ||
| 241 | gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); | ||
| 242 | qd->qd_slot = -1; | ||
| 243 | } | ||
| 244 | spin_unlock(&sdp->sd_quota_spin); | ||
| 245 | } | ||
| 246 | |||
| 247 | static int bh_get(struct gfs2_quota_data *qd) | ||
| 248 | { | ||
| 249 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 250 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | ||
| 251 | unsigned int block, offset; | ||
| 252 | struct buffer_head *bh; | ||
| 253 | int error; | ||
| 254 | struct buffer_head bh_map; | ||
| 255 | |||
| 256 | mutex_lock(&sdp->sd_quota_mutex); | ||
| 257 | |||
| 258 | if (qd->qd_bh_count++) { | ||
| 259 | mutex_unlock(&sdp->sd_quota_mutex); | ||
| 260 | return 0; | ||
| 261 | } | ||
| 262 | |||
| 263 | block = qd->qd_slot / sdp->sd_qc_per_block; | ||
| 264 | offset = qd->qd_slot % sdp->sd_qc_per_block;; | ||
| 265 | |||
| 266 | error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map, 1); | ||
| 267 | if (error) | ||
| 268 | goto fail; | ||
| 269 | error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); | ||
| 270 | if (error) | ||
| 271 | goto fail; | ||
| 272 | error = -EIO; | ||
| 273 | if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) | ||
| 274 | goto fail_brelse; | ||
| 275 | |||
| 276 | qd->qd_bh = bh; | ||
| 277 | qd->qd_bh_qc = (struct gfs2_quota_change *) | ||
| 278 | (bh->b_data + sizeof(struct gfs2_meta_header) + | ||
| 279 | offset * sizeof(struct gfs2_quota_change)); | ||
| 280 | |||
| 281 | mutex_lock(&sdp->sd_quota_mutex); | ||
| 282 | |||
| 283 | return 0; | ||
| 284 | |||
| 285 | fail_brelse: | ||
| 286 | brelse(bh); | ||
| 287 | fail: | ||
| 288 | qd->qd_bh_count--; | ||
| 289 | mutex_unlock(&sdp->sd_quota_mutex); | ||
| 290 | return error; | ||
| 291 | } | ||
| 292 | |||
| 293 | static void bh_put(struct gfs2_quota_data *qd) | ||
| 294 | { | ||
| 295 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 296 | |||
| 297 | mutex_lock(&sdp->sd_quota_mutex); | ||
| 298 | gfs2_assert(sdp, qd->qd_bh_count); | ||
| 299 | if (!--qd->qd_bh_count) { | ||
| 300 | brelse(qd->qd_bh); | ||
| 301 | qd->qd_bh = NULL; | ||
| 302 | qd->qd_bh_qc = NULL; | ||
| 303 | } | ||
| 304 | mutex_unlock(&sdp->sd_quota_mutex); | ||
| 305 | } | ||
| 306 | |||
| 307 | static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) | ||
| 308 | { | ||
| 309 | struct gfs2_quota_data *qd = NULL; | ||
| 310 | int error; | ||
| 311 | int found = 0; | ||
| 312 | |||
| 313 | *qdp = NULL; | ||
| 314 | |||
| 315 | if (sdp->sd_vfs->s_flags & MS_RDONLY) | ||
| 316 | return 0; | ||
| 317 | |||
| 318 | spin_lock(&sdp->sd_quota_spin); | ||
| 319 | |||
| 320 | list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { | ||
| 321 | if (test_bit(QDF_LOCKED, &qd->qd_flags) || | ||
| 322 | !test_bit(QDF_CHANGE, &qd->qd_flags) || | ||
| 323 | qd->qd_sync_gen >= sdp->sd_quota_sync_gen) | ||
| 324 | continue; | ||
| 325 | |||
| 326 | list_move_tail(&qd->qd_list, &sdp->sd_quota_list); | ||
| 327 | |||
| 328 | set_bit(QDF_LOCKED, &qd->qd_flags); | ||
| 329 | gfs2_assert_warn(sdp, qd->qd_count); | ||
| 330 | qd->qd_count++; | ||
| 331 | qd->qd_change_sync = qd->qd_change; | ||
| 332 | gfs2_assert_warn(sdp, qd->qd_slot_count); | ||
| 333 | qd->qd_slot_count++; | ||
| 334 | found = 1; | ||
| 335 | |||
| 336 | break; | ||
| 337 | } | ||
| 338 | |||
| 339 | if (!found) | ||
| 340 | qd = NULL; | ||
| 341 | |||
| 342 | spin_unlock(&sdp->sd_quota_spin); | ||
| 343 | |||
| 344 | if (qd) { | ||
| 345 | gfs2_assert_warn(sdp, qd->qd_change_sync); | ||
| 346 | error = bh_get(qd); | ||
| 347 | if (error) { | ||
| 348 | clear_bit(QDF_LOCKED, &qd->qd_flags); | ||
| 349 | slot_put(qd); | ||
| 350 | qd_put(qd); | ||
| 351 | return error; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | |||
| 355 | *qdp = qd; | ||
| 356 | |||
| 357 | return 0; | ||
| 358 | } | ||
| 359 | |||
| 360 | static int qd_trylock(struct gfs2_quota_data *qd) | ||
| 361 | { | ||
| 362 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 363 | |||
| 364 | if (sdp->sd_vfs->s_flags & MS_RDONLY) | ||
| 365 | return 0; | ||
| 366 | |||
| 367 | spin_lock(&sdp->sd_quota_spin); | ||
| 368 | |||
| 369 | if (test_bit(QDF_LOCKED, &qd->qd_flags) || | ||
| 370 | !test_bit(QDF_CHANGE, &qd->qd_flags)) { | ||
| 371 | spin_unlock(&sdp->sd_quota_spin); | ||
| 372 | return 0; | ||
| 373 | } | ||
| 374 | |||
| 375 | list_move_tail(&qd->qd_list, &sdp->sd_quota_list); | ||
| 376 | |||
| 377 | set_bit(QDF_LOCKED, &qd->qd_flags); | ||
| 378 | gfs2_assert_warn(sdp, qd->qd_count); | ||
| 379 | qd->qd_count++; | ||
| 380 | qd->qd_change_sync = qd->qd_change; | ||
| 381 | gfs2_assert_warn(sdp, qd->qd_slot_count); | ||
| 382 | qd->qd_slot_count++; | ||
| 383 | |||
| 384 | spin_unlock(&sdp->sd_quota_spin); | ||
| 385 | |||
| 386 | gfs2_assert_warn(sdp, qd->qd_change_sync); | ||
| 387 | if (bh_get(qd)) { | ||
| 388 | clear_bit(QDF_LOCKED, &qd->qd_flags); | ||
| 389 | slot_put(qd); | ||
| 390 | qd_put(qd); | ||
| 391 | return 0; | ||
| 392 | } | ||
| 393 | |||
| 394 | return 1; | ||
| 395 | } | ||
| 396 | |||
| 397 | static void qd_unlock(struct gfs2_quota_data *qd) | ||
| 398 | { | ||
| 399 | gfs2_assert_warn(qd->qd_gl->gl_sbd, | ||
| 400 | test_bit(QDF_LOCKED, &qd->qd_flags)); | ||
| 401 | clear_bit(QDF_LOCKED, &qd->qd_flags); | ||
| 402 | bh_put(qd); | ||
| 403 | slot_put(qd); | ||
| 404 | qd_put(qd); | ||
| 405 | } | ||
| 406 | |||
| 407 | static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create, | ||
| 408 | struct gfs2_quota_data **qdp) | ||
| 409 | { | ||
| 410 | int error; | ||
| 411 | |||
| 412 | error = qd_get(sdp, user, id, create, qdp); | ||
| 413 | if (error) | ||
| 414 | return error; | ||
| 415 | |||
| 416 | error = slot_get(*qdp); | ||
| 417 | if (error) | ||
| 418 | goto fail; | ||
| 419 | |||
| 420 | error = bh_get(*qdp); | ||
| 421 | if (error) | ||
| 422 | goto fail_slot; | ||
| 423 | |||
| 424 | return 0; | ||
| 425 | |||
| 426 | fail_slot: | ||
| 427 | slot_put(*qdp); | ||
| 428 | fail: | ||
| 429 | qd_put(*qdp); | ||
| 430 | return error; | ||
| 431 | } | ||
| 432 | |||
| 433 | static void qdsb_put(struct gfs2_quota_data *qd) | ||
| 434 | { | ||
| 435 | bh_put(qd); | ||
| 436 | slot_put(qd); | ||
| 437 | qd_put(qd); | ||
| 438 | } | ||
| 439 | |||
| 440 | int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) | ||
| 441 | { | ||
| 442 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 443 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 444 | struct gfs2_quota_data **qd = al->al_qd; | ||
| 445 | int error; | ||
| 446 | |||
| 447 | if (gfs2_assert_warn(sdp, !al->al_qd_num) || | ||
| 448 | gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) | ||
| 449 | return -EIO; | ||
| 450 | |||
| 451 | if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) | ||
| 452 | return 0; | ||
| 453 | |||
| 454 | error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd); | ||
| 455 | if (error) | ||
| 456 | goto out; | ||
| 457 | al->al_qd_num++; | ||
| 458 | qd++; | ||
| 459 | |||
| 460 | error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd); | ||
| 461 | if (error) | ||
| 462 | goto out; | ||
| 463 | al->al_qd_num++; | ||
| 464 | qd++; | ||
| 465 | |||
| 466 | if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) { | ||
| 467 | error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); | ||
| 468 | if (error) | ||
| 469 | goto out; | ||
| 470 | al->al_qd_num++; | ||
| 471 | qd++; | ||
| 472 | } | ||
| 473 | |||
| 474 | if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) { | ||
| 475 | error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); | ||
| 476 | if (error) | ||
| 477 | goto out; | ||
| 478 | al->al_qd_num++; | ||
| 479 | qd++; | ||
| 480 | } | ||
| 481 | |||
| 482 | out: | ||
| 483 | if (error) | ||
| 484 | gfs2_quota_unhold(ip); | ||
| 485 | return error; | ||
| 486 | } | ||
| 487 | |||
| 488 | void gfs2_quota_unhold(struct gfs2_inode *ip) | ||
| 489 | { | ||
| 490 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 491 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 492 | unsigned int x; | ||
| 493 | |||
| 494 | gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); | ||
| 495 | |||
| 496 | for (x = 0; x < al->al_qd_num; x++) { | ||
| 497 | qdsb_put(al->al_qd[x]); | ||
| 498 | al->al_qd[x] = NULL; | ||
| 499 | } | ||
| 500 | al->al_qd_num = 0; | ||
| 501 | } | ||
| 502 | |||
| 503 | static int sort_qd(const void *a, const void *b) | ||
| 504 | { | ||
| 505 | const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a; | ||
| 506 | const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b; | ||
| 507 | |||
| 508 | if (!test_bit(QDF_USER, &qd_a->qd_flags) != | ||
| 509 | !test_bit(QDF_USER, &qd_b->qd_flags)) { | ||
| 510 | if (test_bit(QDF_USER, &qd_a->qd_flags)) | ||
| 511 | return -1; | ||
| 512 | else | ||
| 513 | return 1; | ||
| 514 | } | ||
| 515 | if (qd_a->qd_id < qd_b->qd_id) | ||
| 516 | return -1; | ||
| 517 | if (qd_a->qd_id > qd_b->qd_id) | ||
| 518 | return 1; | ||
| 519 | |||
| 520 | return 0; | ||
| 521 | } | ||
| 522 | |||
| 523 | static void do_qc(struct gfs2_quota_data *qd, s64 change) | ||
| 524 | { | ||
| 525 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 526 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | ||
| 527 | struct gfs2_quota_change *qc = qd->qd_bh_qc; | ||
| 528 | s64 x; | ||
| 529 | |||
| 530 | mutex_lock(&sdp->sd_quota_mutex); | ||
| 531 | gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1); | ||
| 532 | |||
| 533 | if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { | ||
| 534 | qc->qc_change = 0; | ||
| 535 | qc->qc_flags = 0; | ||
| 536 | if (test_bit(QDF_USER, &qd->qd_flags)) | ||
| 537 | qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); | ||
| 538 | qc->qc_id = cpu_to_be32(qd->qd_id); | ||
| 539 | } | ||
| 540 | |||
| 541 | x = qc->qc_change; | ||
| 542 | x = be64_to_cpu(x) + change; | ||
| 543 | qc->qc_change = cpu_to_be64(x); | ||
| 544 | |||
| 545 | spin_lock(&sdp->sd_quota_spin); | ||
| 546 | qd->qd_change = x; | ||
| 547 | spin_unlock(&sdp->sd_quota_spin); | ||
| 548 | |||
| 549 | if (!x) { | ||
| 550 | gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); | ||
| 551 | clear_bit(QDF_CHANGE, &qd->qd_flags); | ||
| 552 | qc->qc_flags = 0; | ||
| 553 | qc->qc_id = 0; | ||
| 554 | slot_put(qd); | ||
| 555 | qd_put(qd); | ||
| 556 | } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { | ||
| 557 | qd_hold(qd); | ||
| 558 | slot_hold(qd); | ||
| 559 | } | ||
| 560 | |||
| 561 | mutex_unlock(&sdp->sd_quota_mutex); | ||
| 562 | } | ||
| 563 | |||
| 564 | /** | ||
| 565 | * gfs2_adjust_quota | ||
| 566 | * | ||
| 567 | * This function was mostly borrowed from gfs2_block_truncate_page which was | ||
| 568 | * in turn mostly borrowed from ext3 | ||
| 569 | */ | ||
| 570 | static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | ||
| 571 | s64 change, struct gfs2_quota_data *qd) | ||
| 572 | { | ||
| 573 | struct inode *inode = &ip->i_inode; | ||
| 574 | struct address_space *mapping = inode->i_mapping; | ||
| 575 | unsigned long index = loc >> PAGE_CACHE_SHIFT; | ||
| 576 | unsigned offset = loc & (PAGE_CACHE_SHIFT - 1); | ||
| 577 | unsigned blocksize, iblock, pos; | ||
| 578 | struct buffer_head *bh; | ||
| 579 | struct page *page; | ||
| 580 | void *kaddr; | ||
| 581 | __be64 *ptr; | ||
| 582 | s64 value; | ||
| 583 | int err = -EIO; | ||
| 584 | |||
| 585 | page = grab_cache_page(mapping, index); | ||
| 586 | if (!page) | ||
| 587 | return -ENOMEM; | ||
| 588 | |||
| 589 | blocksize = inode->i_sb->s_blocksize; | ||
| 590 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
| 591 | |||
| 592 | if (!page_has_buffers(page)) | ||
| 593 | create_empty_buffers(page, blocksize, 0); | ||
| 594 | |||
| 595 | bh = page_buffers(page); | ||
| 596 | pos = blocksize; | ||
| 597 | while (offset >= pos) { | ||
| 598 | bh = bh->b_this_page; | ||
| 599 | iblock++; | ||
| 600 | pos += blocksize; | ||
| 601 | } | ||
| 602 | |||
| 603 | if (!buffer_mapped(bh)) { | ||
| 604 | gfs2_get_block(inode, iblock, bh, 1); | ||
| 605 | if (!buffer_mapped(bh)) | ||
| 606 | goto unlock; | ||
| 607 | } | ||
| 608 | |||
| 609 | if (PageUptodate(page)) | ||
| 610 | set_buffer_uptodate(bh); | ||
| 611 | |||
| 612 | if (!buffer_uptodate(bh)) { | ||
| 613 | ll_rw_block(READ_META, 1, &bh); | ||
| 614 | wait_on_buffer(bh); | ||
| 615 | if (!buffer_uptodate(bh)) | ||
| 616 | goto unlock; | ||
| 617 | } | ||
| 618 | |||
| 619 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | ||
| 620 | |||
| 621 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 622 | ptr = kaddr + offset; | ||
| 623 | value = (s64)be64_to_cpu(*ptr) + change; | ||
| 624 | *ptr = cpu_to_be64(value); | ||
| 625 | flush_dcache_page(page); | ||
| 626 | kunmap_atomic(kaddr, KM_USER0); | ||
| 627 | err = 0; | ||
| 628 | qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 629 | qd->qd_qb.qb_value = cpu_to_be64(value); | ||
| 630 | unlock: | ||
| 631 | unlock_page(page); | ||
| 632 | page_cache_release(page); | ||
| 633 | return err; | ||
| 634 | } | ||
| 635 | |||
| 636 | static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | ||
| 637 | { | ||
| 638 | struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; | ||
| 639 | struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); | ||
| 640 | unsigned int data_blocks, ind_blocks; | ||
| 641 | struct gfs2_holder *ghs, i_gh; | ||
| 642 | unsigned int qx, x; | ||
| 643 | struct gfs2_quota_data *qd; | ||
| 644 | loff_t offset; | ||
| 645 | unsigned int nalloc = 0; | ||
| 646 | struct gfs2_alloc *al = NULL; | ||
| 647 | int error; | ||
| 648 | |||
| 649 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), | ||
| 650 | &data_blocks, &ind_blocks); | ||
| 651 | |||
| 652 | ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL); | ||
| 653 | if (!ghs) | ||
| 654 | return -ENOMEM; | ||
| 655 | |||
| 656 | sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); | ||
| 657 | for (qx = 0; qx < num_qd; qx++) { | ||
| 658 | error = gfs2_glock_nq_init(qda[qx]->qd_gl, | ||
| 659 | LM_ST_EXCLUSIVE, | ||
| 660 | GL_NOCACHE, &ghs[qx]); | ||
| 661 | if (error) | ||
| 662 | goto out; | ||
| 663 | } | ||
| 664 | |||
| 665 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
| 666 | if (error) | ||
| 667 | goto out; | ||
| 668 | |||
| 669 | for (x = 0; x < num_qd; x++) { | ||
| 670 | int alloc_required; | ||
| 671 | |||
| 672 | offset = qd2offset(qda[x]); | ||
| 673 | error = gfs2_write_alloc_required(ip, offset, | ||
| 674 | sizeof(struct gfs2_quota), | ||
| 675 | &alloc_required); | ||
| 676 | if (error) | ||
| 677 | goto out_gunlock; | ||
| 678 | if (alloc_required) | ||
| 679 | nalloc++; | ||
| 680 | } | ||
| 681 | |||
| 682 | if (nalloc) { | ||
| 683 | al = gfs2_alloc_get(ip); | ||
| 684 | |||
| 685 | al->al_requested = nalloc * (data_blocks + ind_blocks); | ||
| 686 | |||
| 687 | error = gfs2_inplace_reserve(ip); | ||
| 688 | if (error) | ||
| 689 | goto out_alloc; | ||
| 690 | |||
| 691 | error = gfs2_trans_begin(sdp, | ||
| 692 | al->al_rgd->rd_ri.ri_length + | ||
| 693 | num_qd * data_blocks + | ||
| 694 | nalloc * ind_blocks + | ||
| 695 | RES_DINODE + num_qd + | ||
| 696 | RES_STATFS, 0); | ||
| 697 | if (error) | ||
| 698 | goto out_ipres; | ||
| 699 | } else { | ||
| 700 | error = gfs2_trans_begin(sdp, | ||
| 701 | num_qd * data_blocks + | ||
| 702 | RES_DINODE + num_qd, 0); | ||
| 703 | if (error) | ||
| 704 | goto out_gunlock; | ||
| 705 | } | ||
| 706 | |||
| 707 | for (x = 0; x < num_qd; x++) { | ||
| 708 | qd = qda[x]; | ||
| 709 | offset = qd2offset(qd); | ||
| 710 | error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, | ||
| 711 | (struct gfs2_quota_data *) | ||
| 712 | qd->qd_gl->gl_lvb); | ||
| 713 | if (error) | ||
| 714 | goto out_end_trans; | ||
| 715 | |||
| 716 | do_qc(qd, -qd->qd_change_sync); | ||
| 717 | } | ||
| 718 | |||
| 719 | error = 0; | ||
| 720 | |||
| 721 | out_end_trans: | ||
| 722 | gfs2_trans_end(sdp); | ||
| 723 | out_ipres: | ||
| 724 | if (nalloc) | ||
| 725 | gfs2_inplace_release(ip); | ||
| 726 | out_alloc: | ||
| 727 | if (nalloc) | ||
| 728 | gfs2_alloc_put(ip); | ||
| 729 | out_gunlock: | ||
| 730 | gfs2_glock_dq_uninit(&i_gh); | ||
| 731 | out: | ||
| 732 | while (qx--) | ||
| 733 | gfs2_glock_dq_uninit(&ghs[qx]); | ||
| 734 | kfree(ghs); | ||
| 735 | gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); | ||
| 736 | return error; | ||
| 737 | } | ||
| 738 | |||
| 739 | static int do_glock(struct gfs2_quota_data *qd, int force_refresh, | ||
| 740 | struct gfs2_holder *q_gh) | ||
| 741 | { | ||
| 742 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 743 | struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); | ||
| 744 | struct gfs2_holder i_gh; | ||
| 745 | struct gfs2_quota q; | ||
| 746 | char buf[sizeof(struct gfs2_quota)]; | ||
| 747 | struct file_ra_state ra_state; | ||
| 748 | int error; | ||
| 749 | struct gfs2_quota_lvb *qlvb; | ||
| 750 | |||
| 751 | file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping); | ||
| 752 | restart: | ||
| 753 | error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); | ||
| 754 | if (error) | ||
| 755 | return error; | ||
| 756 | |||
| 757 | qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; | ||
| 758 | |||
| 759 | if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { | ||
| 760 | loff_t pos; | ||
| 761 | gfs2_glock_dq_uninit(q_gh); | ||
| 762 | error = gfs2_glock_nq_init(qd->qd_gl, | ||
| 763 | LM_ST_EXCLUSIVE, GL_NOCACHE, | ||
| 764 | q_gh); | ||
| 765 | if (error) | ||
| 766 | return error; | ||
| 767 | |||
| 768 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); | ||
| 769 | if (error) | ||
| 770 | goto fail; | ||
| 771 | |||
| 772 | memset(buf, 0, sizeof(struct gfs2_quota)); | ||
| 773 | pos = qd2offset(qd); | ||
| 774 | error = gfs2_internal_read(ip, &ra_state, buf, | ||
| 775 | &pos, sizeof(struct gfs2_quota)); | ||
| 776 | if (error < 0) | ||
| 777 | goto fail_gunlock; | ||
| 778 | |||
| 779 | gfs2_glock_dq_uninit(&i_gh); | ||
| 780 | |||
| 781 | |||
| 782 | gfs2_quota_in(&q, buf); | ||
| 783 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; | ||
| 784 | qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 785 | qlvb->__pad = 0; | ||
| 786 | qlvb->qb_limit = cpu_to_be64(q.qu_limit); | ||
| 787 | qlvb->qb_warn = cpu_to_be64(q.qu_warn); | ||
| 788 | qlvb->qb_value = cpu_to_be64(q.qu_value); | ||
| 789 | qd->qd_qb = *qlvb; | ||
| 790 | |||
| 791 | if (gfs2_glock_is_blocking(qd->qd_gl)) { | ||
| 792 | gfs2_glock_dq_uninit(q_gh); | ||
| 793 | force_refresh = 0; | ||
| 794 | goto restart; | ||
| 795 | } | ||
| 796 | } | ||
| 797 | |||
| 798 | return 0; | ||
| 799 | |||
| 800 | fail_gunlock: | ||
| 801 | gfs2_glock_dq_uninit(&i_gh); | ||
| 802 | fail: | ||
| 803 | gfs2_glock_dq_uninit(q_gh); | ||
| 804 | return error; | ||
| 805 | } | ||
| 806 | |||
| 807 | int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) | ||
| 808 | { | ||
| 809 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 810 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 811 | unsigned int x; | ||
| 812 | int error = 0; | ||
| 813 | |||
| 814 | gfs2_quota_hold(ip, uid, gid); | ||
| 815 | |||
| 816 | if (capable(CAP_SYS_RESOURCE) || | ||
| 817 | sdp->sd_args.ar_quota != GFS2_QUOTA_ON) | ||
| 818 | return 0; | ||
| 819 | |||
| 820 | sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *), | ||
| 821 | sort_qd, NULL); | ||
| 822 | |||
| 823 | for (x = 0; x < al->al_qd_num; x++) { | ||
| 824 | error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]); | ||
| 825 | if (error) | ||
| 826 | break; | ||
| 827 | } | ||
| 828 | |||
| 829 | if (!error) | ||
| 830 | set_bit(GIF_QD_LOCKED, &ip->i_flags); | ||
| 831 | else { | ||
| 832 | while (x--) | ||
| 833 | gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); | ||
| 834 | gfs2_quota_unhold(ip); | ||
| 835 | } | ||
| 836 | |||
| 837 | return error; | ||
| 838 | } | ||
| 839 | |||
| 840 | static int need_sync(struct gfs2_quota_data *qd) | ||
| 841 | { | ||
| 842 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 843 | struct gfs2_tune *gt = &sdp->sd_tune; | ||
| 844 | s64 value; | ||
| 845 | unsigned int num, den; | ||
| 846 | int do_sync = 1; | ||
| 847 | |||
| 848 | if (!qd->qd_qb.qb_limit) | ||
| 849 | return 0; | ||
| 850 | |||
| 851 | spin_lock(&sdp->sd_quota_spin); | ||
| 852 | value = qd->qd_change; | ||
| 853 | spin_unlock(&sdp->sd_quota_spin); | ||
| 854 | |||
| 855 | spin_lock(>->gt_spin); | ||
| 856 | num = gt->gt_quota_scale_num; | ||
| 857 | den = gt->gt_quota_scale_den; | ||
| 858 | spin_unlock(>->gt_spin); | ||
| 859 | |||
| 860 | if (value < 0) | ||
| 861 | do_sync = 0; | ||
| 862 | else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >= | ||
| 863 | (s64)be64_to_cpu(qd->qd_qb.qb_limit)) | ||
| 864 | do_sync = 0; | ||
| 865 | else { | ||
| 866 | value *= gfs2_jindex_size(sdp) * num; | ||
| 867 | do_div(value, den); | ||
| 868 | value += (s64)be64_to_cpu(qd->qd_qb.qb_value); | ||
| 869 | if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) | ||
| 870 | do_sync = 0; | ||
| 871 | } | ||
| 872 | |||
| 873 | return do_sync; | ||
| 874 | } | ||
| 875 | |||
| 876 | void gfs2_quota_unlock(struct gfs2_inode *ip) | ||
| 877 | { | ||
| 878 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 879 | struct gfs2_quota_data *qda[4]; | ||
| 880 | unsigned int count = 0; | ||
| 881 | unsigned int x; | ||
| 882 | |||
| 883 | if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) | ||
| 884 | goto out; | ||
| 885 | |||
| 886 | for (x = 0; x < al->al_qd_num; x++) { | ||
| 887 | struct gfs2_quota_data *qd; | ||
| 888 | int sync; | ||
| 889 | |||
| 890 | qd = al->al_qd[x]; | ||
| 891 | sync = need_sync(qd); | ||
| 892 | |||
| 893 | gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); | ||
| 894 | |||
| 895 | if (sync && qd_trylock(qd)) | ||
| 896 | qda[count++] = qd; | ||
| 897 | } | ||
| 898 | |||
| 899 | if (count) { | ||
| 900 | do_sync(count, qda); | ||
| 901 | for (x = 0; x < count; x++) | ||
| 902 | qd_unlock(qda[x]); | ||
| 903 | } | ||
| 904 | |||
| 905 | out: | ||
| 906 | gfs2_quota_unhold(ip); | ||
| 907 | } | ||
| 908 | |||
| 909 | #define MAX_LINE 256 | ||
| 910 | |||
| 911 | static int print_message(struct gfs2_quota_data *qd, char *type) | ||
| 912 | { | ||
| 913 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | ||
| 914 | |||
| 915 | printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n", | ||
| 916 | sdp->sd_fsname, type, | ||
| 917 | (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", | ||
| 918 | qd->qd_id); | ||
| 919 | |||
| 920 | return 0; | ||
| 921 | } | ||
| 922 | |||
| 923 | int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) | ||
| 924 | { | ||
| 925 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 926 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 927 | struct gfs2_quota_data *qd; | ||
| 928 | s64 value; | ||
| 929 | unsigned int x; | ||
| 930 | int error = 0; | ||
| 931 | |||
| 932 | if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) | ||
| 933 | return 0; | ||
| 934 | |||
| 935 | if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) | ||
| 936 | return 0; | ||
| 937 | |||
| 938 | for (x = 0; x < al->al_qd_num; x++) { | ||
| 939 | qd = al->al_qd[x]; | ||
| 940 | |||
| 941 | if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || | ||
| 942 | (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) | ||
| 943 | continue; | ||
| 944 | |||
| 945 | value = (s64)be64_to_cpu(qd->qd_qb.qb_value); | ||
| 946 | spin_lock(&sdp->sd_quota_spin); | ||
| 947 | value += qd->qd_change; | ||
| 948 | spin_unlock(&sdp->sd_quota_spin); | ||
| 949 | |||
| 950 | if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { | ||
| 951 | print_message(qd, "exceeded"); | ||
| 952 | error = -EDQUOT; | ||
| 953 | break; | ||
| 954 | } else if (be64_to_cpu(qd->qd_qb.qb_warn) && | ||
| 955 | (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && | ||
| 956 | time_after_eq(jiffies, qd->qd_last_warn + | ||
| 957 | gfs2_tune_get(sdp, | ||
| 958 | gt_quota_warn_period) * HZ)) { | ||
| 959 | error = print_message(qd, "warning"); | ||
| 960 | qd->qd_last_warn = jiffies; | ||
| 961 | } | ||
| 962 | } | ||
| 963 | |||
| 964 | return error; | ||
| 965 | } | ||
| 966 | |||
| 967 | void gfs2_quota_change(struct gfs2_inode *ip, s64 change, | ||
| 968 | u32 uid, u32 gid) | ||
| 969 | { | ||
| 970 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 971 | struct gfs2_quota_data *qd; | ||
| 972 | unsigned int x; | ||
| 973 | unsigned int found = 0; | ||
| 974 | |||
| 975 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) | ||
| 976 | return; | ||
| 977 | if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) | ||
| 978 | return; | ||
| 979 | |||
| 980 | for (x = 0; x < al->al_qd_num; x++) { | ||
| 981 | qd = al->al_qd[x]; | ||
| 982 | |||
| 983 | if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || | ||
| 984 | (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { | ||
| 985 | do_qc(qd, change); | ||
| 986 | found++; | ||
| 987 | } | ||
| 988 | } | ||
| 989 | } | ||
| 990 | |||
| 991 | int gfs2_quota_sync(struct gfs2_sbd *sdp) | ||
| 992 | { | ||
| 993 | struct gfs2_quota_data **qda; | ||
| 994 | unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); | ||
| 995 | unsigned int num_qd; | ||
| 996 | unsigned int x; | ||
| 997 | int error = 0; | ||
| 998 | |||
| 999 | sdp->sd_quota_sync_gen++; | ||
| 1000 | |||
| 1001 | qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); | ||
| 1002 | if (!qda) | ||
| 1003 | return -ENOMEM; | ||
| 1004 | |||
| 1005 | do { | ||
| 1006 | num_qd = 0; | ||
| 1007 | |||
| 1008 | for (;;) { | ||
| 1009 | error = qd_fish(sdp, qda + num_qd); | ||
| 1010 | if (error || !qda[num_qd]) | ||
| 1011 | break; | ||
| 1012 | if (++num_qd == max_qd) | ||
| 1013 | break; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | if (num_qd) { | ||
| 1017 | if (!error) | ||
| 1018 | error = do_sync(num_qd, qda); | ||
| 1019 | if (!error) | ||
| 1020 | for (x = 0; x < num_qd; x++) | ||
| 1021 | qda[x]->qd_sync_gen = | ||
| 1022 | sdp->sd_quota_sync_gen; | ||
| 1023 | |||
| 1024 | for (x = 0; x < num_qd; x++) | ||
| 1025 | qd_unlock(qda[x]); | ||
| 1026 | } | ||
| 1027 | } while (!error && num_qd == max_qd); | ||
| 1028 | |||
| 1029 | kfree(qda); | ||
| 1030 | |||
| 1031 | return error; | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) | ||
| 1035 | { | ||
| 1036 | struct gfs2_quota_data *qd; | ||
| 1037 | struct gfs2_holder q_gh; | ||
| 1038 | int error; | ||
| 1039 | |||
| 1040 | error = qd_get(sdp, user, id, CREATE, &qd); | ||
| 1041 | if (error) | ||
| 1042 | return error; | ||
| 1043 | |||
| 1044 | error = do_glock(qd, FORCE, &q_gh); | ||
| 1045 | if (!error) | ||
| 1046 | gfs2_glock_dq_uninit(&q_gh); | ||
| 1047 | |||
| 1048 | qd_put(qd); | ||
| 1049 | |||
| 1050 | return error; | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | int gfs2_quota_init(struct gfs2_sbd *sdp) | ||
| 1054 | { | ||
| 1055 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | ||
| 1056 | unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; | ||
| 1057 | unsigned int x, slot = 0; | ||
| 1058 | unsigned int found = 0; | ||
| 1059 | u64 dblock; | ||
| 1060 | u32 extlen = 0; | ||
| 1061 | int error; | ||
| 1062 | |||
| 1063 | if (!ip->i_di.di_size || ip->i_di.di_size > (64 << 20) || | ||
| 1064 | ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) { | ||
| 1065 | gfs2_consist_inode(ip); | ||
| 1066 | return -EIO; | ||
| 1067 | } | ||
| 1068 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; | ||
| 1069 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); | ||
| 1070 | |||
| 1071 | error = -ENOMEM; | ||
| 1072 | |||
| 1073 | sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, | ||
| 1074 | sizeof(unsigned char *), GFP_KERNEL); | ||
| 1075 | if (!sdp->sd_quota_bitmap) | ||
| 1076 | return error; | ||
| 1077 | |||
| 1078 | for (x = 0; x < sdp->sd_quota_chunks; x++) { | ||
| 1079 | sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
| 1080 | if (!sdp->sd_quota_bitmap[x]) | ||
| 1081 | goto fail; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | for (x = 0; x < blocks; x++) { | ||
| 1085 | struct buffer_head *bh; | ||
| 1086 | unsigned int y; | ||
| 1087 | |||
| 1088 | if (!extlen) { | ||
| 1089 | int new = 0; | ||
| 1090 | error = gfs2_extent_map(&ip->i_inode, x, &new, &dblock, &extlen); | ||
| 1091 | if (error) | ||
| 1092 | goto fail; | ||
| 1093 | } | ||
| 1094 | error = -EIO; | ||
| 1095 | bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); | ||
| 1096 | if (!bh) | ||
| 1097 | goto fail; | ||
| 1098 | if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) { | ||
| 1099 | brelse(bh); | ||
| 1100 | goto fail; | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; | ||
| 1104 | y++, slot++) { | ||
| 1105 | struct gfs2_quota_change qc; | ||
| 1106 | struct gfs2_quota_data *qd; | ||
| 1107 | |||
| 1108 | gfs2_quota_change_in(&qc, bh->b_data + | ||
| 1109 | sizeof(struct gfs2_meta_header) + | ||
| 1110 | y * sizeof(struct gfs2_quota_change)); | ||
| 1111 | if (!qc.qc_change) | ||
| 1112 | continue; | ||
| 1113 | |||
| 1114 | error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER), | ||
| 1115 | qc.qc_id, &qd); | ||
| 1116 | if (error) { | ||
| 1117 | brelse(bh); | ||
| 1118 | goto fail; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | set_bit(QDF_CHANGE, &qd->qd_flags); | ||
| 1122 | qd->qd_change = qc.qc_change; | ||
| 1123 | qd->qd_slot = slot; | ||
| 1124 | qd->qd_slot_count = 1; | ||
| 1125 | qd->qd_last_touched = jiffies; | ||
| 1126 | |||
| 1127 | spin_lock(&sdp->sd_quota_spin); | ||
| 1128 | gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); | ||
| 1129 | list_add(&qd->qd_list, &sdp->sd_quota_list); | ||
| 1130 | atomic_inc(&sdp->sd_quota_count); | ||
| 1131 | spin_unlock(&sdp->sd_quota_spin); | ||
| 1132 | |||
| 1133 | found++; | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | brelse(bh); | ||
| 1137 | dblock++; | ||
| 1138 | extlen--; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | if (found) | ||
| 1142 | fs_info(sdp, "found %u quota changes\n", found); | ||
| 1143 | |||
| 1144 | return 0; | ||
| 1145 | |||
| 1146 | fail: | ||
| 1147 | gfs2_quota_cleanup(sdp); | ||
| 1148 | return error; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void gfs2_quota_scan(struct gfs2_sbd *sdp) | ||
| 1152 | { | ||
| 1153 | struct gfs2_quota_data *qd, *safe; | ||
| 1154 | LIST_HEAD(dead); | ||
| 1155 | |||
| 1156 | spin_lock(&sdp->sd_quota_spin); | ||
| 1157 | list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) { | ||
| 1158 | if (!qd->qd_count && | ||
| 1159 | time_after_eq(jiffies, qd->qd_last_touched + | ||
| 1160 | gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) { | ||
| 1161 | list_move(&qd->qd_list, &dead); | ||
| 1162 | gfs2_assert_warn(sdp, | ||
| 1163 | atomic_read(&sdp->sd_quota_count) > 0); | ||
| 1164 | atomic_dec(&sdp->sd_quota_count); | ||
| 1165 | } | ||
| 1166 | } | ||
| 1167 | spin_unlock(&sdp->sd_quota_spin); | ||
| 1168 | |||
| 1169 | while (!list_empty(&dead)) { | ||
| 1170 | qd = list_entry(dead.next, struct gfs2_quota_data, qd_list); | ||
| 1171 | list_del(&qd->qd_list); | ||
| 1172 | |||
| 1173 | gfs2_assert_warn(sdp, !qd->qd_change); | ||
| 1174 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | ||
| 1175 | gfs2_assert_warn(sdp, !qd->qd_bh_count); | ||
| 1176 | |||
| 1177 | gfs2_lvb_unhold(qd->qd_gl); | ||
| 1178 | kfree(qd); | ||
| 1179 | } | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | void gfs2_quota_cleanup(struct gfs2_sbd *sdp) | ||
| 1183 | { | ||
| 1184 | struct list_head *head = &sdp->sd_quota_list; | ||
| 1185 | struct gfs2_quota_data *qd; | ||
| 1186 | unsigned int x; | ||
| 1187 | |||
| 1188 | spin_lock(&sdp->sd_quota_spin); | ||
| 1189 | while (!list_empty(head)) { | ||
| 1190 | qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); | ||
| 1191 | |||
| 1192 | if (qd->qd_count > 1 || | ||
| 1193 | (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { | ||
| 1194 | list_move(&qd->qd_list, head); | ||
| 1195 | spin_unlock(&sdp->sd_quota_spin); | ||
| 1196 | schedule(); | ||
| 1197 | spin_lock(&sdp->sd_quota_spin); | ||
| 1198 | continue; | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | list_del(&qd->qd_list); | ||
| 1202 | atomic_dec(&sdp->sd_quota_count); | ||
| 1203 | spin_unlock(&sdp->sd_quota_spin); | ||
| 1204 | |||
| 1205 | if (!qd->qd_count) { | ||
| 1206 | gfs2_assert_warn(sdp, !qd->qd_change); | ||
| 1207 | gfs2_assert_warn(sdp, !qd->qd_slot_count); | ||
| 1208 | } else | ||
| 1209 | gfs2_assert_warn(sdp, qd->qd_slot_count == 1); | ||
| 1210 | gfs2_assert_warn(sdp, !qd->qd_bh_count); | ||
| 1211 | |||
| 1212 | gfs2_lvb_unhold(qd->qd_gl); | ||
| 1213 | kfree(qd); | ||
| 1214 | |||
| 1215 | spin_lock(&sdp->sd_quota_spin); | ||
| 1216 | } | ||
| 1217 | spin_unlock(&sdp->sd_quota_spin); | ||
| 1218 | |||
| 1219 | gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); | ||
| 1220 | |||
| 1221 | if (sdp->sd_quota_bitmap) { | ||
| 1222 | for (x = 0; x < sdp->sd_quota_chunks; x++) | ||
| 1223 | kfree(sdp->sd_quota_bitmap[x]); | ||
| 1224 | kfree(sdp->sd_quota_bitmap); | ||
| 1225 | } | ||
| 1226 | } | ||
| 1227 | |||
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h new file mode 100644 index 000000000000..a8be1417051f --- /dev/null +++ b/fs/gfs2/quota.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __QUOTA_DOT_H__ | ||
| 11 | #define __QUOTA_DOT_H__ | ||
| 12 | |||
| 13 | struct gfs2_inode; | ||
| 14 | struct gfs2_sbd; | ||
| 15 | |||
| 16 | #define NO_QUOTA_CHANGE ((u32)-1) | ||
| 17 | |||
| 18 | int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid); | ||
| 19 | void gfs2_quota_unhold(struct gfs2_inode *ip); | ||
| 20 | |||
| 21 | int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid); | ||
| 22 | void gfs2_quota_unlock(struct gfs2_inode *ip); | ||
| 23 | |||
| 24 | int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); | ||
| 25 | void gfs2_quota_change(struct gfs2_inode *ip, s64 change, | ||
| 26 | u32 uid, u32 gid); | ||
| 27 | |||
| 28 | int gfs2_quota_sync(struct gfs2_sbd *sdp); | ||
| 29 | int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); | ||
| 30 | |||
| 31 | int gfs2_quota_init(struct gfs2_sbd *sdp); | ||
| 32 | void gfs2_quota_scan(struct gfs2_sbd *sdp); | ||
| 33 | void gfs2_quota_cleanup(struct gfs2_sbd *sdp); | ||
| 34 | |||
| 35 | #endif /* __QUOTA_DOT_H__ */ | ||
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c new file mode 100644 index 000000000000..0a8a4b87dcc6 --- /dev/null +++ b/fs/gfs2/recovery.c | |||
| @@ -0,0 +1,570 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/crc32.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | #include "gfs2.h" | ||
| 20 | #include "incore.h" | ||
| 21 | #include "bmap.h" | ||
| 22 | #include "glock.h" | ||
| 23 | #include "glops.h" | ||
| 24 | #include "lm.h" | ||
| 25 | #include "lops.h" | ||
| 26 | #include "meta_io.h" | ||
| 27 | #include "recovery.h" | ||
| 28 | #include "super.h" | ||
| 29 | #include "util.h" | ||
| 30 | #include "dir.h" | ||
| 31 | |||
| 32 | int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, | ||
| 33 | struct buffer_head **bh) | ||
| 34 | { | ||
| 35 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 36 | struct gfs2_glock *gl = ip->i_gl; | ||
| 37 | int new = 0; | ||
| 38 | u64 dblock; | ||
| 39 | u32 extlen; | ||
| 40 | int error; | ||
| 41 | |||
| 42 | error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen); | ||
| 43 | if (error) | ||
| 44 | return error; | ||
| 45 | if (!dblock) { | ||
| 46 | gfs2_consist_inode(ip); | ||
| 47 | return -EIO; | ||
| 48 | } | ||
| 49 | |||
| 50 | *bh = gfs2_meta_ra(gl, dblock, extlen); | ||
| 51 | |||
| 52 | return error; | ||
| 53 | } | ||
| 54 | |||
| 55 | int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | ||
| 56 | { | ||
| 57 | struct list_head *head = &sdp->sd_revoke_list; | ||
| 58 | struct gfs2_revoke_replay *rr; | ||
| 59 | int found = 0; | ||
| 60 | |||
| 61 | list_for_each_entry(rr, head, rr_list) { | ||
| 62 | if (rr->rr_blkno == blkno) { | ||
| 63 | found = 1; | ||
| 64 | break; | ||
| 65 | } | ||
| 66 | } | ||
| 67 | |||
| 68 | if (found) { | ||
| 69 | rr->rr_where = where; | ||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | |||
| 73 | rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL); | ||
| 74 | if (!rr) | ||
| 75 | return -ENOMEM; | ||
| 76 | |||
| 77 | rr->rr_blkno = blkno; | ||
| 78 | rr->rr_where = where; | ||
| 79 | list_add(&rr->rr_list, head); | ||
| 80 | |||
| 81 | return 1; | ||
| 82 | } | ||
| 83 | |||
| 84 | int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | ||
| 85 | { | ||
| 86 | struct gfs2_revoke_replay *rr; | ||
| 87 | int wrap, a, b, revoke; | ||
| 88 | int found = 0; | ||
| 89 | |||
| 90 | list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { | ||
| 91 | if (rr->rr_blkno == blkno) { | ||
| 92 | found = 1; | ||
| 93 | break; | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | if (!found) | ||
| 98 | return 0; | ||
| 99 | |||
| 100 | wrap = (rr->rr_where < sdp->sd_replay_tail); | ||
| 101 | a = (sdp->sd_replay_tail < where); | ||
| 102 | b = (where < rr->rr_where); | ||
| 103 | revoke = (wrap) ? (a || b) : (a && b); | ||
| 104 | |||
| 105 | return revoke; | ||
| 106 | } | ||
| 107 | |||
| 108 | void gfs2_revoke_clean(struct gfs2_sbd *sdp) | ||
| 109 | { | ||
| 110 | struct list_head *head = &sdp->sd_revoke_list; | ||
| 111 | struct gfs2_revoke_replay *rr; | ||
| 112 | |||
| 113 | while (!list_empty(head)) { | ||
| 114 | rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list); | ||
| 115 | list_del(&rr->rr_list); | ||
| 116 | kfree(rr); | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | /** | ||
| 121 | * get_log_header - read the log header for a given segment | ||
| 122 | * @jd: the journal | ||
| 123 | * @blk: the block to look at | ||
| 124 | * @lh: the log header to return | ||
| 125 | * | ||
| 126 | * Read the log header for a given segement in a given journal. Do a few | ||
| 127 | * sanity checks on it. | ||
| 128 | * | ||
| 129 | * Returns: 0 on success, | ||
| 130 | * 1 if the header was invalid or incomplete, | ||
| 131 | * errno on error | ||
| 132 | */ | ||
| 133 | |||
| 134 | static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, | ||
| 135 | struct gfs2_log_header *head) | ||
| 136 | { | ||
| 137 | struct buffer_head *bh; | ||
| 138 | struct gfs2_log_header lh; | ||
| 139 | u32 hash; | ||
| 140 | int error; | ||
| 141 | |||
| 142 | error = gfs2_replay_read_block(jd, blk, &bh); | ||
| 143 | if (error) | ||
| 144 | return error; | ||
| 145 | |||
| 146 | memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header)); | ||
| 147 | lh.lh_hash = 0; | ||
| 148 | hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header)); | ||
| 149 | gfs2_log_header_in(&lh, bh->b_data); | ||
| 150 | |||
| 151 | brelse(bh); | ||
| 152 | |||
| 153 | if (lh.lh_header.mh_magic != GFS2_MAGIC || | ||
| 154 | lh.lh_header.mh_type != GFS2_METATYPE_LH || | ||
| 155 | lh.lh_blkno != blk || lh.lh_hash != hash) | ||
| 156 | return 1; | ||
| 157 | |||
| 158 | *head = lh; | ||
| 159 | |||
| 160 | return 0; | ||
| 161 | } | ||
| 162 | |||
| 163 | /** | ||
| 164 | * find_good_lh - find a good log header | ||
| 165 | * @jd: the journal | ||
| 166 | * @blk: the segment to start searching from | ||
| 167 | * @lh: the log header to fill in | ||
| 168 | * @forward: if true search forward in the log, else search backward | ||
| 169 | * | ||
| 170 | * Call get_log_header() to get a log header for a segment, but if the | ||
| 171 | * segment is bad, either scan forward or backward until we find a good one. | ||
| 172 | * | ||
| 173 | * Returns: errno | ||
| 174 | */ | ||
| 175 | |||
| 176 | static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, | ||
| 177 | struct gfs2_log_header *head) | ||
| 178 | { | ||
| 179 | unsigned int orig_blk = *blk; | ||
| 180 | int error; | ||
| 181 | |||
| 182 | for (;;) { | ||
| 183 | error = get_log_header(jd, *blk, head); | ||
| 184 | if (error <= 0) | ||
| 185 | return error; | ||
| 186 | |||
| 187 | if (++*blk == jd->jd_blocks) | ||
| 188 | *blk = 0; | ||
| 189 | |||
| 190 | if (*blk == orig_blk) { | ||
| 191 | gfs2_consist_inode(GFS2_I(jd->jd_inode)); | ||
| 192 | return -EIO; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | /** | ||
| 198 | * jhead_scan - make sure we've found the head of the log | ||
| 199 | * @jd: the journal | ||
| 200 | * @head: this is filled in with the log descriptor of the head | ||
| 201 | * | ||
| 202 | * At this point, seg and lh should be either the head of the log or just | ||
| 203 | * before. Scan forward until we find the head. | ||
| 204 | * | ||
| 205 | * Returns: errno | ||
| 206 | */ | ||
| 207 | |||
| 208 | static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head) | ||
| 209 | { | ||
| 210 | unsigned int blk = head->lh_blkno; | ||
| 211 | struct gfs2_log_header lh; | ||
| 212 | int error; | ||
| 213 | |||
| 214 | for (;;) { | ||
| 215 | if (++blk == jd->jd_blocks) | ||
| 216 | blk = 0; | ||
| 217 | |||
| 218 | error = get_log_header(jd, blk, &lh); | ||
| 219 | if (error < 0) | ||
| 220 | return error; | ||
| 221 | if (error == 1) | ||
| 222 | continue; | ||
| 223 | |||
| 224 | if (lh.lh_sequence == head->lh_sequence) { | ||
| 225 | gfs2_consist_inode(GFS2_I(jd->jd_inode)); | ||
| 226 | return -EIO; | ||
| 227 | } | ||
| 228 | if (lh.lh_sequence < head->lh_sequence) | ||
| 229 | break; | ||
| 230 | |||
| 231 | *head = lh; | ||
| 232 | } | ||
| 233 | |||
| 234 | return 0; | ||
| 235 | } | ||
| 236 | |||
| 237 | /** | ||
| 238 | * gfs2_find_jhead - find the head of a log | ||
| 239 | * @jd: the journal | ||
| 240 | * @head: the log descriptor for the head of the log is returned here | ||
| 241 | * | ||
| 242 | * Do a binary search of a journal and find the valid log entry with the | ||
| 243 | * highest sequence number. (i.e. the log head) | ||
| 244 | * | ||
| 245 | * Returns: errno | ||
| 246 | */ | ||
| 247 | |||
| 248 | int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head) | ||
| 249 | { | ||
| 250 | struct gfs2_log_header lh_1, lh_m; | ||
| 251 | u32 blk_1, blk_2, blk_m; | ||
| 252 | int error; | ||
| 253 | |||
| 254 | blk_1 = 0; | ||
| 255 | blk_2 = jd->jd_blocks - 1; | ||
| 256 | |||
| 257 | for (;;) { | ||
| 258 | blk_m = (blk_1 + blk_2) / 2; | ||
| 259 | |||
| 260 | error = find_good_lh(jd, &blk_1, &lh_1); | ||
| 261 | if (error) | ||
| 262 | return error; | ||
| 263 | |||
| 264 | error = find_good_lh(jd, &blk_m, &lh_m); | ||
| 265 | if (error) | ||
| 266 | return error; | ||
| 267 | |||
| 268 | if (blk_1 == blk_m || blk_m == blk_2) | ||
| 269 | break; | ||
| 270 | |||
| 271 | if (lh_1.lh_sequence <= lh_m.lh_sequence) | ||
| 272 | blk_1 = blk_m; | ||
| 273 | else | ||
| 274 | blk_2 = blk_m; | ||
| 275 | } | ||
| 276 | |||
| 277 | error = jhead_scan(jd, &lh_1); | ||
| 278 | if (error) | ||
| 279 | return error; | ||
| 280 | |||
| 281 | *head = lh_1; | ||
| 282 | |||
| 283 | return error; | ||
| 284 | } | ||
| 285 | |||
| 286 | /** | ||
| 287 | * foreach_descriptor - go through the active part of the log | ||
| 288 | * @jd: the journal | ||
| 289 | * @start: the first log header in the active region | ||
| 290 | * @end: the last log header (don't process the contents of this entry)) | ||
| 291 | * | ||
| 292 | * Call a given function once for every log descriptor in the active | ||
| 293 | * portion of the log. | ||
| 294 | * | ||
| 295 | * Returns: errno | ||
| 296 | */ | ||
| 297 | |||
| 298 | static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, | ||
| 299 | unsigned int end, int pass) | ||
| 300 | { | ||
| 301 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 302 | struct buffer_head *bh; | ||
| 303 | struct gfs2_log_descriptor *ld; | ||
| 304 | int error = 0; | ||
| 305 | u32 length; | ||
| 306 | __be64 *ptr; | ||
| 307 | unsigned int offset = sizeof(struct gfs2_log_descriptor); | ||
| 308 | offset += sizeof(__be64) - 1; | ||
| 309 | offset &= ~(sizeof(__be64) - 1); | ||
| 310 | |||
| 311 | while (start != end) { | ||
| 312 | error = gfs2_replay_read_block(jd, start, &bh); | ||
| 313 | if (error) | ||
| 314 | return error; | ||
| 315 | if (gfs2_meta_check(sdp, bh)) { | ||
| 316 | brelse(bh); | ||
| 317 | return -EIO; | ||
| 318 | } | ||
| 319 | ld = (struct gfs2_log_descriptor *)bh->b_data; | ||
| 320 | length = be32_to_cpu(ld->ld_length); | ||
| 321 | |||
| 322 | if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) { | ||
| 323 | struct gfs2_log_header lh; | ||
| 324 | error = get_log_header(jd, start, &lh); | ||
| 325 | if (!error) { | ||
| 326 | gfs2_replay_incr_blk(sdp, &start); | ||
| 327 | brelse(bh); | ||
| 328 | continue; | ||
| 329 | } | ||
| 330 | if (error == 1) { | ||
| 331 | gfs2_consist_inode(GFS2_I(jd->jd_inode)); | ||
| 332 | error = -EIO; | ||
| 333 | } | ||
| 334 | brelse(bh); | ||
| 335 | return error; | ||
| 336 | } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) { | ||
| 337 | brelse(bh); | ||
| 338 | return -EIO; | ||
| 339 | } | ||
| 340 | ptr = (__be64 *)(bh->b_data + offset); | ||
| 341 | error = lops_scan_elements(jd, start, ld, ptr, pass); | ||
| 342 | if (error) { | ||
| 343 | brelse(bh); | ||
| 344 | return error; | ||
| 345 | } | ||
| 346 | |||
| 347 | while (length--) | ||
| 348 | gfs2_replay_incr_blk(sdp, &start); | ||
| 349 | |||
| 350 | brelse(bh); | ||
| 351 | } | ||
| 352 | |||
| 353 | return 0; | ||
| 354 | } | ||
| 355 | |||
| 356 | /** | ||
| 357 | * clean_journal - mark a dirty journal as being clean | ||
| 358 | * @sdp: the filesystem | ||
| 359 | * @jd: the journal | ||
| 360 | * @gl: the journal's glock | ||
| 361 | * @head: the head journal to start from | ||
| 362 | * | ||
| 363 | * Returns: errno | ||
| 364 | */ | ||
| 365 | |||
| 366 | static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head) | ||
| 367 | { | ||
| 368 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 369 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 370 | unsigned int lblock; | ||
| 371 | struct gfs2_log_header *lh; | ||
| 372 | u32 hash; | ||
| 373 | struct buffer_head *bh; | ||
| 374 | int error; | ||
| 375 | struct buffer_head bh_map; | ||
| 376 | |||
| 377 | lblock = head->lh_blkno; | ||
| 378 | gfs2_replay_incr_blk(sdp, &lblock); | ||
| 379 | error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map, 1); | ||
| 380 | if (error) | ||
| 381 | return error; | ||
| 382 | if (!bh_map.b_blocknr) { | ||
| 383 | gfs2_consist_inode(ip); | ||
| 384 | return -EIO; | ||
| 385 | } | ||
| 386 | |||
| 387 | bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr); | ||
| 388 | lock_buffer(bh); | ||
| 389 | memset(bh->b_data, 0, bh->b_size); | ||
| 390 | set_buffer_uptodate(bh); | ||
| 391 | clear_buffer_dirty(bh); | ||
| 392 | unlock_buffer(bh); | ||
| 393 | |||
| 394 | lh = (struct gfs2_log_header *)bh->b_data; | ||
| 395 | memset(lh, 0, sizeof(struct gfs2_log_header)); | ||
| 396 | lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
| 397 | lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); | ||
| 398 | lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); | ||
| 399 | lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); | ||
| 400 | lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); | ||
| 401 | lh->lh_blkno = cpu_to_be32(lblock); | ||
| 402 | hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header)); | ||
| 403 | lh->lh_hash = cpu_to_be32(hash); | ||
| 404 | |||
| 405 | set_buffer_dirty(bh); | ||
| 406 | if (sync_dirty_buffer(bh)) | ||
| 407 | gfs2_io_error_bh(sdp, bh); | ||
| 408 | brelse(bh); | ||
| 409 | |||
| 410 | return error; | ||
| 411 | } | ||
| 412 | |||
| 413 | /** | ||
| 414 | * gfs2_recover_journal - recovery a given journal | ||
| 415 | * @jd: the struct gfs2_jdesc describing the journal | ||
| 416 | * | ||
| 417 | * Acquire the journal's lock, check to see if the journal is clean, and | ||
| 418 | * do recovery if necessary. | ||
| 419 | * | ||
| 420 | * Returns: errno | ||
| 421 | */ | ||
| 422 | |||
| 423 | int gfs2_recover_journal(struct gfs2_jdesc *jd) | ||
| 424 | { | ||
| 425 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 426 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 427 | struct gfs2_log_header head; | ||
| 428 | struct gfs2_holder j_gh, ji_gh, t_gh; | ||
| 429 | unsigned long t; | ||
| 430 | int ro = 0; | ||
| 431 | unsigned int pass; | ||
| 432 | int error; | ||
| 433 | |||
| 434 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | ||
| 435 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", | ||
| 436 | jd->jd_jid); | ||
| 437 | |||
| 438 | /* Aquire the journal lock so we can do recovery */ | ||
| 439 | |||
| 440 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, | ||
| 441 | LM_ST_EXCLUSIVE, | ||
| 442 | LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE, | ||
| 443 | &j_gh); | ||
| 444 | switch (error) { | ||
| 445 | case 0: | ||
| 446 | break; | ||
| 447 | |||
| 448 | case GLR_TRYFAILED: | ||
| 449 | fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid); | ||
| 450 | error = 0; | ||
| 451 | |||
| 452 | default: | ||
| 453 | goto fail; | ||
| 454 | }; | ||
| 455 | |||
| 456 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, | ||
| 457 | LM_FLAG_NOEXP, &ji_gh); | ||
| 458 | if (error) | ||
| 459 | goto fail_gunlock_j; | ||
| 460 | } else { | ||
| 461 | fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid); | ||
| 462 | } | ||
| 463 | |||
| 464 | fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid); | ||
| 465 | |||
| 466 | error = gfs2_jdesc_check(jd); | ||
| 467 | if (error) | ||
| 468 | goto fail_gunlock_ji; | ||
| 469 | |||
| 470 | error = gfs2_find_jhead(jd, &head); | ||
| 471 | if (error) | ||
| 472 | goto fail_gunlock_ji; | ||
| 473 | |||
| 474 | if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { | ||
| 475 | fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", | ||
| 476 | jd->jd_jid); | ||
| 477 | |||
| 478 | t = jiffies; | ||
| 479 | |||
| 480 | /* Acquire a shared hold on the transaction lock */ | ||
| 481 | |||
| 482 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | ||
| 483 | LM_FLAG_NOEXP | LM_FLAG_PRIORITY | | ||
| 484 | GL_NOCANCEL | GL_NOCACHE, &t_gh); | ||
| 485 | if (error) | ||
| 486 | goto fail_gunlock_ji; | ||
| 487 | |||
| 488 | if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { | ||
| 489 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) | ||
| 490 | ro = 1; | ||
| 491 | } else { | ||
| 492 | if (sdp->sd_vfs->s_flags & MS_RDONLY) | ||
| 493 | ro = 1; | ||
| 494 | } | ||
| 495 | |||
| 496 | if (ro) { | ||
| 497 | fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n", | ||
| 498 | jd->jd_jid); | ||
| 499 | error = -EROFS; | ||
| 500 | goto fail_gunlock_tr; | ||
| 501 | } | ||
| 502 | |||
| 503 | fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); | ||
| 504 | |||
| 505 | for (pass = 0; pass < 2; pass++) { | ||
| 506 | lops_before_scan(jd, &head, pass); | ||
| 507 | error = foreach_descriptor(jd, head.lh_tail, | ||
| 508 | head.lh_blkno, pass); | ||
| 509 | lops_after_scan(jd, error, pass); | ||
| 510 | if (error) | ||
| 511 | goto fail_gunlock_tr; | ||
| 512 | } | ||
| 513 | |||
| 514 | error = clean_journal(jd, &head); | ||
| 515 | if (error) | ||
| 516 | goto fail_gunlock_tr; | ||
| 517 | |||
| 518 | gfs2_glock_dq_uninit(&t_gh); | ||
| 519 | t = DIV_ROUND_UP(jiffies - t, HZ); | ||
| 520 | fs_info(sdp, "jid=%u: Journal replayed in %lus\n", | ||
| 521 | jd->jd_jid, t); | ||
| 522 | } | ||
| 523 | |||
| 524 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | ||
| 525 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 526 | |||
| 527 | gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); | ||
| 528 | |||
| 529 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | ||
| 530 | gfs2_glock_dq_uninit(&j_gh); | ||
| 531 | |||
| 532 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | ||
| 533 | return 0; | ||
| 534 | |||
| 535 | fail_gunlock_tr: | ||
| 536 | gfs2_glock_dq_uninit(&t_gh); | ||
| 537 | fail_gunlock_ji: | ||
| 538 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | ||
| 539 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 540 | fail_gunlock_j: | ||
| 541 | gfs2_glock_dq_uninit(&j_gh); | ||
| 542 | } | ||
| 543 | |||
| 544 | fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); | ||
| 545 | |||
| 546 | fail: | ||
| 547 | gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); | ||
| 548 | return error; | ||
| 549 | } | ||
| 550 | |||
| 551 | /** | ||
| 552 | * gfs2_check_journals - Recover any dirty journals | ||
| 553 | * @sdp: the filesystem | ||
| 554 | * | ||
| 555 | */ | ||
| 556 | |||
| 557 | void gfs2_check_journals(struct gfs2_sbd *sdp) | ||
| 558 | { | ||
| 559 | struct gfs2_jdesc *jd; | ||
| 560 | |||
| 561 | for (;;) { | ||
| 562 | jd = gfs2_jdesc_find_dirty(sdp); | ||
| 563 | if (!jd) | ||
| 564 | break; | ||
| 565 | |||
| 566 | if (jd != sdp->sd_jdesc) | ||
| 567 | gfs2_recover_journal(jd); | ||
| 568 | } | ||
| 569 | } | ||
| 570 | |||
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h new file mode 100644 index 000000000000..961feedf4d8b --- /dev/null +++ b/fs/gfs2/recovery.h | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __RECOVERY_DOT_H__ | ||
| 11 | #define __RECOVERY_DOT_H__ | ||
| 12 | |||
| 13 | #include "incore.h" | ||
| 14 | |||
| 15 | static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) | ||
| 16 | { | ||
| 17 | if (++*blk == sdp->sd_jdesc->jd_blocks) | ||
| 18 | *blk = 0; | ||
| 19 | } | ||
| 20 | |||
| 21 | int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, | ||
| 22 | struct buffer_head **bh); | ||
| 23 | |||
| 24 | int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); | ||
| 25 | int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); | ||
| 26 | void gfs2_revoke_clean(struct gfs2_sbd *sdp); | ||
| 27 | |||
| 28 | int gfs2_find_jhead(struct gfs2_jdesc *jd, | ||
| 29 | struct gfs2_log_header *head); | ||
| 30 | int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); | ||
| 31 | void gfs2_check_journals(struct gfs2_sbd *sdp); | ||
| 32 | |||
| 33 | #endif /* __RECOVERY_DOT_H__ */ | ||
| 34 | |||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c new file mode 100644 index 000000000000..b261385c0065 --- /dev/null +++ b/fs/gfs2/rgrp.c | |||
| @@ -0,0 +1,1513 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/fs.h> | ||
| 16 | #include <linux/gfs2_ondisk.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | #include "gfs2.h" | ||
| 20 | #include "incore.h" | ||
| 21 | #include "glock.h" | ||
| 22 | #include "glops.h" | ||
| 23 | #include "lops.h" | ||
| 24 | #include "meta_io.h" | ||
| 25 | #include "quota.h" | ||
| 26 | #include "rgrp.h" | ||
| 27 | #include "super.h" | ||
| 28 | #include "trans.h" | ||
| 29 | #include "ops_file.h" | ||
| 30 | #include "util.h" | ||
| 31 | |||
| 32 | #define BFITNOENT ((u32)~0) | ||
| 33 | |||
| 34 | /* | ||
| 35 | * These routines are used by the resource group routines (rgrp.c) | ||
| 36 | * to keep track of block allocation. Each block is represented by two | ||
| 37 | * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. | ||
| 38 | * | ||
| 39 | * 0 = Free | ||
| 40 | * 1 = Used (not metadata) | ||
| 41 | * 2 = Unlinked (still in use) inode | ||
| 42 | * 3 = Used (metadata) | ||
| 43 | */ | ||
| 44 | |||
| 45 | static const char valid_change[16] = { | ||
| 46 | /* current */ | ||
| 47 | /* n */ 0, 1, 1, 1, | ||
| 48 | /* e */ 1, 0, 0, 0, | ||
| 49 | /* w */ 0, 0, 0, 1, | ||
| 50 | 1, 0, 0, 0 | ||
| 51 | }; | ||
| 52 | |||
| 53 | /** | ||
| 54 | * gfs2_setbit - Set a bit in the bitmaps | ||
| 55 | * @buffer: the buffer that holds the bitmaps | ||
| 56 | * @buflen: the length (in bytes) of the buffer | ||
| 57 | * @block: the block to set | ||
| 58 | * @new_state: the new state of the block | ||
| 59 | * | ||
| 60 | */ | ||
| 61 | |||
| 62 | static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | ||
| 63 | unsigned int buflen, u32 block, | ||
| 64 | unsigned char new_state) | ||
| 65 | { | ||
| 66 | unsigned char *byte, *end, cur_state; | ||
| 67 | unsigned int bit; | ||
| 68 | |||
| 69 | byte = buffer + (block / GFS2_NBBY); | ||
| 70 | bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; | ||
| 71 | end = buffer + buflen; | ||
| 72 | |||
| 73 | gfs2_assert(rgd->rd_sbd, byte < end); | ||
| 74 | |||
| 75 | cur_state = (*byte >> bit) & GFS2_BIT_MASK; | ||
| 76 | |||
| 77 | if (valid_change[new_state * 4 + cur_state]) { | ||
| 78 | *byte ^= cur_state << bit; | ||
| 79 | *byte |= new_state << bit; | ||
| 80 | } else | ||
| 81 | gfs2_consist_rgrpd(rgd); | ||
| 82 | } | ||
| 83 | |||
| 84 | /** | ||
| 85 | * gfs2_testbit - test a bit in the bitmaps | ||
| 86 | * @buffer: the buffer that holds the bitmaps | ||
| 87 | * @buflen: the length (in bytes) of the buffer | ||
| 88 | * @block: the block to read | ||
| 89 | * | ||
| 90 | */ | ||
| 91 | |||
| 92 | static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | ||
| 93 | unsigned int buflen, u32 block) | ||
| 94 | { | ||
| 95 | unsigned char *byte, *end, cur_state; | ||
| 96 | unsigned int bit; | ||
| 97 | |||
| 98 | byte = buffer + (block / GFS2_NBBY); | ||
| 99 | bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; | ||
| 100 | end = buffer + buflen; | ||
| 101 | |||
| 102 | gfs2_assert(rgd->rd_sbd, byte < end); | ||
| 103 | |||
| 104 | cur_state = (*byte >> bit) & GFS2_BIT_MASK; | ||
| 105 | |||
| 106 | return cur_state; | ||
| 107 | } | ||
| 108 | |||
| 109 | /** | ||
| 110 | * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing | ||
| 111 | * a block in a given allocation state. | ||
| 112 | * @buffer: the buffer that holds the bitmaps | ||
| 113 | * @buflen: the length (in bytes) of the buffer | ||
| 114 | * @goal: start search at this block's bit-pair (within @buffer) | ||
| 115 | * @old_state: GFS2_BLKST_XXX the state of the block we're looking for; | ||
| 116 | * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0) | ||
| 117 | * | ||
| 118 | * Scope of @goal and returned block number is only within this bitmap buffer, | ||
| 119 | * not entire rgrp or filesystem. @buffer will be offset from the actual | ||
| 120 | * beginning of a bitmap block buffer, skipping any header structures. | ||
| 121 | * | ||
| 122 | * Return: the block number (bitmap buffer scope) that was found | ||
| 123 | */ | ||
| 124 | |||
| 125 | static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | ||
| 126 | unsigned int buflen, u32 goal, | ||
| 127 | unsigned char old_state) | ||
| 128 | { | ||
| 129 | unsigned char *byte, *end, alloc; | ||
| 130 | u32 blk = goal; | ||
| 131 | unsigned int bit; | ||
| 132 | |||
| 133 | byte = buffer + (goal / GFS2_NBBY); | ||
| 134 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; | ||
| 135 | end = buffer + buflen; | ||
| 136 | alloc = (old_state & 1) ? 0 : 0x55; | ||
| 137 | |||
| 138 | while (byte < end) { | ||
| 139 | if ((*byte & 0x55) == alloc) { | ||
| 140 | blk += (8 - bit) >> 1; | ||
| 141 | |||
| 142 | bit = 0; | ||
| 143 | byte++; | ||
| 144 | |||
| 145 | continue; | ||
| 146 | } | ||
| 147 | |||
| 148 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) | ||
| 149 | return blk; | ||
| 150 | |||
| 151 | bit += GFS2_BIT_SIZE; | ||
| 152 | if (bit >= 8) { | ||
| 153 | bit = 0; | ||
| 154 | byte++; | ||
| 155 | } | ||
| 156 | |||
| 157 | blk++; | ||
| 158 | } | ||
| 159 | |||
| 160 | return BFITNOENT; | ||
| 161 | } | ||
| 162 | |||
| 163 | /** | ||
| 164 | * gfs2_bitcount - count the number of bits in a certain state | ||
| 165 | * @buffer: the buffer that holds the bitmaps | ||
| 166 | * @buflen: the length (in bytes) of the buffer | ||
| 167 | * @state: the state of the block we're looking for | ||
| 168 | * | ||
| 169 | * Returns: The number of bits | ||
| 170 | */ | ||
| 171 | |||
| 172 | static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer, | ||
| 173 | unsigned int buflen, unsigned char state) | ||
| 174 | { | ||
| 175 | unsigned char *byte = buffer; | ||
| 176 | unsigned char *end = buffer + buflen; | ||
| 177 | unsigned char state1 = state << 2; | ||
| 178 | unsigned char state2 = state << 4; | ||
| 179 | unsigned char state3 = state << 6; | ||
| 180 | u32 count = 0; | ||
| 181 | |||
| 182 | for (; byte < end; byte++) { | ||
| 183 | if (((*byte) & 0x03) == state) | ||
| 184 | count++; | ||
| 185 | if (((*byte) & 0x0C) == state1) | ||
| 186 | count++; | ||
| 187 | if (((*byte) & 0x30) == state2) | ||
| 188 | count++; | ||
| 189 | if (((*byte) & 0xC0) == state3) | ||
| 190 | count++; | ||
| 191 | } | ||
| 192 | |||
| 193 | return count; | ||
| 194 | } | ||
| 195 | |||
| 196 | /** | ||
| 197 | * gfs2_rgrp_verify - Verify that a resource group is consistent | ||
| 198 | * @sdp: the filesystem | ||
| 199 | * @rgd: the rgrp | ||
| 200 | * | ||
| 201 | */ | ||
| 202 | |||
| 203 | void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) | ||
| 204 | { | ||
| 205 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 206 | struct gfs2_bitmap *bi = NULL; | ||
| 207 | u32 length = rgd->rd_ri.ri_length; | ||
| 208 | u32 count[4], tmp; | ||
| 209 | int buf, x; | ||
| 210 | |||
| 211 | memset(count, 0, 4 * sizeof(u32)); | ||
| 212 | |||
| 213 | /* Count # blocks in each of 4 possible allocation states */ | ||
| 214 | for (buf = 0; buf < length; buf++) { | ||
| 215 | bi = rgd->rd_bits + buf; | ||
| 216 | for (x = 0; x < 4; x++) | ||
| 217 | count[x] += gfs2_bitcount(rgd, | ||
| 218 | bi->bi_bh->b_data + | ||
| 219 | bi->bi_offset, | ||
| 220 | bi->bi_len, x); | ||
| 221 | } | ||
| 222 | |||
| 223 | if (count[0] != rgd->rd_rg.rg_free) { | ||
| 224 | if (gfs2_consist_rgrpd(rgd)) | ||
| 225 | fs_err(sdp, "free data mismatch: %u != %u\n", | ||
| 226 | count[0], rgd->rd_rg.rg_free); | ||
| 227 | return; | ||
| 228 | } | ||
| 229 | |||
| 230 | tmp = rgd->rd_ri.ri_data - | ||
| 231 | rgd->rd_rg.rg_free - | ||
| 232 | rgd->rd_rg.rg_dinodes; | ||
| 233 | if (count[1] + count[2] != tmp) { | ||
| 234 | if (gfs2_consist_rgrpd(rgd)) | ||
| 235 | fs_err(sdp, "used data mismatch: %u != %u\n", | ||
| 236 | count[1], tmp); | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | |||
| 240 | if (count[3] != rgd->rd_rg.rg_dinodes) { | ||
| 241 | if (gfs2_consist_rgrpd(rgd)) | ||
| 242 | fs_err(sdp, "used metadata mismatch: %u != %u\n", | ||
| 243 | count[3], rgd->rd_rg.rg_dinodes); | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | |||
| 247 | if (count[2] > count[3]) { | ||
| 248 | if (gfs2_consist_rgrpd(rgd)) | ||
| 249 | fs_err(sdp, "unlinked inodes > inodes: %u\n", | ||
| 250 | count[2]); | ||
| 251 | return; | ||
| 252 | } | ||
| 253 | |||
| 254 | } | ||
| 255 | |||
| 256 | static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block) | ||
| 257 | { | ||
| 258 | u64 first = ri->ri_data0; | ||
| 259 | u64 last = first + ri->ri_data; | ||
| 260 | return first <= block && block < last; | ||
| 261 | } | ||
| 262 | |||
| 263 | /** | ||
| 264 | * gfs2_blk2rgrpd - Find resource group for a given data/meta block number | ||
| 265 | * @sdp: The GFS2 superblock | ||
| 266 | * @n: The data block number | ||
| 267 | * | ||
| 268 | * Returns: The resource group, or NULL if not found | ||
| 269 | */ | ||
| 270 | |||
| 271 | struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) | ||
| 272 | { | ||
| 273 | struct gfs2_rgrpd *rgd; | ||
| 274 | |||
| 275 | spin_lock(&sdp->sd_rindex_spin); | ||
| 276 | |||
| 277 | list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { | ||
| 278 | if (rgrp_contains_block(&rgd->rd_ri, blk)) { | ||
| 279 | list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); | ||
| 280 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 281 | return rgd; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 286 | |||
| 287 | return NULL; | ||
| 288 | } | ||
| 289 | |||
| 290 | /** | ||
| 291 | * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem | ||
| 292 | * @sdp: The GFS2 superblock | ||
| 293 | * | ||
| 294 | * Returns: The first rgrp in the filesystem | ||
| 295 | */ | ||
| 296 | |||
| 297 | struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) | ||
| 298 | { | ||
| 299 | gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list)); | ||
| 300 | return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list); | ||
| 301 | } | ||
| 302 | |||
| 303 | /** | ||
| 304 | * gfs2_rgrpd_get_next - get the next RG | ||
| 305 | * @rgd: A RG | ||
| 306 | * | ||
| 307 | * Returns: The next rgrp | ||
| 308 | */ | ||
| 309 | |||
| 310 | struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) | ||
| 311 | { | ||
| 312 | if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list) | ||
| 313 | return NULL; | ||
| 314 | return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list); | ||
| 315 | } | ||
| 316 | |||
| 317 | static void clear_rgrpdi(struct gfs2_sbd *sdp) | ||
| 318 | { | ||
| 319 | struct list_head *head; | ||
| 320 | struct gfs2_rgrpd *rgd; | ||
| 321 | struct gfs2_glock *gl; | ||
| 322 | |||
| 323 | spin_lock(&sdp->sd_rindex_spin); | ||
| 324 | sdp->sd_rindex_forward = NULL; | ||
| 325 | head = &sdp->sd_rindex_recent_list; | ||
| 326 | while (!list_empty(head)) { | ||
| 327 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); | ||
| 328 | list_del(&rgd->rd_recent); | ||
| 329 | } | ||
| 330 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 331 | |||
| 332 | head = &sdp->sd_rindex_list; | ||
| 333 | while (!list_empty(head)) { | ||
| 334 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list); | ||
| 335 | gl = rgd->rd_gl; | ||
| 336 | |||
| 337 | list_del(&rgd->rd_list); | ||
| 338 | list_del(&rgd->rd_list_mru); | ||
| 339 | |||
| 340 | if (gl) { | ||
| 341 | gl->gl_object = NULL; | ||
| 342 | gfs2_glock_put(gl); | ||
| 343 | } | ||
| 344 | |||
| 345 | kfree(rgd->rd_bits); | ||
| 346 | kfree(rgd); | ||
| 347 | } | ||
| 348 | } | ||
| 349 | |||
| 350 | void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) | ||
| 351 | { | ||
| 352 | mutex_lock(&sdp->sd_rindex_mutex); | ||
| 353 | clear_rgrpdi(sdp); | ||
| 354 | mutex_unlock(&sdp->sd_rindex_mutex); | ||
| 355 | } | ||
| 356 | |||
| 357 | /** | ||
| 358 | * gfs2_compute_bitstructs - Compute the bitmap sizes | ||
| 359 | * @rgd: The resource group descriptor | ||
| 360 | * | ||
| 361 | * Calculates bitmap descriptors, one for each block that contains bitmap data | ||
| 362 | * | ||
| 363 | * Returns: errno | ||
| 364 | */ | ||
| 365 | |||
| 366 | static int compute_bitstructs(struct gfs2_rgrpd *rgd) | ||
| 367 | { | ||
| 368 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 369 | struct gfs2_bitmap *bi; | ||
| 370 | u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */ | ||
| 371 | u32 bytes_left, bytes; | ||
| 372 | int x; | ||
| 373 | |||
| 374 | if (!length) | ||
| 375 | return -EINVAL; | ||
| 376 | |||
| 377 | rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); | ||
| 378 | if (!rgd->rd_bits) | ||
| 379 | return -ENOMEM; | ||
| 380 | |||
| 381 | bytes_left = rgd->rd_ri.ri_bitbytes; | ||
| 382 | |||
| 383 | for (x = 0; x < length; x++) { | ||
| 384 | bi = rgd->rd_bits + x; | ||
| 385 | |||
| 386 | /* small rgrp; bitmap stored completely in header block */ | ||
| 387 | if (length == 1) { | ||
| 388 | bytes = bytes_left; | ||
| 389 | bi->bi_offset = sizeof(struct gfs2_rgrp); | ||
| 390 | bi->bi_start = 0; | ||
| 391 | bi->bi_len = bytes; | ||
| 392 | /* header block */ | ||
| 393 | } else if (x == 0) { | ||
| 394 | bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); | ||
| 395 | bi->bi_offset = sizeof(struct gfs2_rgrp); | ||
| 396 | bi->bi_start = 0; | ||
| 397 | bi->bi_len = bytes; | ||
| 398 | /* last block */ | ||
| 399 | } else if (x + 1 == length) { | ||
| 400 | bytes = bytes_left; | ||
| 401 | bi->bi_offset = sizeof(struct gfs2_meta_header); | ||
| 402 | bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; | ||
| 403 | bi->bi_len = bytes; | ||
| 404 | /* other blocks */ | ||
| 405 | } else { | ||
| 406 | bytes = sdp->sd_sb.sb_bsize - | ||
| 407 | sizeof(struct gfs2_meta_header); | ||
| 408 | bi->bi_offset = sizeof(struct gfs2_meta_header); | ||
| 409 | bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; | ||
| 410 | bi->bi_len = bytes; | ||
| 411 | } | ||
| 412 | |||
| 413 | bytes_left -= bytes; | ||
| 414 | } | ||
| 415 | |||
| 416 | if (bytes_left) { | ||
| 417 | gfs2_consist_rgrpd(rgd); | ||
| 418 | return -EIO; | ||
| 419 | } | ||
| 420 | bi = rgd->rd_bits + (length - 1); | ||
| 421 | if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) { | ||
| 422 | if (gfs2_consist_rgrpd(rgd)) { | ||
| 423 | gfs2_rindex_print(&rgd->rd_ri); | ||
| 424 | fs_err(sdp, "start=%u len=%u offset=%u\n", | ||
| 425 | bi->bi_start, bi->bi_len, bi->bi_offset); | ||
| 426 | } | ||
| 427 | return -EIO; | ||
| 428 | } | ||
| 429 | |||
| 430 | return 0; | ||
| 431 | } | ||
| 432 | |||
| 433 | /** | ||
| 434 | * gfs2_ri_update - Pull in a new resource index from the disk | ||
| 435 | * @gl: The glock covering the rindex inode | ||
| 436 | * | ||
| 437 | * Returns: 0 on successful update, error code otherwise | ||
| 438 | */ | ||
| 439 | |||
| 440 | static int gfs2_ri_update(struct gfs2_inode *ip) | ||
| 441 | { | ||
| 442 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 443 | struct inode *inode = &ip->i_inode; | ||
| 444 | struct gfs2_rgrpd *rgd; | ||
| 445 | char buf[sizeof(struct gfs2_rindex)]; | ||
| 446 | struct file_ra_state ra_state; | ||
| 447 | u64 junk = ip->i_di.di_size; | ||
| 448 | int error; | ||
| 449 | |||
| 450 | if (do_div(junk, sizeof(struct gfs2_rindex))) { | ||
| 451 | gfs2_consist_inode(ip); | ||
| 452 | return -EIO; | ||
| 453 | } | ||
| 454 | |||
| 455 | clear_rgrpdi(sdp); | ||
| 456 | |||
| 457 | file_ra_state_init(&ra_state, inode->i_mapping); | ||
| 458 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { | ||
| 459 | loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); | ||
| 460 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, | ||
| 461 | sizeof(struct gfs2_rindex)); | ||
| 462 | if (!error) | ||
| 463 | break; | ||
| 464 | if (error != sizeof(struct gfs2_rindex)) { | ||
| 465 | if (error > 0) | ||
| 466 | error = -EIO; | ||
| 467 | goto fail; | ||
| 468 | } | ||
| 469 | |||
| 470 | rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); | ||
| 471 | error = -ENOMEM; | ||
| 472 | if (!rgd) | ||
| 473 | goto fail; | ||
| 474 | |||
| 475 | mutex_init(&rgd->rd_mutex); | ||
| 476 | lops_init_le(&rgd->rd_le, &gfs2_rg_lops); | ||
| 477 | rgd->rd_sbd = sdp; | ||
| 478 | |||
| 479 | list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list); | ||
| 480 | list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); | ||
| 481 | |||
| 482 | gfs2_rindex_in(&rgd->rd_ri, buf); | ||
| 483 | error = compute_bitstructs(rgd); | ||
| 484 | if (error) | ||
| 485 | goto fail; | ||
| 486 | |||
| 487 | error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr, | ||
| 488 | &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); | ||
| 489 | if (error) | ||
| 490 | goto fail; | ||
| 491 | |||
| 492 | rgd->rd_gl->gl_object = rgd; | ||
| 493 | rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; | ||
| 494 | } | ||
| 495 | |||
| 496 | sdp->sd_rindex_vn = ip->i_gl->gl_vn; | ||
| 497 | return 0; | ||
| 498 | |||
| 499 | fail: | ||
| 500 | clear_rgrpdi(sdp); | ||
| 501 | return error; | ||
| 502 | } | ||
| 503 | |||
| 504 | /** | ||
| 505 | * gfs2_rindex_hold - Grab a lock on the rindex | ||
| 506 | * @sdp: The GFS2 superblock | ||
| 507 | * @ri_gh: the glock holder | ||
| 508 | * | ||
| 509 | * We grab a lock on the rindex inode to make sure that it doesn't | ||
| 510 | * change whilst we are performing an operation. We keep this lock | ||
| 511 | * for quite long periods of time compared to other locks. This | ||
| 512 | * doesn't matter, since it is shared and it is very, very rarely | ||
| 513 | * accessed in the exclusive mode (i.e. only when expanding the filesystem). | ||
| 514 | * | ||
| 515 | * This makes sure that we're using the latest copy of the resource index | ||
| 516 | * special file, which might have been updated if someone expanded the | ||
| 517 | * filesystem (via gfs2_grow utility), which adds new resource groups. | ||
| 518 | * | ||
| 519 | * Returns: 0 on success, error code otherwise | ||
| 520 | */ | ||
| 521 | |||
| 522 | int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) | ||
| 523 | { | ||
| 524 | struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); | ||
| 525 | struct gfs2_glock *gl = ip->i_gl; | ||
| 526 | int error; | ||
| 527 | |||
| 528 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh); | ||
| 529 | if (error) | ||
| 530 | return error; | ||
| 531 | |||
| 532 | /* Read new copy from disk if we don't have the latest */ | ||
| 533 | if (sdp->sd_rindex_vn != gl->gl_vn) { | ||
| 534 | mutex_lock(&sdp->sd_rindex_mutex); | ||
| 535 | if (sdp->sd_rindex_vn != gl->gl_vn) { | ||
| 536 | error = gfs2_ri_update(ip); | ||
| 537 | if (error) | ||
| 538 | gfs2_glock_dq_uninit(ri_gh); | ||
| 539 | } | ||
| 540 | mutex_unlock(&sdp->sd_rindex_mutex); | ||
| 541 | } | ||
| 542 | |||
| 543 | return error; | ||
| 544 | } | ||
| 545 | |||
| 546 | /** | ||
| 547 | * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps | ||
| 548 | * @rgd: the struct gfs2_rgrpd describing the RG to read in | ||
| 549 | * | ||
| 550 | * Read in all of a Resource Group's header and bitmap blocks. | ||
| 551 | * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. | ||
| 552 | * | ||
| 553 | * Returns: errno | ||
| 554 | */ | ||
| 555 | |||
| 556 | int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | ||
| 557 | { | ||
| 558 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 559 | struct gfs2_glock *gl = rgd->rd_gl; | ||
| 560 | unsigned int length = rgd->rd_ri.ri_length; | ||
| 561 | struct gfs2_bitmap *bi; | ||
| 562 | unsigned int x, y; | ||
| 563 | int error; | ||
| 564 | |||
| 565 | mutex_lock(&rgd->rd_mutex); | ||
| 566 | |||
| 567 | spin_lock(&sdp->sd_rindex_spin); | ||
| 568 | if (rgd->rd_bh_count) { | ||
| 569 | rgd->rd_bh_count++; | ||
| 570 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 571 | mutex_unlock(&rgd->rd_mutex); | ||
| 572 | return 0; | ||
| 573 | } | ||
| 574 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 575 | |||
| 576 | for (x = 0; x < length; x++) { | ||
| 577 | bi = rgd->rd_bits + x; | ||
| 578 | error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh); | ||
| 579 | if (error) | ||
| 580 | goto fail; | ||
| 581 | } | ||
| 582 | |||
| 583 | for (y = length; y--;) { | ||
| 584 | bi = rgd->rd_bits + y; | ||
| 585 | error = gfs2_meta_wait(sdp, bi->bi_bh); | ||
| 586 | if (error) | ||
| 587 | goto fail; | ||
| 588 | if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : | ||
| 589 | GFS2_METATYPE_RG)) { | ||
| 590 | error = -EIO; | ||
| 591 | goto fail; | ||
| 592 | } | ||
| 593 | } | ||
| 594 | |||
| 595 | if (rgd->rd_rg_vn != gl->gl_vn) { | ||
| 596 | gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data); | ||
| 597 | rgd->rd_rg_vn = gl->gl_vn; | ||
| 598 | } | ||
| 599 | |||
| 600 | spin_lock(&sdp->sd_rindex_spin); | ||
| 601 | rgd->rd_free_clone = rgd->rd_rg.rg_free; | ||
| 602 | rgd->rd_bh_count++; | ||
| 603 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 604 | |||
| 605 | mutex_unlock(&rgd->rd_mutex); | ||
| 606 | |||
| 607 | return 0; | ||
| 608 | |||
| 609 | fail: | ||
| 610 | while (x--) { | ||
| 611 | bi = rgd->rd_bits + x; | ||
| 612 | brelse(bi->bi_bh); | ||
| 613 | bi->bi_bh = NULL; | ||
| 614 | gfs2_assert_warn(sdp, !bi->bi_clone); | ||
| 615 | } | ||
| 616 | mutex_unlock(&rgd->rd_mutex); | ||
| 617 | |||
| 618 | return error; | ||
| 619 | } | ||
| 620 | |||
| 621 | void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd) | ||
| 622 | { | ||
| 623 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 624 | |||
| 625 | spin_lock(&sdp->sd_rindex_spin); | ||
| 626 | gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); | ||
| 627 | rgd->rd_bh_count++; | ||
| 628 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 629 | } | ||
| 630 | |||
| 631 | /** | ||
| 632 | * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get() | ||
| 633 | * @rgd: the struct gfs2_rgrpd describing the RG to read in | ||
| 634 | * | ||
| 635 | */ | ||
| 636 | |||
| 637 | void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) | ||
| 638 | { | ||
| 639 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 640 | int x, length = rgd->rd_ri.ri_length; | ||
| 641 | |||
| 642 | spin_lock(&sdp->sd_rindex_spin); | ||
| 643 | gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); | ||
| 644 | if (--rgd->rd_bh_count) { | ||
| 645 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 646 | return; | ||
| 647 | } | ||
| 648 | |||
| 649 | for (x = 0; x < length; x++) { | ||
| 650 | struct gfs2_bitmap *bi = rgd->rd_bits + x; | ||
| 651 | kfree(bi->bi_clone); | ||
| 652 | bi->bi_clone = NULL; | ||
| 653 | brelse(bi->bi_bh); | ||
| 654 | bi->bi_bh = NULL; | ||
| 655 | } | ||
| 656 | |||
| 657 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 658 | } | ||
| 659 | |||
| 660 | void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) | ||
| 661 | { | ||
| 662 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 663 | unsigned int length = rgd->rd_ri.ri_length; | ||
| 664 | unsigned int x; | ||
| 665 | |||
| 666 | for (x = 0; x < length; x++) { | ||
| 667 | struct gfs2_bitmap *bi = rgd->rd_bits + x; | ||
| 668 | if (!bi->bi_clone) | ||
| 669 | continue; | ||
| 670 | memcpy(bi->bi_clone + bi->bi_offset, | ||
| 671 | bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); | ||
| 672 | } | ||
| 673 | |||
| 674 | spin_lock(&sdp->sd_rindex_spin); | ||
| 675 | rgd->rd_free_clone = rgd->rd_rg.rg_free; | ||
| 676 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 677 | } | ||
| 678 | |||
| 679 | /** | ||
| 680 | * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode | ||
| 681 | * @ip: the incore GFS2 inode structure | ||
| 682 | * | ||
| 683 | * Returns: the struct gfs2_alloc | ||
| 684 | */ | ||
| 685 | |||
| 686 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) | ||
| 687 | { | ||
| 688 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 689 | |||
| 690 | /* FIXME: Should assert that the correct locks are held here... */ | ||
| 691 | memset(al, 0, sizeof(*al)); | ||
| 692 | return al; | ||
| 693 | } | ||
| 694 | |||
| 695 | /** | ||
| 696 | * try_rgrp_fit - See if a given reservation will fit in a given RG | ||
| 697 | * @rgd: the RG data | ||
| 698 | * @al: the struct gfs2_alloc structure describing the reservation | ||
| 699 | * | ||
| 700 | * If there's room for the requested blocks to be allocated from the RG: | ||
| 701 | * Sets the $al_reserved_data field in @al. | ||
| 702 | * Sets the $al_reserved_meta field in @al. | ||
| 703 | * Sets the $al_rgd field in @al. | ||
| 704 | * | ||
| 705 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) | ||
| 706 | */ | ||
| 707 | |||
| 708 | static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | ||
| 709 | { | ||
| 710 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 711 | int ret = 0; | ||
| 712 | |||
| 713 | spin_lock(&sdp->sd_rindex_spin); | ||
| 714 | if (rgd->rd_free_clone >= al->al_requested) { | ||
| 715 | al->al_rgd = rgd; | ||
| 716 | ret = 1; | ||
| 717 | } | ||
| 718 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 719 | |||
| 720 | return ret; | ||
| 721 | } | ||
| 722 | |||
| 723 | /** | ||
| 724 | * recent_rgrp_first - get first RG from "recent" list | ||
| 725 | * @sdp: The GFS2 superblock | ||
| 726 | * @rglast: address of the rgrp used last | ||
| 727 | * | ||
| 728 | * Returns: The first rgrp in the recent list | ||
| 729 | */ | ||
| 730 | |||
| 731 | static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, | ||
| 732 | u64 rglast) | ||
| 733 | { | ||
| 734 | struct gfs2_rgrpd *rgd = NULL; | ||
| 735 | |||
| 736 | spin_lock(&sdp->sd_rindex_spin); | ||
| 737 | |||
| 738 | if (list_empty(&sdp->sd_rindex_recent_list)) | ||
| 739 | goto out; | ||
| 740 | |||
| 741 | if (!rglast) | ||
| 742 | goto first; | ||
| 743 | |||
| 744 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
| 745 | if (rgd->rd_ri.ri_addr == rglast) | ||
| 746 | goto out; | ||
| 747 | } | ||
| 748 | |||
| 749 | first: | ||
| 750 | rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd, | ||
| 751 | rd_recent); | ||
| 752 | out: | ||
| 753 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 754 | return rgd; | ||
| 755 | } | ||
| 756 | |||
| 757 | /** | ||
| 758 | * recent_rgrp_next - get next RG from "recent" list | ||
| 759 | * @cur_rgd: current rgrp | ||
| 760 | * @remove: | ||
| 761 | * | ||
| 762 | * Returns: The next rgrp in the recent list | ||
| 763 | */ | ||
| 764 | |||
| 765 | static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd, | ||
| 766 | int remove) | ||
| 767 | { | ||
| 768 | struct gfs2_sbd *sdp = cur_rgd->rd_sbd; | ||
| 769 | struct list_head *head; | ||
| 770 | struct gfs2_rgrpd *rgd; | ||
| 771 | |||
| 772 | spin_lock(&sdp->sd_rindex_spin); | ||
| 773 | |||
| 774 | head = &sdp->sd_rindex_recent_list; | ||
| 775 | |||
| 776 | list_for_each_entry(rgd, head, rd_recent) { | ||
| 777 | if (rgd == cur_rgd) { | ||
| 778 | if (cur_rgd->rd_recent.next != head) | ||
| 779 | rgd = list_entry(cur_rgd->rd_recent.next, | ||
| 780 | struct gfs2_rgrpd, rd_recent); | ||
| 781 | else | ||
| 782 | rgd = NULL; | ||
| 783 | |||
| 784 | if (remove) | ||
| 785 | list_del(&cur_rgd->rd_recent); | ||
| 786 | |||
| 787 | goto out; | ||
| 788 | } | ||
| 789 | } | ||
| 790 | |||
| 791 | rgd = NULL; | ||
| 792 | if (!list_empty(head)) | ||
| 793 | rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); | ||
| 794 | |||
| 795 | out: | ||
| 796 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 797 | return rgd; | ||
| 798 | } | ||
| 799 | |||
| 800 | /** | ||
| 801 | * recent_rgrp_add - add an RG to tail of "recent" list | ||
| 802 | * @new_rgd: The rgrp to add | ||
| 803 | * | ||
| 804 | */ | ||
| 805 | |||
| 806 | static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd) | ||
| 807 | { | ||
| 808 | struct gfs2_sbd *sdp = new_rgd->rd_sbd; | ||
| 809 | struct gfs2_rgrpd *rgd; | ||
| 810 | unsigned int count = 0; | ||
| 811 | unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp); | ||
| 812 | |||
| 813 | spin_lock(&sdp->sd_rindex_spin); | ||
| 814 | |||
| 815 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
| 816 | if (rgd == new_rgd) | ||
| 817 | goto out; | ||
| 818 | |||
| 819 | if (++count >= max) | ||
| 820 | goto out; | ||
| 821 | } | ||
| 822 | list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list); | ||
| 823 | |||
| 824 | out: | ||
| 825 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 826 | } | ||
| 827 | |||
| 828 | /** | ||
| 829 | * forward_rgrp_get - get an rgrp to try next from full list | ||
| 830 | * @sdp: The GFS2 superblock | ||
| 831 | * | ||
| 832 | * Returns: The rgrp to try next | ||
| 833 | */ | ||
| 834 | |||
| 835 | static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp) | ||
| 836 | { | ||
| 837 | struct gfs2_rgrpd *rgd; | ||
| 838 | unsigned int journals = gfs2_jindex_size(sdp); | ||
| 839 | unsigned int rg = 0, x; | ||
| 840 | |||
| 841 | spin_lock(&sdp->sd_rindex_spin); | ||
| 842 | |||
| 843 | rgd = sdp->sd_rindex_forward; | ||
| 844 | if (!rgd) { | ||
| 845 | if (sdp->sd_rgrps >= journals) | ||
| 846 | rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals; | ||
| 847 | |||
| 848 | for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg; | ||
| 849 | x++, rgd = gfs2_rgrpd_get_next(rgd)) | ||
| 850 | /* Do Nothing */; | ||
| 851 | |||
| 852 | sdp->sd_rindex_forward = rgd; | ||
| 853 | } | ||
| 854 | |||
| 855 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 856 | |||
| 857 | return rgd; | ||
| 858 | } | ||
| 859 | |||
| 860 | /** | ||
| 861 | * forward_rgrp_set - set the forward rgrp pointer | ||
| 862 | * @sdp: the filesystem | ||
| 863 | * @rgd: The new forward rgrp | ||
| 864 | * | ||
| 865 | */ | ||
| 866 | |||
| 867 | static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) | ||
| 868 | { | ||
| 869 | spin_lock(&sdp->sd_rindex_spin); | ||
| 870 | sdp->sd_rindex_forward = rgd; | ||
| 871 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 872 | } | ||
| 873 | |||
| 874 | /** | ||
| 875 | * get_local_rgrp - Choose and lock a rgrp for allocation | ||
| 876 | * @ip: the inode to reserve space for | ||
| 877 | * @rgp: the chosen and locked rgrp | ||
| 878 | * | ||
| 879 | * Try to acquire rgrp in way which avoids contending with others. | ||
| 880 | * | ||
| 881 | * Returns: errno | ||
| 882 | */ | ||
| 883 | |||
| 884 | static int get_local_rgrp(struct gfs2_inode *ip) | ||
| 885 | { | ||
| 886 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 887 | struct gfs2_rgrpd *rgd, *begin = NULL; | ||
| 888 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 889 | int flags = LM_FLAG_TRY; | ||
| 890 | int skipped = 0; | ||
| 891 | int loops = 0; | ||
| 892 | int error; | ||
| 893 | |||
| 894 | /* Try recently successful rgrps */ | ||
| 895 | |||
| 896 | rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); | ||
| 897 | |||
| 898 | while (rgd) { | ||
| 899 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, | ||
| 900 | LM_FLAG_TRY, &al->al_rgd_gh); | ||
| 901 | switch (error) { | ||
| 902 | case 0: | ||
| 903 | if (try_rgrp_fit(rgd, al)) | ||
| 904 | goto out; | ||
| 905 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
| 906 | rgd = recent_rgrp_next(rgd, 1); | ||
| 907 | break; | ||
| 908 | |||
| 909 | case GLR_TRYFAILED: | ||
| 910 | rgd = recent_rgrp_next(rgd, 0); | ||
| 911 | break; | ||
| 912 | |||
| 913 | default: | ||
| 914 | return error; | ||
| 915 | } | ||
| 916 | } | ||
| 917 | |||
| 918 | /* Go through full list of rgrps */ | ||
| 919 | |||
| 920 | begin = rgd = forward_rgrp_get(sdp); | ||
| 921 | |||
| 922 | for (;;) { | ||
| 923 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, | ||
| 924 | &al->al_rgd_gh); | ||
| 925 | switch (error) { | ||
| 926 | case 0: | ||
| 927 | if (try_rgrp_fit(rgd, al)) | ||
| 928 | goto out; | ||
| 929 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
| 930 | break; | ||
| 931 | |||
| 932 | case GLR_TRYFAILED: | ||
| 933 | skipped++; | ||
| 934 | break; | ||
| 935 | |||
| 936 | default: | ||
| 937 | return error; | ||
| 938 | } | ||
| 939 | |||
| 940 | rgd = gfs2_rgrpd_get_next(rgd); | ||
| 941 | if (!rgd) | ||
| 942 | rgd = gfs2_rgrpd_get_first(sdp); | ||
| 943 | |||
| 944 | if (rgd == begin) { | ||
| 945 | if (++loops >= 2 || !skipped) | ||
| 946 | return -ENOSPC; | ||
| 947 | flags = 0; | ||
| 948 | } | ||
| 949 | } | ||
| 950 | |||
| 951 | out: | ||
| 952 | ip->i_last_rg_alloc = rgd->rd_ri.ri_addr; | ||
| 953 | |||
| 954 | if (begin) { | ||
| 955 | recent_rgrp_add(rgd); | ||
| 956 | rgd = gfs2_rgrpd_get_next(rgd); | ||
| 957 | if (!rgd) | ||
| 958 | rgd = gfs2_rgrpd_get_first(sdp); | ||
| 959 | forward_rgrp_set(sdp, rgd); | ||
| 960 | } | ||
| 961 | |||
| 962 | return 0; | ||
| 963 | } | ||
| 964 | |||
| 965 | /** | ||
| 966 | * gfs2_inplace_reserve_i - Reserve space in the filesystem | ||
| 967 | * @ip: the inode to reserve space for | ||
| 968 | * | ||
| 969 | * Returns: errno | ||
| 970 | */ | ||
| 971 | |||
| 972 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | ||
| 973 | { | ||
| 974 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 975 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 976 | int error; | ||
| 977 | |||
| 978 | if (gfs2_assert_warn(sdp, al->al_requested)) | ||
| 979 | return -EINVAL; | ||
| 980 | |||
| 981 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | ||
| 982 | if (error) | ||
| 983 | return error; | ||
| 984 | |||
| 985 | error = get_local_rgrp(ip); | ||
| 986 | if (error) { | ||
| 987 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
| 988 | return error; | ||
| 989 | } | ||
| 990 | |||
| 991 | al->al_file = file; | ||
| 992 | al->al_line = line; | ||
| 993 | |||
| 994 | return 0; | ||
| 995 | } | ||
| 996 | |||
| 997 | /** | ||
| 998 | * gfs2_inplace_release - release an inplace reservation | ||
| 999 | * @ip: the inode the reservation was taken out on | ||
| 1000 | * | ||
| 1001 | * Release a reservation made by gfs2_inplace_reserve(). | ||
| 1002 | */ | ||
| 1003 | |||
| 1004 | void gfs2_inplace_release(struct gfs2_inode *ip) | ||
| 1005 | { | ||
| 1006 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1007 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 1008 | |||
| 1009 | if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) | ||
| 1010 | fs_warn(sdp, "al_alloced = %u, al_requested = %u " | ||
| 1011 | "al_file = %s, al_line = %u\n", | ||
| 1012 | al->al_alloced, al->al_requested, al->al_file, | ||
| 1013 | al->al_line); | ||
| 1014 | |||
| 1015 | al->al_rgd = NULL; | ||
| 1016 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
| 1017 | gfs2_glock_dq_uninit(&al->al_ri_gh); | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | /** | ||
| 1021 | * gfs2_get_block_type - Check a block in a RG is of given type | ||
| 1022 | * @rgd: the resource group holding the block | ||
| 1023 | * @block: the block number | ||
| 1024 | * | ||
| 1025 | * Returns: The block type (GFS2_BLKST_*) | ||
| 1026 | */ | ||
| 1027 | |||
| 1028 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) | ||
| 1029 | { | ||
| 1030 | struct gfs2_bitmap *bi = NULL; | ||
| 1031 | u32 length, rgrp_block, buf_block; | ||
| 1032 | unsigned int buf; | ||
| 1033 | unsigned char type; | ||
| 1034 | |||
| 1035 | length = rgd->rd_ri.ri_length; | ||
| 1036 | rgrp_block = block - rgd->rd_ri.ri_data0; | ||
| 1037 | |||
| 1038 | for (buf = 0; buf < length; buf++) { | ||
| 1039 | bi = rgd->rd_bits + buf; | ||
| 1040 | if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) | ||
| 1041 | break; | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | gfs2_assert(rgd->rd_sbd, buf < length); | ||
| 1045 | buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; | ||
| 1046 | |||
| 1047 | type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | ||
| 1048 | bi->bi_len, buf_block); | ||
| 1049 | |||
| 1050 | return type; | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | /** | ||
| 1054 | * rgblk_search - find a block in @old_state, change allocation | ||
| 1055 | * state to @new_state | ||
| 1056 | * @rgd: the resource group descriptor | ||
| 1057 | * @goal: the goal block within the RG (start here to search for avail block) | ||
| 1058 | * @old_state: GFS2_BLKST_XXX the before-allocation state to find | ||
| 1059 | * @new_state: GFS2_BLKST_XXX the after-allocation block state | ||
| 1060 | * | ||
| 1061 | * Walk rgrp's bitmap to find bits that represent a block in @old_state. | ||
| 1062 | * Add the found bitmap buffer to the transaction. | ||
| 1063 | * Set the found bits to @new_state to change block's allocation state. | ||
| 1064 | * | ||
| 1065 | * This function never fails, because we wouldn't call it unless we | ||
| 1066 | * know (from reservation results, etc.) that a block is available. | ||
| 1067 | * | ||
| 1068 | * Scope of @goal and returned block is just within rgrp, not the whole | ||
| 1069 | * filesystem. | ||
| 1070 | * | ||
| 1071 | * Returns: the block number allocated | ||
| 1072 | */ | ||
| 1073 | |||
| 1074 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | ||
| 1075 | unsigned char old_state, unsigned char new_state) | ||
| 1076 | { | ||
| 1077 | struct gfs2_bitmap *bi = NULL; | ||
| 1078 | u32 length = rgd->rd_ri.ri_length; | ||
| 1079 | u32 blk = 0; | ||
| 1080 | unsigned int buf, x; | ||
| 1081 | |||
| 1082 | /* Find bitmap block that contains bits for goal block */ | ||
| 1083 | for (buf = 0; buf < length; buf++) { | ||
| 1084 | bi = rgd->rd_bits + buf; | ||
| 1085 | if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) | ||
| 1086 | break; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | gfs2_assert(rgd->rd_sbd, buf < length); | ||
| 1090 | |||
| 1091 | /* Convert scope of "goal" from rgrp-wide to within found bit block */ | ||
| 1092 | goal -= bi->bi_start * GFS2_NBBY; | ||
| 1093 | |||
| 1094 | /* Search (up to entire) bitmap in this rgrp for allocatable block. | ||
| 1095 | "x <= length", instead of "x < length", because we typically start | ||
| 1096 | the search in the middle of a bit block, but if we can't find an | ||
| 1097 | allocatable block anywhere else, we want to be able wrap around and | ||
| 1098 | search in the first part of our first-searched bit block. */ | ||
| 1099 | for (x = 0; x <= length; x++) { | ||
| 1100 | if (bi->bi_clone) | ||
| 1101 | blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, | ||
| 1102 | bi->bi_len, goal, old_state); | ||
| 1103 | else | ||
| 1104 | blk = gfs2_bitfit(rgd, | ||
| 1105 | bi->bi_bh->b_data + bi->bi_offset, | ||
| 1106 | bi->bi_len, goal, old_state); | ||
| 1107 | if (blk != BFITNOENT) | ||
| 1108 | break; | ||
| 1109 | |||
| 1110 | /* Try next bitmap block (wrap back to rgrp header if at end) */ | ||
| 1111 | buf = (buf + 1) % length; | ||
| 1112 | bi = rgd->rd_bits + buf; | ||
| 1113 | goal = 0; | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length)) | ||
| 1117 | blk = 0; | ||
| 1118 | |||
| 1119 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | ||
| 1120 | gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | ||
| 1121 | bi->bi_len, blk, new_state); | ||
| 1122 | if (bi->bi_clone) | ||
| 1123 | gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset, | ||
| 1124 | bi->bi_len, blk, new_state); | ||
| 1125 | |||
| 1126 | return bi->bi_start * GFS2_NBBY + blk; | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | /** | ||
| 1130 | * rgblk_free - Change alloc state of given block(s) | ||
| 1131 | * @sdp: the filesystem | ||
| 1132 | * @bstart: the start of a run of blocks to free | ||
| 1133 | * @blen: the length of the block run (all must lie within ONE RG!) | ||
| 1134 | * @new_state: GFS2_BLKST_XXX the after-allocation block state | ||
| 1135 | * | ||
| 1136 | * Returns: Resource group containing the block(s) | ||
| 1137 | */ | ||
| 1138 | |||
| 1139 | static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | ||
| 1140 | u32 blen, unsigned char new_state) | ||
| 1141 | { | ||
| 1142 | struct gfs2_rgrpd *rgd; | ||
| 1143 | struct gfs2_bitmap *bi = NULL; | ||
| 1144 | u32 length, rgrp_blk, buf_blk; | ||
| 1145 | unsigned int buf; | ||
| 1146 | |||
| 1147 | rgd = gfs2_blk2rgrpd(sdp, bstart); | ||
| 1148 | if (!rgd) { | ||
| 1149 | if (gfs2_consist(sdp)) | ||
| 1150 | fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); | ||
| 1151 | return NULL; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | length = rgd->rd_ri.ri_length; | ||
| 1155 | |||
| 1156 | rgrp_blk = bstart - rgd->rd_ri.ri_data0; | ||
| 1157 | |||
| 1158 | while (blen--) { | ||
| 1159 | for (buf = 0; buf < length; buf++) { | ||
| 1160 | bi = rgd->rd_bits + buf; | ||
| 1161 | if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) | ||
| 1162 | break; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | gfs2_assert(rgd->rd_sbd, buf < length); | ||
| 1166 | |||
| 1167 | buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; | ||
| 1168 | rgrp_blk++; | ||
| 1169 | |||
| 1170 | if (!bi->bi_clone) { | ||
| 1171 | bi->bi_clone = kmalloc(bi->bi_bh->b_size, | ||
| 1172 | GFP_NOFS | __GFP_NOFAIL); | ||
| 1173 | memcpy(bi->bi_clone + bi->bi_offset, | ||
| 1174 | bi->bi_bh->b_data + bi->bi_offset, | ||
| 1175 | bi->bi_len); | ||
| 1176 | } | ||
| 1177 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | ||
| 1178 | gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | ||
| 1179 | bi->bi_len, buf_blk, new_state); | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | return rgd; | ||
| 1183 | } | ||
| 1184 | |||
| 1185 | /** | ||
| 1186 | * gfs2_alloc_data - Allocate a data block | ||
| 1187 | * @ip: the inode to allocate the data block for | ||
| 1188 | * | ||
| 1189 | * Returns: the allocated block | ||
| 1190 | */ | ||
| 1191 | |||
| 1192 | u64 gfs2_alloc_data(struct gfs2_inode *ip) | ||
| 1193 | { | ||
| 1194 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1195 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 1196 | struct gfs2_rgrpd *rgd = al->al_rgd; | ||
| 1197 | u32 goal, blk; | ||
| 1198 | u64 block; | ||
| 1199 | |||
| 1200 | if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data)) | ||
| 1201 | goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0; | ||
| 1202 | else | ||
| 1203 | goal = rgd->rd_last_alloc_data; | ||
| 1204 | |||
| 1205 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); | ||
| 1206 | rgd->rd_last_alloc_data = blk; | ||
| 1207 | |||
| 1208 | block = rgd->rd_ri.ri_data0 + blk; | ||
| 1209 | ip->i_di.di_goal_data = block; | ||
| 1210 | |||
| 1211 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); | ||
| 1212 | rgd->rd_rg.rg_free--; | ||
| 1213 | |||
| 1214 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1215 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1216 | |||
| 1217 | al->al_alloced++; | ||
| 1218 | |||
| 1219 | gfs2_statfs_change(sdp, 0, -1, 0); | ||
| 1220 | gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 1221 | |||
| 1222 | spin_lock(&sdp->sd_rindex_spin); | ||
| 1223 | rgd->rd_free_clone--; | ||
| 1224 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1225 | |||
| 1226 | return block; | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | /** | ||
| 1230 | * gfs2_alloc_meta - Allocate a metadata block | ||
| 1231 | * @ip: the inode to allocate the metadata block for | ||
| 1232 | * | ||
| 1233 | * Returns: the allocated block | ||
| 1234 | */ | ||
| 1235 | |||
| 1236 | u64 gfs2_alloc_meta(struct gfs2_inode *ip) | ||
| 1237 | { | ||
| 1238 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1239 | struct gfs2_alloc *al = &ip->i_alloc; | ||
| 1240 | struct gfs2_rgrpd *rgd = al->al_rgd; | ||
| 1241 | u32 goal, blk; | ||
| 1242 | u64 block; | ||
| 1243 | |||
| 1244 | if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta)) | ||
| 1245 | goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0; | ||
| 1246 | else | ||
| 1247 | goal = rgd->rd_last_alloc_meta; | ||
| 1248 | |||
| 1249 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); | ||
| 1250 | rgd->rd_last_alloc_meta = blk; | ||
| 1251 | |||
| 1252 | block = rgd->rd_ri.ri_data0 + blk; | ||
| 1253 | ip->i_di.di_goal_meta = block; | ||
| 1254 | |||
| 1255 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); | ||
| 1256 | rgd->rd_rg.rg_free--; | ||
| 1257 | |||
| 1258 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1259 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1260 | |||
| 1261 | al->al_alloced++; | ||
| 1262 | |||
| 1263 | gfs2_statfs_change(sdp, 0, -1, 0); | ||
| 1264 | gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 1265 | gfs2_trans_add_unrevoke(sdp, block); | ||
| 1266 | |||
| 1267 | spin_lock(&sdp->sd_rindex_spin); | ||
| 1268 | rgd->rd_free_clone--; | ||
| 1269 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1270 | |||
| 1271 | return block; | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | /** | ||
| 1275 | * gfs2_alloc_di - Allocate a dinode | ||
| 1276 | * @dip: the directory that the inode is going in | ||
| 1277 | * | ||
| 1278 | * Returns: the block allocated | ||
| 1279 | */ | ||
| 1280 | |||
| 1281 | u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | ||
| 1282 | { | ||
| 1283 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | ||
| 1284 | struct gfs2_alloc *al = &dip->i_alloc; | ||
| 1285 | struct gfs2_rgrpd *rgd = al->al_rgd; | ||
| 1286 | u32 blk; | ||
| 1287 | u64 block; | ||
| 1288 | |||
| 1289 | blk = rgblk_search(rgd, rgd->rd_last_alloc_meta, | ||
| 1290 | GFS2_BLKST_FREE, GFS2_BLKST_DINODE); | ||
| 1291 | |||
| 1292 | rgd->rd_last_alloc_meta = blk; | ||
| 1293 | |||
| 1294 | block = rgd->rd_ri.ri_data0 + blk; | ||
| 1295 | |||
| 1296 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); | ||
| 1297 | rgd->rd_rg.rg_free--; | ||
| 1298 | rgd->rd_rg.rg_dinodes++; | ||
| 1299 | *generation = rgd->rd_rg.rg_igeneration++; | ||
| 1300 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1301 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1302 | |||
| 1303 | al->al_alloced++; | ||
| 1304 | |||
| 1305 | gfs2_statfs_change(sdp, 0, -1, +1); | ||
| 1306 | gfs2_trans_add_unrevoke(sdp, block); | ||
| 1307 | |||
| 1308 | spin_lock(&sdp->sd_rindex_spin); | ||
| 1309 | rgd->rd_free_clone--; | ||
| 1310 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1311 | |||
| 1312 | return block; | ||
| 1313 | } | ||
| 1314 | |||
| 1315 | /** | ||
| 1316 | * gfs2_free_data - free a contiguous run of data block(s) | ||
| 1317 | * @ip: the inode these blocks are being freed from | ||
| 1318 | * @bstart: first block of a run of contiguous blocks | ||
| 1319 | * @blen: the length of the block run | ||
| 1320 | * | ||
| 1321 | */ | ||
| 1322 | |||
| 1323 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | ||
| 1324 | { | ||
| 1325 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1326 | struct gfs2_rgrpd *rgd; | ||
| 1327 | |||
| 1328 | rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); | ||
| 1329 | if (!rgd) | ||
| 1330 | return; | ||
| 1331 | |||
| 1332 | rgd->rd_rg.rg_free += blen; | ||
| 1333 | |||
| 1334 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1335 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1336 | |||
| 1337 | gfs2_trans_add_rg(rgd); | ||
| 1338 | |||
| 1339 | gfs2_statfs_change(sdp, 0, +blen, 0); | ||
| 1340 | gfs2_quota_change(ip, -(s64)blen, | ||
| 1341 | ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | /** | ||
| 1345 | * gfs2_free_meta - free a contiguous run of data block(s) | ||
| 1346 | * @ip: the inode these blocks are being freed from | ||
| 1347 | * @bstart: first block of a run of contiguous blocks | ||
| 1348 | * @blen: the length of the block run | ||
| 1349 | * | ||
| 1350 | */ | ||
| 1351 | |||
| 1352 | void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) | ||
| 1353 | { | ||
| 1354 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1355 | struct gfs2_rgrpd *rgd; | ||
| 1356 | |||
| 1357 | rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); | ||
| 1358 | if (!rgd) | ||
| 1359 | return; | ||
| 1360 | |||
| 1361 | rgd->rd_rg.rg_free += blen; | ||
| 1362 | |||
| 1363 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1364 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1365 | |||
| 1366 | gfs2_trans_add_rg(rgd); | ||
| 1367 | |||
| 1368 | gfs2_statfs_change(sdp, 0, +blen, 0); | ||
| 1369 | gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 1370 | gfs2_meta_wipe(ip, bstart, blen); | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | void gfs2_unlink_di(struct inode *inode) | ||
| 1374 | { | ||
| 1375 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1376 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1377 | struct gfs2_rgrpd *rgd; | ||
| 1378 | u64 blkno = ip->i_num.no_addr; | ||
| 1379 | |||
| 1380 | rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); | ||
| 1381 | if (!rgd) | ||
| 1382 | return; | ||
| 1383 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1384 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1385 | gfs2_trans_add_rg(rgd); | ||
| 1386 | } | ||
| 1387 | |||
| 1388 | static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | ||
| 1389 | { | ||
| 1390 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 1391 | struct gfs2_rgrpd *tmp_rgd; | ||
| 1392 | |||
| 1393 | tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); | ||
| 1394 | if (!tmp_rgd) | ||
| 1395 | return; | ||
| 1396 | gfs2_assert_withdraw(sdp, rgd == tmp_rgd); | ||
| 1397 | |||
| 1398 | if (!rgd->rd_rg.rg_dinodes) | ||
| 1399 | gfs2_consist_rgrpd(rgd); | ||
| 1400 | rgd->rd_rg.rg_dinodes--; | ||
| 1401 | rgd->rd_rg.rg_free++; | ||
| 1402 | |||
| 1403 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1404 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1405 | |||
| 1406 | gfs2_statfs_change(sdp, 0, +1, -1); | ||
| 1407 | gfs2_trans_add_rg(rgd); | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | |||
| 1411 | void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) | ||
| 1412 | { | ||
| 1413 | gfs2_free_uninit_di(rgd, ip->i_num.no_addr); | ||
| 1414 | gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid); | ||
| 1415 | gfs2_meta_wipe(ip, ip->i_num.no_addr, 1); | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | /** | ||
| 1419 | * gfs2_rlist_add - add a RG to a list of RGs | ||
| 1420 | * @sdp: the filesystem | ||
| 1421 | * @rlist: the list of resource groups | ||
| 1422 | * @block: the block | ||
| 1423 | * | ||
| 1424 | * Figure out what RG a block belongs to and add that RG to the list | ||
| 1425 | * | ||
| 1426 | * FIXME: Don't use NOFAIL | ||
| 1427 | * | ||
| 1428 | */ | ||
| 1429 | |||
| 1430 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | ||
| 1431 | u64 block) | ||
| 1432 | { | ||
| 1433 | struct gfs2_rgrpd *rgd; | ||
| 1434 | struct gfs2_rgrpd **tmp; | ||
| 1435 | unsigned int new_space; | ||
| 1436 | unsigned int x; | ||
| 1437 | |||
| 1438 | if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) | ||
| 1439 | return; | ||
| 1440 | |||
| 1441 | rgd = gfs2_blk2rgrpd(sdp, block); | ||
| 1442 | if (!rgd) { | ||
| 1443 | if (gfs2_consist(sdp)) | ||
| 1444 | fs_err(sdp, "block = %llu\n", (unsigned long long)block); | ||
| 1445 | return; | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | for (x = 0; x < rlist->rl_rgrps; x++) | ||
| 1449 | if (rlist->rl_rgd[x] == rgd) | ||
| 1450 | return; | ||
| 1451 | |||
| 1452 | if (rlist->rl_rgrps == rlist->rl_space) { | ||
| 1453 | new_space = rlist->rl_space + 10; | ||
| 1454 | |||
| 1455 | tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), | ||
| 1456 | GFP_NOFS | __GFP_NOFAIL); | ||
| 1457 | |||
| 1458 | if (rlist->rl_rgd) { | ||
| 1459 | memcpy(tmp, rlist->rl_rgd, | ||
| 1460 | rlist->rl_space * sizeof(struct gfs2_rgrpd *)); | ||
| 1461 | kfree(rlist->rl_rgd); | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | rlist->rl_space = new_space; | ||
| 1465 | rlist->rl_rgd = tmp; | ||
| 1466 | } | ||
| 1467 | |||
| 1468 | rlist->rl_rgd[rlist->rl_rgrps++] = rgd; | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | /** | ||
| 1472 | * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate | ||
| 1473 | * and initialize an array of glock holders for them | ||
| 1474 | * @rlist: the list of resource groups | ||
| 1475 | * @state: the lock state to acquire the RG lock in | ||
| 1476 | * @flags: the modifier flags for the holder structures | ||
| 1477 | * | ||
| 1478 | * FIXME: Don't use NOFAIL | ||
| 1479 | * | ||
| 1480 | */ | ||
| 1481 | |||
| 1482 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | ||
| 1483 | int flags) | ||
| 1484 | { | ||
| 1485 | unsigned int x; | ||
| 1486 | |||
| 1487 | rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), | ||
| 1488 | GFP_NOFS | __GFP_NOFAIL); | ||
| 1489 | for (x = 0; x < rlist->rl_rgrps; x++) | ||
| 1490 | gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, | ||
| 1491 | state, flags, | ||
| 1492 | &rlist->rl_ghs[x]); | ||
| 1493 | } | ||
| 1494 | |||
| 1495 | /** | ||
| 1496 | * gfs2_rlist_free - free a resource group list | ||
| 1497 | * @list: the list of resource groups | ||
| 1498 | * | ||
| 1499 | */ | ||
| 1500 | |||
| 1501 | void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) | ||
| 1502 | { | ||
| 1503 | unsigned int x; | ||
| 1504 | |||
| 1505 | kfree(rlist->rl_rgd); | ||
| 1506 | |||
| 1507 | if (rlist->rl_ghs) { | ||
| 1508 | for (x = 0; x < rlist->rl_rgrps; x++) | ||
| 1509 | gfs2_holder_uninit(&rlist->rl_ghs[x]); | ||
| 1510 | kfree(rlist->rl_ghs); | ||
| 1511 | } | ||
| 1512 | } | ||
| 1513 | |||
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h new file mode 100644 index 000000000000..9eedfd12bfff --- /dev/null +++ b/fs/gfs2/rgrp.h | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __RGRP_DOT_H__ | ||
| 11 | #define __RGRP_DOT_H__ | ||
| 12 | |||
| 13 | struct gfs2_rgrpd; | ||
| 14 | struct gfs2_sbd; | ||
| 15 | struct gfs2_holder; | ||
| 16 | |||
| 17 | void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); | ||
| 18 | |||
| 19 | struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); | ||
| 20 | struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); | ||
| 21 | struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); | ||
| 22 | |||
| 23 | void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); | ||
| 24 | int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); | ||
| 25 | |||
| 26 | int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); | ||
| 27 | void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); | ||
| 28 | void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); | ||
| 29 | |||
| 30 | void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); | ||
| 31 | |||
| 32 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | ||
| 33 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) | ||
| 34 | { | ||
| 35 | return; /* Se we can see where ip->i_alloc is used */ | ||
| 36 | } | ||
| 37 | |||
| 38 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, | ||
| 39 | char *file, unsigned int line); | ||
| 40 | #define gfs2_inplace_reserve(ip) \ | ||
| 41 | gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) | ||
| 42 | |||
| 43 | void gfs2_inplace_release(struct gfs2_inode *ip); | ||
| 44 | |||
| 45 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); | ||
| 46 | |||
| 47 | u64 gfs2_alloc_data(struct gfs2_inode *ip); | ||
| 48 | u64 gfs2_alloc_meta(struct gfs2_inode *ip); | ||
| 49 | u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); | ||
| 50 | |||
| 51 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); | ||
| 52 | void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); | ||
| 53 | void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); | ||
| 54 | void gfs2_unlink_di(struct inode *inode); | ||
| 55 | |||
| 56 | struct gfs2_rgrp_list { | ||
| 57 | unsigned int rl_rgrps; | ||
| 58 | unsigned int rl_space; | ||
| 59 | struct gfs2_rgrpd **rl_rgd; | ||
| 60 | struct gfs2_holder *rl_ghs; | ||
| 61 | }; | ||
| 62 | |||
| 63 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | ||
| 64 | u64 block); | ||
| 65 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | ||
| 66 | int flags); | ||
| 67 | void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); | ||
| 68 | |||
| 69 | #endif /* __RGRP_DOT_H__ */ | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c new file mode 100644 index 000000000000..6a78b1b32e25 --- /dev/null +++ b/fs/gfs2/super.c | |||
| @@ -0,0 +1,976 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/crc32.h> | ||
| 16 | #include <linux/gfs2_ondisk.h> | ||
| 17 | #include <linux/bio.h> | ||
| 18 | #include <linux/lm_interface.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "bmap.h" | ||
| 23 | #include "dir.h" | ||
| 24 | #include "glock.h" | ||
| 25 | #include "glops.h" | ||
| 26 | #include "inode.h" | ||
| 27 | #include "log.h" | ||
| 28 | #include "meta_io.h" | ||
| 29 | #include "quota.h" | ||
| 30 | #include "recovery.h" | ||
| 31 | #include "rgrp.h" | ||
| 32 | #include "super.h" | ||
| 33 | #include "trans.h" | ||
| 34 | #include "util.h" | ||
| 35 | |||
| 36 | static const u32 gfs2_old_fs_formats[] = { | ||
| 37 | 0 | ||
| 38 | }; | ||
| 39 | |||
| 40 | static const u32 gfs2_old_multihost_formats[] = { | ||
| 41 | 0 | ||
| 42 | }; | ||
| 43 | |||
| 44 | /** | ||
| 45 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
| 46 | * @gt: tune | ||
| 47 | * | ||
| 48 | */ | ||
| 49 | |||
| 50 | void gfs2_tune_init(struct gfs2_tune *gt) | ||
| 51 | { | ||
| 52 | spin_lock_init(>->gt_spin); | ||
| 53 | |||
| 54 | gt->gt_ilimit = 100; | ||
| 55 | gt->gt_ilimit_tries = 3; | ||
| 56 | gt->gt_ilimit_min = 1; | ||
| 57 | gt->gt_demote_secs = 300; | ||
| 58 | gt->gt_incore_log_blocks = 1024; | ||
| 59 | gt->gt_log_flush_secs = 60; | ||
| 60 | gt->gt_jindex_refresh_secs = 60; | ||
| 61 | gt->gt_scand_secs = 15; | ||
| 62 | gt->gt_recoverd_secs = 60; | ||
| 63 | gt->gt_logd_secs = 1; | ||
| 64 | gt->gt_quotad_secs = 5; | ||
| 65 | gt->gt_quota_simul_sync = 64; | ||
| 66 | gt->gt_quota_warn_period = 10; | ||
| 67 | gt->gt_quota_scale_num = 1; | ||
| 68 | gt->gt_quota_scale_den = 1; | ||
| 69 | gt->gt_quota_cache_secs = 300; | ||
| 70 | gt->gt_quota_quantum = 60; | ||
| 71 | gt->gt_atime_quantum = 3600; | ||
| 72 | gt->gt_new_files_jdata = 0; | ||
| 73 | gt->gt_new_files_directio = 0; | ||
| 74 | gt->gt_max_atomic_write = 4 << 20; | ||
| 75 | gt->gt_max_readahead = 1 << 18; | ||
| 76 | gt->gt_lockdump_size = 131072; | ||
| 77 | gt->gt_stall_secs = 600; | ||
| 78 | gt->gt_complain_secs = 10; | ||
| 79 | gt->gt_reclaim_limit = 5000; | ||
| 80 | gt->gt_entries_per_readdir = 32; | ||
| 81 | gt->gt_prefetch_secs = 10; | ||
| 82 | gt->gt_greedy_default = HZ / 10; | ||
| 83 | gt->gt_greedy_quantum = HZ / 40; | ||
| 84 | gt->gt_greedy_max = HZ / 4; | ||
| 85 | gt->gt_statfs_quantum = 30; | ||
| 86 | gt->gt_statfs_slow = 0; | ||
| 87 | } | ||
| 88 | |||
| 89 | /** | ||
| 90 | * gfs2_check_sb - Check superblock | ||
| 91 | * @sdp: the filesystem | ||
| 92 | * @sb: The superblock | ||
| 93 | * @silent: Don't print a message if the check fails | ||
| 94 | * | ||
| 95 | * Checks the version code of the FS is one that we understand how to | ||
| 96 | * read and that the sizes of the various on-disk structures have not | ||
| 97 | * changed. | ||
| 98 | */ | ||
| 99 | |||
| 100 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent) | ||
| 101 | { | ||
| 102 | unsigned int x; | ||
| 103 | |||
| 104 | if (sb->sb_header.mh_magic != GFS2_MAGIC || | ||
| 105 | sb->sb_header.mh_type != GFS2_METATYPE_SB) { | ||
| 106 | if (!silent) | ||
| 107 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | ||
| 108 | return -EINVAL; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* If format numbers match exactly, we're done. */ | ||
| 112 | |||
| 113 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
| 114 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
| 115 | return 0; | ||
| 116 | |||
| 117 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
| 118 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
| 119 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
| 120 | break; | ||
| 121 | |||
| 122 | if (!gfs2_old_fs_formats[x]) { | ||
| 123 | printk(KERN_WARNING | ||
| 124 | "GFS2: code version (%u, %u) is incompatible " | ||
| 125 | "with ondisk format (%u, %u)\n", | ||
| 126 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 127 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 128 | printk(KERN_WARNING | ||
| 129 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 130 | return -EINVAL; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
| 135 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
| 136 | if (gfs2_old_multihost_formats[x] == | ||
| 137 | sb->sb_multihost_format) | ||
| 138 | break; | ||
| 139 | |||
| 140 | if (!gfs2_old_multihost_formats[x]) { | ||
| 141 | printk(KERN_WARNING | ||
| 142 | "GFS2: code version (%u, %u) is incompatible " | ||
| 143 | "with ondisk format (%u, %u)\n", | ||
| 144 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 145 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 146 | printk(KERN_WARNING | ||
| 147 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 148 | return -EINVAL; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | if (!sdp->sd_args.ar_upgrade) { | ||
| 153 | printk(KERN_WARNING | ||
| 154 | "GFS2: code version (%u, %u) is incompatible " | ||
| 155 | "with ondisk format (%u, %u)\n", | ||
| 156 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 157 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 158 | printk(KERN_INFO | ||
| 159 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
| 160 | "the FS\n"); | ||
| 161 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
| 162 | return -EINVAL; | ||
| 163 | } | ||
| 164 | |||
| 165 | return 0; | ||
| 166 | } | ||
| 167 | |||
| 168 | |||
| 169 | static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error) | ||
| 170 | { | ||
| 171 | struct page *page = bio->bi_private; | ||
| 172 | if (bio->bi_size) | ||
| 173 | return 1; | ||
| 174 | |||
| 175 | if (!error) | ||
| 176 | SetPageUptodate(page); | ||
| 177 | else | ||
| 178 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
| 179 | unlock_page(page); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | struct page *gfs2_read_super(struct super_block *sb, sector_t sector) | ||
| 184 | { | ||
| 185 | struct page *page; | ||
| 186 | struct bio *bio; | ||
| 187 | |||
| 188 | page = alloc_page(GFP_KERNEL); | ||
| 189 | if (unlikely(!page)) | ||
| 190 | return NULL; | ||
| 191 | |||
| 192 | ClearPageUptodate(page); | ||
| 193 | ClearPageDirty(page); | ||
| 194 | lock_page(page); | ||
| 195 | |||
| 196 | bio = bio_alloc(GFP_KERNEL, 1); | ||
| 197 | if (unlikely(!bio)) { | ||
| 198 | __free_page(page); | ||
| 199 | return NULL; | ||
| 200 | } | ||
| 201 | |||
| 202 | bio->bi_sector = sector; | ||
| 203 | bio->bi_bdev = sb->s_bdev; | ||
| 204 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
| 205 | |||
| 206 | bio->bi_end_io = end_bio_io_page; | ||
| 207 | bio->bi_private = page; | ||
| 208 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
| 209 | wait_on_page_locked(page); | ||
| 210 | bio_put(bio); | ||
| 211 | if (!PageUptodate(page)) { | ||
| 212 | __free_page(page); | ||
| 213 | return NULL; | ||
| 214 | } | ||
| 215 | return page; | ||
| 216 | } | ||
| 217 | |||
| 218 | /** | ||
| 219 | * gfs2_read_sb - Read super block | ||
| 220 | * @sdp: The GFS2 superblock | ||
| 221 | * @gl: the glock for the superblock (assumed to be held) | ||
| 222 | * @silent: Don't print message if mount fails | ||
| 223 | * | ||
| 224 | */ | ||
| 225 | |||
| 226 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
| 227 | { | ||
| 228 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
| 229 | u32 tmp_blocks; | ||
| 230 | unsigned int x; | ||
| 231 | int error; | ||
| 232 | struct page *page; | ||
| 233 | char *sb; | ||
| 234 | |||
| 235 | page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
| 236 | if (!page) { | ||
| 237 | if (!silent) | ||
| 238 | fs_err(sdp, "can't read superblock\n"); | ||
| 239 | return -EIO; | ||
| 240 | } | ||
| 241 | sb = kmap(page); | ||
| 242 | gfs2_sb_in(&sdp->sd_sb, sb); | ||
| 243 | kunmap(page); | ||
| 244 | __free_page(page); | ||
| 245 | |||
| 246 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
| 247 | if (error) | ||
| 248 | return error; | ||
| 249 | |||
| 250 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
| 251 | GFS2_BASIC_BLOCK_SHIFT; | ||
| 252 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
| 253 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
| 254 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
| 255 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
| 256 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
| 257 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
| 258 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
| 259 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
| 260 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
| 261 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
| 262 | sizeof(struct gfs2_meta_header)) / | ||
| 263 | sizeof(struct gfs2_quota_change); | ||
| 264 | |||
| 265 | /* Compute maximum reservation required to add a entry to a directory */ | ||
| 266 | |||
| 267 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
| 268 | sdp->sd_jbsize); | ||
| 269 | |||
| 270 | ind_blocks = 0; | ||
| 271 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
| 272 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
| 273 | ind_blocks += tmp_blocks; | ||
| 274 | } | ||
| 275 | |||
| 276 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
| 277 | |||
| 278 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
| 279 | |||
| 280 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
| 281 | sizeof(struct gfs2_dinode); | ||
| 282 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
| 283 | for (x = 2;; x++) { | ||
| 284 | u64 space, d; | ||
| 285 | u32 m; | ||
| 286 | |||
| 287 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
| 288 | d = space; | ||
| 289 | m = do_div(d, sdp->sd_inptrs); | ||
| 290 | |||
| 291 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
| 292 | break; | ||
| 293 | sdp->sd_heightsize[x] = space; | ||
| 294 | } | ||
| 295 | sdp->sd_max_height = x; | ||
| 296 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
| 297 | |||
| 298 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
| 299 | sizeof(struct gfs2_dinode); | ||
| 300 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
| 301 | for (x = 2;; x++) { | ||
| 302 | u64 space, d; | ||
| 303 | u32 m; | ||
| 304 | |||
| 305 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
| 306 | d = space; | ||
| 307 | m = do_div(d, sdp->sd_inptrs); | ||
| 308 | |||
| 309 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
| 310 | break; | ||
| 311 | sdp->sd_jheightsize[x] = space; | ||
| 312 | } | ||
| 313 | sdp->sd_max_jheight = x; | ||
| 314 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
| 315 | |||
| 316 | return 0; | ||
| 317 | } | ||
| 318 | |||
| 319 | /** | ||
| 320 | * gfs2_jindex_hold - Grab a lock on the jindex | ||
| 321 | * @sdp: The GFS2 superblock | ||
| 322 | * @ji_gh: the holder for the jindex glock | ||
| 323 | * | ||
| 324 | * This is very similar to the gfs2_rindex_hold() function, except that | ||
| 325 | * in general we hold the jindex lock for longer periods of time and | ||
| 326 | * we grab it far less frequently (in general) then the rgrp lock. | ||
| 327 | * | ||
| 328 | * Returns: errno | ||
| 329 | */ | ||
| 330 | |||
| 331 | int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | ||
| 332 | { | ||
| 333 | struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex); | ||
| 334 | struct qstr name; | ||
| 335 | char buf[20]; | ||
| 336 | struct gfs2_jdesc *jd; | ||
| 337 | int error; | ||
| 338 | |||
| 339 | name.name = buf; | ||
| 340 | |||
| 341 | mutex_lock(&sdp->sd_jindex_mutex); | ||
| 342 | |||
| 343 | for (;;) { | ||
| 344 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, | ||
| 345 | GL_LOCAL_EXCL, ji_gh); | ||
| 346 | if (error) | ||
| 347 | break; | ||
| 348 | |||
| 349 | name.len = sprintf(buf, "journal%u", sdp->sd_journals); | ||
| 350 | name.hash = gfs2_disk_hash(name.name, name.len); | ||
| 351 | |||
| 352 | error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL); | ||
| 353 | if (error == -ENOENT) { | ||
| 354 | error = 0; | ||
| 355 | break; | ||
| 356 | } | ||
| 357 | |||
| 358 | gfs2_glock_dq_uninit(ji_gh); | ||
| 359 | |||
| 360 | if (error) | ||
| 361 | break; | ||
| 362 | |||
| 363 | error = -ENOMEM; | ||
| 364 | jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL); | ||
| 365 | if (!jd) | ||
| 366 | break; | ||
| 367 | |||
| 368 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); | ||
| 369 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { | ||
| 370 | if (!jd->jd_inode) | ||
| 371 | error = -ENOENT; | ||
| 372 | else | ||
| 373 | error = PTR_ERR(jd->jd_inode); | ||
| 374 | kfree(jd); | ||
| 375 | break; | ||
| 376 | } | ||
| 377 | |||
| 378 | spin_lock(&sdp->sd_jindex_spin); | ||
| 379 | jd->jd_jid = sdp->sd_journals++; | ||
| 380 | list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); | ||
| 381 | spin_unlock(&sdp->sd_jindex_spin); | ||
| 382 | } | ||
| 383 | |||
| 384 | mutex_unlock(&sdp->sd_jindex_mutex); | ||
| 385 | |||
| 386 | return error; | ||
| 387 | } | ||
| 388 | |||
| 389 | /** | ||
| 390 | * gfs2_jindex_free - Clear all the journal index information | ||
| 391 | * @sdp: The GFS2 superblock | ||
| 392 | * | ||
| 393 | */ | ||
| 394 | |||
| 395 | void gfs2_jindex_free(struct gfs2_sbd *sdp) | ||
| 396 | { | ||
| 397 | struct list_head list; | ||
| 398 | struct gfs2_jdesc *jd; | ||
| 399 | |||
| 400 | spin_lock(&sdp->sd_jindex_spin); | ||
| 401 | list_add(&list, &sdp->sd_jindex_list); | ||
| 402 | list_del_init(&sdp->sd_jindex_list); | ||
| 403 | sdp->sd_journals = 0; | ||
| 404 | spin_unlock(&sdp->sd_jindex_spin); | ||
| 405 | |||
| 406 | while (!list_empty(&list)) { | ||
| 407 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); | ||
| 408 | list_del(&jd->jd_list); | ||
| 409 | iput(jd->jd_inode); | ||
| 410 | kfree(jd); | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 414 | static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid) | ||
| 415 | { | ||
| 416 | struct gfs2_jdesc *jd; | ||
| 417 | int found = 0; | ||
| 418 | |||
| 419 | list_for_each_entry(jd, head, jd_list) { | ||
| 420 | if (jd->jd_jid == jid) { | ||
| 421 | found = 1; | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | if (!found) | ||
| 427 | jd = NULL; | ||
| 428 | |||
| 429 | return jd; | ||
| 430 | } | ||
| 431 | |||
| 432 | struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid) | ||
| 433 | { | ||
| 434 | struct gfs2_jdesc *jd; | ||
| 435 | |||
| 436 | spin_lock(&sdp->sd_jindex_spin); | ||
| 437 | jd = jdesc_find_i(&sdp->sd_jindex_list, jid); | ||
| 438 | spin_unlock(&sdp->sd_jindex_spin); | ||
| 439 | |||
| 440 | return jd; | ||
| 441 | } | ||
| 442 | |||
| 443 | void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid) | ||
| 444 | { | ||
| 445 | struct gfs2_jdesc *jd; | ||
| 446 | |||
| 447 | spin_lock(&sdp->sd_jindex_spin); | ||
| 448 | jd = jdesc_find_i(&sdp->sd_jindex_list, jid); | ||
| 449 | if (jd) | ||
| 450 | jd->jd_dirty = 1; | ||
| 451 | spin_unlock(&sdp->sd_jindex_spin); | ||
| 452 | } | ||
| 453 | |||
| 454 | struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) | ||
| 455 | { | ||
| 456 | struct gfs2_jdesc *jd; | ||
| 457 | int found = 0; | ||
| 458 | |||
| 459 | spin_lock(&sdp->sd_jindex_spin); | ||
| 460 | |||
| 461 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | ||
| 462 | if (jd->jd_dirty) { | ||
| 463 | jd->jd_dirty = 0; | ||
| 464 | found = 1; | ||
| 465 | break; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | spin_unlock(&sdp->sd_jindex_spin); | ||
| 469 | |||
| 470 | if (!found) | ||
| 471 | jd = NULL; | ||
| 472 | |||
| 473 | return jd; | ||
| 474 | } | ||
| 475 | |||
| 476 | int gfs2_jdesc_check(struct gfs2_jdesc *jd) | ||
| 477 | { | ||
| 478 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
| 479 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
| 480 | int ar; | ||
| 481 | int error; | ||
| 482 | |||
| 483 | if (ip->i_di.di_size < (8 << 20) || ip->i_di.di_size > (1 << 30) || | ||
| 484 | (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) { | ||
| 485 | gfs2_consist_inode(ip); | ||
| 486 | return -EIO; | ||
| 487 | } | ||
| 488 | jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; | ||
| 489 | |||
| 490 | error = gfs2_write_alloc_required(ip, 0, ip->i_di.di_size, &ar); | ||
| 491 | if (!error && ar) { | ||
| 492 | gfs2_consist_inode(ip); | ||
| 493 | error = -EIO; | ||
| 494 | } | ||
| 495 | |||
| 496 | return error; | ||
| 497 | } | ||
| 498 | |||
| 499 | /** | ||
| 500 | * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one | ||
| 501 | * @sdp: the filesystem | ||
| 502 | * | ||
| 503 | * Returns: errno | ||
| 504 | */ | ||
| 505 | |||
| 506 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | ||
| 507 | { | ||
| 508 | struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); | ||
| 509 | struct gfs2_glock *j_gl = ip->i_gl; | ||
| 510 | struct gfs2_holder t_gh; | ||
| 511 | struct gfs2_log_header head; | ||
| 512 | int error; | ||
| 513 | |||
| 514 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | ||
| 515 | GL_LOCAL_EXCL, &t_gh); | ||
| 516 | if (error) | ||
| 517 | return error; | ||
| 518 | |||
| 519 | gfs2_meta_cache_flush(ip); | ||
| 520 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); | ||
| 521 | |||
| 522 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | ||
| 523 | if (error) | ||
| 524 | goto fail; | ||
| 525 | |||
| 526 | if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { | ||
| 527 | gfs2_consist(sdp); | ||
| 528 | error = -EIO; | ||
| 529 | goto fail; | ||
| 530 | } | ||
| 531 | |||
| 532 | /* Initialize some head of the log stuff */ | ||
| 533 | sdp->sd_log_sequence = head.lh_sequence + 1; | ||
| 534 | gfs2_log_pointers_init(sdp, head.lh_blkno); | ||
| 535 | |||
| 536 | error = gfs2_quota_init(sdp); | ||
| 537 | if (error) | ||
| 538 | goto fail; | ||
| 539 | |||
| 540 | set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
| 541 | |||
| 542 | gfs2_glock_dq_uninit(&t_gh); | ||
| 543 | |||
| 544 | return 0; | ||
| 545 | |||
| 546 | fail: | ||
| 547 | t_gh.gh_flags |= GL_NOCACHE; | ||
| 548 | gfs2_glock_dq_uninit(&t_gh); | ||
| 549 | |||
| 550 | return error; | ||
| 551 | } | ||
| 552 | |||
| 553 | /** | ||
| 554 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
| 555 | * @sdp: the filesystem | ||
| 556 | * | ||
| 557 | * Returns: errno | ||
| 558 | */ | ||
| 559 | |||
| 560 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
| 561 | { | ||
| 562 | struct gfs2_holder t_gh; | ||
| 563 | int error; | ||
| 564 | |||
| 565 | gfs2_quota_sync(sdp); | ||
| 566 | gfs2_statfs_sync(sdp); | ||
| 567 | |||
| 568 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | ||
| 569 | GL_LOCAL_EXCL | GL_NOCACHE, | ||
| 570 | &t_gh); | ||
| 571 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 572 | return error; | ||
| 573 | |||
| 574 | gfs2_meta_syncfs(sdp); | ||
| 575 | gfs2_log_shutdown(sdp); | ||
| 576 | |||
| 577 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
| 578 | |||
| 579 | if (t_gh.gh_gl) | ||
| 580 | gfs2_glock_dq_uninit(&t_gh); | ||
| 581 | |||
| 582 | gfs2_quota_cleanup(sdp); | ||
| 583 | |||
| 584 | return error; | ||
| 585 | } | ||
| 586 | |||
| 587 | int gfs2_statfs_init(struct gfs2_sbd *sdp) | ||
| 588 | { | ||
| 589 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | ||
| 590 | struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; | ||
| 591 | struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); | ||
| 592 | struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; | ||
| 593 | struct buffer_head *m_bh, *l_bh; | ||
| 594 | struct gfs2_holder gh; | ||
| 595 | int error; | ||
| 596 | |||
| 597 | error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, | ||
| 598 | &gh); | ||
| 599 | if (error) | ||
| 600 | return error; | ||
| 601 | |||
| 602 | error = gfs2_meta_inode_buffer(m_ip, &m_bh); | ||
| 603 | if (error) | ||
| 604 | goto out; | ||
| 605 | |||
| 606 | if (sdp->sd_args.ar_spectator) { | ||
| 607 | spin_lock(&sdp->sd_statfs_spin); | ||
| 608 | gfs2_statfs_change_in(m_sc, m_bh->b_data + | ||
| 609 | sizeof(struct gfs2_dinode)); | ||
| 610 | spin_unlock(&sdp->sd_statfs_spin); | ||
| 611 | } else { | ||
| 612 | error = gfs2_meta_inode_buffer(l_ip, &l_bh); | ||
| 613 | if (error) | ||
| 614 | goto out_m_bh; | ||
| 615 | |||
| 616 | spin_lock(&sdp->sd_statfs_spin); | ||
| 617 | gfs2_statfs_change_in(m_sc, m_bh->b_data + | ||
| 618 | sizeof(struct gfs2_dinode)); | ||
| 619 | gfs2_statfs_change_in(l_sc, l_bh->b_data + | ||
| 620 | sizeof(struct gfs2_dinode)); | ||
| 621 | spin_unlock(&sdp->sd_statfs_spin); | ||
| 622 | |||
| 623 | brelse(l_bh); | ||
| 624 | } | ||
| 625 | |||
| 626 | out_m_bh: | ||
| 627 | brelse(m_bh); | ||
| 628 | out: | ||
| 629 | gfs2_glock_dq_uninit(&gh); | ||
| 630 | return 0; | ||
| 631 | } | ||
| 632 | |||
| 633 | void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, | ||
| 634 | s64 dinodes) | ||
| 635 | { | ||
| 636 | struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); | ||
| 637 | struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; | ||
| 638 | struct buffer_head *l_bh; | ||
| 639 | int error; | ||
| 640 | |||
| 641 | error = gfs2_meta_inode_buffer(l_ip, &l_bh); | ||
| 642 | if (error) | ||
| 643 | return; | ||
| 644 | |||
| 645 | mutex_lock(&sdp->sd_statfs_mutex); | ||
| 646 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | ||
| 647 | mutex_unlock(&sdp->sd_statfs_mutex); | ||
| 648 | |||
| 649 | spin_lock(&sdp->sd_statfs_spin); | ||
| 650 | l_sc->sc_total += total; | ||
| 651 | l_sc->sc_free += free; | ||
| 652 | l_sc->sc_dinodes += dinodes; | ||
| 653 | gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); | ||
| 654 | spin_unlock(&sdp->sd_statfs_spin); | ||
| 655 | |||
| 656 | brelse(l_bh); | ||
| 657 | } | ||
| 658 | |||
| 659 | int gfs2_statfs_sync(struct gfs2_sbd *sdp) | ||
| 660 | { | ||
| 661 | struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); | ||
| 662 | struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); | ||
| 663 | struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; | ||
| 664 | struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; | ||
| 665 | struct gfs2_holder gh; | ||
| 666 | struct buffer_head *m_bh, *l_bh; | ||
| 667 | int error; | ||
| 668 | |||
| 669 | error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, | ||
| 670 | &gh); | ||
| 671 | if (error) | ||
| 672 | return error; | ||
| 673 | |||
| 674 | error = gfs2_meta_inode_buffer(m_ip, &m_bh); | ||
| 675 | if (error) | ||
| 676 | goto out; | ||
| 677 | |||
| 678 | spin_lock(&sdp->sd_statfs_spin); | ||
| 679 | gfs2_statfs_change_in(m_sc, m_bh->b_data + | ||
| 680 | sizeof(struct gfs2_dinode)); | ||
| 681 | if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) { | ||
| 682 | spin_unlock(&sdp->sd_statfs_spin); | ||
| 683 | goto out_bh; | ||
| 684 | } | ||
| 685 | spin_unlock(&sdp->sd_statfs_spin); | ||
| 686 | |||
| 687 | error = gfs2_meta_inode_buffer(l_ip, &l_bh); | ||
| 688 | if (error) | ||
| 689 | goto out_bh; | ||
| 690 | |||
| 691 | error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); | ||
| 692 | if (error) | ||
| 693 | goto out_bh2; | ||
| 694 | |||
| 695 | mutex_lock(&sdp->sd_statfs_mutex); | ||
| 696 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | ||
| 697 | mutex_unlock(&sdp->sd_statfs_mutex); | ||
| 698 | |||
| 699 | spin_lock(&sdp->sd_statfs_spin); | ||
| 700 | m_sc->sc_total += l_sc->sc_total; | ||
| 701 | m_sc->sc_free += l_sc->sc_free; | ||
| 702 | m_sc->sc_dinodes += l_sc->sc_dinodes; | ||
| 703 | memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); | ||
| 704 | memset(l_bh->b_data + sizeof(struct gfs2_dinode), | ||
| 705 | 0, sizeof(struct gfs2_statfs_change)); | ||
| 706 | spin_unlock(&sdp->sd_statfs_spin); | ||
| 707 | |||
| 708 | gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); | ||
| 709 | gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); | ||
| 710 | |||
| 711 | gfs2_trans_end(sdp); | ||
| 712 | |||
| 713 | out_bh2: | ||
| 714 | brelse(l_bh); | ||
| 715 | out_bh: | ||
| 716 | brelse(m_bh); | ||
| 717 | out: | ||
| 718 | gfs2_glock_dq_uninit(&gh); | ||
| 719 | return error; | ||
| 720 | } | ||
| 721 | |||
| 722 | /** | ||
| 723 | * gfs2_statfs_i - Do a statfs | ||
| 724 | * @sdp: the filesystem | ||
| 725 | * @sg: the sg structure | ||
| 726 | * | ||
| 727 | * Returns: errno | ||
| 728 | */ | ||
| 729 | |||
| 730 | int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) | ||
| 731 | { | ||
| 732 | struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; | ||
| 733 | struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; | ||
| 734 | |||
| 735 | spin_lock(&sdp->sd_statfs_spin); | ||
| 736 | |||
| 737 | *sc = *m_sc; | ||
| 738 | sc->sc_total += l_sc->sc_total; | ||
| 739 | sc->sc_free += l_sc->sc_free; | ||
| 740 | sc->sc_dinodes += l_sc->sc_dinodes; | ||
| 741 | |||
| 742 | spin_unlock(&sdp->sd_statfs_spin); | ||
| 743 | |||
| 744 | if (sc->sc_free < 0) | ||
| 745 | sc->sc_free = 0; | ||
| 746 | if (sc->sc_free > sc->sc_total) | ||
| 747 | sc->sc_free = sc->sc_total; | ||
| 748 | if (sc->sc_dinodes < 0) | ||
| 749 | sc->sc_dinodes = 0; | ||
| 750 | |||
| 751 | return 0; | ||
| 752 | } | ||
| 753 | |||
| 754 | /** | ||
| 755 | * statfs_fill - fill in the sg for a given RG | ||
| 756 | * @rgd: the RG | ||
| 757 | * @sc: the sc structure | ||
| 758 | * | ||
| 759 | * Returns: 0 on success, -ESTALE if the LVB is invalid | ||
| 760 | */ | ||
| 761 | |||
| 762 | static int statfs_slow_fill(struct gfs2_rgrpd *rgd, | ||
| 763 | struct gfs2_statfs_change *sc) | ||
| 764 | { | ||
| 765 | gfs2_rgrp_verify(rgd); | ||
| 766 | sc->sc_total += rgd->rd_ri.ri_data; | ||
| 767 | sc->sc_free += rgd->rd_rg.rg_free; | ||
| 768 | sc->sc_dinodes += rgd->rd_rg.rg_dinodes; | ||
| 769 | return 0; | ||
| 770 | } | ||
| 771 | |||
| 772 | /** | ||
| 773 | * gfs2_statfs_slow - Stat a filesystem using asynchronous locking | ||
| 774 | * @sdp: the filesystem | ||
| 775 | * @sc: the sc info that will be returned | ||
| 776 | * | ||
| 777 | * Any error (other than a signal) will cause this routine to fall back | ||
| 778 | * to the synchronous version. | ||
| 779 | * | ||
| 780 | * FIXME: This really shouldn't busy wait like this. | ||
| 781 | * | ||
| 782 | * Returns: errno | ||
| 783 | */ | ||
| 784 | |||
| 785 | int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) | ||
| 786 | { | ||
| 787 | struct gfs2_holder ri_gh; | ||
| 788 | struct gfs2_rgrpd *rgd_next; | ||
| 789 | struct gfs2_holder *gha, *gh; | ||
| 790 | unsigned int slots = 64; | ||
| 791 | unsigned int x; | ||
| 792 | int done; | ||
| 793 | int error = 0, err; | ||
| 794 | |||
| 795 | memset(sc, 0, sizeof(struct gfs2_statfs_change)); | ||
| 796 | gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); | ||
| 797 | if (!gha) | ||
| 798 | return -ENOMEM; | ||
| 799 | |||
| 800 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
| 801 | if (error) | ||
| 802 | goto out; | ||
| 803 | |||
| 804 | rgd_next = gfs2_rgrpd_get_first(sdp); | ||
| 805 | |||
| 806 | for (;;) { | ||
| 807 | done = 1; | ||
| 808 | |||
| 809 | for (x = 0; x < slots; x++) { | ||
| 810 | gh = gha + x; | ||
| 811 | |||
| 812 | if (gh->gh_gl && gfs2_glock_poll(gh)) { | ||
| 813 | err = gfs2_glock_wait(gh); | ||
| 814 | if (err) { | ||
| 815 | gfs2_holder_uninit(gh); | ||
| 816 | error = err; | ||
| 817 | } else { | ||
| 818 | if (!error) | ||
| 819 | error = statfs_slow_fill( | ||
| 820 | gh->gh_gl->gl_object, sc); | ||
| 821 | gfs2_glock_dq_uninit(gh); | ||
| 822 | } | ||
| 823 | } | ||
| 824 | |||
| 825 | if (gh->gh_gl) | ||
| 826 | done = 0; | ||
| 827 | else if (rgd_next && !error) { | ||
| 828 | error = gfs2_glock_nq_init(rgd_next->rd_gl, | ||
| 829 | LM_ST_SHARED, | ||
| 830 | GL_ASYNC, | ||
| 831 | gh); | ||
| 832 | rgd_next = gfs2_rgrpd_get_next(rgd_next); | ||
| 833 | done = 0; | ||
| 834 | } | ||
| 835 | |||
| 836 | if (signal_pending(current)) | ||
| 837 | error = -ERESTARTSYS; | ||
| 838 | } | ||
| 839 | |||
| 840 | if (done) | ||
| 841 | break; | ||
| 842 | |||
| 843 | yield(); | ||
| 844 | } | ||
| 845 | |||
| 846 | gfs2_glock_dq_uninit(&ri_gh); | ||
| 847 | |||
| 848 | out: | ||
| 849 | kfree(gha); | ||
| 850 | return error; | ||
| 851 | } | ||
| 852 | |||
| 853 | struct lfcc { | ||
| 854 | struct list_head list; | ||
| 855 | struct gfs2_holder gh; | ||
| 856 | }; | ||
| 857 | |||
| 858 | /** | ||
| 859 | * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all | ||
| 860 | * journals are clean | ||
| 861 | * @sdp: the file system | ||
| 862 | * @state: the state to put the transaction lock into | ||
| 863 | * @t_gh: the hold on the transaction lock | ||
| 864 | * | ||
| 865 | * Returns: errno | ||
| 866 | */ | ||
| 867 | |||
| 868 | static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, | ||
| 869 | struct gfs2_holder *t_gh) | ||
| 870 | { | ||
| 871 | struct gfs2_inode *ip; | ||
| 872 | struct gfs2_holder ji_gh; | ||
| 873 | struct gfs2_jdesc *jd; | ||
| 874 | struct lfcc *lfcc; | ||
| 875 | LIST_HEAD(list); | ||
| 876 | struct gfs2_log_header lh; | ||
| 877 | int error; | ||
| 878 | |||
| 879 | error = gfs2_jindex_hold(sdp, &ji_gh); | ||
| 880 | if (error) | ||
| 881 | return error; | ||
| 882 | |||
| 883 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | ||
| 884 | lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); | ||
| 885 | if (!lfcc) { | ||
| 886 | error = -ENOMEM; | ||
| 887 | goto out; | ||
| 888 | } | ||
| 889 | ip = GFS2_I(jd->jd_inode); | ||
| 890 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh); | ||
| 891 | if (error) { | ||
| 892 | kfree(lfcc); | ||
| 893 | goto out; | ||
| 894 | } | ||
| 895 | list_add(&lfcc->list, &list); | ||
| 896 | } | ||
| 897 | |||
| 898 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, | ||
| 899 | LM_FLAG_PRIORITY | GL_NOCACHE, | ||
| 900 | t_gh); | ||
| 901 | |||
| 902 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | ||
| 903 | error = gfs2_jdesc_check(jd); | ||
| 904 | if (error) | ||
| 905 | break; | ||
| 906 | error = gfs2_find_jhead(jd, &lh); | ||
| 907 | if (error) | ||
| 908 | break; | ||
| 909 | if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { | ||
| 910 | error = -EBUSY; | ||
| 911 | break; | ||
| 912 | } | ||
| 913 | } | ||
| 914 | |||
| 915 | if (error) | ||
| 916 | gfs2_glock_dq_uninit(t_gh); | ||
| 917 | |||
| 918 | out: | ||
| 919 | while (!list_empty(&list)) { | ||
| 920 | lfcc = list_entry(list.next, struct lfcc, list); | ||
| 921 | list_del(&lfcc->list); | ||
| 922 | gfs2_glock_dq_uninit(&lfcc->gh); | ||
| 923 | kfree(lfcc); | ||
| 924 | } | ||
| 925 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 926 | return error; | ||
| 927 | } | ||
| 928 | |||
| 929 | /** | ||
| 930 | * gfs2_freeze_fs - freezes the file system | ||
| 931 | * @sdp: the file system | ||
| 932 | * | ||
| 933 | * This function flushes data and meta data for all machines by | ||
| 934 | * aquiring the transaction log exclusively. All journals are | ||
| 935 | * ensured to be in a clean state as well. | ||
| 936 | * | ||
| 937 | * Returns: errno | ||
| 938 | */ | ||
| 939 | |||
| 940 | int gfs2_freeze_fs(struct gfs2_sbd *sdp) | ||
| 941 | { | ||
| 942 | int error = 0; | ||
| 943 | |||
| 944 | mutex_lock(&sdp->sd_freeze_lock); | ||
| 945 | |||
| 946 | if (!sdp->sd_freeze_count++) { | ||
| 947 | error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); | ||
| 948 | if (error) | ||
| 949 | sdp->sd_freeze_count--; | ||
| 950 | } | ||
| 951 | |||
| 952 | mutex_unlock(&sdp->sd_freeze_lock); | ||
| 953 | |||
| 954 | return error; | ||
| 955 | } | ||
| 956 | |||
| 957 | /** | ||
| 958 | * gfs2_unfreeze_fs - unfreezes the file system | ||
| 959 | * @sdp: the file system | ||
| 960 | * | ||
| 961 | * This function allows the file system to proceed by unlocking | ||
| 962 | * the exclusively held transaction lock. Other GFS2 nodes are | ||
| 963 | * now free to acquire the lock shared and go on with their lives. | ||
| 964 | * | ||
| 965 | */ | ||
| 966 | |||
| 967 | void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) | ||
| 968 | { | ||
| 969 | mutex_lock(&sdp->sd_freeze_lock); | ||
| 970 | |||
| 971 | if (sdp->sd_freeze_count && !--sdp->sd_freeze_count) | ||
| 972 | gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); | ||
| 973 | |||
| 974 | mutex_unlock(&sdp->sd_freeze_lock); | ||
| 975 | } | ||
| 976 | |||
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h new file mode 100644 index 000000000000..5bb443ae0f59 --- /dev/null +++ b/fs/gfs2/super.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __SUPER_DOT_H__ | ||
| 11 | #define __SUPER_DOT_H__ | ||
| 12 | |||
| 13 | #include "incore.h" | ||
| 14 | |||
| 15 | void gfs2_tune_init(struct gfs2_tune *gt); | ||
| 16 | |||
| 17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent); | ||
| 18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | ||
| 19 | struct page *gfs2_read_super(struct super_block *sb, sector_t sector); | ||
| 20 | |||
| 21 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | ||
| 22 | { | ||
| 23 | unsigned int x; | ||
| 24 | spin_lock(&sdp->sd_jindex_spin); | ||
| 25 | x = sdp->sd_journals; | ||
| 26 | spin_unlock(&sdp->sd_jindex_spin); | ||
| 27 | return x; | ||
| 28 | } | ||
| 29 | |||
| 30 | int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh); | ||
| 31 | void gfs2_jindex_free(struct gfs2_sbd *sdp); | ||
| 32 | |||
| 33 | struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); | ||
| 34 | void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid); | ||
| 35 | struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp); | ||
| 36 | int gfs2_jdesc_check(struct gfs2_jdesc *jd); | ||
| 37 | |||
| 38 | int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, | ||
| 39 | struct gfs2_inode **ipp); | ||
| 40 | |||
| 41 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); | ||
| 42 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp); | ||
| 43 | |||
| 44 | int gfs2_statfs_init(struct gfs2_sbd *sdp); | ||
| 45 | void gfs2_statfs_change(struct gfs2_sbd *sdp, | ||
| 46 | s64 total, s64 free, s64 dinodes); | ||
| 47 | int gfs2_statfs_sync(struct gfs2_sbd *sdp); | ||
| 48 | int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); | ||
| 49 | int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); | ||
| 50 | |||
| 51 | int gfs2_freeze_fs(struct gfs2_sbd *sdp); | ||
| 52 | void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); | ||
| 53 | |||
| 54 | #endif /* __SUPER_DOT_H__ */ | ||
| 55 | |||
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c new file mode 100644 index 000000000000..0e0ec988f731 --- /dev/null +++ b/fs/gfs2/sys.c | |||
| @@ -0,0 +1,583 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | #include <linux/kobject.h> | ||
| 17 | #include <linux/gfs2_ondisk.h> | ||
| 18 | #include <linux/lm_interface.h> | ||
| 19 | #include <asm/uaccess.h> | ||
| 20 | |||
| 21 | #include "gfs2.h" | ||
| 22 | #include "incore.h" | ||
| 23 | #include "lm.h" | ||
| 24 | #include "sys.h" | ||
| 25 | #include "super.h" | ||
| 26 | #include "glock.h" | ||
| 27 | #include "quota.h" | ||
| 28 | #include "util.h" | ||
| 29 | |||
| 30 | char *gfs2_sys_margs; | ||
| 31 | spinlock_t gfs2_sys_margs_lock; | ||
| 32 | |||
| 33 | static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) | ||
| 34 | { | ||
| 35 | return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id); | ||
| 36 | } | ||
| 37 | |||
| 38 | static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) | ||
| 39 | { | ||
| 40 | return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); | ||
| 41 | } | ||
| 42 | |||
| 43 | static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) | ||
| 44 | { | ||
| 45 | unsigned int count; | ||
| 46 | |||
| 47 | mutex_lock(&sdp->sd_freeze_lock); | ||
| 48 | count = sdp->sd_freeze_count; | ||
| 49 | mutex_unlock(&sdp->sd_freeze_lock); | ||
| 50 | |||
| 51 | return snprintf(buf, PAGE_SIZE, "%u\n", count); | ||
| 52 | } | ||
| 53 | |||
| 54 | static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | ||
| 55 | { | ||
| 56 | ssize_t ret = len; | ||
| 57 | int error = 0; | ||
| 58 | int n = simple_strtol(buf, NULL, 0); | ||
| 59 | |||
| 60 | if (!capable(CAP_SYS_ADMIN)) | ||
| 61 | return -EACCES; | ||
| 62 | |||
| 63 | switch (n) { | ||
| 64 | case 0: | ||
| 65 | gfs2_unfreeze_fs(sdp); | ||
| 66 | break; | ||
| 67 | case 1: | ||
| 68 | error = gfs2_freeze_fs(sdp); | ||
| 69 | break; | ||
| 70 | default: | ||
| 71 | ret = -EINVAL; | ||
| 72 | } | ||
| 73 | |||
| 74 | if (error) | ||
| 75 | fs_warn(sdp, "freeze %d error %d", n, error); | ||
| 76 | |||
| 77 | return ret; | ||
| 78 | } | ||
| 79 | |||
| 80 | static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) | ||
| 81 | { | ||
| 82 | unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags); | ||
| 83 | return snprintf(buf, PAGE_SIZE, "%u\n", b); | ||
| 84 | } | ||
| 85 | |||
| 86 | static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | ||
| 87 | { | ||
| 88 | if (!capable(CAP_SYS_ADMIN)) | ||
| 89 | return -EACCES; | ||
| 90 | |||
| 91 | if (simple_strtol(buf, NULL, 0) != 1) | ||
| 92 | return -EINVAL; | ||
| 93 | |||
| 94 | gfs2_lm_withdraw(sdp, | ||
| 95 | "GFS2: fsid=%s: withdrawing from cluster at user's request\n", | ||
| 96 | sdp->sd_fsname); | ||
| 97 | return len; | ||
| 98 | } | ||
| 99 | |||
| 100 | static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, | ||
| 101 | size_t len) | ||
| 102 | { | ||
| 103 | if (!capable(CAP_SYS_ADMIN)) | ||
| 104 | return -EACCES; | ||
| 105 | |||
| 106 | if (simple_strtol(buf, NULL, 0) != 1) | ||
| 107 | return -EINVAL; | ||
| 108 | |||
| 109 | gfs2_statfs_sync(sdp); | ||
| 110 | return len; | ||
| 111 | } | ||
| 112 | |||
| 113 | static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | ||
| 114 | { | ||
| 115 | if (!capable(CAP_SYS_ADMIN)) | ||
| 116 | return -EACCES; | ||
| 117 | |||
| 118 | if (simple_strtol(buf, NULL, 0) != 1) | ||
| 119 | return -EINVAL; | ||
| 120 | |||
| 121 | gfs2_gl_hash_clear(sdp, NO_WAIT); | ||
| 122 | return len; | ||
| 123 | } | ||
| 124 | |||
| 125 | static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, | ||
| 126 | size_t len) | ||
| 127 | { | ||
| 128 | if (!capable(CAP_SYS_ADMIN)) | ||
| 129 | return -EACCES; | ||
| 130 | |||
| 131 | if (simple_strtol(buf, NULL, 0) != 1) | ||
| 132 | return -EINVAL; | ||
| 133 | |||
| 134 | gfs2_quota_sync(sdp); | ||
| 135 | return len; | ||
| 136 | } | ||
| 137 | |||
| 138 | static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, | ||
| 139 | size_t len) | ||
| 140 | { | ||
| 141 | u32 id; | ||
| 142 | |||
| 143 | if (!capable(CAP_SYS_ADMIN)) | ||
| 144 | return -EACCES; | ||
| 145 | |||
| 146 | id = simple_strtoul(buf, NULL, 0); | ||
| 147 | |||
| 148 | gfs2_quota_refresh(sdp, 1, id); | ||
| 149 | return len; | ||
| 150 | } | ||
| 151 | |||
| 152 | static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, | ||
| 153 | size_t len) | ||
| 154 | { | ||
| 155 | u32 id; | ||
| 156 | |||
| 157 | if (!capable(CAP_SYS_ADMIN)) | ||
| 158 | return -EACCES; | ||
| 159 | |||
| 160 | id = simple_strtoul(buf, NULL, 0); | ||
| 161 | |||
| 162 | gfs2_quota_refresh(sdp, 0, id); | ||
| 163 | return len; | ||
| 164 | } | ||
| 165 | |||
| 166 | struct gfs2_attr { | ||
| 167 | struct attribute attr; | ||
| 168 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
| 169 | ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); | ||
| 170 | }; | ||
| 171 | |||
| 172 | #define GFS2_ATTR(name, mode, show, store) \ | ||
| 173 | static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) | ||
| 174 | |||
| 175 | GFS2_ATTR(id, 0444, id_show, NULL); | ||
| 176 | GFS2_ATTR(fsname, 0444, fsname_show, NULL); | ||
| 177 | GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); | ||
| 178 | GFS2_ATTR(shrink, 0200, NULL, shrink_store); | ||
| 179 | GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | ||
| 180 | GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); | ||
| 181 | GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); | ||
| 182 | GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store); | ||
| 183 | GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store); | ||
| 184 | |||
| 185 | static struct attribute *gfs2_attrs[] = { | ||
| 186 | &gfs2_attr_id.attr, | ||
| 187 | &gfs2_attr_fsname.attr, | ||
| 188 | &gfs2_attr_freeze.attr, | ||
| 189 | &gfs2_attr_shrink.attr, | ||
| 190 | &gfs2_attr_withdraw.attr, | ||
| 191 | &gfs2_attr_statfs_sync.attr, | ||
| 192 | &gfs2_attr_quota_sync.attr, | ||
| 193 | &gfs2_attr_quota_refresh_user.attr, | ||
| 194 | &gfs2_attr_quota_refresh_group.attr, | ||
| 195 | NULL, | ||
| 196 | }; | ||
| 197 | |||
| 198 | static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, | ||
| 199 | char *buf) | ||
| 200 | { | ||
| 201 | struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); | ||
| 202 | struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); | ||
| 203 | return a->show ? a->show(sdp, buf) : 0; | ||
| 204 | } | ||
| 205 | |||
| 206 | static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, | ||
| 207 | const char *buf, size_t len) | ||
| 208 | { | ||
| 209 | struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); | ||
| 210 | struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); | ||
| 211 | return a->store ? a->store(sdp, buf, len) : len; | ||
| 212 | } | ||
| 213 | |||
| 214 | static struct sysfs_ops gfs2_attr_ops = { | ||
| 215 | .show = gfs2_attr_show, | ||
| 216 | .store = gfs2_attr_store, | ||
| 217 | }; | ||
| 218 | |||
| 219 | static struct kobj_type gfs2_ktype = { | ||
| 220 | .default_attrs = gfs2_attrs, | ||
| 221 | .sysfs_ops = &gfs2_attr_ops, | ||
| 222 | }; | ||
| 223 | |||
| 224 | static struct kset gfs2_kset = { | ||
| 225 | .subsys = &fs_subsys, | ||
| 226 | .kobj = {.name = "gfs2"}, | ||
| 227 | .ktype = &gfs2_ktype, | ||
| 228 | }; | ||
| 229 | |||
| 230 | /* | ||
| 231 | * display struct lm_lockstruct fields | ||
| 232 | */ | ||
| 233 | |||
| 234 | struct lockstruct_attr { | ||
| 235 | struct attribute attr; | ||
| 236 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
| 237 | }; | ||
| 238 | |||
| 239 | #define LOCKSTRUCT_ATTR(name, fmt) \ | ||
| 240 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | ||
| 241 | { \ | ||
| 242 | return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \ | ||
| 243 | } \ | ||
| 244 | static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name) | ||
| 245 | |||
| 246 | LOCKSTRUCT_ATTR(jid, "%u\n"); | ||
| 247 | LOCKSTRUCT_ATTR(first, "%u\n"); | ||
| 248 | LOCKSTRUCT_ATTR(lvb_size, "%u\n"); | ||
| 249 | LOCKSTRUCT_ATTR(flags, "%d\n"); | ||
| 250 | |||
| 251 | static struct attribute *lockstruct_attrs[] = { | ||
| 252 | &lockstruct_attr_jid.attr, | ||
| 253 | &lockstruct_attr_first.attr, | ||
| 254 | &lockstruct_attr_lvb_size.attr, | ||
| 255 | &lockstruct_attr_flags.attr, | ||
| 256 | NULL, | ||
| 257 | }; | ||
| 258 | |||
| 259 | /* | ||
| 260 | * display struct gfs2_args fields | ||
| 261 | */ | ||
| 262 | |||
| 263 | struct args_attr { | ||
| 264 | struct attribute attr; | ||
| 265 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
| 266 | }; | ||
| 267 | |||
| 268 | #define ARGS_ATTR(name, fmt) \ | ||
| 269 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | ||
| 270 | { \ | ||
| 271 | return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \ | ||
| 272 | } \ | ||
| 273 | static struct args_attr args_attr_##name = __ATTR_RO(name) | ||
| 274 | |||
| 275 | ARGS_ATTR(lockproto, "%s\n"); | ||
| 276 | ARGS_ATTR(locktable, "%s\n"); | ||
| 277 | ARGS_ATTR(hostdata, "%s\n"); | ||
| 278 | ARGS_ATTR(spectator, "%d\n"); | ||
| 279 | ARGS_ATTR(ignore_local_fs, "%d\n"); | ||
| 280 | ARGS_ATTR(localcaching, "%d\n"); | ||
| 281 | ARGS_ATTR(localflocks, "%d\n"); | ||
| 282 | ARGS_ATTR(debug, "%d\n"); | ||
| 283 | ARGS_ATTR(upgrade, "%d\n"); | ||
| 284 | ARGS_ATTR(num_glockd, "%u\n"); | ||
| 285 | ARGS_ATTR(posix_acl, "%d\n"); | ||
| 286 | ARGS_ATTR(quota, "%u\n"); | ||
| 287 | ARGS_ATTR(suiddir, "%d\n"); | ||
| 288 | ARGS_ATTR(data, "%d\n"); | ||
| 289 | |||
| 290 | /* one oddball doesn't fit the macro mold */ | ||
| 291 | static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) | ||
| 292 | { | ||
| 293 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
| 294 | !!test_bit(SDF_NOATIME, &sdp->sd_flags)); | ||
| 295 | } | ||
| 296 | static struct args_attr args_attr_noatime = __ATTR_RO(noatime); | ||
| 297 | |||
| 298 | static struct attribute *args_attrs[] = { | ||
| 299 | &args_attr_lockproto.attr, | ||
| 300 | &args_attr_locktable.attr, | ||
| 301 | &args_attr_hostdata.attr, | ||
| 302 | &args_attr_spectator.attr, | ||
| 303 | &args_attr_ignore_local_fs.attr, | ||
| 304 | &args_attr_localcaching.attr, | ||
| 305 | &args_attr_localflocks.attr, | ||
| 306 | &args_attr_debug.attr, | ||
| 307 | &args_attr_upgrade.attr, | ||
| 308 | &args_attr_num_glockd.attr, | ||
| 309 | &args_attr_posix_acl.attr, | ||
| 310 | &args_attr_quota.attr, | ||
| 311 | &args_attr_suiddir.attr, | ||
| 312 | &args_attr_data.attr, | ||
| 313 | &args_attr_noatime.attr, | ||
| 314 | NULL, | ||
| 315 | }; | ||
| 316 | |||
| 317 | /* | ||
| 318 | * display counters from superblock | ||
| 319 | */ | ||
| 320 | |||
| 321 | struct counters_attr { | ||
| 322 | struct attribute attr; | ||
| 323 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
| 324 | }; | ||
| 325 | |||
| 326 | #define COUNTERS_ATTR(name, fmt) \ | ||
| 327 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | ||
| 328 | { \ | ||
| 329 | return snprintf(buf, PAGE_SIZE, fmt, \ | ||
| 330 | (unsigned int)atomic_read(&sdp->sd_##name)); \ | ||
| 331 | } \ | ||
| 332 | static struct counters_attr counters_attr_##name = __ATTR_RO(name) | ||
| 333 | |||
| 334 | COUNTERS_ATTR(glock_count, "%u\n"); | ||
| 335 | COUNTERS_ATTR(glock_held_count, "%u\n"); | ||
| 336 | COUNTERS_ATTR(inode_count, "%u\n"); | ||
| 337 | COUNTERS_ATTR(reclaimed, "%u\n"); | ||
| 338 | |||
| 339 | static struct attribute *counters_attrs[] = { | ||
| 340 | &counters_attr_glock_count.attr, | ||
| 341 | &counters_attr_glock_held_count.attr, | ||
| 342 | &counters_attr_inode_count.attr, | ||
| 343 | &counters_attr_reclaimed.attr, | ||
| 344 | NULL, | ||
| 345 | }; | ||
| 346 | |||
| 347 | /* | ||
| 348 | * get and set struct gfs2_tune fields | ||
| 349 | */ | ||
| 350 | |||
| 351 | static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf) | ||
| 352 | { | ||
| 353 | return snprintf(buf, PAGE_SIZE, "%u %u\n", | ||
| 354 | sdp->sd_tune.gt_quota_scale_num, | ||
| 355 | sdp->sd_tune.gt_quota_scale_den); | ||
| 356 | } | ||
| 357 | |||
| 358 | static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf, | ||
| 359 | size_t len) | ||
| 360 | { | ||
| 361 | struct gfs2_tune *gt = &sdp->sd_tune; | ||
| 362 | unsigned int x, y; | ||
| 363 | |||
| 364 | if (!capable(CAP_SYS_ADMIN)) | ||
| 365 | return -EACCES; | ||
| 366 | |||
| 367 | if (sscanf(buf, "%u %u", &x, &y) != 2 || !y) | ||
| 368 | return -EINVAL; | ||
| 369 | |||
| 370 | spin_lock(>->gt_spin); | ||
| 371 | gt->gt_quota_scale_num = x; | ||
| 372 | gt->gt_quota_scale_den = y; | ||
| 373 | spin_unlock(>->gt_spin); | ||
| 374 | return len; | ||
| 375 | } | ||
| 376 | |||
| 377 | static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, | ||
| 378 | int check_zero, const char *buf, size_t len) | ||
| 379 | { | ||
| 380 | struct gfs2_tune *gt = &sdp->sd_tune; | ||
| 381 | unsigned int x; | ||
| 382 | |||
| 383 | if (!capable(CAP_SYS_ADMIN)) | ||
| 384 | return -EACCES; | ||
| 385 | |||
| 386 | x = simple_strtoul(buf, NULL, 0); | ||
| 387 | |||
| 388 | if (check_zero && !x) | ||
| 389 | return -EINVAL; | ||
| 390 | |||
| 391 | spin_lock(>->gt_spin); | ||
| 392 | *field = x; | ||
| 393 | spin_unlock(>->gt_spin); | ||
| 394 | return len; | ||
| 395 | } | ||
| 396 | |||
| 397 | struct tune_attr { | ||
| 398 | struct attribute attr; | ||
| 399 | ssize_t (*show)(struct gfs2_sbd *, char *); | ||
| 400 | ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); | ||
| 401 | }; | ||
| 402 | |||
| 403 | #define TUNE_ATTR_3(name, show, store) \ | ||
| 404 | static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store) | ||
| 405 | |||
| 406 | #define TUNE_ATTR_2(name, store) \ | ||
| 407 | static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | ||
| 408 | { \ | ||
| 409 | return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \ | ||
| 410 | } \ | ||
| 411 | TUNE_ATTR_3(name, name##_show, store) | ||
| 412 | |||
| 413 | #define TUNE_ATTR(name, check_zero) \ | ||
| 414 | static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ | ||
| 415 | { \ | ||
| 416 | return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \ | ||
| 417 | } \ | ||
| 418 | TUNE_ATTR_2(name, name##_store) | ||
| 419 | |||
| 420 | #define TUNE_ATTR_DAEMON(name, process) \ | ||
| 421 | static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ | ||
| 422 | { \ | ||
| 423 | ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \ | ||
| 424 | wake_up_process(sdp->sd_##process); \ | ||
| 425 | return r; \ | ||
| 426 | } \ | ||
| 427 | TUNE_ATTR_2(name, name##_store) | ||
| 428 | |||
| 429 | TUNE_ATTR(ilimit, 0); | ||
| 430 | TUNE_ATTR(ilimit_tries, 0); | ||
| 431 | TUNE_ATTR(ilimit_min, 0); | ||
| 432 | TUNE_ATTR(demote_secs, 0); | ||
| 433 | TUNE_ATTR(incore_log_blocks, 0); | ||
| 434 | TUNE_ATTR(log_flush_secs, 0); | ||
| 435 | TUNE_ATTR(jindex_refresh_secs, 0); | ||
| 436 | TUNE_ATTR(quota_warn_period, 0); | ||
| 437 | TUNE_ATTR(quota_quantum, 0); | ||
| 438 | TUNE_ATTR(atime_quantum, 0); | ||
| 439 | TUNE_ATTR(max_readahead, 0); | ||
| 440 | TUNE_ATTR(complain_secs, 0); | ||
| 441 | TUNE_ATTR(reclaim_limit, 0); | ||
| 442 | TUNE_ATTR(prefetch_secs, 0); | ||
| 443 | TUNE_ATTR(statfs_slow, 0); | ||
| 444 | TUNE_ATTR(new_files_jdata, 0); | ||
| 445 | TUNE_ATTR(new_files_directio, 0); | ||
| 446 | TUNE_ATTR(quota_simul_sync, 1); | ||
| 447 | TUNE_ATTR(quota_cache_secs, 1); | ||
| 448 | TUNE_ATTR(max_atomic_write, 1); | ||
| 449 | TUNE_ATTR(stall_secs, 1); | ||
| 450 | TUNE_ATTR(entries_per_readdir, 1); | ||
| 451 | TUNE_ATTR(greedy_default, 1); | ||
| 452 | TUNE_ATTR(greedy_quantum, 1); | ||
| 453 | TUNE_ATTR(greedy_max, 1); | ||
| 454 | TUNE_ATTR(statfs_quantum, 1); | ||
| 455 | TUNE_ATTR_DAEMON(scand_secs, scand_process); | ||
| 456 | TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); | ||
| 457 | TUNE_ATTR_DAEMON(logd_secs, logd_process); | ||
| 458 | TUNE_ATTR_DAEMON(quotad_secs, quotad_process); | ||
| 459 | TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); | ||
| 460 | |||
| 461 | static struct attribute *tune_attrs[] = { | ||
| 462 | &tune_attr_ilimit.attr, | ||
| 463 | &tune_attr_ilimit_tries.attr, | ||
| 464 | &tune_attr_ilimit_min.attr, | ||
| 465 | &tune_attr_demote_secs.attr, | ||
| 466 | &tune_attr_incore_log_blocks.attr, | ||
| 467 | &tune_attr_log_flush_secs.attr, | ||
| 468 | &tune_attr_jindex_refresh_secs.attr, | ||
| 469 | &tune_attr_quota_warn_period.attr, | ||
| 470 | &tune_attr_quota_quantum.attr, | ||
| 471 | &tune_attr_atime_quantum.attr, | ||
| 472 | &tune_attr_max_readahead.attr, | ||
| 473 | &tune_attr_complain_secs.attr, | ||
| 474 | &tune_attr_reclaim_limit.attr, | ||
| 475 | &tune_attr_prefetch_secs.attr, | ||
| 476 | &tune_attr_statfs_slow.attr, | ||
| 477 | &tune_attr_quota_simul_sync.attr, | ||
| 478 | &tune_attr_quota_cache_secs.attr, | ||
| 479 | &tune_attr_max_atomic_write.attr, | ||
| 480 | &tune_attr_stall_secs.attr, | ||
| 481 | &tune_attr_entries_per_readdir.attr, | ||
| 482 | &tune_attr_greedy_default.attr, | ||
| 483 | &tune_attr_greedy_quantum.attr, | ||
| 484 | &tune_attr_greedy_max.attr, | ||
| 485 | &tune_attr_statfs_quantum.attr, | ||
| 486 | &tune_attr_scand_secs.attr, | ||
| 487 | &tune_attr_recoverd_secs.attr, | ||
| 488 | &tune_attr_logd_secs.attr, | ||
| 489 | &tune_attr_quotad_secs.attr, | ||
| 490 | &tune_attr_quota_scale.attr, | ||
| 491 | &tune_attr_new_files_jdata.attr, | ||
| 492 | &tune_attr_new_files_directio.attr, | ||
| 493 | NULL, | ||
| 494 | }; | ||
| 495 | |||
| 496 | static struct attribute_group lockstruct_group = { | ||
| 497 | .name = "lockstruct", | ||
| 498 | .attrs = lockstruct_attrs, | ||
| 499 | }; | ||
| 500 | |||
| 501 | static struct attribute_group counters_group = { | ||
| 502 | .name = "counters", | ||
| 503 | .attrs = counters_attrs, | ||
| 504 | }; | ||
| 505 | |||
| 506 | static struct attribute_group args_group = { | ||
| 507 | .name = "args", | ||
| 508 | .attrs = args_attrs, | ||
| 509 | }; | ||
| 510 | |||
| 511 | static struct attribute_group tune_group = { | ||
| 512 | .name = "tune", | ||
| 513 | .attrs = tune_attrs, | ||
| 514 | }; | ||
| 515 | |||
| 516 | int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | ||
| 517 | { | ||
| 518 | int error; | ||
| 519 | |||
| 520 | sdp->sd_kobj.kset = &gfs2_kset; | ||
| 521 | sdp->sd_kobj.ktype = &gfs2_ktype; | ||
| 522 | |||
| 523 | error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name); | ||
| 524 | if (error) | ||
| 525 | goto fail; | ||
| 526 | |||
| 527 | error = kobject_register(&sdp->sd_kobj); | ||
| 528 | if (error) | ||
| 529 | goto fail; | ||
| 530 | |||
| 531 | error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group); | ||
| 532 | if (error) | ||
| 533 | goto fail_reg; | ||
| 534 | |||
| 535 | error = sysfs_create_group(&sdp->sd_kobj, &counters_group); | ||
| 536 | if (error) | ||
| 537 | goto fail_lockstruct; | ||
| 538 | |||
| 539 | error = sysfs_create_group(&sdp->sd_kobj, &args_group); | ||
| 540 | if (error) | ||
| 541 | goto fail_counters; | ||
| 542 | |||
| 543 | error = sysfs_create_group(&sdp->sd_kobj, &tune_group); | ||
| 544 | if (error) | ||
| 545 | goto fail_args; | ||
| 546 | |||
| 547 | return 0; | ||
| 548 | |||
| 549 | fail_args: | ||
| 550 | sysfs_remove_group(&sdp->sd_kobj, &args_group); | ||
| 551 | fail_counters: | ||
| 552 | sysfs_remove_group(&sdp->sd_kobj, &counters_group); | ||
| 553 | fail_lockstruct: | ||
| 554 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | ||
| 555 | fail_reg: | ||
| 556 | kobject_unregister(&sdp->sd_kobj); | ||
| 557 | fail: | ||
| 558 | fs_err(sdp, "error %d adding sysfs files", error); | ||
| 559 | return error; | ||
| 560 | } | ||
| 561 | |||
| 562 | void gfs2_sys_fs_del(struct gfs2_sbd *sdp) | ||
| 563 | { | ||
| 564 | sysfs_remove_group(&sdp->sd_kobj, &tune_group); | ||
| 565 | sysfs_remove_group(&sdp->sd_kobj, &args_group); | ||
| 566 | sysfs_remove_group(&sdp->sd_kobj, &counters_group); | ||
| 567 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | ||
| 568 | kobject_unregister(&sdp->sd_kobj); | ||
| 569 | } | ||
| 570 | |||
| 571 | int gfs2_sys_init(void) | ||
| 572 | { | ||
| 573 | gfs2_sys_margs = NULL; | ||
| 574 | spin_lock_init(&gfs2_sys_margs_lock); | ||
| 575 | return kset_register(&gfs2_kset); | ||
| 576 | } | ||
| 577 | |||
| 578 | void gfs2_sys_uninit(void) | ||
| 579 | { | ||
| 580 | kfree(gfs2_sys_margs); | ||
| 581 | kset_unregister(&gfs2_kset); | ||
| 582 | } | ||
| 583 | |||
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h new file mode 100644 index 000000000000..1ca8cdac5304 --- /dev/null +++ b/fs/gfs2/sys.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __SYS_DOT_H__ | ||
| 11 | #define __SYS_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/spinlock.h> | ||
| 14 | struct gfs2_sbd; | ||
| 15 | |||
| 16 | /* Allow args to be passed to GFS2 when using an initial ram disk */ | ||
| 17 | extern char *gfs2_sys_margs; | ||
| 18 | extern spinlock_t gfs2_sys_margs_lock; | ||
| 19 | |||
| 20 | int gfs2_sys_fs_add(struct gfs2_sbd *sdp); | ||
| 21 | void gfs2_sys_fs_del(struct gfs2_sbd *sdp); | ||
| 22 | |||
| 23 | int gfs2_sys_init(void); | ||
| 24 | void gfs2_sys_uninit(void); | ||
| 25 | |||
| 26 | #endif /* __SYS_DOT_H__ */ | ||
| 27 | |||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c new file mode 100644 index 000000000000..f8dabf8446bb --- /dev/null +++ b/fs/gfs2/trans.c | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/kallsyms.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | |||
| 19 | #include "gfs2.h" | ||
| 20 | #include "incore.h" | ||
| 21 | #include "glock.h" | ||
| 22 | #include "log.h" | ||
| 23 | #include "lops.h" | ||
| 24 | #include "meta_io.h" | ||
| 25 | #include "trans.h" | ||
| 26 | #include "util.h" | ||
| 27 | |||
| 28 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | ||
| 29 | unsigned int revokes) | ||
| 30 | { | ||
| 31 | struct gfs2_trans *tr; | ||
| 32 | int error; | ||
| 33 | |||
| 34 | BUG_ON(current->journal_info); | ||
| 35 | BUG_ON(blocks == 0 && revokes == 0); | ||
| 36 | |||
| 37 | tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); | ||
| 38 | if (!tr) | ||
| 39 | return -ENOMEM; | ||
| 40 | |||
| 41 | tr->tr_ip = (unsigned long)__builtin_return_address(0); | ||
| 42 | tr->tr_blocks = blocks; | ||
| 43 | tr->tr_revokes = revokes; | ||
| 44 | tr->tr_reserved = 1; | ||
| 45 | if (blocks) | ||
| 46 | tr->tr_reserved += 6 + blocks; | ||
| 47 | if (revokes) | ||
| 48 | tr->tr_reserved += gfs2_struct2blk(sdp, revokes, | ||
| 49 | sizeof(u64)); | ||
| 50 | INIT_LIST_HEAD(&tr->tr_list_buf); | ||
| 51 | |||
| 52 | gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); | ||
| 53 | |||
| 54 | error = gfs2_glock_nq(&tr->tr_t_gh); | ||
| 55 | if (error) | ||
| 56 | goto fail_holder_uninit; | ||
| 57 | |||
| 58 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
| 59 | tr->tr_t_gh.gh_flags |= GL_NOCACHE; | ||
| 60 | error = -EROFS; | ||
| 61 | goto fail_gunlock; | ||
| 62 | } | ||
| 63 | |||
| 64 | error = gfs2_log_reserve(sdp, tr->tr_reserved); | ||
| 65 | if (error) | ||
| 66 | goto fail_gunlock; | ||
| 67 | |||
| 68 | current->journal_info = tr; | ||
| 69 | |||
| 70 | return 0; | ||
| 71 | |||
| 72 | fail_gunlock: | ||
| 73 | gfs2_glock_dq(&tr->tr_t_gh); | ||
| 74 | |||
| 75 | fail_holder_uninit: | ||
| 76 | gfs2_holder_uninit(&tr->tr_t_gh); | ||
| 77 | kfree(tr); | ||
| 78 | |||
| 79 | return error; | ||
| 80 | } | ||
| 81 | |||
| 82 | void gfs2_trans_end(struct gfs2_sbd *sdp) | ||
| 83 | { | ||
| 84 | struct gfs2_trans *tr = current->journal_info; | ||
| 85 | |||
| 86 | BUG_ON(!tr); | ||
| 87 | current->journal_info = NULL; | ||
| 88 | |||
| 89 | if (!tr->tr_touched) { | ||
| 90 | gfs2_log_release(sdp, tr->tr_reserved); | ||
| 91 | gfs2_glock_dq(&tr->tr_t_gh); | ||
| 92 | gfs2_holder_uninit(&tr->tr_t_gh); | ||
| 93 | kfree(tr); | ||
| 94 | return; | ||
| 95 | } | ||
| 96 | |||
| 97 | if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) { | ||
| 98 | fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ", | ||
| 99 | tr->tr_num_buf, tr->tr_blocks); | ||
| 100 | print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip); | ||
| 101 | } | ||
| 102 | if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) { | ||
| 103 | fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ", | ||
| 104 | tr->tr_num_revoke, tr->tr_revokes); | ||
| 105 | print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip); | ||
| 106 | } | ||
| 107 | |||
| 108 | gfs2_log_commit(sdp, tr); | ||
| 109 | gfs2_glock_dq(&tr->tr_t_gh); | ||
| 110 | gfs2_holder_uninit(&tr->tr_t_gh); | ||
| 111 | kfree(tr); | ||
| 112 | |||
| 113 | if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) | ||
| 114 | gfs2_log_flush(sdp, NULL); | ||
| 115 | } | ||
| 116 | |||
| 117 | void gfs2_trans_add_gl(struct gfs2_glock *gl) | ||
| 118 | { | ||
| 119 | lops_add(gl->gl_sbd, &gl->gl_le); | ||
| 120 | } | ||
| 121 | |||
| 122 | /** | ||
| 123 | * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction | ||
| 124 | * @gl: the glock the buffer belongs to | ||
| 125 | * @bh: The buffer to add | ||
| 126 | * @meta: True in the case of adding metadata | ||
| 127 | * | ||
| 128 | */ | ||
| 129 | |||
| 130 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) | ||
| 131 | { | ||
| 132 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 133 | struct gfs2_bufdata *bd; | ||
| 134 | |||
| 135 | bd = bh->b_private; | ||
| 136 | if (bd) | ||
| 137 | gfs2_assert(sdp, bd->bd_gl == gl); | ||
| 138 | else { | ||
| 139 | gfs2_attach_bufdata(gl, bh, meta); | ||
| 140 | bd = bh->b_private; | ||
| 141 | } | ||
| 142 | lops_add(sdp, &bd->bd_le); | ||
| 143 | } | ||
| 144 | |||
| 145 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno) | ||
| 146 | { | ||
| 147 | struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), | ||
| 148 | GFP_NOFS | __GFP_NOFAIL); | ||
| 149 | lops_init_le(&rv->rv_le, &gfs2_revoke_lops); | ||
| 150 | rv->rv_blkno = blkno; | ||
| 151 | lops_add(sdp, &rv->rv_le); | ||
| 152 | } | ||
| 153 | |||
| 154 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) | ||
| 155 | { | ||
| 156 | struct gfs2_revoke *rv; | ||
| 157 | int found = 0; | ||
| 158 | |||
| 159 | gfs2_log_lock(sdp); | ||
| 160 | |||
| 161 | list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) { | ||
| 162 | if (rv->rv_blkno == blkno) { | ||
| 163 | list_del(&rv->rv_le.le_list); | ||
| 164 | gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); | ||
| 165 | sdp->sd_log_num_revoke--; | ||
| 166 | found = 1; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | gfs2_log_unlock(sdp); | ||
| 172 | |||
| 173 | if (found) { | ||
| 174 | struct gfs2_trans *tr = current->journal_info; | ||
| 175 | kfree(rv); | ||
| 176 | tr->tr_num_revoke_rm++; | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) | ||
| 181 | { | ||
| 182 | lops_add(rgd->rd_sbd, &rgd->rd_le); | ||
| 183 | } | ||
| 184 | |||
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h new file mode 100644 index 000000000000..23d4cbe1de5b --- /dev/null +++ b/fs/gfs2/trans.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __TRANS_DOT_H__ | ||
| 11 | #define __TRANS_DOT_H__ | ||
| 12 | |||
| 13 | #include <linux/buffer_head.h> | ||
| 14 | struct gfs2_sbd; | ||
| 15 | struct gfs2_rgrpd; | ||
| 16 | struct gfs2_glock; | ||
| 17 | |||
| 18 | #define RES_DINODE 1 | ||
| 19 | #define RES_INDIRECT 1 | ||
| 20 | #define RES_JDATA 1 | ||
| 21 | #define RES_DATA 1 | ||
| 22 | #define RES_LEAF 1 | ||
| 23 | #define RES_RG_BIT 2 | ||
| 24 | #define RES_EATTR 1 | ||
| 25 | #define RES_STATFS 1 | ||
| 26 | #define RES_QUOTA 2 | ||
| 27 | |||
| 28 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | ||
| 29 | unsigned int revokes); | ||
| 30 | |||
| 31 | void gfs2_trans_end(struct gfs2_sbd *sdp); | ||
| 32 | |||
| 33 | void gfs2_trans_add_gl(struct gfs2_glock *gl); | ||
| 34 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); | ||
| 35 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno); | ||
| 36 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); | ||
| 37 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); | ||
| 38 | |||
| 39 | #endif /* __TRANS_DOT_H__ */ | ||
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c new file mode 100644 index 000000000000..196c604faadc --- /dev/null +++ b/fs/gfs2/util.c | |||
| @@ -0,0 +1,245 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/completion.h> | ||
| 14 | #include <linux/buffer_head.h> | ||
| 15 | #include <linux/crc32.h> | ||
| 16 | #include <linux/gfs2_ondisk.h> | ||
| 17 | #include <linux/lm_interface.h> | ||
| 18 | #include <asm/uaccess.h> | ||
| 19 | |||
| 20 | #include "gfs2.h" | ||
| 21 | #include "incore.h" | ||
| 22 | #include "glock.h" | ||
| 23 | #include "lm.h" | ||
| 24 | #include "util.h" | ||
| 25 | |||
| 26 | kmem_cache_t *gfs2_glock_cachep __read_mostly; | ||
| 27 | kmem_cache_t *gfs2_inode_cachep __read_mostly; | ||
| 28 | kmem_cache_t *gfs2_bufdata_cachep __read_mostly; | ||
| 29 | |||
| 30 | void gfs2_assert_i(struct gfs2_sbd *sdp) | ||
| 31 | { | ||
| 32 | printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n", | ||
| 33 | sdp->sd_fsname); | ||
| 34 | } | ||
| 35 | |||
| 36 | /** | ||
| 37 | * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false | ||
| 38 | * Returns: -1 if this call withdrew the machine, | ||
| 39 | * -2 if it was already withdrawn | ||
| 40 | */ | ||
| 41 | |||
| 42 | int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, | ||
| 43 | const char *function, char *file, unsigned int line) | ||
| 44 | { | ||
| 45 | int me; | ||
| 46 | me = gfs2_lm_withdraw(sdp, | ||
| 47 | "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" | ||
| 48 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 49 | sdp->sd_fsname, assertion, | ||
| 50 | sdp->sd_fsname, function, file, line); | ||
| 51 | dump_stack(); | ||
| 52 | return (me) ? -1 : -2; | ||
| 53 | } | ||
| 54 | |||
| 55 | /** | ||
| 56 | * gfs2_assert_warn_i - Print a message to the console if @assertion is false | ||
| 57 | * Returns: -1 if we printed something | ||
| 58 | * -2 if we didn't | ||
| 59 | */ | ||
| 60 | |||
| 61 | int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, | ||
| 62 | const char *function, char *file, unsigned int line) | ||
| 63 | { | ||
| 64 | if (time_before(jiffies, | ||
| 65 | sdp->sd_last_warning + | ||
| 66 | gfs2_tune_get(sdp, gt_complain_secs) * HZ)) | ||
| 67 | return -2; | ||
| 68 | |||
| 69 | printk(KERN_WARNING | ||
| 70 | "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" | ||
| 71 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 72 | sdp->sd_fsname, assertion, | ||
| 73 | sdp->sd_fsname, function, file, line); | ||
| 74 | |||
| 75 | if (sdp->sd_args.ar_debug) | ||
| 76 | BUG(); | ||
| 77 | else | ||
| 78 | dump_stack(); | ||
| 79 | |||
| 80 | sdp->sd_last_warning = jiffies; | ||
| 81 | |||
| 82 | return -1; | ||
| 83 | } | ||
| 84 | |||
| 85 | /** | ||
| 86 | * gfs2_consist_i - Flag a filesystem consistency error and withdraw | ||
| 87 | * Returns: -1 if this call withdrew the machine, | ||
| 88 | * 0 if it was already withdrawn | ||
| 89 | */ | ||
| 90 | |||
| 91 | int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, | ||
| 92 | char *file, unsigned int line) | ||
| 93 | { | ||
| 94 | int rv; | ||
| 95 | rv = gfs2_lm_withdraw(sdp, | ||
| 96 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | ||
| 97 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 98 | sdp->sd_fsname, | ||
| 99 | sdp->sd_fsname, function, file, line); | ||
| 100 | return rv; | ||
| 101 | } | ||
| 102 | |||
| 103 | /** | ||
| 104 | * gfs2_consist_inode_i - Flag an inode consistency error and withdraw | ||
| 105 | * Returns: -1 if this call withdrew the machine, | ||
| 106 | * 0 if it was already withdrawn | ||
| 107 | */ | ||
| 108 | |||
| 109 | int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, | ||
| 110 | const char *function, char *file, unsigned int line) | ||
| 111 | { | ||
| 112 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 113 | int rv; | ||
| 114 | rv = gfs2_lm_withdraw(sdp, | ||
| 115 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | ||
| 116 | "GFS2: fsid=%s: inode = %llu %llu\n" | ||
| 117 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 118 | sdp->sd_fsname, | ||
| 119 | sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino, | ||
| 120 | (unsigned long long)ip->i_num.no_addr, | ||
| 121 | sdp->sd_fsname, function, file, line); | ||
| 122 | return rv; | ||
| 123 | } | ||
| 124 | |||
| 125 | /** | ||
| 126 | * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw | ||
| 127 | * Returns: -1 if this call withdrew the machine, | ||
| 128 | * 0 if it was already withdrawn | ||
| 129 | */ | ||
| 130 | |||
| 131 | int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, | ||
| 132 | const char *function, char *file, unsigned int line) | ||
| 133 | { | ||
| 134 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
| 135 | int rv; | ||
| 136 | rv = gfs2_lm_withdraw(sdp, | ||
| 137 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | ||
| 138 | "GFS2: fsid=%s: RG = %llu\n" | ||
| 139 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 140 | sdp->sd_fsname, | ||
| 141 | sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr, | ||
| 142 | sdp->sd_fsname, function, file, line); | ||
| 143 | return rv; | ||
| 144 | } | ||
| 145 | |||
| 146 | /** | ||
| 147 | * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw | ||
| 148 | * Returns: -1 if this call withdrew the machine, | ||
| 149 | * -2 if it was already withdrawn | ||
| 150 | */ | ||
| 151 | |||
| 152 | int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 153 | const char *type, const char *function, char *file, | ||
| 154 | unsigned int line) | ||
| 155 | { | ||
| 156 | int me; | ||
| 157 | me = gfs2_lm_withdraw(sdp, | ||
| 158 | "GFS2: fsid=%s: fatal: invalid metadata block\n" | ||
| 159 | "GFS2: fsid=%s: bh = %llu (%s)\n" | ||
| 160 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 161 | sdp->sd_fsname, | ||
| 162 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, | ||
| 163 | sdp->sd_fsname, function, file, line); | ||
| 164 | return (me) ? -1 : -2; | ||
| 165 | } | ||
| 166 | |||
| 167 | /** | ||
| 168 | * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw | ||
| 169 | * Returns: -1 if this call withdrew the machine, | ||
| 170 | * -2 if it was already withdrawn | ||
| 171 | */ | ||
| 172 | |||
| 173 | int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 174 | u16 type, u16 t, const char *function, | ||
| 175 | char *file, unsigned int line) | ||
| 176 | { | ||
| 177 | int me; | ||
| 178 | me = gfs2_lm_withdraw(sdp, | ||
| 179 | "GFS2: fsid=%s: fatal: invalid metadata block\n" | ||
| 180 | "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n" | ||
| 181 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 182 | sdp->sd_fsname, | ||
| 183 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t, | ||
| 184 | sdp->sd_fsname, function, file, line); | ||
| 185 | return (me) ? -1 : -2; | ||
| 186 | } | ||
| 187 | |||
| 188 | /** | ||
| 189 | * gfs2_io_error_i - Flag an I/O error and withdraw | ||
| 190 | * Returns: -1 if this call withdrew the machine, | ||
| 191 | * 0 if it was already withdrawn | ||
| 192 | */ | ||
| 193 | |||
| 194 | int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, | ||
| 195 | unsigned int line) | ||
| 196 | { | ||
| 197 | int rv; | ||
| 198 | rv = gfs2_lm_withdraw(sdp, | ||
| 199 | "GFS2: fsid=%s: fatal: I/O error\n" | ||
| 200 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 201 | sdp->sd_fsname, | ||
| 202 | sdp->sd_fsname, function, file, line); | ||
| 203 | return rv; | ||
| 204 | } | ||
| 205 | |||
| 206 | /** | ||
| 207 | * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw | ||
| 208 | * Returns: -1 if this call withdrew the machine, | ||
| 209 | * 0 if it was already withdrawn | ||
| 210 | */ | ||
| 211 | |||
| 212 | int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 213 | const char *function, char *file, unsigned int line) | ||
| 214 | { | ||
| 215 | int rv; | ||
| 216 | rv = gfs2_lm_withdraw(sdp, | ||
| 217 | "GFS2: fsid=%s: fatal: I/O error\n" | ||
| 218 | "GFS2: fsid=%s: block = %llu\n" | ||
| 219 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
| 220 | sdp->sd_fsname, | ||
| 221 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, | ||
| 222 | sdp->sd_fsname, function, file, line); | ||
| 223 | return rv; | ||
| 224 | } | ||
| 225 | |||
| 226 | void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, | ||
| 227 | unsigned int bit, int new_value) | ||
| 228 | { | ||
| 229 | unsigned int c, o, b = bit; | ||
| 230 | int old_value; | ||
| 231 | |||
| 232 | c = b / (8 * PAGE_SIZE); | ||
| 233 | b %= 8 * PAGE_SIZE; | ||
| 234 | o = b / 8; | ||
| 235 | b %= 8; | ||
| 236 | |||
| 237 | old_value = (bitmap[c][o] & (1 << b)); | ||
| 238 | gfs2_assert_withdraw(sdp, !old_value != !new_value); | ||
| 239 | |||
| 240 | if (new_value) | ||
| 241 | bitmap[c][o] |= 1 << b; | ||
| 242 | else | ||
| 243 | bitmap[c][o] &= ~(1 << b); | ||
| 244 | } | ||
| 245 | |||
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h new file mode 100644 index 000000000000..76a50899fe9e --- /dev/null +++ b/fs/gfs2/util.h | |||
| @@ -0,0 +1,170 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __UTIL_DOT_H__ | ||
| 11 | #define __UTIL_DOT_H__ | ||
| 12 | |||
| 13 | #include "incore.h" | ||
| 14 | |||
| 15 | #define fs_printk(level, fs, fmt, arg...) \ | ||
| 16 | printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg) | ||
| 17 | |||
| 18 | #define fs_info(fs, fmt, arg...) \ | ||
| 19 | fs_printk(KERN_INFO , fs , fmt , ## arg) | ||
| 20 | |||
| 21 | #define fs_warn(fs, fmt, arg...) \ | ||
| 22 | fs_printk(KERN_WARNING , fs , fmt , ## arg) | ||
| 23 | |||
| 24 | #define fs_err(fs, fmt, arg...) \ | ||
| 25 | fs_printk(KERN_ERR, fs , fmt , ## arg) | ||
| 26 | |||
| 27 | |||
| 28 | void gfs2_assert_i(struct gfs2_sbd *sdp); | ||
| 29 | |||
| 30 | #define gfs2_assert(sdp, assertion) \ | ||
| 31 | do { \ | ||
| 32 | if (unlikely(!(assertion))) { \ | ||
| 33 | gfs2_assert_i(sdp); \ | ||
| 34 | BUG(); \ | ||
| 35 | } \ | ||
| 36 | } while (0) | ||
| 37 | |||
| 38 | |||
| 39 | int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, | ||
| 40 | const char *function, char *file, unsigned int line); | ||
| 41 | |||
| 42 | #define gfs2_assert_withdraw(sdp, assertion) \ | ||
| 43 | ((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ | ||
| 44 | __FUNCTION__, __FILE__, __LINE__)) | ||
| 45 | |||
| 46 | |||
| 47 | int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, | ||
| 48 | const char *function, char *file, unsigned int line); | ||
| 49 | |||
| 50 | #define gfs2_assert_warn(sdp, assertion) \ | ||
| 51 | ((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ | ||
| 52 | __FUNCTION__, __FILE__, __LINE__)) | ||
| 53 | |||
| 54 | |||
| 55 | int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, | ||
| 56 | const char *function, char *file, unsigned int line); | ||
| 57 | |||
| 58 | #define gfs2_consist(sdp) \ | ||
| 59 | gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) | ||
| 60 | |||
| 61 | |||
| 62 | int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, | ||
| 63 | const char *function, char *file, unsigned int line); | ||
| 64 | |||
| 65 | #define gfs2_consist_inode(ip) \ | ||
| 66 | gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) | ||
| 67 | |||
| 68 | |||
| 69 | int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, | ||
| 70 | const char *function, char *file, unsigned int line); | ||
| 71 | |||
| 72 | #define gfs2_consist_rgrpd(rgd) \ | ||
| 73 | gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) | ||
| 74 | |||
| 75 | |||
| 76 | int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 77 | const char *type, const char *function, | ||
| 78 | char *file, unsigned int line); | ||
| 79 | |||
| 80 | static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, | ||
| 81 | struct buffer_head *bh, | ||
| 82 | const char *function, | ||
| 83 | char *file, unsigned int line) | ||
| 84 | { | ||
| 85 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; | ||
| 86 | u32 magic = mh->mh_magic; | ||
| 87 | magic = be32_to_cpu(magic); | ||
| 88 | if (unlikely(magic != GFS2_MAGIC)) | ||
| 89 | return gfs2_meta_check_ii(sdp, bh, "magic number", function, | ||
| 90 | file, line); | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | #define gfs2_meta_check(sdp, bh) \ | ||
| 95 | gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) | ||
| 96 | |||
| 97 | |||
| 98 | int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 99 | u16 type, u16 t, | ||
| 100 | const char *function, | ||
| 101 | char *file, unsigned int line); | ||
| 102 | |||
| 103 | static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, | ||
| 104 | struct buffer_head *bh, | ||
| 105 | u16 type, | ||
| 106 | const char *function, | ||
| 107 | char *file, unsigned int line) | ||
| 108 | { | ||
| 109 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; | ||
| 110 | u32 magic = mh->mh_magic; | ||
| 111 | u16 t = be32_to_cpu(mh->mh_type); | ||
| 112 | magic = be32_to_cpu(magic); | ||
| 113 | if (unlikely(magic != GFS2_MAGIC)) | ||
| 114 | return gfs2_meta_check_ii(sdp, bh, "magic number", function, | ||
| 115 | file, line); | ||
| 116 | if (unlikely(t != type)) | ||
| 117 | return gfs2_metatype_check_ii(sdp, bh, type, t, function, | ||
| 118 | file, line); | ||
| 119 | return 0; | ||
| 120 | } | ||
| 121 | |||
| 122 | #define gfs2_metatype_check(sdp, bh, type) \ | ||
| 123 | gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) | ||
| 124 | |||
| 125 | static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, | ||
| 126 | u16 format) | ||
| 127 | { | ||
| 128 | struct gfs2_meta_header *mh; | ||
| 129 | mh = (struct gfs2_meta_header *)bh->b_data; | ||
| 130 | mh->mh_type = cpu_to_be32(type); | ||
| 131 | mh->mh_format = cpu_to_be32(format); | ||
| 132 | } | ||
| 133 | |||
| 134 | |||
| 135 | int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, | ||
| 136 | char *file, unsigned int line); | ||
| 137 | |||
| 138 | #define gfs2_io_error(sdp) \ | ||
| 139 | gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); | ||
| 140 | |||
| 141 | |||
| 142 | int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, | ||
| 143 | const char *function, char *file, unsigned int line); | ||
| 144 | |||
| 145 | #define gfs2_io_error_bh(sdp, bh) \ | ||
| 146 | gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); | ||
| 147 | |||
| 148 | |||
| 149 | extern kmem_cache_t *gfs2_glock_cachep; | ||
| 150 | extern kmem_cache_t *gfs2_inode_cachep; | ||
| 151 | extern kmem_cache_t *gfs2_bufdata_cachep; | ||
| 152 | |||
| 153 | static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, | ||
| 154 | unsigned int *p) | ||
| 155 | { | ||
| 156 | unsigned int x; | ||
| 157 | spin_lock(>->gt_spin); | ||
| 158 | x = *p; | ||
| 159 | spin_unlock(>->gt_spin); | ||
| 160 | return x; | ||
| 161 | } | ||
| 162 | |||
| 163 | #define gfs2_tune_get(sdp, field) \ | ||
| 164 | gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) | ||
| 165 | |||
| 166 | void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, | ||
| 167 | unsigned int bit, int new_value); | ||
| 168 | |||
| 169 | #endif /* __UTIL_DOT_H__ */ | ||
| 170 | |||
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 7d564b6fc98f..ea005c0a79fd 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild | |||
| @@ -46,6 +46,7 @@ header-y += coff.h | |||
| 46 | header-y += comstats.h | 46 | header-y += comstats.h |
| 47 | header-y += consolemap.h | 47 | header-y += consolemap.h |
| 48 | header-y += cycx_cfm.h | 48 | header-y += cycx_cfm.h |
| 49 | header-y += dlm_device.h | ||
| 49 | header-y += dm-ioctl.h | 50 | header-y += dm-ioctl.h |
| 50 | header-y += dn.h | 51 | header-y += dn.h |
| 51 | header-y += dqblk_v1.h | 52 | header-y += dqblk_v1.h |
| @@ -104,6 +105,7 @@ header-y += ixjuser.h | |||
| 104 | header-y += jffs2.h | 105 | header-y += jffs2.h |
| 105 | header-y += keyctl.h | 106 | header-y += keyctl.h |
| 106 | header-y += limits.h | 107 | header-y += limits.h |
| 108 | header-y += lock_dlm_plock.h | ||
| 107 | header-y += magic.h | 109 | header-y += magic.h |
| 108 | header-y += major.h | 110 | header-y += major.h |
| 109 | header-y += matroxfb.h | 111 | header-y += matroxfb.h |
| @@ -192,6 +194,7 @@ unifdef-y += cyclades.h | |||
| 192 | unifdef-y += dccp.h | 194 | unifdef-y += dccp.h |
| 193 | unifdef-y += dirent.h | 195 | unifdef-y += dirent.h |
| 194 | unifdef-y += divert.h | 196 | unifdef-y += divert.h |
| 197 | unifdef-y += dlm.h | ||
| 195 | unifdef-y += elfcore.h | 198 | unifdef-y += elfcore.h |
| 196 | unifdef-y += errno.h | 199 | unifdef-y += errno.h |
| 197 | unifdef-y += errqueue.h | 200 | unifdef-y += errqueue.h |
| @@ -208,6 +211,7 @@ unifdef-y += ftape.h | |||
| 208 | unifdef-y += gameport.h | 211 | unifdef-y += gameport.h |
| 209 | unifdef-y += generic_serial.h | 212 | unifdef-y += generic_serial.h |
| 210 | unifdef-y += genhd.h | 213 | unifdef-y += genhd.h |
| 214 | unifdef-y += gfs2_ondisk.h | ||
| 211 | unifdef-y += hayesesp.h | 215 | unifdef-y += hayesesp.h |
| 212 | unifdef-y += hdlcdrv.h | 216 | unifdef-y += hdlcdrv.h |
| 213 | unifdef-y += hdlc.h | 217 | unifdef-y += hdlc.h |
diff --git a/include/linux/dlm.h b/include/linux/dlm.h new file mode 100644 index 000000000000..1b1dcb9a40bb --- /dev/null +++ b/include/linux/dlm.h | |||
| @@ -0,0 +1,302 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | #ifndef __DLM_DOT_H__ | ||
| 15 | #define __DLM_DOT_H__ | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Interface to Distributed Lock Manager (DLM) | ||
| 19 | * routines and structures to use DLM lockspaces | ||
| 20 | */ | ||
| 21 | |||
| 22 | /* | ||
| 23 | * Lock Modes | ||
| 24 | */ | ||
| 25 | |||
| 26 | #define DLM_LOCK_IV -1 /* invalid */ | ||
| 27 | #define DLM_LOCK_NL 0 /* null */ | ||
| 28 | #define DLM_LOCK_CR 1 /* concurrent read */ | ||
| 29 | #define DLM_LOCK_CW 2 /* concurrent write */ | ||
| 30 | #define DLM_LOCK_PR 3 /* protected read */ | ||
| 31 | #define DLM_LOCK_PW 4 /* protected write */ | ||
| 32 | #define DLM_LOCK_EX 5 /* exclusive */ | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Maximum size in bytes of a dlm_lock name | ||
| 36 | */ | ||
| 37 | |||
| 38 | #define DLM_RESNAME_MAXLEN 64 | ||
| 39 | |||
| 40 | /* | ||
| 41 | * Flags to dlm_lock | ||
| 42 | * | ||
| 43 | * DLM_LKF_NOQUEUE | ||
| 44 | * | ||
| 45 | * Do not queue the lock request on the wait queue if it cannot be granted | ||
| 46 | * immediately. If the lock cannot be granted because of this flag, DLM will | ||
| 47 | * either return -EAGAIN from the dlm_lock call or will return 0 from | ||
| 48 | * dlm_lock and -EAGAIN in the lock status block when the AST is executed. | ||
| 49 | * | ||
| 50 | * DLM_LKF_CANCEL | ||
| 51 | * | ||
| 52 | * Used to cancel a pending lock request or conversion. A converting lock is | ||
| 53 | * returned to its previously granted mode. | ||
| 54 | * | ||
| 55 | * DLM_LKF_CONVERT | ||
| 56 | * | ||
| 57 | * Indicates a lock conversion request. For conversions the name and namelen | ||
| 58 | * are ignored and the lock ID in the LKSB is used to identify the lock. | ||
| 59 | * | ||
| 60 | * DLM_LKF_VALBLK | ||
| 61 | * | ||
| 62 | * Requests DLM to return the current contents of the lock value block in the | ||
| 63 | * lock status block. When this flag is set in a lock conversion from PW or EX | ||
| 64 | * modes, DLM assigns the value specified in the lock status block to the lock | ||
| 65 | * value block of the lock resource. The LVB is a DLM_LVB_LEN size array | ||
| 66 | * containing application-specific information. | ||
| 67 | * | ||
| 68 | * DLM_LKF_QUECVT | ||
| 69 | * | ||
| 70 | * Force a conversion request to be queued, even if it is compatible with | ||
| 71 | * the granted modes of other locks on the same resource. | ||
| 72 | * | ||
| 73 | * DLM_LKF_IVVALBLK | ||
| 74 | * | ||
| 75 | * Invalidate the lock value block. | ||
| 76 | * | ||
| 77 | * DLM_LKF_CONVDEADLK | ||
| 78 | * | ||
| 79 | * Allows the dlm to resolve conversion deadlocks internally by demoting the | ||
| 80 | * granted mode of a converting lock to NL. The DLM_SBF_DEMOTED flag is | ||
| 81 | * returned for a conversion that's been effected by this. | ||
| 82 | * | ||
| 83 | * DLM_LKF_PERSISTENT | ||
| 84 | * | ||
| 85 | * Only relevant to locks originating in userspace. A persistent lock will not | ||
| 86 | * be removed if the process holding the lock exits. | ||
| 87 | * | ||
| 88 | * DLM_LKF_NODLKWT | ||
| 89 | * DLM_LKF_NODLCKBLK | ||
| 90 | * | ||
| 91 | * net yet implemented | ||
| 92 | * | ||
| 93 | * DLM_LKF_EXPEDITE | ||
| 94 | * | ||
| 95 | * Used only with new requests for NL mode locks. Tells the lock manager | ||
| 96 | * to grant the lock, ignoring other locks in convert and wait queues. | ||
| 97 | * | ||
| 98 | * DLM_LKF_NOQUEUEBAST | ||
| 99 | * | ||
| 100 | * Send blocking AST's before returning -EAGAIN to the caller. It is only | ||
| 101 | * used along with the NOQUEUE flag. Blocking AST's are not sent for failed | ||
| 102 | * NOQUEUE requests otherwise. | ||
| 103 | * | ||
| 104 | * DLM_LKF_HEADQUE | ||
| 105 | * | ||
| 106 | * Add a lock to the head of the convert or wait queue rather than the tail. | ||
| 107 | * | ||
| 108 | * DLM_LKF_NOORDER | ||
| 109 | * | ||
| 110 | * Disregard the standard grant order rules and grant a lock as soon as it | ||
| 111 | * is compatible with other granted locks. | ||
| 112 | * | ||
| 113 | * DLM_LKF_ORPHAN | ||
| 114 | * | ||
| 115 | * not yet implemented | ||
| 116 | * | ||
| 117 | * DLM_LKF_ALTPR | ||
| 118 | * | ||
| 119 | * If the requested mode cannot be granted immediately, try to grant the lock | ||
| 120 | * in PR mode instead. If this alternate mode is granted instead of the | ||
| 121 | * requested mode, DLM_SBF_ALTMODE is returned in the lksb. | ||
| 122 | * | ||
| 123 | * DLM_LKF_ALTCW | ||
| 124 | * | ||
| 125 | * The same as ALTPR, but the alternate mode is CW. | ||
| 126 | * | ||
| 127 | * DLM_LKF_FORCEUNLOCK | ||
| 128 | * | ||
| 129 | * Unlock the lock even if it is converting or waiting or has sublocks. | ||
| 130 | * Only really for use by the userland device.c code. | ||
| 131 | * | ||
| 132 | */ | ||
| 133 | |||
| 134 | #define DLM_LKF_NOQUEUE 0x00000001 | ||
| 135 | #define DLM_LKF_CANCEL 0x00000002 | ||
| 136 | #define DLM_LKF_CONVERT 0x00000004 | ||
| 137 | #define DLM_LKF_VALBLK 0x00000008 | ||
| 138 | #define DLM_LKF_QUECVT 0x00000010 | ||
| 139 | #define DLM_LKF_IVVALBLK 0x00000020 | ||
| 140 | #define DLM_LKF_CONVDEADLK 0x00000040 | ||
| 141 | #define DLM_LKF_PERSISTENT 0x00000080 | ||
| 142 | #define DLM_LKF_NODLCKWT 0x00000100 | ||
| 143 | #define DLM_LKF_NODLCKBLK 0x00000200 | ||
| 144 | #define DLM_LKF_EXPEDITE 0x00000400 | ||
| 145 | #define DLM_LKF_NOQUEUEBAST 0x00000800 | ||
| 146 | #define DLM_LKF_HEADQUE 0x00001000 | ||
| 147 | #define DLM_LKF_NOORDER 0x00002000 | ||
| 148 | #define DLM_LKF_ORPHAN 0x00004000 | ||
| 149 | #define DLM_LKF_ALTPR 0x00008000 | ||
| 150 | #define DLM_LKF_ALTCW 0x00010000 | ||
| 151 | #define DLM_LKF_FORCEUNLOCK 0x00020000 | ||
| 152 | |||
| 153 | /* | ||
| 154 | * Some return codes that are not in errno.h | ||
| 155 | */ | ||
| 156 | |||
| 157 | #define DLM_ECANCEL 0x10001 | ||
| 158 | #define DLM_EUNLOCK 0x10002 | ||
| 159 | |||
| 160 | typedef void dlm_lockspace_t; | ||
| 161 | |||
| 162 | /* | ||
| 163 | * Lock status block | ||
| 164 | * | ||
| 165 | * Use this structure to specify the contents of the lock value block. For a | ||
| 166 | * conversion request, this structure is used to specify the lock ID of the | ||
| 167 | * lock. DLM writes the status of the lock request and the lock ID assigned | ||
| 168 | * to the request in the lock status block. | ||
| 169 | * | ||
| 170 | * sb_lkid: the returned lock ID. It is set on new (non-conversion) requests. | ||
| 171 | * It is available when dlm_lock returns. | ||
| 172 | * | ||
| 173 | * sb_lvbptr: saves or returns the contents of the lock's LVB according to rules | ||
| 174 | * shown for the DLM_LKF_VALBLK flag. | ||
| 175 | * | ||
| 176 | * sb_flags: DLM_SBF_DEMOTED is returned if in the process of promoting a lock, | ||
| 177 | * it was first demoted to NL to avoid conversion deadlock. | ||
| 178 | * DLM_SBF_VALNOTVALID is returned if the resource's LVB is marked invalid. | ||
| 179 | * | ||
| 180 | * sb_status: the returned status of the lock request set prior to AST | ||
| 181 | * execution. Possible return values: | ||
| 182 | * | ||
| 183 | * 0 if lock request was successful | ||
| 184 | * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | ||
| 185 | * -ENOMEM if there is no memory to process request | ||
| 186 | * -EINVAL if there are invalid parameters | ||
| 187 | * -DLM_EUNLOCK if unlock request was successful | ||
| 188 | * -DLM_ECANCEL if a cancel completed successfully | ||
| 189 | */ | ||
| 190 | |||
| 191 | #define DLM_SBF_DEMOTED 0x01 | ||
| 192 | #define DLM_SBF_VALNOTVALID 0x02 | ||
| 193 | #define DLM_SBF_ALTMODE 0x04 | ||
| 194 | |||
| 195 | struct dlm_lksb { | ||
| 196 | int sb_status; | ||
| 197 | uint32_t sb_lkid; | ||
| 198 | char sb_flags; | ||
| 199 | char * sb_lvbptr; | ||
| 200 | }; | ||
| 201 | |||
| 202 | |||
| 203 | #ifdef __KERNEL__ | ||
| 204 | |||
| 205 | #define DLM_LSFL_NODIR 0x00000001 | ||
| 206 | |||
| 207 | /* | ||
| 208 | * dlm_new_lockspace | ||
| 209 | * | ||
| 210 | * Starts a lockspace with the given name. If the named lockspace exists in | ||
| 211 | * the cluster, the calling node joins it. | ||
| 212 | */ | ||
| 213 | |||
| 214 | int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace, | ||
| 215 | uint32_t flags, int lvblen); | ||
| 216 | |||
| 217 | /* | ||
| 218 | * dlm_release_lockspace | ||
| 219 | * | ||
| 220 | * Stop a lockspace. | ||
| 221 | */ | ||
| 222 | |||
| 223 | int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); | ||
| 224 | |||
| 225 | /* | ||
| 226 | * dlm_lock | ||
| 227 | * | ||
| 228 | * Make an asyncronous request to acquire or convert a lock on a named | ||
| 229 | * resource. | ||
| 230 | * | ||
| 231 | * lockspace: context for the request | ||
| 232 | * mode: the requested mode of the lock (DLM_LOCK_) | ||
| 233 | * lksb: lock status block for input and async return values | ||
| 234 | * flags: input flags (DLM_LKF_) | ||
| 235 | * name: name of the resource to lock, can be binary | ||
| 236 | * namelen: the length in bytes of the resource name (MAX_RESNAME_LEN) | ||
| 237 | * parent: the lock ID of a parent lock or 0 if none | ||
| 238 | * lockast: function DLM executes when it completes processing the request | ||
| 239 | * astarg: argument passed to lockast and bast functions | ||
| 240 | * bast: function DLM executes when this lock later blocks another request | ||
| 241 | * | ||
| 242 | * Returns: | ||
| 243 | * 0 if request is successfully queued for processing | ||
| 244 | * -EINVAL if any input parameters are invalid | ||
| 245 | * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | ||
| 246 | * -ENOMEM if there is no memory to process request | ||
| 247 | * -ENOTCONN if there is a communication error | ||
| 248 | * | ||
| 249 | * If the call to dlm_lock returns an error then the operation has failed and | ||
| 250 | * the AST routine will not be called. If dlm_lock returns 0 it is still | ||
| 251 | * possible that the lock operation will fail. The AST routine will be called | ||
| 252 | * when the locking is complete and the status is returned in the lksb. | ||
| 253 | * | ||
| 254 | * If the AST routines or parameter are passed to a conversion operation then | ||
| 255 | * they will overwrite those values that were passed to a previous dlm_lock | ||
| 256 | * call. | ||
| 257 | * | ||
| 258 | * AST routines should not block (at least not for long), but may make | ||
| 259 | * any locking calls they please. | ||
| 260 | */ | ||
| 261 | |||
| 262 | int dlm_lock(dlm_lockspace_t *lockspace, | ||
| 263 | int mode, | ||
| 264 | struct dlm_lksb *lksb, | ||
| 265 | uint32_t flags, | ||
| 266 | void *name, | ||
| 267 | unsigned int namelen, | ||
| 268 | uint32_t parent_lkid, | ||
| 269 | void (*lockast) (void *astarg), | ||
| 270 | void *astarg, | ||
| 271 | void (*bast) (void *astarg, int mode)); | ||
| 272 | |||
| 273 | /* | ||
| 274 | * dlm_unlock | ||
| 275 | * | ||
| 276 | * Asynchronously release a lock on a resource. The AST routine is called | ||
| 277 | * when the resource is successfully unlocked. | ||
| 278 | * | ||
| 279 | * lockspace: context for the request | ||
| 280 | * lkid: the lock ID as returned in the lksb | ||
| 281 | * flags: input flags (DLM_LKF_) | ||
| 282 | * lksb: if NULL the lksb parameter passed to last lock request is used | ||
| 283 | * astarg: the arg used with the completion ast for the unlock | ||
| 284 | * | ||
| 285 | * Returns: | ||
| 286 | * 0 if request is successfully queued for processing | ||
| 287 | * -EINVAL if any input parameters are invalid | ||
| 288 | * -ENOTEMPTY if the lock still has sublocks | ||
| 289 | * -EBUSY if the lock is waiting for a remote lock operation | ||
| 290 | * -ENOTCONN if there is a communication error | ||
| 291 | */ | ||
| 292 | |||
| 293 | int dlm_unlock(dlm_lockspace_t *lockspace, | ||
| 294 | uint32_t lkid, | ||
| 295 | uint32_t flags, | ||
| 296 | struct dlm_lksb *lksb, | ||
| 297 | void *astarg); | ||
| 298 | |||
| 299 | #endif /* __KERNEL__ */ | ||
| 300 | |||
| 301 | #endif /* __DLM_DOT_H__ */ | ||
| 302 | |||
diff --git a/include/linux/dlm_device.h b/include/linux/dlm_device.h new file mode 100644 index 000000000000..2a2dd189b9fd --- /dev/null +++ b/include/linux/dlm_device.h | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | /* This is the device interface for dlm, most users will use a library | ||
| 15 | * interface. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #define DLM_USER_LVB_LEN 32 | ||
| 19 | |||
| 20 | /* Version of the device interface */ | ||
| 21 | #define DLM_DEVICE_VERSION_MAJOR 5 | ||
| 22 | #define DLM_DEVICE_VERSION_MINOR 0 | ||
| 23 | #define DLM_DEVICE_VERSION_PATCH 0 | ||
| 24 | |||
| 25 | /* struct passed to the lock write */ | ||
| 26 | struct dlm_lock_params { | ||
| 27 | __u8 mode; | ||
| 28 | __u8 namelen; | ||
| 29 | __u16 flags; | ||
| 30 | __u32 lkid; | ||
| 31 | __u32 parent; | ||
| 32 | void __user *castparam; | ||
| 33 | void __user *castaddr; | ||
| 34 | void __user *bastparam; | ||
| 35 | void __user *bastaddr; | ||
| 36 | struct dlm_lksb __user *lksb; | ||
| 37 | char lvb[DLM_USER_LVB_LEN]; | ||
| 38 | char name[0]; | ||
| 39 | }; | ||
| 40 | |||
| 41 | struct dlm_lspace_params { | ||
| 42 | __u32 flags; | ||
| 43 | __u32 minor; | ||
| 44 | char name[0]; | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct dlm_write_request { | ||
| 48 | __u32 version[3]; | ||
| 49 | __u8 cmd; | ||
| 50 | __u8 is64bit; | ||
| 51 | __u8 unused[2]; | ||
| 52 | |||
| 53 | union { | ||
| 54 | struct dlm_lock_params lock; | ||
| 55 | struct dlm_lspace_params lspace; | ||
| 56 | } i; | ||
| 57 | }; | ||
| 58 | |||
| 59 | /* struct read from the "device" fd, | ||
| 60 | consists mainly of userspace pointers for the library to use */ | ||
| 61 | struct dlm_lock_result { | ||
| 62 | __u32 length; | ||
| 63 | void __user * user_astaddr; | ||
| 64 | void __user * user_astparam; | ||
| 65 | struct dlm_lksb __user * user_lksb; | ||
| 66 | struct dlm_lksb lksb; | ||
| 67 | __u8 bast_mode; | ||
| 68 | __u8 unused[3]; | ||
| 69 | /* Offsets may be zero if no data is present */ | ||
| 70 | __u32 lvb_offset; | ||
| 71 | }; | ||
| 72 | |||
| 73 | /* Commands passed to the device */ | ||
| 74 | #define DLM_USER_LOCK 1 | ||
| 75 | #define DLM_USER_UNLOCK 2 | ||
| 76 | #define DLM_USER_QUERY 3 | ||
| 77 | #define DLM_USER_CREATE_LOCKSPACE 4 | ||
| 78 | #define DLM_USER_REMOVE_LOCKSPACE 5 | ||
| 79 | |||
| 80 | /* Arbitrary length restriction */ | ||
| 81 | #define MAX_LS_NAME_LEN 64 | ||
| 82 | |||
| 83 | /* Lockspace flags */ | ||
| 84 | #define DLM_USER_LSFLG_AUTOFREE 1 | ||
| 85 | #define DLM_USER_LSFLG_FORCEFREE 2 | ||
| 86 | |||
diff --git a/include/linux/fs.h b/include/linux/fs.h index f53bf4ff1955..34406ed467c3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -250,6 +250,8 @@ extern int dir_notify_enable; | |||
| 250 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ | 250 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ |
| 251 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ | 251 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ |
| 252 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 252 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
| 253 | #define FS_EXTENT_FL 0x00080000 /* Extents */ | ||
| 254 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ | ||
| 253 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ | 255 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ |
| 254 | 256 | ||
| 255 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ | 257 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ |
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h new file mode 100644 index 000000000000..a7ae7c177cac --- /dev/null +++ b/include/linux/gfs2_ondisk.h | |||
| @@ -0,0 +1,443 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License v.2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __GFS2_ONDISK_DOT_H__ | ||
| 11 | #define __GFS2_ONDISK_DOT_H__ | ||
| 12 | |||
| 13 | #define GFS2_MAGIC 0x01161970 | ||
| 14 | #define GFS2_BASIC_BLOCK 512 | ||
| 15 | #define GFS2_BASIC_BLOCK_SHIFT 9 | ||
| 16 | |||
| 17 | /* Lock numbers of the LM_TYPE_NONDISK type */ | ||
| 18 | |||
| 19 | #define GFS2_MOUNT_LOCK 0 | ||
| 20 | #define GFS2_LIVE_LOCK 1 | ||
| 21 | #define GFS2_TRANS_LOCK 2 | ||
| 22 | #define GFS2_RENAME_LOCK 3 | ||
| 23 | |||
| 24 | /* Format numbers for various metadata types */ | ||
| 25 | |||
| 26 | #define GFS2_FORMAT_NONE 0 | ||
| 27 | #define GFS2_FORMAT_SB 100 | ||
| 28 | #define GFS2_FORMAT_RG 200 | ||
| 29 | #define GFS2_FORMAT_RB 300 | ||
| 30 | #define GFS2_FORMAT_DI 400 | ||
| 31 | #define GFS2_FORMAT_IN 500 | ||
| 32 | #define GFS2_FORMAT_LF 600 | ||
| 33 | #define GFS2_FORMAT_JD 700 | ||
| 34 | #define GFS2_FORMAT_LH 800 | ||
| 35 | #define GFS2_FORMAT_LD 900 | ||
| 36 | #define GFS2_FORMAT_LB 1000 | ||
| 37 | #define GFS2_FORMAT_EA 1600 | ||
| 38 | #define GFS2_FORMAT_ED 1700 | ||
| 39 | #define GFS2_FORMAT_QC 1400 | ||
| 40 | /* These are format numbers for entities contained in files */ | ||
| 41 | #define GFS2_FORMAT_RI 1100 | ||
| 42 | #define GFS2_FORMAT_DE 1200 | ||
| 43 | #define GFS2_FORMAT_QU 1500 | ||
| 44 | /* These are part of the superblock */ | ||
| 45 | #define GFS2_FORMAT_FS 1801 | ||
| 46 | #define GFS2_FORMAT_MULTI 1900 | ||
| 47 | |||
| 48 | /* | ||
| 49 | * An on-disk inode number | ||
| 50 | */ | ||
| 51 | |||
| 52 | struct gfs2_inum { | ||
| 53 | __be64 no_formal_ino; | ||
| 54 | __be64 no_addr; | ||
| 55 | }; | ||
| 56 | |||
| 57 | static inline int gfs2_inum_equal(const struct gfs2_inum *ino1, | ||
| 58 | const struct gfs2_inum *ino2) | ||
| 59 | { | ||
| 60 | return ino1->no_formal_ino == ino2->no_formal_ino && | ||
| 61 | ino1->no_addr == ino2->no_addr; | ||
| 62 | } | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Generic metadata head structure | ||
| 66 | * Every inplace buffer logged in the journal must start with this. | ||
| 67 | */ | ||
| 68 | |||
| 69 | #define GFS2_METATYPE_NONE 0 | ||
| 70 | #define GFS2_METATYPE_SB 1 | ||
| 71 | #define GFS2_METATYPE_RG 2 | ||
| 72 | #define GFS2_METATYPE_RB 3 | ||
| 73 | #define GFS2_METATYPE_DI 4 | ||
| 74 | #define GFS2_METATYPE_IN 5 | ||
| 75 | #define GFS2_METATYPE_LF 6 | ||
| 76 | #define GFS2_METATYPE_JD 7 | ||
| 77 | #define GFS2_METATYPE_LH 8 | ||
| 78 | #define GFS2_METATYPE_LD 9 | ||
| 79 | #define GFS2_METATYPE_LB 12 | ||
| 80 | #define GFS2_METATYPE_EA 10 | ||
| 81 | #define GFS2_METATYPE_ED 11 | ||
| 82 | #define GFS2_METATYPE_QC 14 | ||
| 83 | |||
| 84 | struct gfs2_meta_header { | ||
| 85 | __be32 mh_magic; | ||
| 86 | __be32 mh_type; | ||
| 87 | __be64 __pad0; /* Was generation number in gfs1 */ | ||
| 88 | __be32 mh_format; | ||
| 89 | __be32 __pad1; /* Was incarnation number in gfs1 */ | ||
| 90 | }; | ||
| 91 | |||
| 92 | /* | ||
| 93 | * super-block structure | ||
| 94 | * | ||
| 95 | * It's probably good if SIZEOF_SB <= GFS2_BASIC_BLOCK (512 bytes) | ||
| 96 | * | ||
| 97 | * Order is important, need to be able to read old superblocks to do on-disk | ||
| 98 | * version upgrades. | ||
| 99 | */ | ||
| 100 | |||
| 101 | /* Address of superblock in GFS2 basic blocks */ | ||
| 102 | #define GFS2_SB_ADDR 128 | ||
| 103 | |||
| 104 | /* The lock number for the superblock (must be zero) */ | ||
| 105 | #define GFS2_SB_LOCK 0 | ||
| 106 | |||
| 107 | /* Requirement: GFS2_LOCKNAME_LEN % 8 == 0 | ||
| 108 | Includes: the fencing zero at the end */ | ||
| 109 | #define GFS2_LOCKNAME_LEN 64 | ||
| 110 | |||
| 111 | struct gfs2_sb { | ||
| 112 | struct gfs2_meta_header sb_header; | ||
| 113 | |||
| 114 | __be32 sb_fs_format; | ||
| 115 | __be32 sb_multihost_format; | ||
| 116 | __u32 __pad0; /* Was superblock flags in gfs1 */ | ||
| 117 | |||
| 118 | __be32 sb_bsize; | ||
| 119 | __be32 sb_bsize_shift; | ||
| 120 | __u32 __pad1; /* Was journal segment size in gfs1 */ | ||
| 121 | |||
| 122 | struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */ | ||
| 123 | struct gfs2_inum __pad2; /* Was rindex dinode in gfs1 */ | ||
| 124 | struct gfs2_inum sb_root_dir; | ||
| 125 | |||
| 126 | char sb_lockproto[GFS2_LOCKNAME_LEN]; | ||
| 127 | char sb_locktable[GFS2_LOCKNAME_LEN]; | ||
| 128 | /* In gfs1, quota and license dinodes followed */ | ||
| 129 | }; | ||
| 130 | |||
| 131 | /* | ||
| 132 | * resource index structure | ||
| 133 | */ | ||
| 134 | |||
| 135 | struct gfs2_rindex { | ||
| 136 | __be64 ri_addr; /* grp block disk address */ | ||
| 137 | __be32 ri_length; /* length of rgrp header in fs blocks */ | ||
| 138 | __u32 __pad; | ||
| 139 | |||
| 140 | __be64 ri_data0; /* first data location */ | ||
| 141 | __be32 ri_data; /* num of data blocks in rgrp */ | ||
| 142 | |||
| 143 | __be32 ri_bitbytes; /* number of bytes in data bitmaps */ | ||
| 144 | |||
| 145 | __u8 ri_reserved[64]; | ||
| 146 | }; | ||
| 147 | |||
| 148 | /* | ||
| 149 | * resource group header structure | ||
| 150 | */ | ||
| 151 | |||
| 152 | /* Number of blocks per byte in rgrp */ | ||
| 153 | #define GFS2_NBBY 4 | ||
| 154 | #define GFS2_BIT_SIZE 2 | ||
| 155 | #define GFS2_BIT_MASK 0x00000003 | ||
| 156 | |||
| 157 | #define GFS2_BLKST_FREE 0 | ||
| 158 | #define GFS2_BLKST_USED 1 | ||
| 159 | #define GFS2_BLKST_UNLINKED 2 | ||
| 160 | #define GFS2_BLKST_DINODE 3 | ||
| 161 | |||
| 162 | #define GFS2_RGF_JOURNAL 0x00000001 | ||
| 163 | #define GFS2_RGF_METAONLY 0x00000002 | ||
| 164 | #define GFS2_RGF_DATAONLY 0x00000004 | ||
| 165 | #define GFS2_RGF_NOALLOC 0x00000008 | ||
| 166 | |||
| 167 | struct gfs2_rgrp { | ||
| 168 | struct gfs2_meta_header rg_header; | ||
| 169 | |||
| 170 | __be32 rg_flags; | ||
| 171 | __be32 rg_free; | ||
| 172 | __be32 rg_dinodes; | ||
| 173 | __be32 __pad; | ||
| 174 | __be64 rg_igeneration; | ||
| 175 | |||
| 176 | __u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */ | ||
| 177 | }; | ||
| 178 | |||
| 179 | /* | ||
| 180 | * quota structure | ||
| 181 | */ | ||
| 182 | |||
| 183 | struct gfs2_quota { | ||
| 184 | __be64 qu_limit; | ||
| 185 | __be64 qu_warn; | ||
| 186 | __be64 qu_value; | ||
| 187 | __u8 qu_reserved[64]; | ||
| 188 | }; | ||
| 189 | |||
| 190 | /* | ||
| 191 | * dinode structure | ||
| 192 | */ | ||
| 193 | |||
| 194 | #define GFS2_MAX_META_HEIGHT 10 | ||
| 195 | #define GFS2_DIR_MAX_DEPTH 17 | ||
| 196 | |||
| 197 | #define DT2IF(dt) (((dt) << 12) & S_IFMT) | ||
| 198 | #define IF2DT(sif) (((sif) & S_IFMT) >> 12) | ||
| 199 | |||
| 200 | enum { | ||
| 201 | gfs2fl_Jdata = 0, | ||
| 202 | gfs2fl_ExHash = 1, | ||
| 203 | gfs2fl_Unused = 2, | ||
| 204 | gfs2fl_EaIndirect = 3, | ||
| 205 | gfs2fl_Directio = 4, | ||
| 206 | gfs2fl_Immutable = 5, | ||
| 207 | gfs2fl_AppendOnly = 6, | ||
| 208 | gfs2fl_NoAtime = 7, | ||
| 209 | gfs2fl_Sync = 8, | ||
| 210 | gfs2fl_System = 9, | ||
| 211 | gfs2fl_TruncInProg = 29, | ||
| 212 | gfs2fl_InheritDirectio = 30, | ||
| 213 | gfs2fl_InheritJdata = 31, | ||
| 214 | }; | ||
| 215 | |||
| 216 | /* Dinode flags */ | ||
| 217 | #define GFS2_DIF_JDATA 0x00000001 | ||
| 218 | #define GFS2_DIF_EXHASH 0x00000002 | ||
| 219 | #define GFS2_DIF_UNUSED 0x00000004 /* only in gfs1 */ | ||
| 220 | #define GFS2_DIF_EA_INDIRECT 0x00000008 | ||
| 221 | #define GFS2_DIF_DIRECTIO 0x00000010 | ||
| 222 | #define GFS2_DIF_IMMUTABLE 0x00000020 | ||
| 223 | #define GFS2_DIF_APPENDONLY 0x00000040 | ||
| 224 | #define GFS2_DIF_NOATIME 0x00000080 | ||
| 225 | #define GFS2_DIF_SYNC 0x00000100 | ||
| 226 | #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */ | ||
| 227 | #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */ | ||
| 228 | #define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 | ||
| 229 | #define GFS2_DIF_INHERIT_JDATA 0x80000000 | ||
| 230 | |||
| 231 | struct gfs2_dinode { | ||
| 232 | struct gfs2_meta_header di_header; | ||
| 233 | |||
| 234 | struct gfs2_inum di_num; | ||
| 235 | |||
| 236 | __be32 di_mode; /* mode of file */ | ||
| 237 | __be32 di_uid; /* owner's user id */ | ||
| 238 | __be32 di_gid; /* owner's group id */ | ||
| 239 | __be32 di_nlink; /* number of links to this file */ | ||
| 240 | __be64 di_size; /* number of bytes in file */ | ||
| 241 | __be64 di_blocks; /* number of blocks in file */ | ||
| 242 | __be64 di_atime; /* time last accessed */ | ||
| 243 | __be64 di_mtime; /* time last modified */ | ||
| 244 | __be64 di_ctime; /* time last changed */ | ||
| 245 | __be32 di_major; /* device major number */ | ||
| 246 | __be32 di_minor; /* device minor number */ | ||
| 247 | |||
| 248 | /* This section varies from gfs1. Padding added to align with | ||
| 249 | * remainder of dinode | ||
| 250 | */ | ||
| 251 | __be64 di_goal_meta; /* rgrp to alloc from next */ | ||
| 252 | __be64 di_goal_data; /* data block goal */ | ||
| 253 | __be64 di_generation; /* generation number for NFS */ | ||
| 254 | |||
| 255 | __be32 di_flags; /* GFS2_DIF_... */ | ||
| 256 | __be32 di_payload_format; /* GFS2_FORMAT_... */ | ||
| 257 | __u16 __pad1; /* Was ditype in gfs1 */ | ||
| 258 | __be16 di_height; /* height of metadata */ | ||
| 259 | __u32 __pad2; /* Unused incarnation number from gfs1 */ | ||
| 260 | |||
| 261 | /* These only apply to directories */ | ||
| 262 | __u16 __pad3; /* Padding */ | ||
| 263 | __be16 di_depth; /* Number of bits in the table */ | ||
| 264 | __be32 di_entries; /* The number of entries in the directory */ | ||
| 265 | |||
| 266 | struct gfs2_inum __pad4; /* Unused even in current gfs1 */ | ||
| 267 | |||
| 268 | __be64 di_eattr; /* extended attribute block number */ | ||
| 269 | |||
| 270 | __u8 di_reserved[56]; | ||
| 271 | }; | ||
| 272 | |||
| 273 | /* | ||
| 274 | * directory structure - many of these per directory file | ||
| 275 | */ | ||
| 276 | |||
| 277 | #define GFS2_FNAMESIZE 255 | ||
| 278 | #define GFS2_DIRENT_SIZE(name_len) ((sizeof(struct gfs2_dirent) + (name_len) + 7) & ~7) | ||
| 279 | |||
| 280 | struct gfs2_dirent { | ||
| 281 | struct gfs2_inum de_inum; | ||
| 282 | __be32 de_hash; | ||
| 283 | __be16 de_rec_len; | ||
| 284 | __be16 de_name_len; | ||
| 285 | __be16 de_type; | ||
| 286 | __u8 __pad[14]; | ||
| 287 | }; | ||
| 288 | |||
| 289 | /* | ||
| 290 | * Header of leaf directory nodes | ||
| 291 | */ | ||
| 292 | |||
| 293 | struct gfs2_leaf { | ||
| 294 | struct gfs2_meta_header lf_header; | ||
| 295 | |||
| 296 | __be16 lf_depth; /* Depth of leaf */ | ||
| 297 | __be16 lf_entries; /* Number of dirents in leaf */ | ||
| 298 | __be32 lf_dirent_format; /* Format of the dirents */ | ||
| 299 | __be64 lf_next; /* Next leaf, if overflow */ | ||
| 300 | |||
| 301 | __u8 lf_reserved[64]; | ||
| 302 | }; | ||
| 303 | |||
| 304 | /* | ||
| 305 | * Extended attribute header format | ||
| 306 | */ | ||
| 307 | |||
| 308 | #define GFS2_EA_MAX_NAME_LEN 255 | ||
| 309 | #define GFS2_EA_MAX_DATA_LEN 65536 | ||
| 310 | |||
| 311 | #define GFS2_EATYPE_UNUSED 0 | ||
| 312 | #define GFS2_EATYPE_USR 1 | ||
| 313 | #define GFS2_EATYPE_SYS 2 | ||
| 314 | #define GFS2_EATYPE_SECURITY 3 | ||
| 315 | |||
| 316 | #define GFS2_EATYPE_LAST 3 | ||
| 317 | #define GFS2_EATYPE_VALID(x) ((x) <= GFS2_EATYPE_LAST) | ||
| 318 | |||
| 319 | #define GFS2_EAFLAG_LAST 0x01 /* last ea in block */ | ||
| 320 | |||
| 321 | struct gfs2_ea_header { | ||
| 322 | __be32 ea_rec_len; | ||
| 323 | __be32 ea_data_len; | ||
| 324 | __u8 ea_name_len; /* no NULL pointer after the string */ | ||
| 325 | __u8 ea_type; /* GFS2_EATYPE_... */ | ||
| 326 | __u8 ea_flags; /* GFS2_EAFLAG_... */ | ||
| 327 | __u8 ea_num_ptrs; | ||
| 328 | __u32 __pad; | ||
| 329 | }; | ||
| 330 | |||
| 331 | /* | ||
| 332 | * Log header structure | ||
| 333 | */ | ||
| 334 | |||
| 335 | #define GFS2_LOG_HEAD_UNMOUNT 0x00000001 /* log is clean */ | ||
| 336 | |||
| 337 | struct gfs2_log_header { | ||
| 338 | struct gfs2_meta_header lh_header; | ||
| 339 | |||
| 340 | __be64 lh_sequence; /* Sequence number of this transaction */ | ||
| 341 | __be32 lh_flags; /* GFS2_LOG_HEAD_... */ | ||
| 342 | __be32 lh_tail; /* Block number of log tail */ | ||
| 343 | __be32 lh_blkno; | ||
| 344 | __be32 lh_hash; | ||
| 345 | }; | ||
| 346 | |||
| 347 | /* | ||
| 348 | * Log type descriptor | ||
| 349 | */ | ||
| 350 | |||
| 351 | #define GFS2_LOG_DESC_METADATA 300 | ||
| 352 | /* ld_data1 is the number of metadata blocks in the descriptor. | ||
| 353 | ld_data2 is unused. */ | ||
| 354 | |||
| 355 | #define GFS2_LOG_DESC_REVOKE 301 | ||
| 356 | /* ld_data1 is the number of revoke blocks in the descriptor. | ||
| 357 | ld_data2 is unused. */ | ||
| 358 | |||
| 359 | #define GFS2_LOG_DESC_JDATA 302 | ||
| 360 | /* ld_data1 is the number of data blocks in the descriptor. | ||
| 361 | ld_data2 is unused. */ | ||
| 362 | |||
| 363 | struct gfs2_log_descriptor { | ||
| 364 | struct gfs2_meta_header ld_header; | ||
| 365 | |||
| 366 | __be32 ld_type; /* GFS2_LOG_DESC_... */ | ||
| 367 | __be32 ld_length; /* Number of buffers in this chunk */ | ||
| 368 | __be32 ld_data1; /* descriptor-specific field */ | ||
| 369 | __be32 ld_data2; /* descriptor-specific field */ | ||
| 370 | |||
| 371 | __u8 ld_reserved[32]; | ||
| 372 | }; | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Inum Range | ||
| 376 | * Describe a range of formal inode numbers allocated to | ||
| 377 | * one machine to assign to inodes. | ||
| 378 | */ | ||
| 379 | |||
| 380 | #define GFS2_INUM_QUANTUM 1048576 | ||
| 381 | |||
| 382 | struct gfs2_inum_range { | ||
| 383 | __be64 ir_start; | ||
| 384 | __be64 ir_length; | ||
| 385 | }; | ||
| 386 | |||
| 387 | /* | ||
| 388 | * Statfs change | ||
| 389 | * Describes an change to the pool of free and allocated | ||
| 390 | * blocks. | ||
| 391 | */ | ||
| 392 | |||
| 393 | struct gfs2_statfs_change { | ||
| 394 | __be64 sc_total; | ||
| 395 | __be64 sc_free; | ||
| 396 | __be64 sc_dinodes; | ||
| 397 | }; | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Quota change | ||
| 401 | * Describes an allocation change for a particular | ||
| 402 | * user or group. | ||
| 403 | */ | ||
| 404 | |||
| 405 | #define GFS2_QCF_USER 0x00000001 | ||
| 406 | |||
| 407 | struct gfs2_quota_change { | ||
| 408 | __be64 qc_change; | ||
| 409 | __be32 qc_flags; /* GFS2_QCF_... */ | ||
| 410 | __be32 qc_id; | ||
| 411 | }; | ||
| 412 | |||
| 413 | #ifdef __KERNEL__ | ||
| 414 | /* Translation functions */ | ||
| 415 | |||
| 416 | extern void gfs2_inum_in(struct gfs2_inum *no, const void *buf); | ||
| 417 | extern void gfs2_inum_out(const struct gfs2_inum *no, void *buf); | ||
| 418 | extern void gfs2_sb_in(struct gfs2_sb *sb, const void *buf); | ||
| 419 | extern void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf); | ||
| 420 | extern void gfs2_rindex_out(const struct gfs2_rindex *ri, void *buf); | ||
| 421 | extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf); | ||
| 422 | extern void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf); | ||
| 423 | extern void gfs2_quota_in(struct gfs2_quota *qu, const void *buf); | ||
| 424 | extern void gfs2_quota_out(const struct gfs2_quota *qu, void *buf); | ||
| 425 | extern void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf); | ||
| 426 | extern void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf); | ||
| 427 | extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); | ||
| 428 | extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); | ||
| 429 | extern void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf); | ||
| 430 | extern void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf); | ||
| 431 | extern void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf); | ||
| 432 | extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf); | ||
| 433 | extern void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf); | ||
| 434 | extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf); | ||
| 435 | |||
| 436 | /* Printing functions */ | ||
| 437 | |||
| 438 | extern void gfs2_rindex_print(const struct gfs2_rindex *ri); | ||
| 439 | extern void gfs2_dinode_print(const struct gfs2_dinode *di); | ||
| 440 | |||
| 441 | #endif /* __KERNEL__ */ | ||
| 442 | |||
| 443 | #endif /* __GFS2_ONDISK_DOT_H__ */ | ||
diff --git a/include/linux/lm_interface.h b/include/linux/lm_interface.h new file mode 100644 index 000000000000..1418fdc9ac02 --- /dev/null +++ b/include/linux/lm_interface.h | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __LM_INTERFACE_DOT_H__ | ||
| 11 | #define __LM_INTERFACE_DOT_H__ | ||
| 12 | |||
| 13 | |||
| 14 | typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data); | ||
| 15 | |||
| 16 | /* | ||
| 17 | * lm_mount() flags | ||
| 18 | * | ||
| 19 | * LM_MFLAG_SPECTATOR | ||
| 20 | * GFS is asking to join the filesystem's lockspace, but it doesn't want to | ||
| 21 | * modify the filesystem. The lock module shouldn't assign a journal to the FS | ||
| 22 | * mount. It shouldn't send recovery callbacks to the FS mount. If the node | ||
| 23 | * dies or withdraws, all locks can be wiped immediately. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #define LM_MFLAG_SPECTATOR 0x00000001 | ||
| 27 | |||
| 28 | /* | ||
| 29 | * lm_lockstruct flags | ||
| 30 | * | ||
| 31 | * LM_LSFLAG_LOCAL | ||
| 32 | * The lock_nolock module returns LM_LSFLAG_LOCAL to GFS, indicating that GFS | ||
| 33 | * can make single-node optimizations. | ||
| 34 | */ | ||
| 35 | |||
| 36 | #define LM_LSFLAG_LOCAL 0x00000001 | ||
| 37 | |||
| 38 | /* | ||
| 39 | * lm_lockname types | ||
| 40 | */ | ||
| 41 | |||
| 42 | #define LM_TYPE_RESERVED 0x00 | ||
| 43 | #define LM_TYPE_NONDISK 0x01 | ||
| 44 | #define LM_TYPE_INODE 0x02 | ||
| 45 | #define LM_TYPE_RGRP 0x03 | ||
| 46 | #define LM_TYPE_META 0x04 | ||
| 47 | #define LM_TYPE_IOPEN 0x05 | ||
| 48 | #define LM_TYPE_FLOCK 0x06 | ||
| 49 | #define LM_TYPE_PLOCK 0x07 | ||
| 50 | #define LM_TYPE_QUOTA 0x08 | ||
| 51 | #define LM_TYPE_JOURNAL 0x09 | ||
| 52 | |||
| 53 | /* | ||
| 54 | * lm_lock() states | ||
| 55 | * | ||
| 56 | * SHARED is compatible with SHARED, not with DEFERRED or EX. | ||
| 57 | * DEFERRED is compatible with DEFERRED, not with SHARED or EX. | ||
| 58 | */ | ||
| 59 | |||
| 60 | #define LM_ST_UNLOCKED 0 | ||
| 61 | #define LM_ST_EXCLUSIVE 1 | ||
| 62 | #define LM_ST_DEFERRED 2 | ||
| 63 | #define LM_ST_SHARED 3 | ||
| 64 | |||
| 65 | /* | ||
| 66 | * lm_lock() flags | ||
| 67 | * | ||
| 68 | * LM_FLAG_TRY | ||
| 69 | * Don't wait to acquire the lock if it can't be granted immediately. | ||
| 70 | * | ||
| 71 | * LM_FLAG_TRY_1CB | ||
| 72 | * Send one blocking callback if TRY is set and the lock is not granted. | ||
| 73 | * | ||
| 74 | * LM_FLAG_NOEXP | ||
| 75 | * GFS sets this flag on lock requests it makes while doing journal recovery. | ||
| 76 | * These special requests should not be blocked due to the recovery like | ||
| 77 | * ordinary locks would be. | ||
| 78 | * | ||
| 79 | * LM_FLAG_ANY | ||
| 80 | * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may | ||
| 81 | * also be granted in SHARED. The preferred state is whichever is compatible | ||
| 82 | * with other granted locks, or the specified state if no other locks exist. | ||
| 83 | * | ||
| 84 | * LM_FLAG_PRIORITY | ||
| 85 | * Override fairness considerations. Suppose a lock is held in a shared state | ||
| 86 | * and there is a pending request for the deferred state. A shared lock | ||
| 87 | * request with the priority flag would be allowed to bypass the deferred | ||
| 88 | * request and directly join the other shared lock. A shared lock request | ||
| 89 | * without the priority flag might be forced to wait until the deferred | ||
| 90 | * requested had acquired and released the lock. | ||
| 91 | */ | ||
| 92 | |||
| 93 | #define LM_FLAG_TRY 0x00000001 | ||
| 94 | #define LM_FLAG_TRY_1CB 0x00000002 | ||
| 95 | #define LM_FLAG_NOEXP 0x00000004 | ||
| 96 | #define LM_FLAG_ANY 0x00000008 | ||
| 97 | #define LM_FLAG_PRIORITY 0x00000010 | ||
| 98 | |||
| 99 | /* | ||
| 100 | * lm_lock() and lm_async_cb return flags | ||
| 101 | * | ||
| 102 | * LM_OUT_ST_MASK | ||
| 103 | * Masks the lower two bits of lock state in the returned value. | ||
| 104 | * | ||
| 105 | * LM_OUT_CACHEABLE | ||
| 106 | * The lock hasn't been released so GFS can continue to cache data for it. | ||
| 107 | * | ||
| 108 | * LM_OUT_CANCELED | ||
| 109 | * The lock request was canceled. | ||
| 110 | * | ||
| 111 | * LM_OUT_ASYNC | ||
| 112 | * The result of the request will be returned in an LM_CB_ASYNC callback. | ||
| 113 | */ | ||
| 114 | |||
| 115 | #define LM_OUT_ST_MASK 0x00000003 | ||
| 116 | #define LM_OUT_CACHEABLE 0x00000004 | ||
| 117 | #define LM_OUT_CANCELED 0x00000008 | ||
| 118 | #define LM_OUT_ASYNC 0x00000080 | ||
| 119 | #define LM_OUT_ERROR 0x00000100 | ||
| 120 | |||
| 121 | /* | ||
| 122 | * lm_callback_t types | ||
| 123 | * | ||
| 124 | * LM_CB_NEED_E LM_CB_NEED_D LM_CB_NEED_S | ||
| 125 | * Blocking callback, a remote node is requesting the given lock in | ||
| 126 | * EXCLUSIVE, DEFERRED, or SHARED. | ||
| 127 | * | ||
| 128 | * LM_CB_NEED_RECOVERY | ||
| 129 | * The given journal needs to be recovered. | ||
| 130 | * | ||
| 131 | * LM_CB_DROPLOCKS | ||
| 132 | * Reduce the number of cached locks. | ||
| 133 | * | ||
| 134 | * LM_CB_ASYNC | ||
| 135 | * The given lock has been granted. | ||
| 136 | */ | ||
| 137 | |||
| 138 | #define LM_CB_NEED_E 257 | ||
| 139 | #define LM_CB_NEED_D 258 | ||
| 140 | #define LM_CB_NEED_S 259 | ||
| 141 | #define LM_CB_NEED_RECOVERY 260 | ||
| 142 | #define LM_CB_DROPLOCKS 261 | ||
| 143 | #define LM_CB_ASYNC 262 | ||
| 144 | |||
| 145 | /* | ||
| 146 | * lm_recovery_done() messages | ||
| 147 | */ | ||
| 148 | |||
| 149 | #define LM_RD_GAVEUP 308 | ||
| 150 | #define LM_RD_SUCCESS 309 | ||
| 151 | |||
| 152 | |||
| 153 | struct lm_lockname { | ||
| 154 | u64 ln_number; | ||
| 155 | unsigned int ln_type; | ||
| 156 | }; | ||
| 157 | |||
| 158 | #define lm_name_equal(name1, name2) \ | ||
| 159 | (((name1)->ln_number == (name2)->ln_number) && \ | ||
| 160 | ((name1)->ln_type == (name2)->ln_type)) \ | ||
| 161 | |||
| 162 | struct lm_async_cb { | ||
| 163 | struct lm_lockname lc_name; | ||
| 164 | int lc_ret; | ||
| 165 | }; | ||
| 166 | |||
| 167 | struct lm_lockstruct; | ||
| 168 | |||
| 169 | struct lm_lockops { | ||
| 170 | const char *lm_proto_name; | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Mount/Unmount | ||
| 174 | */ | ||
| 175 | |||
| 176 | int (*lm_mount) (char *table_name, char *host_data, | ||
| 177 | lm_callback_t cb, void *cb_data, | ||
| 178 | unsigned int min_lvb_size, int flags, | ||
| 179 | struct lm_lockstruct *lockstruct, | ||
| 180 | struct kobject *fskobj); | ||
| 181 | |||
| 182 | void (*lm_others_may_mount) (void *lockspace); | ||
| 183 | |||
| 184 | void (*lm_unmount) (void *lockspace); | ||
| 185 | |||
| 186 | void (*lm_withdraw) (void *lockspace); | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Lock oriented operations | ||
| 190 | */ | ||
| 191 | |||
| 192 | int (*lm_get_lock) (void *lockspace, struct lm_lockname *name, void **lockp); | ||
| 193 | |||
| 194 | void (*lm_put_lock) (void *lock); | ||
| 195 | |||
| 196 | unsigned int (*lm_lock) (void *lock, unsigned int cur_state, | ||
| 197 | unsigned int req_state, unsigned int flags); | ||
| 198 | |||
| 199 | unsigned int (*lm_unlock) (void *lock, unsigned int cur_state); | ||
| 200 | |||
| 201 | void (*lm_cancel) (void *lock); | ||
| 202 | |||
| 203 | int (*lm_hold_lvb) (void *lock, char **lvbp); | ||
| 204 | void (*lm_unhold_lvb) (void *lock, char *lvb); | ||
| 205 | |||
| 206 | /* | ||
| 207 | * Posix Lock oriented operations | ||
| 208 | */ | ||
| 209 | |||
| 210 | int (*lm_plock_get) (void *lockspace, struct lm_lockname *name, | ||
| 211 | struct file *file, struct file_lock *fl); | ||
| 212 | |||
| 213 | int (*lm_plock) (void *lockspace, struct lm_lockname *name, | ||
| 214 | struct file *file, int cmd, struct file_lock *fl); | ||
| 215 | |||
| 216 | int (*lm_punlock) (void *lockspace, struct lm_lockname *name, | ||
| 217 | struct file *file, struct file_lock *fl); | ||
| 218 | |||
| 219 | /* | ||
| 220 | * Client oriented operations | ||
| 221 | */ | ||
| 222 | |||
| 223 | void (*lm_recovery_done) (void *lockspace, unsigned int jid, | ||
| 224 | unsigned int message); | ||
| 225 | |||
| 226 | struct module *lm_owner; | ||
| 227 | }; | ||
| 228 | |||
| 229 | /* | ||
| 230 | * lm_mount() return values | ||
| 231 | * | ||
| 232 | * ls_jid - the journal ID this node should use | ||
| 233 | * ls_first - this node is the first to mount the file system | ||
| 234 | * ls_lvb_size - size in bytes of lock value blocks | ||
| 235 | * ls_lockspace - lock module's context for this file system | ||
| 236 | * ls_ops - lock module's functions | ||
| 237 | * ls_flags - lock module features | ||
| 238 | */ | ||
| 239 | |||
| 240 | struct lm_lockstruct { | ||
| 241 | unsigned int ls_jid; | ||
| 242 | unsigned int ls_first; | ||
| 243 | unsigned int ls_lvb_size; | ||
| 244 | void *ls_lockspace; | ||
| 245 | const struct lm_lockops *ls_ops; | ||
| 246 | int ls_flags; | ||
| 247 | }; | ||
| 248 | |||
| 249 | /* | ||
| 250 | * Lock module bottom interface. A lock module makes itself available to GFS | ||
| 251 | * with these functions. | ||
| 252 | */ | ||
| 253 | |||
| 254 | int gfs2_register_lockproto(const struct lm_lockops *proto); | ||
| 255 | void gfs2_unregister_lockproto(const struct lm_lockops *proto); | ||
| 256 | |||
| 257 | /* | ||
| 258 | * Lock module top interface. GFS calls these functions when mounting or | ||
| 259 | * unmounting a file system. | ||
| 260 | */ | ||
| 261 | |||
| 262 | int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data, | ||
| 263 | lm_callback_t cb, void *cb_data, | ||
| 264 | unsigned int min_lvb_size, int flags, | ||
| 265 | struct lm_lockstruct *lockstruct, | ||
| 266 | struct kobject *fskobj); | ||
| 267 | |||
| 268 | void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct); | ||
| 269 | |||
| 270 | void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct); | ||
| 271 | |||
| 272 | #endif /* __LM_INTERFACE_DOT_H__ */ | ||
| 273 | |||
diff --git a/include/linux/lock_dlm_plock.h b/include/linux/lock_dlm_plock.h new file mode 100644 index 000000000000..fc3415113973 --- /dev/null +++ b/include/linux/lock_dlm_plock.h | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This copyrighted material is made available to anyone wishing to use, | ||
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 6 | * of the GNU General Public License v.2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #ifndef __LOCK_DLM_PLOCK_DOT_H__ | ||
| 10 | #define __LOCK_DLM_PLOCK_DOT_H__ | ||
| 11 | |||
| 12 | #define GDLM_PLOCK_MISC_NAME "lock_dlm_plock" | ||
| 13 | |||
| 14 | #define GDLM_PLOCK_VERSION_MAJOR 1 | ||
| 15 | #define GDLM_PLOCK_VERSION_MINOR 1 | ||
| 16 | #define GDLM_PLOCK_VERSION_PATCH 0 | ||
| 17 | |||
| 18 | enum { | ||
| 19 | GDLM_PLOCK_OP_LOCK = 1, | ||
| 20 | GDLM_PLOCK_OP_UNLOCK, | ||
| 21 | GDLM_PLOCK_OP_GET, | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct gdlm_plock_info { | ||
| 25 | __u32 version[3]; | ||
| 26 | __u8 optype; | ||
| 27 | __u8 ex; | ||
| 28 | __u8 wait; | ||
| 29 | __u8 pad; | ||
| 30 | __u32 pid; | ||
| 31 | __s32 nodeid; | ||
| 32 | __s32 rv; | ||
| 33 | __u32 fsid; | ||
| 34 | __u64 number; | ||
| 35 | __u64 start; | ||
| 36 | __u64 end; | ||
| 37 | __u64 owner; | ||
| 38 | }; | ||
| 39 | |||
| 40 | #endif | ||
| 41 | |||
diff --git a/mm/filemap.c b/mm/filemap.c index f789500406fe..3464b681f844 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -1198,8 +1198,10 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 1198 | if (retval > 0) | 1198 | if (retval > 0) |
| 1199 | *ppos = pos + retval; | 1199 | *ppos = pos + retval; |
| 1200 | } | 1200 | } |
| 1201 | file_accessed(filp); | 1201 | if (likely(retval != 0)) { |
| 1202 | goto out; | 1202 | file_accessed(filp); |
| 1203 | goto out; | ||
| 1204 | } | ||
| 1203 | } | 1205 | } |
| 1204 | 1206 | ||
| 1205 | retval = 0; | 1207 | retval = 0; |
diff --git a/mm/readahead.c b/mm/readahead.c index aa7ec424656a..1ba736ac0367 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
| @@ -38,6 +38,7 @@ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) | |||
| 38 | ra->ra_pages = mapping->backing_dev_info->ra_pages; | 38 | ra->ra_pages = mapping->backing_dev_info->ra_pages; |
| 39 | ra->prev_page = -1; | 39 | ra->prev_page = -1; |
| 40 | } | 40 | } |
| 41 | EXPORT_SYMBOL_GPL(file_ra_state_init); | ||
| 41 | 42 | ||
| 42 | /* | 43 | /* |
| 43 | * Return max readahead size for this inode in number-of-pages. | 44 | * Return max readahead size for this inode in number-of-pages. |
