diff options
author | David Teigland <teigland@redhat.com> | 2006-01-18 04:30:29 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-01-18 04:30:29 -0500 |
commit | e7fd41792fc0ee52a05fcaac87511f118328d147 (patch) | |
tree | eee5227088ba97daef795e385b7548d2a1cc4cb6 | |
parent | e47314207032cfd1157b8c377df162839b32ea6f (diff) |
[DLM] The core of the DLM for GFS2/CLVM
This is the core of the distributed lock manager which is required
to use GFS2 as a cluster filesystem. It is also used by CLVM and
can be used as a standalone lock manager independantly of either
of these two projects.
It implements VAX-style locking modes.
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steve Whitehouse <swhiteho@redhat.com>
39 files changed, 12178 insertions, 0 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index bec8afa5704d..3a32f3f97e9b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -1831,6 +1831,7 @@ source "fs/partitions/Kconfig" | |||
1831 | endmenu | 1831 | endmenu |
1832 | 1832 | ||
1833 | source "fs/nls/Kconfig" | 1833 | source "fs/nls/Kconfig" |
1834 | source "fs/dlm/Kconfig" | ||
1834 | 1835 | ||
1835 | endmenu | 1836 | endmenu |
1836 | 1837 | ||
diff --git a/fs/Makefile b/fs/Makefile index 0922727732c5..b298f4fdc6f2 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -48,6 +48,7 @@ obj-$(CONFIG_SYSFS) += sysfs/ | |||
48 | obj-y += devpts/ | 48 | obj-y += devpts/ |
49 | 49 | ||
50 | obj-$(CONFIG_PROFILING) += dcookies.o | 50 | obj-$(CONFIG_PROFILING) += dcookies.o |
51 | obj-$(CONFIG_DLM) += dlm/ | ||
51 | 52 | ||
52 | # Do not add any filesystems before this line | 53 | # Do not add any filesystems before this line |
53 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 54 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig new file mode 100644 index 000000000000..d01f735e6e06 --- /dev/null +++ b/fs/dlm/Kconfig | |||
@@ -0,0 +1,30 @@ | |||
1 | menu "Distributed Lock Manager" | ||
2 | depends on INET && EXPERIMENTAL | ||
3 | |||
4 | config DLM | ||
5 | tristate "Distributed Lock Manager (DLM)" | ||
6 | depends on SYSFS | ||
7 | depends on IPV6 || IPV6=n | ||
8 | select IP_SCTP | ||
9 | select CONFIGFS_FS | ||
10 | help | ||
11 | A general purpose distributed lock manager for kernel or userspace | ||
12 | applications. | ||
13 | |||
14 | config DLM_DEVICE | ||
15 | tristate "DLM device for userspace access" | ||
16 | depends on DLM | ||
17 | help | ||
18 | This module creates a misc device through which the dlm lockspace | ||
19 | and locking functions become available to userspace applications | ||
20 | (usually through the libdlm library). | ||
21 | |||
22 | config DLM_DEBUG | ||
23 | bool "DLM debugging" | ||
24 | depends on DLM | ||
25 | help | ||
26 | Under the debugfs mount point, the name of each lockspace will | ||
27 | appear as a file in the "dlm" directory. The output is the | ||
28 | list of resource and locks the local node knows about. | ||
29 | |||
30 | endmenu | ||
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile new file mode 100644 index 000000000000..1e6232e7d8e5 --- /dev/null +++ b/fs/dlm/Makefile | |||
@@ -0,0 +1,21 @@ | |||
1 | obj-$(CONFIG_DLM) += dlm.o | ||
2 | obj-$(CONFIG_DLM_DEVICE) += dlm_device.o | ||
3 | |||
4 | dlm-y := ast.o \ | ||
5 | config.o \ | ||
6 | dir.o \ | ||
7 | lock.o \ | ||
8 | lockspace.o \ | ||
9 | lowcomms.o \ | ||
10 | main.o \ | ||
11 | member.o \ | ||
12 | memory.o \ | ||
13 | midcomms.o \ | ||
14 | rcom.o \ | ||
15 | recover.o \ | ||
16 | recoverd.o \ | ||
17 | requestqueue.o \ | ||
18 | util.o | ||
19 | dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o | ||
20 | |||
21 | dlm_device-y := device.o | ||
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c new file mode 100644 index 000000000000..2bd1c5e1a72c --- /dev/null +++ b/fs/dlm/ast.c | |||
@@ -0,0 +1,167 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "lock.h" | ||
16 | #include "ast.h" | ||
17 | |||
18 | #define WAKE_ASTS 0 | ||
19 | |||
20 | static struct list_head ast_queue; | ||
21 | static spinlock_t ast_queue_lock; | ||
22 | static struct task_struct * astd_task; | ||
23 | static unsigned long astd_wakeflags; | ||
24 | static struct semaphore astd_running; | ||
25 | |||
26 | |||
27 | void dlm_del_ast(struct dlm_lkb *lkb) | ||
28 | { | ||
29 | spin_lock(&ast_queue_lock); | ||
30 | if (lkb->lkb_ast_type & (AST_COMP | AST_BAST)) | ||
31 | list_del(&lkb->lkb_astqueue); | ||
32 | spin_unlock(&ast_queue_lock); | ||
33 | } | ||
34 | |||
35 | void dlm_add_ast(struct dlm_lkb *lkb, int type) | ||
36 | { | ||
37 | spin_lock(&ast_queue_lock); | ||
38 | if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { | ||
39 | kref_get(&lkb->lkb_ref); | ||
40 | list_add_tail(&lkb->lkb_astqueue, &ast_queue); | ||
41 | } | ||
42 | lkb->lkb_ast_type |= type; | ||
43 | spin_unlock(&ast_queue_lock); | ||
44 | |||
45 | set_bit(WAKE_ASTS, &astd_wakeflags); | ||
46 | wake_up_process(astd_task); | ||
47 | } | ||
48 | |||
49 | static void process_asts(void) | ||
50 | { | ||
51 | struct dlm_ls *ls = NULL; | ||
52 | struct dlm_rsb *r = NULL; | ||
53 | struct dlm_lkb *lkb; | ||
54 | void (*cast) (long param); | ||
55 | void (*bast) (long param, int mode); | ||
56 | int type = 0, found, bmode; | ||
57 | |||
58 | for (;;) { | ||
59 | found = FALSE; | ||
60 | spin_lock(&ast_queue_lock); | ||
61 | list_for_each_entry(lkb, &ast_queue, lkb_astqueue) { | ||
62 | r = lkb->lkb_resource; | ||
63 | ls = r->res_ls; | ||
64 | |||
65 | if (dlm_locking_stopped(ls)) | ||
66 | continue; | ||
67 | |||
68 | list_del(&lkb->lkb_astqueue); | ||
69 | type = lkb->lkb_ast_type; | ||
70 | lkb->lkb_ast_type = 0; | ||
71 | found = TRUE; | ||
72 | break; | ||
73 | } | ||
74 | spin_unlock(&ast_queue_lock); | ||
75 | |||
76 | if (!found) | ||
77 | break; | ||
78 | |||
79 | cast = lkb->lkb_astaddr; | ||
80 | bast = lkb->lkb_bastaddr; | ||
81 | bmode = lkb->lkb_bastmode; | ||
82 | |||
83 | if ((type & AST_COMP) && cast) | ||
84 | cast(lkb->lkb_astparam); | ||
85 | |||
86 | /* FIXME: Is it safe to look at lkb_grmode here | ||
87 | without doing a lock_rsb() ? | ||
88 | Look at other checks in v1 to avoid basts. */ | ||
89 | |||
90 | if ((type & AST_BAST) && bast) | ||
91 | if (!dlm_modes_compat(lkb->lkb_grmode, bmode)) | ||
92 | bast(lkb->lkb_astparam, bmode); | ||
93 | |||
94 | /* this removes the reference added by dlm_add_ast | ||
95 | and may result in the lkb being freed */ | ||
96 | dlm_put_lkb(lkb); | ||
97 | |||
98 | schedule(); | ||
99 | } | ||
100 | } | ||
101 | |||
102 | static inline int no_asts(void) | ||
103 | { | ||
104 | int ret; | ||
105 | |||
106 | spin_lock(&ast_queue_lock); | ||
107 | ret = list_empty(&ast_queue); | ||
108 | spin_unlock(&ast_queue_lock); | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | static int dlm_astd(void *data) | ||
113 | { | ||
114 | while (!kthread_should_stop()) { | ||
115 | set_current_state(TASK_INTERRUPTIBLE); | ||
116 | if (!test_bit(WAKE_ASTS, &astd_wakeflags)) | ||
117 | schedule(); | ||
118 | set_current_state(TASK_RUNNING); | ||
119 | |||
120 | down(&astd_running); | ||
121 | if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) | ||
122 | process_asts(); | ||
123 | up(&astd_running); | ||
124 | } | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | void dlm_astd_wake(void) | ||
129 | { | ||
130 | if (!no_asts()) { | ||
131 | set_bit(WAKE_ASTS, &astd_wakeflags); | ||
132 | wake_up_process(astd_task); | ||
133 | } | ||
134 | } | ||
135 | |||
136 | int dlm_astd_start(void) | ||
137 | { | ||
138 | struct task_struct *p; | ||
139 | int error = 0; | ||
140 | |||
141 | INIT_LIST_HEAD(&ast_queue); | ||
142 | spin_lock_init(&ast_queue_lock); | ||
143 | init_MUTEX(&astd_running); | ||
144 | |||
145 | p = kthread_run(dlm_astd, NULL, "dlm_astd"); | ||
146 | if (IS_ERR(p)) | ||
147 | error = PTR_ERR(p); | ||
148 | else | ||
149 | astd_task = p; | ||
150 | return error; | ||
151 | } | ||
152 | |||
153 | void dlm_astd_stop(void) | ||
154 | { | ||
155 | kthread_stop(astd_task); | ||
156 | } | ||
157 | |||
158 | void dlm_astd_suspend(void) | ||
159 | { | ||
160 | down(&astd_running); | ||
161 | } | ||
162 | |||
163 | void dlm_astd_resume(void) | ||
164 | { | ||
165 | up(&astd_running); | ||
166 | } | ||
167 | |||
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h new file mode 100644 index 000000000000..6ee276c74c52 --- /dev/null +++ b/fs/dlm/ast.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #ifndef __ASTD_DOT_H__ | ||
14 | #define __ASTD_DOT_H__ | ||
15 | |||
16 | void dlm_add_ast(struct dlm_lkb *lkb, int type); | ||
17 | void dlm_del_ast(struct dlm_lkb *lkb); | ||
18 | |||
19 | void dlm_astd_wake(void); | ||
20 | int dlm_astd_start(void); | ||
21 | void dlm_astd_stop(void); | ||
22 | void dlm_astd_suspend(void); | ||
23 | void dlm_astd_resume(void); | ||
24 | |||
25 | #endif | ||
26 | |||
diff --git a/fs/dlm/config.c b/fs/dlm/config.c new file mode 100644 index 000000000000..024ace9973a8 --- /dev/null +++ b/fs/dlm/config.c | |||
@@ -0,0 +1,787 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/configfs.h> | ||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "config.h" | ||
20 | |||
21 | /* | ||
22 | * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/nodeid | ||
23 | * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight | ||
24 | * /config/dlm/<cluster>/comms/<comm>/nodeid | ||
25 | * /config/dlm/<cluster>/comms/<comm>/local | ||
26 | * /config/dlm/<cluster>/comms/<comm>/addr | ||
27 | * The <cluster> level is useless, but I haven't figured out how to avoid it. | ||
28 | */ | ||
29 | |||
30 | static struct config_group *space_list; | ||
31 | static struct config_group *comm_list; | ||
32 | static struct comm *local_comm; | ||
33 | |||
34 | struct clusters; | ||
35 | struct cluster; | ||
36 | struct spaces; | ||
37 | struct space; | ||
38 | struct comms; | ||
39 | struct comm; | ||
40 | struct nodes; | ||
41 | struct node; | ||
42 | |||
43 | static struct config_group *make_cluster(struct config_group *, const char *); | ||
44 | static void drop_cluster(struct config_group *, struct config_item *); | ||
45 | static void release_cluster(struct config_item *); | ||
46 | static struct config_group *make_space(struct config_group *, const char *); | ||
47 | static void drop_space(struct config_group *, struct config_item *); | ||
48 | static void release_space(struct config_item *); | ||
49 | static struct config_item *make_comm(struct config_group *, const char *); | ||
50 | static void drop_comm(struct config_group *, struct config_item *); | ||
51 | static void release_comm(struct config_item *); | ||
52 | static struct config_item *make_node(struct config_group *, const char *); | ||
53 | static void drop_node(struct config_group *, struct config_item *); | ||
54 | static void release_node(struct config_item *); | ||
55 | |||
56 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | ||
57 | char *buf); | ||
58 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | ||
59 | const char *buf, size_t len); | ||
60 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | ||
61 | char *buf); | ||
62 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | ||
63 | const char *buf, size_t len); | ||
64 | |||
65 | static ssize_t comm_nodeid_read(struct comm *cm, char *buf); | ||
66 | static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len); | ||
67 | static ssize_t comm_local_read(struct comm *cm, char *buf); | ||
68 | static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len); | ||
69 | static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len); | ||
70 | static ssize_t node_nodeid_read(struct node *nd, char *buf); | ||
71 | static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); | ||
72 | static ssize_t node_weight_read(struct node *nd, char *buf); | ||
73 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); | ||
74 | |||
75 | enum { | ||
76 | COMM_ATTR_NODEID = 0, | ||
77 | COMM_ATTR_LOCAL, | ||
78 | COMM_ATTR_ADDR, | ||
79 | }; | ||
80 | |||
81 | struct comm_attribute { | ||
82 | struct configfs_attribute attr; | ||
83 | ssize_t (*show)(struct comm *, char *); | ||
84 | ssize_t (*store)(struct comm *, const char *, size_t); | ||
85 | }; | ||
86 | |||
87 | static struct comm_attribute comm_attr_nodeid = { | ||
88 | .attr = { .ca_owner = THIS_MODULE, | ||
89 | .ca_name = "nodeid", | ||
90 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
91 | .show = comm_nodeid_read, | ||
92 | .store = comm_nodeid_write, | ||
93 | }; | ||
94 | |||
95 | static struct comm_attribute comm_attr_local = { | ||
96 | .attr = { .ca_owner = THIS_MODULE, | ||
97 | .ca_name = "local", | ||
98 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
99 | .show = comm_local_read, | ||
100 | .store = comm_local_write, | ||
101 | }; | ||
102 | |||
103 | static struct comm_attribute comm_attr_addr = { | ||
104 | .attr = { .ca_owner = THIS_MODULE, | ||
105 | .ca_name = "addr", | ||
106 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
107 | .store = comm_addr_write, | ||
108 | }; | ||
109 | |||
110 | static struct configfs_attribute *comm_attrs[] = { | ||
111 | [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, | ||
112 | [COMM_ATTR_LOCAL] = &comm_attr_local.attr, | ||
113 | [COMM_ATTR_ADDR] = &comm_attr_addr.attr, | ||
114 | NULL, | ||
115 | }; | ||
116 | |||
117 | enum { | ||
118 | NODE_ATTR_NODEID = 0, | ||
119 | NODE_ATTR_WEIGHT, | ||
120 | }; | ||
121 | |||
122 | struct node_attribute { | ||
123 | struct configfs_attribute attr; | ||
124 | ssize_t (*show)(struct node *, char *); | ||
125 | ssize_t (*store)(struct node *, const char *, size_t); | ||
126 | }; | ||
127 | |||
128 | static struct node_attribute node_attr_nodeid = { | ||
129 | .attr = { .ca_owner = THIS_MODULE, | ||
130 | .ca_name = "nodeid", | ||
131 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
132 | .show = node_nodeid_read, | ||
133 | .store = node_nodeid_write, | ||
134 | }; | ||
135 | |||
136 | static struct node_attribute node_attr_weight = { | ||
137 | .attr = { .ca_owner = THIS_MODULE, | ||
138 | .ca_name = "weight", | ||
139 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
140 | .show = node_weight_read, | ||
141 | .store = node_weight_write, | ||
142 | }; | ||
143 | |||
144 | static struct configfs_attribute *node_attrs[] = { | ||
145 | [NODE_ATTR_NODEID] = &node_attr_nodeid.attr, | ||
146 | [NODE_ATTR_WEIGHT] = &node_attr_weight.attr, | ||
147 | NULL, | ||
148 | }; | ||
149 | |||
150 | struct clusters { | ||
151 | struct configfs_subsystem subsys; | ||
152 | }; | ||
153 | |||
154 | struct cluster { | ||
155 | struct config_group group; | ||
156 | }; | ||
157 | |||
158 | struct spaces { | ||
159 | struct config_group ss_group; | ||
160 | }; | ||
161 | |||
162 | struct space { | ||
163 | struct config_group group; | ||
164 | struct list_head members; | ||
165 | struct semaphore members_lock; | ||
166 | int members_count; | ||
167 | }; | ||
168 | |||
169 | struct comms { | ||
170 | struct config_group cs_group; | ||
171 | }; | ||
172 | |||
173 | struct comm { | ||
174 | struct config_item item; | ||
175 | int nodeid; | ||
176 | int local; | ||
177 | int addr_count; | ||
178 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | ||
179 | }; | ||
180 | |||
181 | struct nodes { | ||
182 | struct config_group ns_group; | ||
183 | }; | ||
184 | |||
185 | struct node { | ||
186 | struct config_item item; | ||
187 | struct list_head list; /* space->members */ | ||
188 | int nodeid; | ||
189 | int weight; | ||
190 | }; | ||
191 | |||
192 | static struct configfs_group_operations clusters_ops = { | ||
193 | .make_group = make_cluster, | ||
194 | .drop_item = drop_cluster, | ||
195 | }; | ||
196 | |||
197 | static struct configfs_item_operations cluster_ops = { | ||
198 | .release = release_cluster, | ||
199 | }; | ||
200 | |||
201 | static struct configfs_group_operations spaces_ops = { | ||
202 | .make_group = make_space, | ||
203 | .drop_item = drop_space, | ||
204 | }; | ||
205 | |||
206 | static struct configfs_item_operations space_ops = { | ||
207 | .release = release_space, | ||
208 | }; | ||
209 | |||
210 | static struct configfs_group_operations comms_ops = { | ||
211 | .make_item = make_comm, | ||
212 | .drop_item = drop_comm, | ||
213 | }; | ||
214 | |||
215 | static struct configfs_item_operations comm_ops = { | ||
216 | .release = release_comm, | ||
217 | .show_attribute = show_comm, | ||
218 | .store_attribute = store_comm, | ||
219 | }; | ||
220 | |||
221 | static struct configfs_group_operations nodes_ops = { | ||
222 | .make_item = make_node, | ||
223 | .drop_item = drop_node, | ||
224 | }; | ||
225 | |||
226 | static struct configfs_item_operations node_ops = { | ||
227 | .release = release_node, | ||
228 | .show_attribute = show_node, | ||
229 | .store_attribute = store_node, | ||
230 | }; | ||
231 | |||
232 | static struct config_item_type clusters_type = { | ||
233 | .ct_group_ops = &clusters_ops, | ||
234 | .ct_owner = THIS_MODULE, | ||
235 | }; | ||
236 | |||
237 | static struct config_item_type cluster_type = { | ||
238 | .ct_item_ops = &cluster_ops, | ||
239 | .ct_owner = THIS_MODULE, | ||
240 | }; | ||
241 | |||
242 | static struct config_item_type spaces_type = { | ||
243 | .ct_group_ops = &spaces_ops, | ||
244 | .ct_owner = THIS_MODULE, | ||
245 | }; | ||
246 | |||
247 | static struct config_item_type space_type = { | ||
248 | .ct_item_ops = &space_ops, | ||
249 | .ct_owner = THIS_MODULE, | ||
250 | }; | ||
251 | |||
252 | static struct config_item_type comms_type = { | ||
253 | .ct_group_ops = &comms_ops, | ||
254 | .ct_owner = THIS_MODULE, | ||
255 | }; | ||
256 | |||
257 | static struct config_item_type comm_type = { | ||
258 | .ct_item_ops = &comm_ops, | ||
259 | .ct_attrs = comm_attrs, | ||
260 | .ct_owner = THIS_MODULE, | ||
261 | }; | ||
262 | |||
263 | static struct config_item_type nodes_type = { | ||
264 | .ct_group_ops = &nodes_ops, | ||
265 | .ct_owner = THIS_MODULE, | ||
266 | }; | ||
267 | |||
268 | static struct config_item_type node_type = { | ||
269 | .ct_item_ops = &node_ops, | ||
270 | .ct_attrs = node_attrs, | ||
271 | .ct_owner = THIS_MODULE, | ||
272 | }; | ||
273 | |||
274 | static struct cluster *to_cluster(struct config_item *i) | ||
275 | { | ||
276 | return i ? container_of(to_config_group(i), struct cluster, group):NULL; | ||
277 | } | ||
278 | |||
279 | static struct space *to_space(struct config_item *i) | ||
280 | { | ||
281 | return i ? container_of(to_config_group(i), struct space, group) : NULL; | ||
282 | } | ||
283 | |||
284 | static struct comm *to_comm(struct config_item *i) | ||
285 | { | ||
286 | return i ? container_of(i, struct comm, item) : NULL; | ||
287 | } | ||
288 | |||
289 | static struct node *to_node(struct config_item *i) | ||
290 | { | ||
291 | return i ? container_of(i, struct node, item) : NULL; | ||
292 | } | ||
293 | |||
294 | static struct config_group *make_cluster(struct config_group *g, | ||
295 | const char *name) | ||
296 | { | ||
297 | struct cluster *cl = NULL; | ||
298 | struct spaces *sps = NULL; | ||
299 | struct comms *cms = NULL; | ||
300 | void *gps = NULL; | ||
301 | |||
302 | cl = kzalloc(sizeof(struct cluster), GFP_KERNEL); | ||
303 | gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); | ||
304 | sps = kzalloc(sizeof(struct spaces), GFP_KERNEL); | ||
305 | cms = kzalloc(sizeof(struct comms), GFP_KERNEL); | ||
306 | |||
307 | if (!cl || !gps || !sps || !cms) | ||
308 | goto fail; | ||
309 | |||
310 | config_group_init_type_name(&cl->group, name, &cluster_type); | ||
311 | config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type); | ||
312 | config_group_init_type_name(&cms->cs_group, "comms", &comms_type); | ||
313 | |||
314 | cl->group.default_groups = gps; | ||
315 | cl->group.default_groups[0] = &sps->ss_group; | ||
316 | cl->group.default_groups[1] = &cms->cs_group; | ||
317 | cl->group.default_groups[2] = NULL; | ||
318 | |||
319 | space_list = &sps->ss_group; | ||
320 | comm_list = &cms->cs_group; | ||
321 | return &cl->group; | ||
322 | |||
323 | fail: | ||
324 | kfree(cl); | ||
325 | kfree(gps); | ||
326 | kfree(sps); | ||
327 | kfree(cms); | ||
328 | return NULL; | ||
329 | } | ||
330 | |||
331 | static void drop_cluster(struct config_group *g, struct config_item *i) | ||
332 | { | ||
333 | struct cluster *cl = to_cluster(i); | ||
334 | struct config_item *tmp; | ||
335 | int j; | ||
336 | |||
337 | for (j = 0; cl->group.default_groups[j]; j++) { | ||
338 | tmp = &cl->group.default_groups[j]->cg_item; | ||
339 | cl->group.default_groups[j] = NULL; | ||
340 | config_item_put(tmp); | ||
341 | } | ||
342 | |||
343 | space_list = NULL; | ||
344 | comm_list = NULL; | ||
345 | |||
346 | config_item_put(i); | ||
347 | } | ||
348 | |||
349 | static void release_cluster(struct config_item *i) | ||
350 | { | ||
351 | struct cluster *cl = to_cluster(i); | ||
352 | kfree(cl->group.default_groups); | ||
353 | kfree(cl); | ||
354 | } | ||
355 | |||
356 | static struct config_group *make_space(struct config_group *g, const char *name) | ||
357 | { | ||
358 | struct space *sp = NULL; | ||
359 | struct nodes *nds = NULL; | ||
360 | void *gps = NULL; | ||
361 | |||
362 | sp = kzalloc(sizeof(struct space), GFP_KERNEL); | ||
363 | gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); | ||
364 | nds = kzalloc(sizeof(struct nodes), GFP_KERNEL); | ||
365 | |||
366 | if (!sp || !gps || !nds) | ||
367 | goto fail; | ||
368 | |||
369 | config_group_init_type_name(&sp->group, name, &space_type); | ||
370 | config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type); | ||
371 | |||
372 | sp->group.default_groups = gps; | ||
373 | sp->group.default_groups[0] = &nds->ns_group; | ||
374 | sp->group.default_groups[1] = NULL; | ||
375 | |||
376 | INIT_LIST_HEAD(&sp->members); | ||
377 | init_MUTEX(&sp->members_lock); | ||
378 | sp->members_count = 0; | ||
379 | return &sp->group; | ||
380 | |||
381 | fail: | ||
382 | kfree(sp); | ||
383 | kfree(gps); | ||
384 | kfree(nds); | ||
385 | return NULL; | ||
386 | } | ||
387 | |||
388 | static void drop_space(struct config_group *g, struct config_item *i) | ||
389 | { | ||
390 | struct space *sp = to_space(i); | ||
391 | struct config_item *tmp; | ||
392 | int j; | ||
393 | |||
394 | /* assert list_empty(&sp->members) */ | ||
395 | |||
396 | for (j = 0; sp->group.default_groups[j]; j++) { | ||
397 | tmp = &sp->group.default_groups[j]->cg_item; | ||
398 | sp->group.default_groups[j] = NULL; | ||
399 | config_item_put(tmp); | ||
400 | } | ||
401 | |||
402 | config_item_put(i); | ||
403 | } | ||
404 | |||
405 | static void release_space(struct config_item *i) | ||
406 | { | ||
407 | struct space *sp = to_space(i); | ||
408 | kfree(sp->group.default_groups); | ||
409 | kfree(sp); | ||
410 | } | ||
411 | |||
412 | static struct config_item *make_comm(struct config_group *g, const char *name) | ||
413 | { | ||
414 | struct comm *cm; | ||
415 | |||
416 | cm = kzalloc(sizeof(struct comm), GFP_KERNEL); | ||
417 | if (!cm) | ||
418 | return NULL; | ||
419 | |||
420 | config_item_init_type_name(&cm->item, name, &comm_type); | ||
421 | cm->nodeid = -1; | ||
422 | cm->local = 0; | ||
423 | cm->addr_count = 0; | ||
424 | return &cm->item; | ||
425 | } | ||
426 | |||
427 | static void drop_comm(struct config_group *g, struct config_item *i) | ||
428 | { | ||
429 | struct comm *cm = to_comm(i); | ||
430 | if (local_comm == cm) | ||
431 | local_comm = NULL; | ||
432 | while (cm->addr_count--) | ||
433 | kfree(cm->addr[cm->addr_count]); | ||
434 | config_item_put(i); | ||
435 | } | ||
436 | |||
437 | static void release_comm(struct config_item *i) | ||
438 | { | ||
439 | struct comm *cm = to_comm(i); | ||
440 | kfree(cm); | ||
441 | } | ||
442 | |||
443 | static struct config_item *make_node(struct config_group *g, const char *name) | ||
444 | { | ||
445 | struct space *sp = to_space(g->cg_item.ci_parent); | ||
446 | struct node *nd; | ||
447 | |||
448 | nd = kzalloc(sizeof(struct node), GFP_KERNEL); | ||
449 | if (!nd) | ||
450 | return NULL; | ||
451 | |||
452 | config_item_init_type_name(&nd->item, name, &node_type); | ||
453 | nd->nodeid = -1; | ||
454 | nd->weight = 1; /* default weight of 1 if none is set */ | ||
455 | |||
456 | down(&sp->members_lock); | ||
457 | list_add(&nd->list, &sp->members); | ||
458 | sp->members_count++; | ||
459 | up(&sp->members_lock); | ||
460 | |||
461 | return &nd->item; | ||
462 | } | ||
463 | |||
464 | static void drop_node(struct config_group *g, struct config_item *i) | ||
465 | { | ||
466 | struct space *sp = to_space(g->cg_item.ci_parent); | ||
467 | struct node *nd = to_node(i); | ||
468 | |||
469 | down(&sp->members_lock); | ||
470 | list_del(&nd->list); | ||
471 | sp->members_count--; | ||
472 | up(&sp->members_lock); | ||
473 | |||
474 | config_item_put(i); | ||
475 | } | ||
476 | |||
477 | static void release_node(struct config_item *i) | ||
478 | { | ||
479 | struct node *nd = to_node(i); | ||
480 | kfree(nd); | ||
481 | } | ||
482 | |||
483 | static struct clusters clusters_root = { | ||
484 | .subsys = { | ||
485 | .su_group = { | ||
486 | .cg_item = { | ||
487 | .ci_namebuf = "dlm", | ||
488 | .ci_type = &clusters_type, | ||
489 | }, | ||
490 | }, | ||
491 | }, | ||
492 | }; | ||
493 | |||
494 | int dlm_config_init(void) | ||
495 | { | ||
496 | config_group_init(&clusters_root.subsys.su_group); | ||
497 | init_MUTEX(&clusters_root.subsys.su_sem); | ||
498 | return configfs_register_subsystem(&clusters_root.subsys); | ||
499 | } | ||
500 | |||
501 | void dlm_config_exit(void) | ||
502 | { | ||
503 | configfs_unregister_subsystem(&clusters_root.subsys); | ||
504 | } | ||
505 | |||
506 | /* | ||
507 | * Functions for user space to read/write attributes | ||
508 | */ | ||
509 | |||
510 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | ||
511 | char *buf) | ||
512 | { | ||
513 | struct comm *cm = to_comm(i); | ||
514 | struct comm_attribute *cma = | ||
515 | container_of(a, struct comm_attribute, attr); | ||
516 | return cma->show ? cma->show(cm, buf) : 0; | ||
517 | } | ||
518 | |||
519 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | ||
520 | const char *buf, size_t len) | ||
521 | { | ||
522 | struct comm *cm = to_comm(i); | ||
523 | struct comm_attribute *cma = | ||
524 | container_of(a, struct comm_attribute, attr); | ||
525 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; | ||
526 | } | ||
527 | |||
528 | static ssize_t comm_nodeid_read(struct comm *cm, char *buf) | ||
529 | { | ||
530 | return sprintf(buf, "%d\n", cm->nodeid); | ||
531 | } | ||
532 | |||
533 | static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len) | ||
534 | { | ||
535 | cm->nodeid = simple_strtol(buf, NULL, 0); | ||
536 | return len; | ||
537 | } | ||
538 | |||
539 | static ssize_t comm_local_read(struct comm *cm, char *buf) | ||
540 | { | ||
541 | return sprintf(buf, "%d\n", cm->local); | ||
542 | } | ||
543 | |||
544 | static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) | ||
545 | { | ||
546 | cm->local= simple_strtol(buf, NULL, 0); | ||
547 | if (cm->local && !local_comm) | ||
548 | local_comm = cm; | ||
549 | return len; | ||
550 | } | ||
551 | |||
552 | static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) | ||
553 | { | ||
554 | struct sockaddr_storage *addr; | ||
555 | |||
556 | if (len != sizeof(struct sockaddr_storage)) | ||
557 | return -EINVAL; | ||
558 | |||
559 | if (cm->addr_count >= DLM_MAX_ADDR_COUNT) | ||
560 | return -ENOSPC; | ||
561 | |||
562 | addr = kzalloc(sizeof(*addr), GFP_KERNEL); | ||
563 | if (!addr) | ||
564 | return -ENOMEM; | ||
565 | |||
566 | memcpy(addr, buf, len); | ||
567 | cm->addr[cm->addr_count++] = addr; | ||
568 | return len; | ||
569 | } | ||
570 | |||
571 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | ||
572 | char *buf) | ||
573 | { | ||
574 | struct node *nd = to_node(i); | ||
575 | struct node_attribute *nda = | ||
576 | container_of(a, struct node_attribute, attr); | ||
577 | return nda->show ? nda->show(nd, buf) : 0; | ||
578 | } | ||
579 | |||
580 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | ||
581 | const char *buf, size_t len) | ||
582 | { | ||
583 | struct node *nd = to_node(i); | ||
584 | struct node_attribute *nda = | ||
585 | container_of(a, struct node_attribute, attr); | ||
586 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; | ||
587 | } | ||
588 | |||
589 | static ssize_t node_nodeid_read(struct node *nd, char *buf) | ||
590 | { | ||
591 | return sprintf(buf, "%d\n", nd->nodeid); | ||
592 | } | ||
593 | |||
594 | static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len) | ||
595 | { | ||
596 | nd->nodeid = simple_strtol(buf, NULL, 0); | ||
597 | return len; | ||
598 | } | ||
599 | |||
600 | static ssize_t node_weight_read(struct node *nd, char *buf) | ||
601 | { | ||
602 | return sprintf(buf, "%d\n", nd->weight); | ||
603 | } | ||
604 | |||
605 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) | ||
606 | { | ||
607 | nd->weight = simple_strtol(buf, NULL, 0); | ||
608 | return len; | ||
609 | } | ||
610 | |||
611 | /* | ||
612 | * Functions for the dlm to get the info that's been configured | ||
613 | */ | ||
614 | |||
615 | static struct space *get_space(char *name) | ||
616 | { | ||
617 | if (!space_list) | ||
618 | return NULL; | ||
619 | return to_space(config_group_find_obj(space_list, name)); | ||
620 | } | ||
621 | |||
622 | static void put_space(struct space *sp) | ||
623 | { | ||
624 | config_item_put(&sp->group.cg_item); | ||
625 | } | ||
626 | |||
627 | static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | ||
628 | { | ||
629 | struct config_item *i; | ||
630 | struct comm *cm = NULL; | ||
631 | int found = 0; | ||
632 | |||
633 | if (!comm_list) | ||
634 | return NULL; | ||
635 | |||
636 | down(&clusters_root.subsys.su_sem); | ||
637 | |||
638 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | ||
639 | cm = to_comm(i); | ||
640 | |||
641 | if (nodeid) { | ||
642 | if (cm->nodeid != nodeid) | ||
643 | continue; | ||
644 | found = 1; | ||
645 | break; | ||
646 | } else { | ||
647 | if (!cm->addr_count || | ||
648 | memcmp(cm->addr[0], addr, sizeof(*addr))) | ||
649 | continue; | ||
650 | found = 1; | ||
651 | break; | ||
652 | } | ||
653 | } | ||
654 | up(&clusters_root.subsys.su_sem); | ||
655 | |||
656 | if (found) | ||
657 | config_item_get(i); | ||
658 | else | ||
659 | cm = NULL; | ||
660 | return cm; | ||
661 | } | ||
662 | |||
663 | static void put_comm(struct comm *cm) | ||
664 | { | ||
665 | config_item_put(&cm->item); | ||
666 | } | ||
667 | |||
668 | /* caller must free mem */ | ||
669 | int dlm_nodeid_list(char *lsname, int **ids_out) | ||
670 | { | ||
671 | struct space *sp; | ||
672 | struct node *nd; | ||
673 | int i = 0, rv = 0; | ||
674 | int *ids; | ||
675 | |||
676 | sp = get_space(lsname); | ||
677 | if (!sp) | ||
678 | return -EEXIST; | ||
679 | |||
680 | down(&sp->members_lock); | ||
681 | if (!sp->members_count) { | ||
682 | rv = 0; | ||
683 | goto out; | ||
684 | } | ||
685 | |||
686 | ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); | ||
687 | if (!ids) { | ||
688 | rv = -ENOMEM; | ||
689 | goto out; | ||
690 | } | ||
691 | |||
692 | rv = sp->members_count; | ||
693 | list_for_each_entry(nd, &sp->members, list) | ||
694 | ids[i++] = nd->nodeid; | ||
695 | |||
696 | if (rv != i) | ||
697 | printk("bad nodeid count %d %d\n", rv, i); | ||
698 | |||
699 | *ids_out = ids; | ||
700 | out: | ||
701 | up(&sp->members_lock); | ||
702 | put_space(sp); | ||
703 | return rv; | ||
704 | } | ||
705 | |||
706 | int dlm_node_weight(char *lsname, int nodeid) | ||
707 | { | ||
708 | struct space *sp; | ||
709 | struct node *nd; | ||
710 | int w = -EEXIST; | ||
711 | |||
712 | sp = get_space(lsname); | ||
713 | if (!sp) | ||
714 | goto out; | ||
715 | |||
716 | down(&sp->members_lock); | ||
717 | list_for_each_entry(nd, &sp->members, list) { | ||
718 | if (nd->nodeid != nodeid) | ||
719 | continue; | ||
720 | w = nd->weight; | ||
721 | break; | ||
722 | } | ||
723 | up(&sp->members_lock); | ||
724 | put_space(sp); | ||
725 | out: | ||
726 | return w; | ||
727 | } | ||
728 | |||
729 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) | ||
730 | { | ||
731 | struct comm *cm = get_comm(nodeid, NULL); | ||
732 | if (!cm) | ||
733 | return -EEXIST; | ||
734 | if (!cm->addr_count) | ||
735 | return -ENOENT; | ||
736 | memcpy(addr, cm->addr[0], sizeof(*addr)); | ||
737 | put_comm(cm); | ||
738 | return 0; | ||
739 | } | ||
740 | |||
741 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | ||
742 | { | ||
743 | struct comm *cm = get_comm(0, addr); | ||
744 | if (!cm) | ||
745 | return -EEXIST; | ||
746 | *nodeid = cm->nodeid; | ||
747 | put_comm(cm); | ||
748 | return 0; | ||
749 | } | ||
750 | |||
751 | int dlm_our_nodeid(void) | ||
752 | { | ||
753 | return local_comm ? local_comm->nodeid : 0; | ||
754 | } | ||
755 | |||
756 | /* num 0 is first addr, num 1 is second addr */ | ||
757 | int dlm_our_addr(struct sockaddr_storage *addr, int num) | ||
758 | { | ||
759 | if (!local_comm) | ||
760 | return -1; | ||
761 | if (num + 1 > local_comm->addr_count) | ||
762 | return -1; | ||
763 | memcpy(addr, local_comm->addr[num], sizeof(*addr)); | ||
764 | return 0; | ||
765 | } | ||
766 | |||
767 | /* Config file defaults */ | ||
768 | #define DEFAULT_TCP_PORT 21064 | ||
769 | #define DEFAULT_BUFFER_SIZE 4096 | ||
770 | #define DEFAULT_RSBTBL_SIZE 256 | ||
771 | #define DEFAULT_LKBTBL_SIZE 1024 | ||
772 | #define DEFAULT_DIRTBL_SIZE 512 | ||
773 | #define DEFAULT_RECOVER_TIMER 5 | ||
774 | #define DEFAULT_TOSS_SECS 10 | ||
775 | #define DEFAULT_SCAN_SECS 5 | ||
776 | |||
777 | struct dlm_config_info dlm_config = { | ||
778 | .tcp_port = DEFAULT_TCP_PORT, | ||
779 | .buffer_size = DEFAULT_BUFFER_SIZE, | ||
780 | .rsbtbl_size = DEFAULT_RSBTBL_SIZE, | ||
781 | .lkbtbl_size = DEFAULT_LKBTBL_SIZE, | ||
782 | .dirtbl_size = DEFAULT_DIRTBL_SIZE, | ||
783 | .recover_timer = DEFAULT_RECOVER_TIMER, | ||
784 | .toss_secs = DEFAULT_TOSS_SECS, | ||
785 | .scan_secs = DEFAULT_SCAN_SECS | ||
786 | }; | ||
787 | |||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h new file mode 100644 index 000000000000..9da7839958a9 --- /dev/null +++ b/fs/dlm/config.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __CONFIG_DOT_H__ | ||
15 | #define __CONFIG_DOT_H__ | ||
16 | |||
17 | #define DLM_MAX_ADDR_COUNT 3 | ||
18 | |||
19 | struct dlm_config_info { | ||
20 | int tcp_port; | ||
21 | int buffer_size; | ||
22 | int rsbtbl_size; | ||
23 | int lkbtbl_size; | ||
24 | int dirtbl_size; | ||
25 | int recover_timer; | ||
26 | int toss_secs; | ||
27 | int scan_secs; | ||
28 | }; | ||
29 | |||
30 | extern struct dlm_config_info dlm_config; | ||
31 | |||
32 | int dlm_config_init(void); | ||
33 | void dlm_config_exit(void); | ||
34 | int dlm_node_weight(char *lsname, int nodeid); | ||
35 | int dlm_nodeid_list(char *lsname, int **ids_out); | ||
36 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); | ||
37 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); | ||
38 | int dlm_our_nodeid(void); | ||
39 | int dlm_our_addr(struct sockaddr_storage *addr, int num); | ||
40 | |||
41 | #endif /* __CONFIG_DOT_H__ */ | ||
42 | |||
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c new file mode 100644 index 000000000000..98b49a1ece47 --- /dev/null +++ b/fs/dlm/debug_fs.c | |||
@@ -0,0 +1,310 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #include <linux/pagemap.h> | ||
14 | #include <linux/seq_file.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/ctype.h> | ||
17 | #include <linux/debugfs.h> | ||
18 | |||
19 | #include "dlm_internal.h" | ||
20 | |||
21 | |||
22 | static struct dentry *dlm_root; | ||
23 | |||
24 | struct rsb_iter { | ||
25 | int entry; | ||
26 | struct dlm_ls *ls; | ||
27 | struct list_head *next; | ||
28 | struct dlm_rsb *rsb; | ||
29 | }; | ||
30 | |||
31 | static char *print_lockmode(int mode) | ||
32 | { | ||
33 | switch (mode) { | ||
34 | case DLM_LOCK_IV: | ||
35 | return "--"; | ||
36 | case DLM_LOCK_NL: | ||
37 | return "NL"; | ||
38 | case DLM_LOCK_CR: | ||
39 | return "CR"; | ||
40 | case DLM_LOCK_CW: | ||
41 | return "CW"; | ||
42 | case DLM_LOCK_PR: | ||
43 | return "PR"; | ||
44 | case DLM_LOCK_PW: | ||
45 | return "PW"; | ||
46 | case DLM_LOCK_EX: | ||
47 | return "EX"; | ||
48 | default: | ||
49 | return "??"; | ||
50 | } | ||
51 | } | ||
52 | |||
53 | static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, | ||
54 | struct dlm_rsb *res) | ||
55 | { | ||
56 | seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); | ||
57 | |||
58 | if (lkb->lkb_status == DLM_LKSTS_CONVERT | ||
59 | || lkb->lkb_status == DLM_LKSTS_WAITING) | ||
60 | seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode)); | ||
61 | |||
62 | if (lkb->lkb_range) { | ||
63 | /* FIXME: this warns on Alpha */ | ||
64 | if (lkb->lkb_status == DLM_LKSTS_CONVERT | ||
65 | || lkb->lkb_status == DLM_LKSTS_GRANTED) | ||
66 | seq_printf(s, " %" PRIx64 "-%" PRIx64, | ||
67 | lkb->lkb_range[GR_RANGE_START], | ||
68 | lkb->lkb_range[GR_RANGE_END]); | ||
69 | if (lkb->lkb_status == DLM_LKSTS_CONVERT | ||
70 | || lkb->lkb_status == DLM_LKSTS_WAITING) | ||
71 | seq_printf(s, " (%" PRIx64 "-%" PRIx64 ")", | ||
72 | lkb->lkb_range[RQ_RANGE_START], | ||
73 | lkb->lkb_range[RQ_RANGE_END]); | ||
74 | } | ||
75 | |||
76 | if (lkb->lkb_nodeid) { | ||
77 | if (lkb->lkb_nodeid != res->res_nodeid) | ||
78 | seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid, | ||
79 | lkb->lkb_remid); | ||
80 | else | ||
81 | seq_printf(s, " Master: %08x", lkb->lkb_remid); | ||
82 | } | ||
83 | |||
84 | if (lkb->lkb_wait_type) | ||
85 | seq_printf(s, " wait_type: %d", lkb->lkb_wait_type); | ||
86 | |||
87 | seq_printf(s, "\n"); | ||
88 | } | ||
89 | |||
90 | static int print_resource(struct dlm_rsb *res, struct seq_file *s) | ||
91 | { | ||
92 | struct dlm_lkb *lkb; | ||
93 | int i, lvblen = res->res_ls->ls_lvblen; | ||
94 | |||
95 | seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); | ||
96 | for (i = 0; i < res->res_length; i++) { | ||
97 | if (isprint(res->res_name[i])) | ||
98 | seq_printf(s, "%c", res->res_name[i]); | ||
99 | else | ||
100 | seq_printf(s, "%c", '.'); | ||
101 | } | ||
102 | if (res->res_nodeid > 0) | ||
103 | seq_printf(s, "\" \nLocal Copy, Master is node %d\n", | ||
104 | res->res_nodeid); | ||
105 | else if (res->res_nodeid == 0) | ||
106 | seq_printf(s, "\" \nMaster Copy\n"); | ||
107 | else if (res->res_nodeid == -1) | ||
108 | seq_printf(s, "\" \nLooking up master (lkid %x)\n", | ||
109 | res->res_first_lkid); | ||
110 | else | ||
111 | seq_printf(s, "\" \nInvalid master %d\n", res->res_nodeid); | ||
112 | |||
113 | /* Print the LVB: */ | ||
114 | if (res->res_lvbptr) { | ||
115 | seq_printf(s, "LVB: "); | ||
116 | for (i = 0; i < lvblen; i++) { | ||
117 | if (i == lvblen / 2) | ||
118 | seq_printf(s, "\n "); | ||
119 | seq_printf(s, "%02x ", | ||
120 | (unsigned char) res->res_lvbptr[i]); | ||
121 | } | ||
122 | if (rsb_flag(res, RSB_VALNOTVALID)) | ||
123 | seq_printf(s, " (INVALID)"); | ||
124 | seq_printf(s, "\n"); | ||
125 | } | ||
126 | |||
127 | /* Print the locks attached to this resource */ | ||
128 | seq_printf(s, "Granted Queue\n"); | ||
129 | list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) | ||
130 | print_lock(s, lkb, res); | ||
131 | |||
132 | seq_printf(s, "Conversion Queue\n"); | ||
133 | list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) | ||
134 | print_lock(s, lkb, res); | ||
135 | |||
136 | seq_printf(s, "Waiting Queue\n"); | ||
137 | list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) | ||
138 | print_lock(s, lkb, res); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int rsb_iter_next(struct rsb_iter *ri) | ||
144 | { | ||
145 | struct dlm_ls *ls = ri->ls; | ||
146 | int i; | ||
147 | |||
148 | if (!ri->next) { | ||
149 | top: | ||
150 | /* Find the next non-empty hash bucket */ | ||
151 | for (i = ri->entry; i < ls->ls_rsbtbl_size; i++) { | ||
152 | read_lock(&ls->ls_rsbtbl[i].lock); | ||
153 | if (!list_empty(&ls->ls_rsbtbl[i].list)) { | ||
154 | ri->next = ls->ls_rsbtbl[i].list.next; | ||
155 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
156 | break; | ||
157 | } | ||
158 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
159 | } | ||
160 | ri->entry = i; | ||
161 | |||
162 | if (ri->entry >= ls->ls_rsbtbl_size) | ||
163 | return 1; | ||
164 | } else { | ||
165 | i = ri->entry; | ||
166 | read_lock(&ls->ls_rsbtbl[i].lock); | ||
167 | ri->next = ri->next->next; | ||
168 | if (ri->next->next == ls->ls_rsbtbl[i].list.next) { | ||
169 | /* End of list - move to next bucket */ | ||
170 | ri->next = NULL; | ||
171 | ri->entry++; | ||
172 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
173 | goto top; | ||
174 | } | ||
175 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
176 | } | ||
177 | ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain); | ||
178 | |||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | static void rsb_iter_free(struct rsb_iter *ri) | ||
183 | { | ||
184 | kfree(ri); | ||
185 | } | ||
186 | |||
187 | static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls) | ||
188 | { | ||
189 | struct rsb_iter *ri; | ||
190 | |||
191 | ri = kmalloc(sizeof *ri, GFP_KERNEL); | ||
192 | if (!ri) | ||
193 | return NULL; | ||
194 | |||
195 | ri->ls = ls; | ||
196 | ri->entry = 0; | ||
197 | ri->next = NULL; | ||
198 | |||
199 | if (rsb_iter_next(ri)) { | ||
200 | rsb_iter_free(ri); | ||
201 | return NULL; | ||
202 | } | ||
203 | |||
204 | return ri; | ||
205 | } | ||
206 | |||
207 | static void *seq_start(struct seq_file *file, loff_t *pos) | ||
208 | { | ||
209 | struct rsb_iter *ri; | ||
210 | loff_t n = *pos; | ||
211 | |||
212 | ri = rsb_iter_init(file->private); | ||
213 | if (!ri) | ||
214 | return NULL; | ||
215 | |||
216 | while (n--) { | ||
217 | if (rsb_iter_next(ri)) { | ||
218 | rsb_iter_free(ri); | ||
219 | return NULL; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | return ri; | ||
224 | } | ||
225 | |||
226 | static void *seq_next(struct seq_file *file, void *iter_ptr, loff_t *pos) | ||
227 | { | ||
228 | struct rsb_iter *ri = iter_ptr; | ||
229 | |||
230 | (*pos)++; | ||
231 | |||
232 | if (rsb_iter_next(ri)) { | ||
233 | rsb_iter_free(ri); | ||
234 | return NULL; | ||
235 | } | ||
236 | |||
237 | return ri; | ||
238 | } | ||
239 | |||
240 | static void seq_stop(struct seq_file *file, void *iter_ptr) | ||
241 | { | ||
242 | /* nothing for now */ | ||
243 | } | ||
244 | |||
245 | static int seq_show(struct seq_file *file, void *iter_ptr) | ||
246 | { | ||
247 | struct rsb_iter *ri = iter_ptr; | ||
248 | |||
249 | print_resource(ri->rsb, file); | ||
250 | |||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | static struct seq_operations dlm_seq_ops = { | ||
255 | .start = seq_start, | ||
256 | .next = seq_next, | ||
257 | .stop = seq_stop, | ||
258 | .show = seq_show, | ||
259 | }; | ||
260 | |||
261 | static int do_open(struct inode *inode, struct file *file) | ||
262 | { | ||
263 | struct seq_file *seq; | ||
264 | int ret; | ||
265 | |||
266 | ret = seq_open(file, &dlm_seq_ops); | ||
267 | if (ret) | ||
268 | return ret; | ||
269 | |||
270 | seq = file->private_data; | ||
271 | seq->private = inode->u.generic_ip; | ||
272 | |||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | static struct file_operations dlm_fops = { | ||
277 | .owner = THIS_MODULE, | ||
278 | .open = do_open, | ||
279 | .read = seq_read, | ||
280 | .llseek = seq_lseek, | ||
281 | .release = seq_release | ||
282 | }; | ||
283 | |||
284 | int dlm_create_debug_file(struct dlm_ls *ls) | ||
285 | { | ||
286 | ls->ls_debug_dentry = debugfs_create_file(ls->ls_name, | ||
287 | S_IFREG | S_IRUGO, | ||
288 | dlm_root, | ||
289 | ls, | ||
290 | &dlm_fops); | ||
291 | return ls->ls_debug_dentry ? 0 : -ENOMEM; | ||
292 | } | ||
293 | |||
294 | void dlm_delete_debug_file(struct dlm_ls *ls) | ||
295 | { | ||
296 | if (ls->ls_debug_dentry) | ||
297 | debugfs_remove(ls->ls_debug_dentry); | ||
298 | } | ||
299 | |||
300 | int dlm_register_debugfs(void) | ||
301 | { | ||
302 | dlm_root = debugfs_create_dir("dlm", NULL); | ||
303 | return dlm_root ? 0 : -ENOMEM; | ||
304 | } | ||
305 | |||
306 | void dlm_unregister_debugfs(void) | ||
307 | { | ||
308 | debugfs_remove(dlm_root); | ||
309 | } | ||
310 | |||
diff --git a/fs/dlm/device.c b/fs/dlm/device.c new file mode 100644 index 000000000000..a8bf600ed13d --- /dev/null +++ b/fs/dlm/device.c | |||
@@ -0,0 +1,1084 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | /* | ||
15 | * device.c | ||
16 | * | ||
17 | * This is the userland interface to the DLM. | ||
18 | * | ||
19 | * The locking is done via a misc char device (find the | ||
20 | * registered minor number in /proc/misc). | ||
21 | * | ||
22 | * User code should not use this interface directly but | ||
23 | * call the library routines in libdlm.a instead. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/miscdevice.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/wait.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/file.h> | ||
32 | #include <linux/fs.h> | ||
33 | #include <linux/poll.h> | ||
34 | #include <linux/signal.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/idr.h> | ||
37 | |||
38 | #include <linux/dlm.h> | ||
39 | #include <linux/dlm_device.h> | ||
40 | |||
41 | #include "lvb_table.h" | ||
42 | |||
43 | static struct file_operations _dlm_fops; | ||
44 | static const char *name_prefix="dlm"; | ||
45 | static struct list_head user_ls_list; | ||
46 | static struct semaphore user_ls_lock; | ||
47 | |||
48 | /* Lock infos are stored in here indexed by lock ID */ | ||
49 | static DEFINE_IDR(lockinfo_idr); | ||
50 | static rwlock_t lockinfo_lock; | ||
51 | |||
52 | /* Flags in li_flags */ | ||
53 | #define LI_FLAG_COMPLETE 1 | ||
54 | #define LI_FLAG_FIRSTLOCK 2 | ||
55 | #define LI_FLAG_PERSISTENT 3 | ||
56 | |||
57 | /* flags in ls_flags*/ | ||
58 | #define LS_FLAG_DELETED 1 | ||
59 | #define LS_FLAG_AUTOFREE 2 | ||
60 | |||
61 | |||
62 | #define LOCKINFO_MAGIC 0x53595324 | ||
63 | |||
64 | struct lock_info { | ||
65 | uint32_t li_magic; | ||
66 | uint8_t li_cmd; | ||
67 | int8_t li_grmode; | ||
68 | int8_t li_rqmode; | ||
69 | struct dlm_lksb li_lksb; | ||
70 | wait_queue_head_t li_waitq; | ||
71 | unsigned long li_flags; | ||
72 | void __user *li_castparam; | ||
73 | void __user *li_castaddr; | ||
74 | void __user *li_bastparam; | ||
75 | void __user *li_bastaddr; | ||
76 | void __user *li_pend_bastparam; | ||
77 | void __user *li_pend_bastaddr; | ||
78 | struct list_head li_ownerqueue; | ||
79 | struct file_info *li_file; | ||
80 | struct dlm_lksb __user *li_user_lksb; | ||
81 | struct semaphore li_firstlock; | ||
82 | }; | ||
83 | |||
84 | /* A queued AST no less */ | ||
85 | struct ast_info { | ||
86 | struct dlm_lock_result result; | ||
87 | struct list_head list; | ||
88 | uint32_t lvb_updated; | ||
89 | uint32_t progress; /* How much has been read */ | ||
90 | }; | ||
91 | |||
92 | /* One of these per userland lockspace */ | ||
93 | struct user_ls { | ||
94 | void *ls_lockspace; | ||
95 | atomic_t ls_refcnt; | ||
96 | long ls_flags; | ||
97 | |||
98 | /* Passed into misc_register() */ | ||
99 | struct miscdevice ls_miscinfo; | ||
100 | struct list_head ls_list; | ||
101 | }; | ||
102 | |||
103 | /* misc_device info for the control device */ | ||
104 | static struct miscdevice ctl_device; | ||
105 | |||
106 | /* | ||
107 | * Stuff we hang off the file struct. | ||
108 | * The first two are to cope with unlocking all the | ||
109 | * locks help by a process when it dies. | ||
110 | */ | ||
111 | struct file_info { | ||
112 | struct list_head fi_li_list; /* List of active lock_infos */ | ||
113 | spinlock_t fi_li_lock; | ||
114 | struct list_head fi_ast_list; /* Queue of ASTs to be delivered */ | ||
115 | spinlock_t fi_ast_lock; | ||
116 | wait_queue_head_t fi_wait; | ||
117 | struct user_ls *fi_ls; | ||
118 | atomic_t fi_refcnt; /* Number of users */ | ||
119 | unsigned long fi_flags; /* Bit 1 means the device is open */ | ||
120 | }; | ||
121 | |||
122 | |||
123 | /* get and put ops for file_info. | ||
124 | Actually I don't really like "get" and "put", but everyone | ||
125 | else seems to use them and I can't think of anything | ||
126 | nicer at the moment */ | ||
127 | static void get_file_info(struct file_info *f) | ||
128 | { | ||
129 | atomic_inc(&f->fi_refcnt); | ||
130 | } | ||
131 | |||
132 | static void put_file_info(struct file_info *f) | ||
133 | { | ||
134 | if (atomic_dec_and_test(&f->fi_refcnt)) | ||
135 | kfree(f); | ||
136 | } | ||
137 | |||
138 | static void release_lockinfo(struct lock_info *li) | ||
139 | { | ||
140 | put_file_info(li->li_file); | ||
141 | |||
142 | write_lock(&lockinfo_lock); | ||
143 | idr_remove(&lockinfo_idr, li->li_lksb.sb_lkid); | ||
144 | write_unlock(&lockinfo_lock); | ||
145 | |||
146 | if (li->li_lksb.sb_lvbptr) | ||
147 | kfree(li->li_lksb.sb_lvbptr); | ||
148 | kfree(li); | ||
149 | |||
150 | module_put(THIS_MODULE); | ||
151 | } | ||
152 | |||
153 | static struct lock_info *get_lockinfo(uint32_t lockid) | ||
154 | { | ||
155 | struct lock_info *li; | ||
156 | |||
157 | read_lock(&lockinfo_lock); | ||
158 | li = idr_find(&lockinfo_idr, lockid); | ||
159 | read_unlock(&lockinfo_lock); | ||
160 | |||
161 | return li; | ||
162 | } | ||
163 | |||
164 | static int add_lockinfo(struct lock_info *li) | ||
165 | { | ||
166 | int n; | ||
167 | int r; | ||
168 | int ret = -EINVAL; | ||
169 | |||
170 | write_lock(&lockinfo_lock); | ||
171 | |||
172 | if (idr_find(&lockinfo_idr, li->li_lksb.sb_lkid)) | ||
173 | goto out_up; | ||
174 | |||
175 | ret = -ENOMEM; | ||
176 | r = idr_pre_get(&lockinfo_idr, GFP_KERNEL); | ||
177 | if (!r) | ||
178 | goto out_up; | ||
179 | |||
180 | r = idr_get_new_above(&lockinfo_idr, li, li->li_lksb.sb_lkid, &n); | ||
181 | if (r) | ||
182 | goto out_up; | ||
183 | |||
184 | if (n != li->li_lksb.sb_lkid) { | ||
185 | idr_remove(&lockinfo_idr, n); | ||
186 | goto out_up; | ||
187 | } | ||
188 | |||
189 | ret = 0; | ||
190 | |||
191 | out_up: | ||
192 | write_unlock(&lockinfo_lock); | ||
193 | |||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | |||
198 | static struct user_ls *__find_lockspace(int minor) | ||
199 | { | ||
200 | struct user_ls *lsinfo; | ||
201 | |||
202 | list_for_each_entry(lsinfo, &user_ls_list, ls_list) { | ||
203 | if (lsinfo->ls_miscinfo.minor == minor) | ||
204 | return lsinfo; | ||
205 | } | ||
206 | return NULL; | ||
207 | } | ||
208 | |||
209 | /* Find a lockspace struct given the device minor number */ | ||
210 | static struct user_ls *find_lockspace(int minor) | ||
211 | { | ||
212 | struct user_ls *lsinfo; | ||
213 | |||
214 | down(&user_ls_lock); | ||
215 | lsinfo = __find_lockspace(minor); | ||
216 | up(&user_ls_lock); | ||
217 | |||
218 | return lsinfo; | ||
219 | } | ||
220 | |||
221 | static void add_lockspace_to_list(struct user_ls *lsinfo) | ||
222 | { | ||
223 | down(&user_ls_lock); | ||
224 | list_add(&lsinfo->ls_list, &user_ls_list); | ||
225 | up(&user_ls_lock); | ||
226 | } | ||
227 | |||
228 | /* Register a lockspace with the DLM and create a misc | ||
229 | device for userland to access it */ | ||
230 | static int register_lockspace(char *name, struct user_ls **ls, int flags) | ||
231 | { | ||
232 | struct user_ls *newls; | ||
233 | int status; | ||
234 | int namelen; | ||
235 | |||
236 | namelen = strlen(name)+strlen(name_prefix)+2; | ||
237 | |||
238 | newls = kmalloc(sizeof(struct user_ls), GFP_KERNEL); | ||
239 | if (!newls) | ||
240 | return -ENOMEM; | ||
241 | memset(newls, 0, sizeof(struct user_ls)); | ||
242 | |||
243 | newls->ls_miscinfo.name = kmalloc(namelen, GFP_KERNEL); | ||
244 | if (!newls->ls_miscinfo.name) { | ||
245 | kfree(newls); | ||
246 | return -ENOMEM; | ||
247 | } | ||
248 | |||
249 | status = dlm_new_lockspace(name, strlen(name), &newls->ls_lockspace, 0, | ||
250 | DLM_USER_LVB_LEN); | ||
251 | if (status != 0) { | ||
252 | kfree(newls->ls_miscinfo.name); | ||
253 | kfree(newls); | ||
254 | return status; | ||
255 | } | ||
256 | |||
257 | snprintf((char*)newls->ls_miscinfo.name, namelen, "%s_%s", | ||
258 | name_prefix, name); | ||
259 | |||
260 | newls->ls_miscinfo.fops = &_dlm_fops; | ||
261 | newls->ls_miscinfo.minor = MISC_DYNAMIC_MINOR; | ||
262 | |||
263 | status = misc_register(&newls->ls_miscinfo); | ||
264 | if (status) { | ||
265 | printk(KERN_ERR "dlm: misc register failed for %s\n", name); | ||
266 | dlm_release_lockspace(newls->ls_lockspace, 0); | ||
267 | kfree(newls->ls_miscinfo.name); | ||
268 | kfree(newls); | ||
269 | return status; | ||
270 | } | ||
271 | |||
272 | if (flags & DLM_USER_LSFLG_AUTOFREE) | ||
273 | set_bit(LS_FLAG_AUTOFREE, &newls->ls_flags); | ||
274 | |||
275 | add_lockspace_to_list(newls); | ||
276 | *ls = newls; | ||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | /* Called with the user_ls_lock semaphore held */ | ||
281 | static int unregister_lockspace(struct user_ls *lsinfo, int force) | ||
282 | { | ||
283 | int status; | ||
284 | |||
285 | status = dlm_release_lockspace(lsinfo->ls_lockspace, force); | ||
286 | if (status) | ||
287 | return status; | ||
288 | |||
289 | status = misc_deregister(&lsinfo->ls_miscinfo); | ||
290 | if (status) | ||
291 | return status; | ||
292 | |||
293 | list_del(&lsinfo->ls_list); | ||
294 | set_bit(LS_FLAG_DELETED, &lsinfo->ls_flags); | ||
295 | lsinfo->ls_lockspace = NULL; | ||
296 | if (atomic_read(&lsinfo->ls_refcnt) == 0) { | ||
297 | kfree(lsinfo->ls_miscinfo.name); | ||
298 | kfree(lsinfo); | ||
299 | } | ||
300 | |||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | /* Add it to userland's AST queue */ | ||
305 | static void add_to_astqueue(struct lock_info *li, void *astaddr, void *astparam, | ||
306 | int lvb_updated) | ||
307 | { | ||
308 | struct ast_info *ast = kmalloc(sizeof(struct ast_info), GFP_KERNEL); | ||
309 | if (!ast) | ||
310 | return; | ||
311 | |||
312 | memset(ast, 0, sizeof(*ast)); | ||
313 | ast->result.user_astparam = astparam; | ||
314 | ast->result.user_astaddr = astaddr; | ||
315 | ast->result.user_lksb = li->li_user_lksb; | ||
316 | memcpy(&ast->result.lksb, &li->li_lksb, sizeof(struct dlm_lksb)); | ||
317 | ast->lvb_updated = lvb_updated; | ||
318 | |||
319 | spin_lock(&li->li_file->fi_ast_lock); | ||
320 | list_add_tail(&ast->list, &li->li_file->fi_ast_list); | ||
321 | spin_unlock(&li->li_file->fi_ast_lock); | ||
322 | wake_up_interruptible(&li->li_file->fi_wait); | ||
323 | } | ||
324 | |||
325 | static void bast_routine(void *param, int mode) | ||
326 | { | ||
327 | struct lock_info *li = param; | ||
328 | |||
329 | if (li && li->li_bastaddr) | ||
330 | add_to_astqueue(li, li->li_bastaddr, li->li_bastparam, 0); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * This is the kernel's AST routine. | ||
335 | * All lock, unlock & query operations complete here. | ||
336 | * The only syncronous ops are those done during device close. | ||
337 | */ | ||
338 | static void ast_routine(void *param) | ||
339 | { | ||
340 | struct lock_info *li = param; | ||
341 | |||
342 | /* Param may be NULL if a persistent lock is unlocked by someone else */ | ||
343 | if (!li) | ||
344 | return; | ||
345 | |||
346 | /* If this is a succesful conversion then activate the blocking ast | ||
347 | * args from the conversion request */ | ||
348 | if (!test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && | ||
349 | li->li_lksb.sb_status == 0) { | ||
350 | |||
351 | li->li_bastparam = li->li_pend_bastparam; | ||
352 | li->li_bastaddr = li->li_pend_bastaddr; | ||
353 | li->li_pend_bastaddr = NULL; | ||
354 | } | ||
355 | |||
356 | /* If it's an async request then post data to the user's AST queue. */ | ||
357 | if (li->li_castaddr) { | ||
358 | int lvb_updated = 0; | ||
359 | |||
360 | /* See if the lvb has been updated */ | ||
361 | if (dlm_lvb_operations[li->li_grmode+1][li->li_rqmode+1] == 1) | ||
362 | lvb_updated = 1; | ||
363 | |||
364 | if (li->li_lksb.sb_status == 0) | ||
365 | li->li_grmode = li->li_rqmode; | ||
366 | |||
367 | /* Only queue AST if the device is still open */ | ||
368 | if (test_bit(1, &li->li_file->fi_flags)) | ||
369 | add_to_astqueue(li, li->li_castaddr, li->li_castparam, | ||
370 | lvb_updated); | ||
371 | |||
372 | /* If it's a new lock operation that failed, then | ||
373 | * remove it from the owner queue and free the | ||
374 | * lock_info. | ||
375 | */ | ||
376 | if (test_and_clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && | ||
377 | li->li_lksb.sb_status != 0) { | ||
378 | |||
379 | /* Wait till dlm_lock() has finished */ | ||
380 | down(&li->li_firstlock); | ||
381 | up(&li->li_firstlock); | ||
382 | |||
383 | spin_lock(&li->li_file->fi_li_lock); | ||
384 | list_del(&li->li_ownerqueue); | ||
385 | spin_unlock(&li->li_file->fi_li_lock); | ||
386 | release_lockinfo(li); | ||
387 | return; | ||
388 | } | ||
389 | /* Free unlocks & queries */ | ||
390 | if (li->li_lksb.sb_status == -DLM_EUNLOCK || | ||
391 | li->li_cmd == DLM_USER_QUERY) { | ||
392 | release_lockinfo(li); | ||
393 | } | ||
394 | } else { | ||
395 | /* Synchronous request, just wake up the caller */ | ||
396 | set_bit(LI_FLAG_COMPLETE, &li->li_flags); | ||
397 | wake_up_interruptible(&li->li_waitq); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * Wait for the lock op to complete and return the status. | ||
403 | */ | ||
404 | static int wait_for_ast(struct lock_info *li) | ||
405 | { | ||
406 | /* Wait for the AST routine to complete */ | ||
407 | set_task_state(current, TASK_INTERRUPTIBLE); | ||
408 | while (!test_bit(LI_FLAG_COMPLETE, &li->li_flags)) | ||
409 | schedule(); | ||
410 | |||
411 | set_task_state(current, TASK_RUNNING); | ||
412 | |||
413 | return li->li_lksb.sb_status; | ||
414 | } | ||
415 | |||
416 | |||
417 | /* Open on control device */ | ||
418 | static int dlm_ctl_open(struct inode *inode, struct file *file) | ||
419 | { | ||
420 | file->private_data = NULL; | ||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | /* Close on control device */ | ||
425 | static int dlm_ctl_close(struct inode *inode, struct file *file) | ||
426 | { | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | /* Open on lockspace device */ | ||
431 | static int dlm_open(struct inode *inode, struct file *file) | ||
432 | { | ||
433 | struct file_info *f; | ||
434 | struct user_ls *lsinfo; | ||
435 | |||
436 | lsinfo = find_lockspace(iminor(inode)); | ||
437 | if (!lsinfo) | ||
438 | return -ENOENT; | ||
439 | |||
440 | f = kmalloc(sizeof(struct file_info), GFP_KERNEL); | ||
441 | if (!f) | ||
442 | return -ENOMEM; | ||
443 | |||
444 | atomic_inc(&lsinfo->ls_refcnt); | ||
445 | INIT_LIST_HEAD(&f->fi_li_list); | ||
446 | INIT_LIST_HEAD(&f->fi_ast_list); | ||
447 | spin_lock_init(&f->fi_li_lock); | ||
448 | spin_lock_init(&f->fi_ast_lock); | ||
449 | init_waitqueue_head(&f->fi_wait); | ||
450 | f->fi_ls = lsinfo; | ||
451 | f->fi_flags = 0; | ||
452 | get_file_info(f); | ||
453 | set_bit(1, &f->fi_flags); | ||
454 | |||
455 | file->private_data = f; | ||
456 | |||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | /* Check the user's version matches ours */ | ||
461 | static int check_version(struct dlm_write_request *req) | ||
462 | { | ||
463 | if (req->version[0] != DLM_DEVICE_VERSION_MAJOR || | ||
464 | (req->version[0] == DLM_DEVICE_VERSION_MAJOR && | ||
465 | req->version[1] > DLM_DEVICE_VERSION_MINOR)) { | ||
466 | |||
467 | printk(KERN_DEBUG "dlm: process %s (%d) version mismatch " | ||
468 | "user (%d.%d.%d) kernel (%d.%d.%d)\n", | ||
469 | current->comm, | ||
470 | current->pid, | ||
471 | req->version[0], | ||
472 | req->version[1], | ||
473 | req->version[2], | ||
474 | DLM_DEVICE_VERSION_MAJOR, | ||
475 | DLM_DEVICE_VERSION_MINOR, | ||
476 | DLM_DEVICE_VERSION_PATCH); | ||
477 | return -EINVAL; | ||
478 | } | ||
479 | return 0; | ||
480 | } | ||
481 | |||
482 | /* Close on lockspace device */ | ||
483 | static int dlm_close(struct inode *inode, struct file *file) | ||
484 | { | ||
485 | struct file_info *f = file->private_data; | ||
486 | struct lock_info li; | ||
487 | struct lock_info *old_li, *safe; | ||
488 | sigset_t tmpsig; | ||
489 | sigset_t allsigs; | ||
490 | struct user_ls *lsinfo; | ||
491 | DECLARE_WAITQUEUE(wq, current); | ||
492 | |||
493 | lsinfo = find_lockspace(iminor(inode)); | ||
494 | if (!lsinfo) | ||
495 | return -ENOENT; | ||
496 | |||
497 | /* Mark this closed so that ASTs will not be delivered any more */ | ||
498 | clear_bit(1, &f->fi_flags); | ||
499 | |||
500 | /* Block signals while we are doing this */ | ||
501 | sigfillset(&allsigs); | ||
502 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | ||
503 | |||
504 | /* We use our own lock_info struct here, so that any | ||
505 | * outstanding "real" ASTs will be delivered with the | ||
506 | * corresponding "real" params, thus freeing the lock_info | ||
507 | * that belongs the lock. This catches the corner case where | ||
508 | * a lock is BUSY when we try to unlock it here | ||
509 | */ | ||
510 | memset(&li, 0, sizeof(li)); | ||
511 | clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | ||
512 | init_waitqueue_head(&li.li_waitq); | ||
513 | add_wait_queue(&li.li_waitq, &wq); | ||
514 | |||
515 | /* | ||
516 | * Free any outstanding locks, they are on the | ||
517 | * list in LIFO order so there should be no problems | ||
518 | * about unlocking parents before children. | ||
519 | */ | ||
520 | list_for_each_entry_safe(old_li, safe, &f->fi_li_list, li_ownerqueue) { | ||
521 | int status; | ||
522 | int flags = 0; | ||
523 | |||
524 | /* Don't unlock persistent locks, just mark them orphaned */ | ||
525 | if (test_bit(LI_FLAG_PERSISTENT, &old_li->li_flags)) { | ||
526 | list_del(&old_li->li_ownerqueue); | ||
527 | |||
528 | /* Update master copy */ | ||
529 | /* TODO: Check locking core updates the local and | ||
530 | remote ORPHAN flags */ | ||
531 | li.li_lksb.sb_lkid = old_li->li_lksb.sb_lkid; | ||
532 | status = dlm_lock(f->fi_ls->ls_lockspace, | ||
533 | old_li->li_grmode, &li.li_lksb, | ||
534 | DLM_LKF_CONVERT|DLM_LKF_ORPHAN, | ||
535 | NULL, 0, 0, ast_routine, NULL, | ||
536 | NULL, NULL); | ||
537 | if (status != 0) | ||
538 | printk("dlm: Error orphaning lock %x: %d\n", | ||
539 | old_li->li_lksb.sb_lkid, status); | ||
540 | |||
541 | /* But tidy our references in it */ | ||
542 | release_lockinfo(old_li); | ||
543 | continue; | ||
544 | } | ||
545 | |||
546 | clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | ||
547 | |||
548 | flags = DLM_LKF_FORCEUNLOCK; | ||
549 | if (old_li->li_grmode >= DLM_LOCK_PW) | ||
550 | flags |= DLM_LKF_IVVALBLK; | ||
551 | |||
552 | status = dlm_unlock(f->fi_ls->ls_lockspace, | ||
553 | old_li->li_lksb.sb_lkid, flags, | ||
554 | &li.li_lksb, &li); | ||
555 | |||
556 | /* Must wait for it to complete as the next lock could be its | ||
557 | * parent */ | ||
558 | if (status == 0) | ||
559 | wait_for_ast(&li); | ||
560 | |||
561 | /* Unlock suceeded, free the lock_info struct. */ | ||
562 | if (status == 0) | ||
563 | release_lockinfo(old_li); | ||
564 | } | ||
565 | |||
566 | remove_wait_queue(&li.li_waitq, &wq); | ||
567 | |||
568 | /* | ||
569 | * If this is the last reference to the lockspace | ||
570 | * then free the struct. If it's an AUTOFREE lockspace | ||
571 | * then free the whole thing. | ||
572 | */ | ||
573 | down(&user_ls_lock); | ||
574 | if (atomic_dec_and_test(&lsinfo->ls_refcnt)) { | ||
575 | |||
576 | if (lsinfo->ls_lockspace) { | ||
577 | if (test_bit(LS_FLAG_AUTOFREE, &lsinfo->ls_flags)) { | ||
578 | unregister_lockspace(lsinfo, 1); | ||
579 | } | ||
580 | } else { | ||
581 | kfree(lsinfo->ls_miscinfo.name); | ||
582 | kfree(lsinfo); | ||
583 | } | ||
584 | } | ||
585 | up(&user_ls_lock); | ||
586 | put_file_info(f); | ||
587 | |||
588 | /* Restore signals */ | ||
589 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | ||
590 | recalc_sigpending(); | ||
591 | |||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | static int do_user_create_lockspace(struct file_info *fi, uint8_t cmd, | ||
596 | struct dlm_lspace_params *kparams) | ||
597 | { | ||
598 | int status; | ||
599 | struct user_ls *lsinfo; | ||
600 | |||
601 | if (!capable(CAP_SYS_ADMIN)) | ||
602 | return -EPERM; | ||
603 | |||
604 | status = register_lockspace(kparams->name, &lsinfo, kparams->flags); | ||
605 | |||
606 | /* If it succeeded then return the minor number */ | ||
607 | if (status == 0) | ||
608 | status = lsinfo->ls_miscinfo.minor; | ||
609 | |||
610 | return status; | ||
611 | } | ||
612 | |||
613 | static int do_user_remove_lockspace(struct file_info *fi, uint8_t cmd, | ||
614 | struct dlm_lspace_params *kparams) | ||
615 | { | ||
616 | int status; | ||
617 | int force = 1; | ||
618 | struct user_ls *lsinfo; | ||
619 | |||
620 | if (!capable(CAP_SYS_ADMIN)) | ||
621 | return -EPERM; | ||
622 | |||
623 | down(&user_ls_lock); | ||
624 | lsinfo = __find_lockspace(kparams->minor); | ||
625 | if (!lsinfo) { | ||
626 | up(&user_ls_lock); | ||
627 | return -EINVAL; | ||
628 | } | ||
629 | |||
630 | if (kparams->flags & DLM_USER_LSFLG_FORCEFREE) | ||
631 | force = 2; | ||
632 | |||
633 | status = unregister_lockspace(lsinfo, force); | ||
634 | up(&user_ls_lock); | ||
635 | |||
636 | return status; | ||
637 | } | ||
638 | |||
639 | /* Read call, might block if no ASTs are waiting. | ||
640 | * It will only ever return one message at a time, regardless | ||
641 | * of how many are pending. | ||
642 | */ | ||
643 | static ssize_t dlm_read(struct file *file, char __user *buffer, size_t count, | ||
644 | loff_t *ppos) | ||
645 | { | ||
646 | struct file_info *fi = file->private_data; | ||
647 | struct ast_info *ast; | ||
648 | int data_size; | ||
649 | int offset; | ||
650 | DECLARE_WAITQUEUE(wait, current); | ||
651 | |||
652 | if (count < sizeof(struct dlm_lock_result)) | ||
653 | return -EINVAL; | ||
654 | |||
655 | spin_lock(&fi->fi_ast_lock); | ||
656 | if (list_empty(&fi->fi_ast_list)) { | ||
657 | |||
658 | /* No waiting ASTs. | ||
659 | * Return EOF if the lockspace been deleted. | ||
660 | */ | ||
661 | if (test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags)) | ||
662 | return 0; | ||
663 | |||
664 | if (file->f_flags & O_NONBLOCK) { | ||
665 | spin_unlock(&fi->fi_ast_lock); | ||
666 | return -EAGAIN; | ||
667 | } | ||
668 | |||
669 | add_wait_queue(&fi->fi_wait, &wait); | ||
670 | |||
671 | repeat: | ||
672 | set_current_state(TASK_INTERRUPTIBLE); | ||
673 | if (list_empty(&fi->fi_ast_list) && | ||
674 | !signal_pending(current)) { | ||
675 | |||
676 | spin_unlock(&fi->fi_ast_lock); | ||
677 | schedule(); | ||
678 | spin_lock(&fi->fi_ast_lock); | ||
679 | goto repeat; | ||
680 | } | ||
681 | |||
682 | current->state = TASK_RUNNING; | ||
683 | remove_wait_queue(&fi->fi_wait, &wait); | ||
684 | |||
685 | if (signal_pending(current)) { | ||
686 | spin_unlock(&fi->fi_ast_lock); | ||
687 | return -ERESTARTSYS; | ||
688 | } | ||
689 | } | ||
690 | |||
691 | ast = list_entry(fi->fi_ast_list.next, struct ast_info, list); | ||
692 | list_del(&ast->list); | ||
693 | spin_unlock(&fi->fi_ast_lock); | ||
694 | |||
695 | /* Work out the size of the returned data */ | ||
696 | data_size = sizeof(struct dlm_lock_result); | ||
697 | if (ast->lvb_updated && ast->result.lksb.sb_lvbptr) | ||
698 | data_size += DLM_USER_LVB_LEN; | ||
699 | |||
700 | offset = sizeof(struct dlm_lock_result); | ||
701 | |||
702 | /* Room for the extended data ? */ | ||
703 | if (count >= data_size) { | ||
704 | |||
705 | if (ast->lvb_updated && ast->result.lksb.sb_lvbptr) { | ||
706 | if (copy_to_user(buffer+offset, | ||
707 | ast->result.lksb.sb_lvbptr, | ||
708 | DLM_USER_LVB_LEN)) | ||
709 | return -EFAULT; | ||
710 | ast->result.lvb_offset = offset; | ||
711 | offset += DLM_USER_LVB_LEN; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | ast->result.length = data_size; | ||
716 | /* Copy the header now it has all the offsets in it */ | ||
717 | if (copy_to_user(buffer, &ast->result, sizeof(struct dlm_lock_result))) | ||
718 | offset = -EFAULT; | ||
719 | |||
720 | /* If we only returned a header and there's more to come then put it | ||
721 | back on the list */ | ||
722 | if (count < data_size) { | ||
723 | spin_lock(&fi->fi_ast_lock); | ||
724 | list_add(&ast->list, &fi->fi_ast_list); | ||
725 | spin_unlock(&fi->fi_ast_lock); | ||
726 | } else | ||
727 | kfree(ast); | ||
728 | return offset; | ||
729 | } | ||
730 | |||
731 | static unsigned int dlm_poll(struct file *file, poll_table *wait) | ||
732 | { | ||
733 | struct file_info *fi = file->private_data; | ||
734 | |||
735 | poll_wait(file, &fi->fi_wait, wait); | ||
736 | |||
737 | spin_lock(&fi->fi_ast_lock); | ||
738 | if (!list_empty(&fi->fi_ast_list)) { | ||
739 | spin_unlock(&fi->fi_ast_lock); | ||
740 | return POLLIN | POLLRDNORM; | ||
741 | } | ||
742 | |||
743 | spin_unlock(&fi->fi_ast_lock); | ||
744 | return 0; | ||
745 | } | ||
746 | |||
747 | static struct lock_info *allocate_lockinfo(struct file_info *fi, uint8_t cmd, | ||
748 | struct dlm_lock_params *kparams) | ||
749 | { | ||
750 | struct lock_info *li; | ||
751 | |||
752 | if (!try_module_get(THIS_MODULE)) | ||
753 | return NULL; | ||
754 | |||
755 | li = kmalloc(sizeof(struct lock_info), GFP_KERNEL); | ||
756 | if (li) { | ||
757 | li->li_magic = LOCKINFO_MAGIC; | ||
758 | li->li_file = fi; | ||
759 | li->li_cmd = cmd; | ||
760 | li->li_flags = 0; | ||
761 | li->li_grmode = -1; | ||
762 | li->li_rqmode = -1; | ||
763 | li->li_pend_bastparam = NULL; | ||
764 | li->li_pend_bastaddr = NULL; | ||
765 | li->li_castaddr = NULL; | ||
766 | li->li_castparam = NULL; | ||
767 | li->li_lksb.sb_lvbptr = NULL; | ||
768 | li->li_bastaddr = kparams->bastaddr; | ||
769 | li->li_bastparam = kparams->bastparam; | ||
770 | |||
771 | get_file_info(fi); | ||
772 | } | ||
773 | return li; | ||
774 | } | ||
775 | |||
776 | static int do_user_lock(struct file_info *fi, uint8_t cmd, | ||
777 | struct dlm_lock_params *kparams) | ||
778 | { | ||
779 | struct lock_info *li; | ||
780 | int status; | ||
781 | |||
782 | /* | ||
783 | * Validate things that we need to have correct. | ||
784 | */ | ||
785 | if (!kparams->castaddr) | ||
786 | return -EINVAL; | ||
787 | |||
788 | if (!kparams->lksb) | ||
789 | return -EINVAL; | ||
790 | |||
791 | /* Persistent child locks are not available yet */ | ||
792 | if ((kparams->flags & DLM_LKF_PERSISTENT) && kparams->parent) | ||
793 | return -EINVAL; | ||
794 | |||
795 | /* For conversions, there should already be a lockinfo struct, | ||
796 | unless we are adopting an orphaned persistent lock */ | ||
797 | if (kparams->flags & DLM_LKF_CONVERT) { | ||
798 | |||
799 | li = get_lockinfo(kparams->lkid); | ||
800 | |||
801 | /* If this is a persistent lock we will have to create a | ||
802 | lockinfo again */ | ||
803 | if (!li && DLM_LKF_PERSISTENT) { | ||
804 | li = allocate_lockinfo(fi, cmd, kparams); | ||
805 | |||
806 | li->li_lksb.sb_lkid = kparams->lkid; | ||
807 | li->li_castaddr = kparams->castaddr; | ||
808 | li->li_castparam = kparams->castparam; | ||
809 | |||
810 | /* OK, this isn;t exactly a FIRSTLOCK but it is the | ||
811 | first time we've used this lockinfo, and if things | ||
812 | fail we want rid of it */ | ||
813 | init_MUTEX_LOCKED(&li->li_firstlock); | ||
814 | set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | ||
815 | add_lockinfo(li); | ||
816 | |||
817 | /* TODO: do a query to get the current state ?? */ | ||
818 | } | ||
819 | if (!li) | ||
820 | return -EINVAL; | ||
821 | |||
822 | if (li->li_magic != LOCKINFO_MAGIC) | ||
823 | return -EINVAL; | ||
824 | |||
825 | /* For conversions don't overwrite the current blocking AST | ||
826 | info so that: | ||
827 | a) if a blocking AST fires before the conversion is queued | ||
828 | it runs the current handler | ||
829 | b) if the conversion is cancelled, the original blocking AST | ||
830 | declaration is active | ||
831 | The pend_ info is made active when the conversion | ||
832 | completes. | ||
833 | */ | ||
834 | li->li_pend_bastaddr = kparams->bastaddr; | ||
835 | li->li_pend_bastparam = kparams->bastparam; | ||
836 | } else { | ||
837 | li = allocate_lockinfo(fi, cmd, kparams); | ||
838 | if (!li) | ||
839 | return -ENOMEM; | ||
840 | |||
841 | /* semaphore to allow us to complete our work before | ||
842 | the AST routine runs. In fact we only need (and use) this | ||
843 | when the initial lock fails */ | ||
844 | init_MUTEX_LOCKED(&li->li_firstlock); | ||
845 | set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | ||
846 | } | ||
847 | |||
848 | li->li_user_lksb = kparams->lksb; | ||
849 | li->li_castaddr = kparams->castaddr; | ||
850 | li->li_castparam = kparams->castparam; | ||
851 | li->li_lksb.sb_lkid = kparams->lkid; | ||
852 | li->li_rqmode = kparams->mode; | ||
853 | if (kparams->flags & DLM_LKF_PERSISTENT) | ||
854 | set_bit(LI_FLAG_PERSISTENT, &li->li_flags); | ||
855 | |||
856 | /* Copy in the value block */ | ||
857 | if (kparams->flags & DLM_LKF_VALBLK) { | ||
858 | if (!li->li_lksb.sb_lvbptr) { | ||
859 | li->li_lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, | ||
860 | GFP_KERNEL); | ||
861 | if (!li->li_lksb.sb_lvbptr) { | ||
862 | status = -ENOMEM; | ||
863 | goto out_err; | ||
864 | } | ||
865 | } | ||
866 | |||
867 | memcpy(li->li_lksb.sb_lvbptr, kparams->lvb, DLM_USER_LVB_LEN); | ||
868 | } | ||
869 | |||
870 | /* Lock it ... */ | ||
871 | status = dlm_lock(fi->fi_ls->ls_lockspace, | ||
872 | kparams->mode, &li->li_lksb, | ||
873 | kparams->flags, | ||
874 | kparams->name, kparams->namelen, | ||
875 | kparams->parent, | ||
876 | ast_routine, | ||
877 | li, | ||
878 | (li->li_pend_bastaddr || li->li_bastaddr) ? | ||
879 | bast_routine : NULL, | ||
880 | kparams->range.ra_end ? &kparams->range : NULL); | ||
881 | if (status) | ||
882 | goto out_err; | ||
883 | |||
884 | /* If it succeeded (this far) with a new lock then keep track of | ||
885 | it on the file's lockinfo list */ | ||
886 | if (!status && test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) { | ||
887 | |||
888 | spin_lock(&fi->fi_li_lock); | ||
889 | list_add(&li->li_ownerqueue, &fi->fi_li_list); | ||
890 | spin_unlock(&fi->fi_li_lock); | ||
891 | if (add_lockinfo(li)) | ||
892 | printk(KERN_WARNING "Add lockinfo failed\n"); | ||
893 | |||
894 | up(&li->li_firstlock); | ||
895 | } | ||
896 | |||
897 | /* Return the lockid as the user needs it /now/ */ | ||
898 | return li->li_lksb.sb_lkid; | ||
899 | |||
900 | out_err: | ||
901 | if (test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) | ||
902 | release_lockinfo(li); | ||
903 | return status; | ||
904 | |||
905 | } | ||
906 | |||
907 | static int do_user_unlock(struct file_info *fi, uint8_t cmd, | ||
908 | struct dlm_lock_params *kparams) | ||
909 | { | ||
910 | struct lock_info *li; | ||
911 | int status; | ||
912 | int convert_cancel = 0; | ||
913 | |||
914 | li = get_lockinfo(kparams->lkid); | ||
915 | if (!li) { | ||
916 | li = allocate_lockinfo(fi, cmd, kparams); | ||
917 | spin_lock(&fi->fi_li_lock); | ||
918 | list_add(&li->li_ownerqueue, &fi->fi_li_list); | ||
919 | spin_unlock(&fi->fi_li_lock); | ||
920 | } | ||
921 | if (!li) | ||
922 | return -ENOMEM; | ||
923 | |||
924 | if (li->li_magic != LOCKINFO_MAGIC) | ||
925 | return -EINVAL; | ||
926 | |||
927 | li->li_user_lksb = kparams->lksb; | ||
928 | li->li_castparam = kparams->castparam; | ||
929 | li->li_cmd = cmd; | ||
930 | |||
931 | /* Cancelling a conversion doesn't remove the lock...*/ | ||
932 | if (kparams->flags & DLM_LKF_CANCEL && li->li_grmode != -1) | ||
933 | convert_cancel = 1; | ||
934 | |||
935 | /* dlm_unlock() passes a 0 for castaddr which means don't overwrite | ||
936 | the existing li_castaddr as that's the completion routine for | ||
937 | unlocks. dlm_unlock_wait() specifies a new AST routine to be | ||
938 | executed when the unlock completes. */ | ||
939 | if (kparams->castaddr) | ||
940 | li->li_castaddr = kparams->castaddr; | ||
941 | |||
942 | /* Use existing lksb & astparams */ | ||
943 | status = dlm_unlock(fi->fi_ls->ls_lockspace, | ||
944 | kparams->lkid, | ||
945 | kparams->flags, &li->li_lksb, li); | ||
946 | |||
947 | if (!status && !convert_cancel) { | ||
948 | spin_lock(&fi->fi_li_lock); | ||
949 | list_del(&li->li_ownerqueue); | ||
950 | spin_unlock(&fi->fi_li_lock); | ||
951 | } | ||
952 | |||
953 | return status; | ||
954 | } | ||
955 | |||
956 | /* Write call, submit a locking request */ | ||
957 | static ssize_t dlm_write(struct file *file, const char __user *buffer, | ||
958 | size_t count, loff_t *ppos) | ||
959 | { | ||
960 | struct file_info *fi = file->private_data; | ||
961 | struct dlm_write_request *kparams; | ||
962 | sigset_t tmpsig; | ||
963 | sigset_t allsigs; | ||
964 | int status; | ||
965 | |||
966 | /* -1 because lock name is optional */ | ||
967 | if (count < sizeof(struct dlm_write_request)-1) | ||
968 | return -EINVAL; | ||
969 | |||
970 | /* Has the lockspace been deleted */ | ||
971 | if (fi && test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags)) | ||
972 | return -ENOENT; | ||
973 | |||
974 | kparams = kmalloc(count, GFP_KERNEL); | ||
975 | if (!kparams) | ||
976 | return -ENOMEM; | ||
977 | |||
978 | status = -EFAULT; | ||
979 | /* Get the command info */ | ||
980 | if (copy_from_user(kparams, buffer, count)) | ||
981 | goto out_free; | ||
982 | |||
983 | status = -EBADE; | ||
984 | if (check_version(kparams)) | ||
985 | goto out_free; | ||
986 | |||
987 | /* Block signals while we are doing this */ | ||
988 | sigfillset(&allsigs); | ||
989 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | ||
990 | |||
991 | status = -EINVAL; | ||
992 | switch (kparams->cmd) | ||
993 | { | ||
994 | case DLM_USER_LOCK: | ||
995 | if (!fi) goto out_sig; | ||
996 | status = do_user_lock(fi, kparams->cmd, &kparams->i.lock); | ||
997 | break; | ||
998 | |||
999 | case DLM_USER_UNLOCK: | ||
1000 | if (!fi) goto out_sig; | ||
1001 | status = do_user_unlock(fi, kparams->cmd, &kparams->i.lock); | ||
1002 | break; | ||
1003 | |||
1004 | case DLM_USER_CREATE_LOCKSPACE: | ||
1005 | if (fi) goto out_sig; | ||
1006 | status = do_user_create_lockspace(fi, kparams->cmd, | ||
1007 | &kparams->i.lspace); | ||
1008 | break; | ||
1009 | |||
1010 | case DLM_USER_REMOVE_LOCKSPACE: | ||
1011 | if (fi) goto out_sig; | ||
1012 | status = do_user_remove_lockspace(fi, kparams->cmd, | ||
1013 | &kparams->i.lspace); | ||
1014 | break; | ||
1015 | default: | ||
1016 | printk("Unknown command passed to DLM device : %d\n", | ||
1017 | kparams->cmd); | ||
1018 | break; | ||
1019 | } | ||
1020 | |||
1021 | out_sig: | ||
1022 | /* Restore signals */ | ||
1023 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | ||
1024 | recalc_sigpending(); | ||
1025 | |||
1026 | out_free: | ||
1027 | kfree(kparams); | ||
1028 | if (status == 0) | ||
1029 | return count; | ||
1030 | else | ||
1031 | return status; | ||
1032 | } | ||
1033 | |||
1034 | static struct file_operations _dlm_fops = { | ||
1035 | .open = dlm_open, | ||
1036 | .release = dlm_close, | ||
1037 | .read = dlm_read, | ||
1038 | .write = dlm_write, | ||
1039 | .poll = dlm_poll, | ||
1040 | .owner = THIS_MODULE, | ||
1041 | }; | ||
1042 | |||
1043 | static struct file_operations _dlm_ctl_fops = { | ||
1044 | .open = dlm_ctl_open, | ||
1045 | .release = dlm_ctl_close, | ||
1046 | .write = dlm_write, | ||
1047 | .owner = THIS_MODULE, | ||
1048 | }; | ||
1049 | |||
1050 | /* | ||
1051 | * Create control device | ||
1052 | */ | ||
1053 | static int __init dlm_device_init(void) | ||
1054 | { | ||
1055 | int r; | ||
1056 | |||
1057 | INIT_LIST_HEAD(&user_ls_list); | ||
1058 | init_MUTEX(&user_ls_lock); | ||
1059 | rwlock_init(&lockinfo_lock); | ||
1060 | |||
1061 | ctl_device.name = "dlm-control"; | ||
1062 | ctl_device.fops = &_dlm_ctl_fops; | ||
1063 | ctl_device.minor = MISC_DYNAMIC_MINOR; | ||
1064 | |||
1065 | r = misc_register(&ctl_device); | ||
1066 | if (r) { | ||
1067 | printk(KERN_ERR "dlm: misc_register failed for control dev\n"); | ||
1068 | return r; | ||
1069 | } | ||
1070 | |||
1071 | return 0; | ||
1072 | } | ||
1073 | |||
1074 | static void __exit dlm_device_exit(void) | ||
1075 | { | ||
1076 | misc_deregister(&ctl_device); | ||
1077 | } | ||
1078 | |||
1079 | MODULE_DESCRIPTION("Distributed Lock Manager device interface"); | ||
1080 | MODULE_AUTHOR("Red Hat, Inc."); | ||
1081 | MODULE_LICENSE("GPL"); | ||
1082 | |||
1083 | module_init(dlm_device_init); | ||
1084 | module_exit(dlm_device_exit); | ||
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c new file mode 100644 index 000000000000..0f1dde54bcd2 --- /dev/null +++ b/fs/dlm/dir.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "lockspace.h" | ||
16 | #include "member.h" | ||
17 | #include "lowcomms.h" | ||
18 | #include "rcom.h" | ||
19 | #include "config.h" | ||
20 | #include "memory.h" | ||
21 | #include "recover.h" | ||
22 | #include "util.h" | ||
23 | #include "lock.h" | ||
24 | #include "dir.h" | ||
25 | |||
26 | |||
27 | static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) | ||
28 | { | ||
29 | spin_lock(&ls->ls_recover_list_lock); | ||
30 | list_add(&de->list, &ls->ls_recover_list); | ||
31 | spin_unlock(&ls->ls_recover_list_lock); | ||
32 | } | ||
33 | |||
34 | static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) | ||
35 | { | ||
36 | int found = FALSE; | ||
37 | struct dlm_direntry *de; | ||
38 | |||
39 | spin_lock(&ls->ls_recover_list_lock); | ||
40 | list_for_each_entry(de, &ls->ls_recover_list, list) { | ||
41 | if (de->length == len) { | ||
42 | list_del(&de->list); | ||
43 | de->master_nodeid = 0; | ||
44 | memset(de->name, 0, len); | ||
45 | found = TRUE; | ||
46 | break; | ||
47 | } | ||
48 | } | ||
49 | spin_unlock(&ls->ls_recover_list_lock); | ||
50 | |||
51 | if (!found) | ||
52 | de = allocate_direntry(ls, len); | ||
53 | return de; | ||
54 | } | ||
55 | |||
56 | void dlm_clear_free_entries(struct dlm_ls *ls) | ||
57 | { | ||
58 | struct dlm_direntry *de; | ||
59 | |||
60 | spin_lock(&ls->ls_recover_list_lock); | ||
61 | while (!list_empty(&ls->ls_recover_list)) { | ||
62 | de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, | ||
63 | list); | ||
64 | list_del(&de->list); | ||
65 | free_direntry(de); | ||
66 | } | ||
67 | spin_unlock(&ls->ls_recover_list_lock); | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * We use the upper 16 bits of the hash value to select the directory node. | ||
72 | * Low bits are used for distribution of rsb's among hash buckets on each node. | ||
73 | * | ||
74 | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of | ||
75 | * num_nodes to the hash value. This value in the desired range is used as an | ||
76 | * offset into the sorted list of nodeid's to give the particular nodeid. | ||
77 | */ | ||
78 | |||
79 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) | ||
80 | { | ||
81 | struct list_head *tmp; | ||
82 | struct dlm_member *memb = NULL; | ||
83 | uint32_t node, n = 0; | ||
84 | int nodeid; | ||
85 | |||
86 | if (ls->ls_num_nodes == 1) { | ||
87 | nodeid = dlm_our_nodeid(); | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | if (ls->ls_node_array) { | ||
92 | node = (hash >> 16) % ls->ls_total_weight; | ||
93 | nodeid = ls->ls_node_array[node]; | ||
94 | goto out; | ||
95 | } | ||
96 | |||
97 | /* make_member_array() failed to kmalloc ls_node_array... */ | ||
98 | |||
99 | node = (hash >> 16) % ls->ls_num_nodes; | ||
100 | |||
101 | list_for_each(tmp, &ls->ls_nodes) { | ||
102 | if (n++ != node) | ||
103 | continue; | ||
104 | memb = list_entry(tmp, struct dlm_member, list); | ||
105 | break; | ||
106 | } | ||
107 | |||
108 | DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n", | ||
109 | ls->ls_num_nodes, n, node);); | ||
110 | nodeid = memb->nodeid; | ||
111 | out: | ||
112 | return nodeid; | ||
113 | } | ||
114 | |||
115 | int dlm_dir_nodeid(struct dlm_rsb *r) | ||
116 | { | ||
117 | return dlm_hash2nodeid(r->res_ls, r->res_hash); | ||
118 | } | ||
119 | |||
120 | static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) | ||
121 | { | ||
122 | uint32_t val; | ||
123 | |||
124 | val = jhash(name, len, 0); | ||
125 | val &= (ls->ls_dirtbl_size - 1); | ||
126 | |||
127 | return val; | ||
128 | } | ||
129 | |||
130 | static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) | ||
131 | { | ||
132 | uint32_t bucket; | ||
133 | |||
134 | bucket = dir_hash(ls, de->name, de->length); | ||
135 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | ||
136 | } | ||
137 | |||
138 | static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, | ||
139 | int namelen, uint32_t bucket) | ||
140 | { | ||
141 | struct dlm_direntry *de; | ||
142 | |||
143 | list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { | ||
144 | if (de->length == namelen && !memcmp(name, de->name, namelen)) | ||
145 | goto out; | ||
146 | } | ||
147 | de = NULL; | ||
148 | out: | ||
149 | return de; | ||
150 | } | ||
151 | |||
152 | void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen) | ||
153 | { | ||
154 | struct dlm_direntry *de; | ||
155 | uint32_t bucket; | ||
156 | |||
157 | bucket = dir_hash(ls, name, namelen); | ||
158 | |||
159 | write_lock(&ls->ls_dirtbl[bucket].lock); | ||
160 | |||
161 | de = search_bucket(ls, name, namelen, bucket); | ||
162 | |||
163 | if (!de) { | ||
164 | log_error(ls, "remove fr %u none", nodeid); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | if (de->master_nodeid != nodeid) { | ||
169 | log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); | ||
170 | goto out; | ||
171 | } | ||
172 | |||
173 | list_del(&de->list); | ||
174 | free_direntry(de); | ||
175 | out: | ||
176 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
177 | } | ||
178 | |||
179 | void dlm_dir_clear(struct dlm_ls *ls) | ||
180 | { | ||
181 | struct list_head *head; | ||
182 | struct dlm_direntry *de; | ||
183 | int i; | ||
184 | |||
185 | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); | ||
186 | |||
187 | for (i = 0; i < ls->ls_dirtbl_size; i++) { | ||
188 | write_lock(&ls->ls_dirtbl[i].lock); | ||
189 | head = &ls->ls_dirtbl[i].list; | ||
190 | while (!list_empty(head)) { | ||
191 | de = list_entry(head->next, struct dlm_direntry, list); | ||
192 | list_del(&de->list); | ||
193 | put_free_de(ls, de); | ||
194 | } | ||
195 | write_unlock(&ls->ls_dirtbl[i].lock); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | int dlm_recover_directory(struct dlm_ls *ls) | ||
200 | { | ||
201 | struct dlm_member *memb; | ||
202 | struct dlm_direntry *de; | ||
203 | char *b, *last_name = NULL; | ||
204 | int error = -ENOMEM, last_len, count = 0; | ||
205 | uint16_t namelen; | ||
206 | |||
207 | log_debug(ls, "dlm_recover_directory"); | ||
208 | |||
209 | if (dlm_no_directory(ls)) | ||
210 | goto out_status; | ||
211 | |||
212 | dlm_dir_clear(ls); | ||
213 | |||
214 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); | ||
215 | if (!last_name) | ||
216 | goto out; | ||
217 | |||
218 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
219 | memset(last_name, 0, DLM_RESNAME_MAXLEN); | ||
220 | last_len = 0; | ||
221 | |||
222 | for (;;) { | ||
223 | error = dlm_recovery_stopped(ls); | ||
224 | if (error) | ||
225 | goto out_free; | ||
226 | |||
227 | error = dlm_rcom_names(ls, memb->nodeid, | ||
228 | last_name, last_len); | ||
229 | if (error) | ||
230 | goto out_free; | ||
231 | |||
232 | schedule(); | ||
233 | |||
234 | /* | ||
235 | * pick namelen/name pairs out of received buffer | ||
236 | */ | ||
237 | |||
238 | b = ls->ls_recover_buf + sizeof(struct dlm_rcom); | ||
239 | |||
240 | for (;;) { | ||
241 | memcpy(&namelen, b, sizeof(uint16_t)); | ||
242 | namelen = be16_to_cpu(namelen); | ||
243 | b += sizeof(uint16_t); | ||
244 | |||
245 | /* namelen of 0xFFFFF marks end of names for | ||
246 | this node; namelen of 0 marks end of the | ||
247 | buffer */ | ||
248 | |||
249 | if (namelen == 0xFFFF) | ||
250 | goto done; | ||
251 | if (!namelen) | ||
252 | break; | ||
253 | |||
254 | error = -ENOMEM; | ||
255 | de = get_free_de(ls, namelen); | ||
256 | if (!de) | ||
257 | goto out_free; | ||
258 | |||
259 | de->master_nodeid = memb->nodeid; | ||
260 | de->length = namelen; | ||
261 | last_len = namelen; | ||
262 | memcpy(de->name, b, namelen); | ||
263 | memcpy(last_name, b, namelen); | ||
264 | b += namelen; | ||
265 | |||
266 | add_entry_to_hash(ls, de); | ||
267 | count++; | ||
268 | } | ||
269 | } | ||
270 | done: | ||
271 | ; | ||
272 | } | ||
273 | |||
274 | out_status: | ||
275 | error = 0; | ||
276 | dlm_set_recover_status(ls, DLM_RS_DIR); | ||
277 | log_debug(ls, "dlm_recover_directory %d entries", count); | ||
278 | out_free: | ||
279 | kfree(last_name); | ||
280 | out: | ||
281 | dlm_clear_free_entries(ls); | ||
282 | return error; | ||
283 | } | ||
284 | |||
285 | static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | ||
286 | int namelen, int *r_nodeid) | ||
287 | { | ||
288 | struct dlm_direntry *de, *tmp; | ||
289 | uint32_t bucket; | ||
290 | |||
291 | bucket = dir_hash(ls, name, namelen); | ||
292 | |||
293 | write_lock(&ls->ls_dirtbl[bucket].lock); | ||
294 | de = search_bucket(ls, name, namelen, bucket); | ||
295 | if (de) { | ||
296 | *r_nodeid = de->master_nodeid; | ||
297 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
298 | if (*r_nodeid == nodeid) | ||
299 | return -EEXIST; | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
304 | |||
305 | de = allocate_direntry(ls, namelen); | ||
306 | if (!de) | ||
307 | return -ENOMEM; | ||
308 | |||
309 | de->master_nodeid = nodeid; | ||
310 | de->length = namelen; | ||
311 | memcpy(de->name, name, namelen); | ||
312 | |||
313 | write_lock(&ls->ls_dirtbl[bucket].lock); | ||
314 | tmp = search_bucket(ls, name, namelen, bucket); | ||
315 | if (tmp) { | ||
316 | free_direntry(de); | ||
317 | de = tmp; | ||
318 | } else { | ||
319 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | ||
320 | } | ||
321 | *r_nodeid = de->master_nodeid; | ||
322 | write_unlock(&ls->ls_dirtbl[bucket].lock); | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | ||
327 | int *r_nodeid) | ||
328 | { | ||
329 | return get_entry(ls, nodeid, name, namelen, r_nodeid); | ||
330 | } | ||
331 | |||
332 | /* Copy the names of master rsb's into the buffer provided. | ||
333 | Only select names whose dir node is the given nodeid. */ | ||
334 | |||
335 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | ||
336 | char *outbuf, int outlen, int nodeid) | ||
337 | { | ||
338 | struct list_head *list; | ||
339 | struct dlm_rsb *start_r = NULL, *r = NULL; | ||
340 | int offset = 0, start_namelen, error, dir_nodeid; | ||
341 | char *start_name; | ||
342 | uint16_t be_namelen; | ||
343 | |||
344 | /* | ||
345 | * Find the rsb where we left off (or start again) | ||
346 | */ | ||
347 | |||
348 | start_namelen = inlen; | ||
349 | start_name = inbuf; | ||
350 | |||
351 | if (start_namelen > 1) { | ||
352 | /* | ||
353 | * We could also use a find_rsb_root() function here that | ||
354 | * searched the ls_root_list. | ||
355 | */ | ||
356 | error = dlm_find_rsb(ls, start_name, start_namelen, R_MASTER, | ||
357 | &start_r); | ||
358 | DLM_ASSERT(!error && start_r, | ||
359 | printk("error %d\n", error);); | ||
360 | DLM_ASSERT(!list_empty(&start_r->res_root_list), | ||
361 | dlm_print_rsb(start_r);); | ||
362 | dlm_put_rsb(start_r); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Send rsb names for rsb's we're master of and whose directory node | ||
367 | * matches the requesting node. | ||
368 | */ | ||
369 | |||
370 | down_read(&ls->ls_root_sem); | ||
371 | if (start_r) | ||
372 | list = start_r->res_root_list.next; | ||
373 | else | ||
374 | list = ls->ls_root_list.next; | ||
375 | |||
376 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { | ||
377 | r = list_entry(list, struct dlm_rsb, res_root_list); | ||
378 | if (r->res_nodeid) | ||
379 | continue; | ||
380 | |||
381 | dir_nodeid = dlm_dir_nodeid(r); | ||
382 | if (dir_nodeid != nodeid) | ||
383 | continue; | ||
384 | |||
385 | /* | ||
386 | * The block ends when we can't fit the following in the | ||
387 | * remaining buffer space: | ||
388 | * namelen (uint16_t) + | ||
389 | * name (r->res_length) + | ||
390 | * end-of-block record 0x0000 (uint16_t) | ||
391 | */ | ||
392 | |||
393 | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { | ||
394 | /* Write end-of-block record */ | ||
395 | be_namelen = 0; | ||
396 | memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); | ||
397 | offset += sizeof(uint16_t); | ||
398 | goto out; | ||
399 | } | ||
400 | |||
401 | be_namelen = cpu_to_be16(r->res_length); | ||
402 | memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); | ||
403 | offset += sizeof(uint16_t); | ||
404 | memcpy(outbuf + offset, r->res_name, r->res_length); | ||
405 | offset += r->res_length; | ||
406 | } | ||
407 | |||
408 | /* | ||
409 | * If we've reached the end of the list (and there's room) write a | ||
410 | * terminating record. | ||
411 | */ | ||
412 | |||
413 | if ((list == &ls->ls_root_list) && | ||
414 | (offset + sizeof(uint16_t) <= outlen)) { | ||
415 | be_namelen = 0xFFFF; | ||
416 | memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); | ||
417 | offset += sizeof(uint16_t); | ||
418 | } | ||
419 | |||
420 | out: | ||
421 | up_read(&ls->ls_root_sem); | ||
422 | } | ||
423 | |||
diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h new file mode 100644 index 000000000000..0b0eb1267b6e --- /dev/null +++ b/fs/dlm/dir.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __DIR_DOT_H__ | ||
15 | #define __DIR_DOT_H__ | ||
16 | |||
17 | |||
18 | int dlm_dir_nodeid(struct dlm_rsb *rsb); | ||
19 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash); | ||
20 | void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len); | ||
21 | void dlm_dir_clear(struct dlm_ls *ls); | ||
22 | void dlm_clear_free_entries(struct dlm_ls *ls); | ||
23 | int dlm_recover_directory(struct dlm_ls *ls); | ||
24 | int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | ||
25 | int *r_nodeid); | ||
26 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | ||
27 | char *outbuf, int outlen, int nodeid); | ||
28 | |||
29 | #endif /* __DIR_DOT_H__ */ | ||
30 | |||
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h new file mode 100644 index 000000000000..0020cd07baf7 --- /dev/null +++ b/fs/dlm/dlm_internal.h | |||
@@ -0,0 +1,518 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __DLM_INTERNAL_DOT_H__ | ||
15 | #define __DLM_INTERNAL_DOT_H__ | ||
16 | |||
17 | /* | ||
18 | * This is the main header file to be included in each DLM source file. | ||
19 | */ | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/ctype.h> | ||
26 | #include <linux/spinlock.h> | ||
27 | #include <linux/vmalloc.h> | ||
28 | #include <linux/list.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/random.h> | ||
31 | #include <linux/delay.h> | ||
32 | #include <linux/socket.h> | ||
33 | #include <linux/kthread.h> | ||
34 | #include <linux/kobject.h> | ||
35 | #include <linux/kref.h> | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/jhash.h> | ||
38 | #include <asm/semaphore.h> | ||
39 | #include <asm/uaccess.h> | ||
40 | |||
41 | #include <linux/dlm.h> | ||
42 | |||
43 | #define DLM_LOCKSPACE_LEN 64 | ||
44 | |||
45 | #ifndef TRUE | ||
46 | #define TRUE 1 | ||
47 | #endif | ||
48 | |||
49 | #ifndef FALSE | ||
50 | #define FALSE 0 | ||
51 | #endif | ||
52 | |||
53 | #if (BITS_PER_LONG == 64) | ||
54 | #define PRIx64 "lx" | ||
55 | #else | ||
56 | #define PRIx64 "Lx" | ||
57 | #endif | ||
58 | |||
59 | /* Size of the temp buffer midcomms allocates on the stack. | ||
60 | We try to make this large enough so most messages fit. | ||
61 | FIXME: should sctp make this unnecessary? */ | ||
62 | |||
63 | #define DLM_INBUF_LEN 148 | ||
64 | |||
65 | struct dlm_ls; | ||
66 | struct dlm_lkb; | ||
67 | struct dlm_rsb; | ||
68 | struct dlm_member; | ||
69 | struct dlm_lkbtable; | ||
70 | struct dlm_rsbtable; | ||
71 | struct dlm_dirtable; | ||
72 | struct dlm_direntry; | ||
73 | struct dlm_recover; | ||
74 | struct dlm_header; | ||
75 | struct dlm_message; | ||
76 | struct dlm_rcom; | ||
77 | struct dlm_mhandle; | ||
78 | |||
79 | #define log_print(fmt, args...) \ | ||
80 | printk(KERN_ERR "dlm: "fmt"\n" , ##args) | ||
81 | #define log_error(ls, fmt, args...) \ | ||
82 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) | ||
83 | |||
84 | #ifdef DLM_LOG_DEBUG | ||
85 | #define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) | ||
86 | #else | ||
87 | #define log_debug(ls, fmt, args...) | ||
88 | #endif | ||
89 | |||
90 | #define DLM_ASSERT(x, do) \ | ||
91 | { \ | ||
92 | if (!(x)) \ | ||
93 | { \ | ||
94 | printk(KERN_ERR "\nDLM: Assertion failed on line %d of file %s\n" \ | ||
95 | "DLM: assertion: \"%s\"\n" \ | ||
96 | "DLM: time = %lu\n", \ | ||
97 | __LINE__, __FILE__, #x, jiffies); \ | ||
98 | {do} \ | ||
99 | printk("\n"); \ | ||
100 | BUG(); \ | ||
101 | panic("DLM: Record message above and reboot.\n"); \ | ||
102 | } \ | ||
103 | } | ||
104 | |||
105 | |||
106 | struct dlm_direntry { | ||
107 | struct list_head list; | ||
108 | uint32_t master_nodeid; | ||
109 | uint16_t length; | ||
110 | char name[1]; | ||
111 | }; | ||
112 | |||
113 | struct dlm_dirtable { | ||
114 | struct list_head list; | ||
115 | rwlock_t lock; | ||
116 | }; | ||
117 | |||
118 | struct dlm_rsbtable { | ||
119 | struct list_head list; | ||
120 | struct list_head toss; | ||
121 | rwlock_t lock; | ||
122 | }; | ||
123 | |||
124 | struct dlm_lkbtable { | ||
125 | struct list_head list; | ||
126 | rwlock_t lock; | ||
127 | uint16_t counter; | ||
128 | }; | ||
129 | |||
130 | /* | ||
131 | * Lockspace member (per node in a ls) | ||
132 | */ | ||
133 | |||
134 | struct dlm_member { | ||
135 | struct list_head list; | ||
136 | int nodeid; | ||
137 | int weight; | ||
138 | }; | ||
139 | |||
140 | /* | ||
141 | * Save and manage recovery state for a lockspace. | ||
142 | */ | ||
143 | |||
144 | struct dlm_recover { | ||
145 | struct list_head list; | ||
146 | int *nodeids; | ||
147 | int node_count; | ||
148 | uint64_t seq; | ||
149 | }; | ||
150 | |||
151 | /* | ||
152 | * Pass input args to second stage locking function. | ||
153 | */ | ||
154 | |||
155 | struct dlm_args { | ||
156 | uint32_t flags; | ||
157 | void *astaddr; | ||
158 | long astparam; | ||
159 | void *bastaddr; | ||
160 | int mode; | ||
161 | struct dlm_lksb *lksb; | ||
162 | struct dlm_range *range; | ||
163 | }; | ||
164 | |||
165 | |||
166 | /* | ||
167 | * Lock block | ||
168 | * | ||
169 | * A lock can be one of three types: | ||
170 | * | ||
171 | * local copy lock is mastered locally | ||
172 | * (lkb_nodeid is zero and DLM_LKF_MSTCPY is not set) | ||
173 | * process copy lock is mastered on a remote node | ||
174 | * (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is not set) | ||
175 | * master copy master node's copy of a lock owned by remote node | ||
176 | * (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is set) | ||
177 | * | ||
178 | * lkb_exflags: a copy of the most recent flags arg provided to dlm_lock or | ||
179 | * dlm_unlock. The dlm does not modify these or use any private flags in | ||
180 | * this field; it only contains DLM_LKF_ flags from dlm.h. These flags | ||
181 | * are sent as-is to the remote master when the lock is remote. | ||
182 | * | ||
183 | * lkb_flags: internal dlm flags (DLM_IFL_ prefix) from dlm_internal.h. | ||
184 | * Some internal flags are shared between the master and process nodes; | ||
185 | * these shared flags are kept in the lower two bytes. One of these | ||
186 | * flags set on the master copy will be propagated to the process copy | ||
187 | * and v.v. Other internal flags are private to the master or process | ||
188 | * node (e.g. DLM_IFL_MSTCPY). These are kept in the high two bytes. | ||
189 | * | ||
190 | * lkb_sbflags: status block flags. These flags are copied directly into | ||
191 | * the caller's lksb.sb_flags prior to the dlm_lock/dlm_unlock completion | ||
192 | * ast. All defined in dlm.h with DLM_SBF_ prefix. | ||
193 | * | ||
194 | * lkb_status: the lock status indicates which rsb queue the lock is | ||
195 | * on, grant, convert, or wait. DLM_LKSTS_ WAITING/GRANTED/CONVERT | ||
196 | * | ||
197 | * lkb_wait_type: the dlm message type (DLM_MSG_ prefix) for which a | ||
198 | * reply is needed. Only set when the lkb is on the lockspace waiters | ||
199 | * list awaiting a reply from a remote node. | ||
200 | * | ||
201 | * lkb_nodeid: when the lkb is a local copy, nodeid is 0; when the lkb | ||
202 | * is a master copy, nodeid specifies the remote lock holder, when the | ||
203 | * lkb is a process copy, the nodeid specifies the lock master. | ||
204 | */ | ||
205 | |||
206 | /* lkb_ast_type */ | ||
207 | |||
208 | #define AST_COMP 1 | ||
209 | #define AST_BAST 2 | ||
210 | |||
211 | /* lkb_range[] */ | ||
212 | |||
213 | #define GR_RANGE_START 0 | ||
214 | #define GR_RANGE_END 1 | ||
215 | #define RQ_RANGE_START 2 | ||
216 | #define RQ_RANGE_END 3 | ||
217 | |||
218 | /* lkb_status */ | ||
219 | |||
220 | #define DLM_LKSTS_WAITING 1 | ||
221 | #define DLM_LKSTS_GRANTED 2 | ||
222 | #define DLM_LKSTS_CONVERT 3 | ||
223 | |||
224 | /* lkb_flags */ | ||
225 | |||
226 | #define DLM_IFL_MSTCPY 0x00010000 | ||
227 | #define DLM_IFL_RESEND 0x00020000 | ||
228 | #define DLM_IFL_RANGE 0x00000001 | ||
229 | |||
230 | struct dlm_lkb { | ||
231 | struct dlm_rsb *lkb_resource; /* the rsb */ | ||
232 | struct kref lkb_ref; | ||
233 | int lkb_nodeid; /* copied from rsb */ | ||
234 | int lkb_ownpid; /* pid of lock owner */ | ||
235 | uint32_t lkb_id; /* our lock ID */ | ||
236 | uint32_t lkb_remid; /* lock ID on remote partner */ | ||
237 | uint32_t lkb_exflags; /* external flags from caller */ | ||
238 | uint32_t lkb_sbflags; /* lksb flags */ | ||
239 | uint32_t lkb_flags; /* internal flags */ | ||
240 | uint32_t lkb_lvbseq; /* lvb sequence number */ | ||
241 | |||
242 | int8_t lkb_status; /* granted, waiting, convert */ | ||
243 | int8_t lkb_rqmode; /* requested lock mode */ | ||
244 | int8_t lkb_grmode; /* granted lock mode */ | ||
245 | int8_t lkb_bastmode; /* requested mode */ | ||
246 | int8_t lkb_highbast; /* highest mode bast sent for */ | ||
247 | |||
248 | int8_t lkb_wait_type; /* type of reply waiting for */ | ||
249 | int8_t lkb_ast_type; /* type of ast queued for */ | ||
250 | |||
251 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ | ||
252 | struct list_head lkb_statequeue; /* rsb g/c/w list */ | ||
253 | struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ | ||
254 | struct list_head lkb_wait_reply; /* waiting for remote reply */ | ||
255 | struct list_head lkb_astqueue; /* need ast to be sent */ | ||
256 | |||
257 | uint64_t *lkb_range; /* array of gr/rq ranges */ | ||
258 | char *lkb_lvbptr; | ||
259 | struct dlm_lksb *lkb_lksb; /* caller's status block */ | ||
260 | void *lkb_astaddr; /* caller's ast function */ | ||
261 | void *lkb_bastaddr; /* caller's bast function */ | ||
262 | long lkb_astparam; /* caller's ast arg */ | ||
263 | }; | ||
264 | |||
265 | |||
266 | struct dlm_rsb { | ||
267 | struct dlm_ls *res_ls; /* the lockspace */ | ||
268 | struct kref res_ref; | ||
269 | struct semaphore res_sem; | ||
270 | unsigned long res_flags; | ||
271 | int res_length; /* length of rsb name */ | ||
272 | int res_nodeid; | ||
273 | uint32_t res_lvbseq; | ||
274 | uint32_t res_hash; | ||
275 | uint32_t res_bucket; /* rsbtbl */ | ||
276 | unsigned long res_toss_time; | ||
277 | uint32_t res_first_lkid; | ||
278 | struct list_head res_lookup; /* lkbs waiting on first */ | ||
279 | struct list_head res_hashchain; /* rsbtbl */ | ||
280 | struct list_head res_grantqueue; | ||
281 | struct list_head res_convertqueue; | ||
282 | struct list_head res_waitqueue; | ||
283 | |||
284 | struct list_head res_root_list; /* used for recovery */ | ||
285 | struct list_head res_recover_list; /* used for recovery */ | ||
286 | int res_recover_locks_count; | ||
287 | |||
288 | char *res_lvbptr; | ||
289 | char res_name[1]; | ||
290 | }; | ||
291 | |||
292 | /* find_rsb() flags */ | ||
293 | |||
294 | #define R_MASTER 1 /* only return rsb if it's a master */ | ||
295 | #define R_CREATE 2 /* create/add rsb if not found */ | ||
296 | |||
297 | /* rsb_flags */ | ||
298 | |||
299 | enum rsb_flags { | ||
300 | RSB_MASTER_UNCERTAIN, | ||
301 | RSB_VALNOTVALID, | ||
302 | RSB_VALNOTVALID_PREV, | ||
303 | RSB_NEW_MASTER, | ||
304 | RSB_NEW_MASTER2, | ||
305 | RSB_RECOVER_CONVERT, | ||
306 | }; | ||
307 | |||
308 | static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) | ||
309 | { | ||
310 | __set_bit(flag, &r->res_flags); | ||
311 | } | ||
312 | |||
313 | static inline void rsb_clear_flag(struct dlm_rsb *r, enum rsb_flags flag) | ||
314 | { | ||
315 | __clear_bit(flag, &r->res_flags); | ||
316 | } | ||
317 | |||
318 | static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) | ||
319 | { | ||
320 | return test_bit(flag, &r->res_flags); | ||
321 | } | ||
322 | |||
323 | |||
324 | /* dlm_header is first element of all structs sent between nodes */ | ||
325 | |||
326 | #define DLM_HEADER_MAJOR 0x00020000 | ||
327 | #define DLM_HEADER_MINOR 0x00000001 | ||
328 | |||
329 | #define DLM_MSG 1 | ||
330 | #define DLM_RCOM 2 | ||
331 | |||
332 | struct dlm_header { | ||
333 | uint32_t h_version; | ||
334 | uint32_t h_lockspace; | ||
335 | uint32_t h_nodeid; /* nodeid of sender */ | ||
336 | uint16_t h_length; | ||
337 | uint8_t h_cmd; /* DLM_MSG, DLM_RCOM */ | ||
338 | uint8_t h_pad; | ||
339 | }; | ||
340 | |||
341 | |||
342 | #define DLM_MSG_REQUEST 1 | ||
343 | #define DLM_MSG_CONVERT 2 | ||
344 | #define DLM_MSG_UNLOCK 3 | ||
345 | #define DLM_MSG_CANCEL 4 | ||
346 | #define DLM_MSG_REQUEST_REPLY 5 | ||
347 | #define DLM_MSG_CONVERT_REPLY 6 | ||
348 | #define DLM_MSG_UNLOCK_REPLY 7 | ||
349 | #define DLM_MSG_CANCEL_REPLY 8 | ||
350 | #define DLM_MSG_GRANT 9 | ||
351 | #define DLM_MSG_BAST 10 | ||
352 | #define DLM_MSG_LOOKUP 11 | ||
353 | #define DLM_MSG_REMOVE 12 | ||
354 | #define DLM_MSG_LOOKUP_REPLY 13 | ||
355 | |||
356 | struct dlm_message { | ||
357 | struct dlm_header m_header; | ||
358 | uint32_t m_type; /* DLM_MSG_ */ | ||
359 | uint32_t m_nodeid; | ||
360 | uint32_t m_pid; | ||
361 | uint32_t m_lkid; /* lkid on sender */ | ||
362 | uint32_t m_remid; /* lkid on receiver */ | ||
363 | uint32_t m_parent_lkid; | ||
364 | uint32_t m_parent_remid; | ||
365 | uint32_t m_exflags; | ||
366 | uint32_t m_sbflags; | ||
367 | uint32_t m_flags; | ||
368 | uint32_t m_lvbseq; | ||
369 | uint32_t m_hash; | ||
370 | int m_status; | ||
371 | int m_grmode; | ||
372 | int m_rqmode; | ||
373 | int m_bastmode; | ||
374 | int m_asts; | ||
375 | int m_result; /* 0 or -EXXX */ | ||
376 | uint64_t m_range[2]; | ||
377 | char m_extra[0]; /* name or lvb */ | ||
378 | }; | ||
379 | |||
380 | |||
381 | #define DLM_RS_NODES 0x00000001 | ||
382 | #define DLM_RS_NODES_ALL 0x00000002 | ||
383 | #define DLM_RS_DIR 0x00000004 | ||
384 | #define DLM_RS_DIR_ALL 0x00000008 | ||
385 | #define DLM_RS_LOCKS 0x00000010 | ||
386 | #define DLM_RS_LOCKS_ALL 0x00000020 | ||
387 | #define DLM_RS_DONE 0x00000040 | ||
388 | #define DLM_RS_DONE_ALL 0x00000080 | ||
389 | |||
390 | #define DLM_RCOM_STATUS 1 | ||
391 | #define DLM_RCOM_NAMES 2 | ||
392 | #define DLM_RCOM_LOOKUP 3 | ||
393 | #define DLM_RCOM_LOCK 4 | ||
394 | #define DLM_RCOM_STATUS_REPLY 5 | ||
395 | #define DLM_RCOM_NAMES_REPLY 6 | ||
396 | #define DLM_RCOM_LOOKUP_REPLY 7 | ||
397 | #define DLM_RCOM_LOCK_REPLY 8 | ||
398 | |||
399 | struct dlm_rcom { | ||
400 | struct dlm_header rc_header; | ||
401 | uint32_t rc_type; /* DLM_RCOM_ */ | ||
402 | int rc_result; /* multi-purpose */ | ||
403 | uint64_t rc_id; /* match reply with request */ | ||
404 | char rc_buf[0]; | ||
405 | }; | ||
406 | |||
407 | struct rcom_config { | ||
408 | uint32_t rf_lvblen; | ||
409 | uint32_t rf_lsflags; | ||
410 | uint64_t rf_unused; | ||
411 | }; | ||
412 | |||
413 | struct rcom_lock { | ||
414 | uint32_t rl_ownpid; | ||
415 | uint32_t rl_lkid; | ||
416 | uint32_t rl_remid; | ||
417 | uint32_t rl_parent_lkid; | ||
418 | uint32_t rl_parent_remid; | ||
419 | uint32_t rl_exflags; | ||
420 | uint32_t rl_flags; | ||
421 | uint32_t rl_lvbseq; | ||
422 | int rl_result; | ||
423 | int8_t rl_rqmode; | ||
424 | int8_t rl_grmode; | ||
425 | int8_t rl_status; | ||
426 | int8_t rl_asts; | ||
427 | uint16_t rl_wait_type; | ||
428 | uint16_t rl_namelen; | ||
429 | uint64_t rl_range[4]; | ||
430 | char rl_name[DLM_RESNAME_MAXLEN]; | ||
431 | char rl_lvb[0]; | ||
432 | }; | ||
433 | |||
434 | struct dlm_ls { | ||
435 | struct list_head ls_list; /* list of lockspaces */ | ||
436 | uint32_t ls_global_id; /* global unique lockspace ID */ | ||
437 | uint32_t ls_exflags; | ||
438 | int ls_lvblen; | ||
439 | int ls_count; /* reference count */ | ||
440 | unsigned long ls_flags; /* LSFL_ */ | ||
441 | struct kobject ls_kobj; | ||
442 | |||
443 | struct dlm_rsbtable *ls_rsbtbl; | ||
444 | uint32_t ls_rsbtbl_size; | ||
445 | |||
446 | struct dlm_lkbtable *ls_lkbtbl; | ||
447 | uint32_t ls_lkbtbl_size; | ||
448 | |||
449 | struct dlm_dirtable *ls_dirtbl; | ||
450 | uint32_t ls_dirtbl_size; | ||
451 | |||
452 | struct semaphore ls_waiters_sem; | ||
453 | struct list_head ls_waiters; /* lkbs needing a reply */ | ||
454 | |||
455 | struct list_head ls_nodes; /* current nodes in ls */ | ||
456 | struct list_head ls_nodes_gone; /* dead node list, recovery */ | ||
457 | int ls_num_nodes; /* number of nodes in ls */ | ||
458 | int ls_low_nodeid; | ||
459 | int ls_total_weight; | ||
460 | int *ls_node_array; | ||
461 | |||
462 | struct dlm_rsb ls_stub_rsb; /* for returning errors */ | ||
463 | struct dlm_lkb ls_stub_lkb; /* for returning errors */ | ||
464 | struct dlm_message ls_stub_ms; /* for faking a reply */ | ||
465 | |||
466 | struct dentry *ls_debug_dentry; /* debugfs */ | ||
467 | |||
468 | wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ | ||
469 | int ls_uevent_result; | ||
470 | |||
471 | /* recovery related */ | ||
472 | |||
473 | struct timer_list ls_timer; | ||
474 | struct task_struct *ls_recoverd_task; | ||
475 | struct semaphore ls_recoverd_active; | ||
476 | spinlock_t ls_recover_lock; | ||
477 | uint32_t ls_recover_status; /* DLM_RS_ */ | ||
478 | uint64_t ls_recover_seq; | ||
479 | struct dlm_recover *ls_recover_args; | ||
480 | struct rw_semaphore ls_in_recovery; /* block local requests */ | ||
481 | struct list_head ls_requestqueue;/* queue remote requests */ | ||
482 | struct semaphore ls_requestqueue_lock; | ||
483 | char *ls_recover_buf; | ||
484 | struct list_head ls_recover_list; | ||
485 | spinlock_t ls_recover_list_lock; | ||
486 | int ls_recover_list_count; | ||
487 | wait_queue_head_t ls_wait_general; | ||
488 | |||
489 | struct list_head ls_root_list; /* root resources */ | ||
490 | struct rw_semaphore ls_root_sem; /* protect root_list */ | ||
491 | |||
492 | int ls_namelen; | ||
493 | char ls_name[1]; | ||
494 | }; | ||
495 | |||
496 | #define LSFL_WORK 0 | ||
497 | #define LSFL_RUNNING 1 | ||
498 | #define LSFL_RECOVERY_STOP 2 | ||
499 | #define LSFL_RCOM_READY 3 | ||
500 | #define LSFL_UEVENT_WAIT 4 | ||
501 | |||
502 | static inline int dlm_locking_stopped(struct dlm_ls *ls) | ||
503 | { | ||
504 | return !test_bit(LSFL_RUNNING, &ls->ls_flags); | ||
505 | } | ||
506 | |||
507 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) | ||
508 | { | ||
509 | return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | ||
510 | } | ||
511 | |||
512 | static inline int dlm_no_directory(struct dlm_ls *ls) | ||
513 | { | ||
514 | return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0; | ||
515 | } | ||
516 | |||
517 | #endif /* __DLM_INTERNAL_DOT_H__ */ | ||
518 | |||
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c new file mode 100644 index 000000000000..81efb361f95d --- /dev/null +++ b/fs/dlm/lock.c | |||
@@ -0,0 +1,3610 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | /* Central locking logic has four stages: | ||
14 | |||
15 | dlm_lock() | ||
16 | dlm_unlock() | ||
17 | |||
18 | request_lock(ls, lkb) | ||
19 | convert_lock(ls, lkb) | ||
20 | unlock_lock(ls, lkb) | ||
21 | cancel_lock(ls, lkb) | ||
22 | |||
23 | _request_lock(r, lkb) | ||
24 | _convert_lock(r, lkb) | ||
25 | _unlock_lock(r, lkb) | ||
26 | _cancel_lock(r, lkb) | ||
27 | |||
28 | do_request(r, lkb) | ||
29 | do_convert(r, lkb) | ||
30 | do_unlock(r, lkb) | ||
31 | do_cancel(r, lkb) | ||
32 | |||
33 | Stage 1 (lock, unlock) is mainly about checking input args and | ||
34 | splitting into one of the four main operations: | ||
35 | |||
36 | dlm_lock = request_lock | ||
37 | dlm_lock+CONVERT = convert_lock | ||
38 | dlm_unlock = unlock_lock | ||
39 | dlm_unlock+CANCEL = cancel_lock | ||
40 | |||
41 | Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is | ||
42 | provided to the next stage. | ||
43 | |||
44 | Stage 3, _xxxx_lock(), determines if the operation is local or remote. | ||
45 | When remote, it calls send_xxxx(), when local it calls do_xxxx(). | ||
46 | |||
47 | Stage 4, do_xxxx(), is the guts of the operation. It manipulates the | ||
48 | given rsb and lkb and queues callbacks. | ||
49 | |||
50 | For remote operations, send_xxxx() results in the corresponding do_xxxx() | ||
51 | function being executed on the remote node. The connecting send/receive | ||
52 | calls on local (L) and remote (R) nodes: | ||
53 | |||
54 | L: send_xxxx() -> R: receive_xxxx() | ||
55 | R: do_xxxx() | ||
56 | L: receive_xxxx_reply() <- R: send_xxxx_reply() | ||
57 | */ | ||
58 | |||
59 | #include "dlm_internal.h" | ||
60 | #include "memory.h" | ||
61 | #include "lowcomms.h" | ||
62 | #include "requestqueue.h" | ||
63 | #include "util.h" | ||
64 | #include "dir.h" | ||
65 | #include "member.h" | ||
66 | #include "lockspace.h" | ||
67 | #include "ast.h" | ||
68 | #include "lock.h" | ||
69 | #include "rcom.h" | ||
70 | #include "recover.h" | ||
71 | #include "lvb_table.h" | ||
72 | #include "config.h" | ||
73 | |||
74 | static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
75 | static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
76 | static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
77 | static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
78 | static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
79 | static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode); | ||
80 | static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
81 | static int send_remove(struct dlm_rsb *r); | ||
82 | static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
83 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
84 | struct dlm_message *ms); | ||
85 | static int receive_extralen(struct dlm_message *ms); | ||
86 | |||
87 | /* | ||
88 | * Lock compatibilty matrix - thanks Steve | ||
89 | * UN = Unlocked state. Not really a state, used as a flag | ||
90 | * PD = Padding. Used to make the matrix a nice power of two in size | ||
91 | * Other states are the same as the VMS DLM. | ||
92 | * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same) | ||
93 | */ | ||
94 | |||
95 | static const int __dlm_compat_matrix[8][8] = { | ||
96 | /* UN NL CR CW PR PW EX PD */ | ||
97 | {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */ | ||
98 | {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */ | ||
99 | {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */ | ||
100 | {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */ | ||
101 | {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */ | ||
102 | {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */ | ||
103 | {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */ | ||
104 | {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | ||
105 | }; | ||
106 | |||
107 | /* | ||
108 | * This defines the direction of transfer of LVB data. | ||
109 | * Granted mode is the row; requested mode is the column. | ||
110 | * Usage: matrix[grmode+1][rqmode+1] | ||
111 | * 1 = LVB is returned to the caller | ||
112 | * 0 = LVB is written to the resource | ||
113 | * -1 = nothing happens to the LVB | ||
114 | */ | ||
115 | |||
116 | const int dlm_lvb_operations[8][8] = { | ||
117 | /* UN NL CR CW PR PW EX PD*/ | ||
118 | { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */ | ||
119 | { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */ | ||
120 | { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */ | ||
121 | { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */ | ||
122 | { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */ | ||
123 | { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */ | ||
124 | { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */ | ||
125 | { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */ | ||
126 | }; | ||
127 | EXPORT_SYMBOL_GPL(dlm_lvb_operations); | ||
128 | |||
129 | #define modes_compat(gr, rq) \ | ||
130 | __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1] | ||
131 | |||
132 | int dlm_modes_compat(int mode1, int mode2) | ||
133 | { | ||
134 | return __dlm_compat_matrix[mode1 + 1][mode2 + 1]; | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * Compatibility matrix for conversions with QUECVT set. | ||
139 | * Granted mode is the row; requested mode is the column. | ||
140 | * Usage: matrix[grmode+1][rqmode+1] | ||
141 | */ | ||
142 | |||
143 | static const int __quecvt_compat_matrix[8][8] = { | ||
144 | /* UN NL CR CW PR PW EX PD */ | ||
145 | {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */ | ||
146 | {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */ | ||
147 | {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */ | ||
148 | {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */ | ||
149 | {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */ | ||
150 | {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */ | ||
151 | {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */ | ||
152 | {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | ||
153 | }; | ||
154 | |||
155 | static void dlm_print_lkb(struct dlm_lkb *lkb) | ||
156 | { | ||
157 | printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n" | ||
158 | " status %d rqmode %d grmode %d wait_type %d ast_type %d\n", | ||
159 | lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags, | ||
160 | lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode, | ||
161 | lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); | ||
162 | } | ||
163 | |||
164 | void dlm_print_rsb(struct dlm_rsb *r) | ||
165 | { | ||
166 | printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", | ||
167 | r->res_nodeid, r->res_flags, r->res_first_lkid, | ||
168 | r->res_recover_locks_count, r->res_name); | ||
169 | } | ||
170 | |||
171 | /* Threads cannot use the lockspace while it's being recovered */ | ||
172 | |||
173 | static inline void lock_recovery(struct dlm_ls *ls) | ||
174 | { | ||
175 | down_read(&ls->ls_in_recovery); | ||
176 | } | ||
177 | |||
178 | static inline void unlock_recovery(struct dlm_ls *ls) | ||
179 | { | ||
180 | up_read(&ls->ls_in_recovery); | ||
181 | } | ||
182 | |||
183 | static inline int lock_recovery_try(struct dlm_ls *ls) | ||
184 | { | ||
185 | return down_read_trylock(&ls->ls_in_recovery); | ||
186 | } | ||
187 | |||
188 | static inline int can_be_queued(struct dlm_lkb *lkb) | ||
189 | { | ||
190 | return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE); | ||
191 | } | ||
192 | |||
193 | static inline int force_blocking_asts(struct dlm_lkb *lkb) | ||
194 | { | ||
195 | return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST); | ||
196 | } | ||
197 | |||
198 | static inline int is_demoted(struct dlm_lkb *lkb) | ||
199 | { | ||
200 | return (lkb->lkb_sbflags & DLM_SBF_DEMOTED); | ||
201 | } | ||
202 | |||
203 | static inline int is_remote(struct dlm_rsb *r) | ||
204 | { | ||
205 | DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r);); | ||
206 | return !!r->res_nodeid; | ||
207 | } | ||
208 | |||
209 | static inline int is_process_copy(struct dlm_lkb *lkb) | ||
210 | { | ||
211 | return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY)); | ||
212 | } | ||
213 | |||
214 | static inline int is_master_copy(struct dlm_lkb *lkb) | ||
215 | { | ||
216 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
217 | DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb);); | ||
218 | return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? TRUE : FALSE; | ||
219 | } | ||
220 | |||
221 | static inline int middle_conversion(struct dlm_lkb *lkb) | ||
222 | { | ||
223 | if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) || | ||
224 | (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW)) | ||
225 | return TRUE; | ||
226 | return FALSE; | ||
227 | } | ||
228 | |||
229 | static inline int down_conversion(struct dlm_lkb *lkb) | ||
230 | { | ||
231 | return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode); | ||
232 | } | ||
233 | |||
234 | static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
235 | { | ||
236 | if (is_master_copy(lkb)) | ||
237 | return; | ||
238 | |||
239 | DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); | ||
240 | |||
241 | lkb->lkb_lksb->sb_status = rv; | ||
242 | lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; | ||
243 | |||
244 | dlm_add_ast(lkb, AST_COMP); | ||
245 | } | ||
246 | |||
247 | static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) | ||
248 | { | ||
249 | if (is_master_copy(lkb)) | ||
250 | send_bast(r, lkb, rqmode); | ||
251 | else { | ||
252 | lkb->lkb_bastmode = rqmode; | ||
253 | dlm_add_ast(lkb, AST_BAST); | ||
254 | } | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * Basic operations on rsb's and lkb's | ||
259 | */ | ||
260 | |||
261 | static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) | ||
262 | { | ||
263 | struct dlm_rsb *r; | ||
264 | |||
265 | r = allocate_rsb(ls, len); | ||
266 | if (!r) | ||
267 | return NULL; | ||
268 | |||
269 | r->res_ls = ls; | ||
270 | r->res_length = len; | ||
271 | memcpy(r->res_name, name, len); | ||
272 | init_MUTEX(&r->res_sem); | ||
273 | |||
274 | INIT_LIST_HEAD(&r->res_lookup); | ||
275 | INIT_LIST_HEAD(&r->res_grantqueue); | ||
276 | INIT_LIST_HEAD(&r->res_convertqueue); | ||
277 | INIT_LIST_HEAD(&r->res_waitqueue); | ||
278 | INIT_LIST_HEAD(&r->res_root_list); | ||
279 | INIT_LIST_HEAD(&r->res_recover_list); | ||
280 | |||
281 | return r; | ||
282 | } | ||
283 | |||
284 | static int search_rsb_list(struct list_head *head, char *name, int len, | ||
285 | unsigned int flags, struct dlm_rsb **r_ret) | ||
286 | { | ||
287 | struct dlm_rsb *r; | ||
288 | int error = 0; | ||
289 | |||
290 | list_for_each_entry(r, head, res_hashchain) { | ||
291 | if (len == r->res_length && !memcmp(name, r->res_name, len)) | ||
292 | goto found; | ||
293 | } | ||
294 | return -ENOENT; | ||
295 | |||
296 | found: | ||
297 | if (r->res_nodeid && (flags & R_MASTER)) | ||
298 | error = -ENOTBLK; | ||
299 | *r_ret = r; | ||
300 | return error; | ||
301 | } | ||
302 | |||
303 | static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, | ||
304 | unsigned int flags, struct dlm_rsb **r_ret) | ||
305 | { | ||
306 | struct dlm_rsb *r; | ||
307 | int error; | ||
308 | |||
309 | error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); | ||
310 | if (!error) { | ||
311 | kref_get(&r->res_ref); | ||
312 | goto out; | ||
313 | } | ||
314 | error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); | ||
315 | if (error) | ||
316 | goto out; | ||
317 | |||
318 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); | ||
319 | |||
320 | if (dlm_no_directory(ls)) | ||
321 | goto out; | ||
322 | |||
323 | if (r->res_nodeid == -1) { | ||
324 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | ||
325 | r->res_first_lkid = 0; | ||
326 | } else if (r->res_nodeid > 0) { | ||
327 | rsb_set_flag(r, RSB_MASTER_UNCERTAIN); | ||
328 | r->res_first_lkid = 0; | ||
329 | } else { | ||
330 | DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r);); | ||
331 | DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),); | ||
332 | } | ||
333 | out: | ||
334 | *r_ret = r; | ||
335 | return error; | ||
336 | } | ||
337 | |||
338 | static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, | ||
339 | unsigned int flags, struct dlm_rsb **r_ret) | ||
340 | { | ||
341 | int error; | ||
342 | write_lock(&ls->ls_rsbtbl[b].lock); | ||
343 | error = _search_rsb(ls, name, len, b, flags, r_ret); | ||
344 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
345 | return error; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * Find rsb in rsbtbl and potentially create/add one | ||
350 | * | ||
351 | * Delaying the release of rsb's has a similar benefit to applications keeping | ||
352 | * NL locks on an rsb, but without the guarantee that the cached master value | ||
353 | * will still be valid when the rsb is reused. Apps aren't always smart enough | ||
354 | * to keep NL locks on an rsb that they may lock again shortly; this can lead | ||
355 | * to excessive master lookups and removals if we don't delay the release. | ||
356 | * | ||
357 | * Searching for an rsb means looking through both the normal list and toss | ||
358 | * list. When found on the toss list the rsb is moved to the normal list with | ||
359 | * ref count of 1; when found on normal list the ref count is incremented. | ||
360 | */ | ||
361 | |||
362 | static int find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
363 | unsigned int flags, struct dlm_rsb **r_ret) | ||
364 | { | ||
365 | struct dlm_rsb *r, *tmp; | ||
366 | uint32_t hash, bucket; | ||
367 | int error = 0; | ||
368 | |||
369 | if (dlm_no_directory(ls)) | ||
370 | flags |= R_CREATE; | ||
371 | |||
372 | hash = jhash(name, namelen, 0); | ||
373 | bucket = hash & (ls->ls_rsbtbl_size - 1); | ||
374 | |||
375 | error = search_rsb(ls, name, namelen, bucket, flags, &r); | ||
376 | if (!error) | ||
377 | goto out; | ||
378 | |||
379 | if (error == -ENOENT && !(flags & R_CREATE)) | ||
380 | goto out; | ||
381 | |||
382 | /* the rsb was found but wasn't a master copy */ | ||
383 | if (error == -ENOTBLK) | ||
384 | goto out; | ||
385 | |||
386 | error = -ENOMEM; | ||
387 | r = create_rsb(ls, name, namelen); | ||
388 | if (!r) | ||
389 | goto out; | ||
390 | |||
391 | r->res_hash = hash; | ||
392 | r->res_bucket = bucket; | ||
393 | r->res_nodeid = -1; | ||
394 | kref_init(&r->res_ref); | ||
395 | |||
396 | /* With no directory, the master can be set immediately */ | ||
397 | if (dlm_no_directory(ls)) { | ||
398 | int nodeid = dlm_dir_nodeid(r); | ||
399 | if (nodeid == dlm_our_nodeid()) | ||
400 | nodeid = 0; | ||
401 | r->res_nodeid = nodeid; | ||
402 | } | ||
403 | |||
404 | write_lock(&ls->ls_rsbtbl[bucket].lock); | ||
405 | error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); | ||
406 | if (!error) { | ||
407 | write_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
408 | free_rsb(r); | ||
409 | r = tmp; | ||
410 | goto out; | ||
411 | } | ||
412 | list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); | ||
413 | write_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
414 | error = 0; | ||
415 | out: | ||
416 | *r_ret = r; | ||
417 | return error; | ||
418 | } | ||
419 | |||
420 | int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
421 | unsigned int flags, struct dlm_rsb **r_ret) | ||
422 | { | ||
423 | return find_rsb(ls, name, namelen, flags, r_ret); | ||
424 | } | ||
425 | |||
426 | /* This is only called to add a reference when the code already holds | ||
427 | a valid reference to the rsb, so there's no need for locking. */ | ||
428 | |||
429 | static inline void hold_rsb(struct dlm_rsb *r) | ||
430 | { | ||
431 | kref_get(&r->res_ref); | ||
432 | } | ||
433 | |||
434 | void dlm_hold_rsb(struct dlm_rsb *r) | ||
435 | { | ||
436 | hold_rsb(r); | ||
437 | } | ||
438 | |||
439 | static void toss_rsb(struct kref *kref) | ||
440 | { | ||
441 | struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); | ||
442 | struct dlm_ls *ls = r->res_ls; | ||
443 | |||
444 | DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); | ||
445 | kref_init(&r->res_ref); | ||
446 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); | ||
447 | r->res_toss_time = jiffies; | ||
448 | if (r->res_lvbptr) { | ||
449 | free_lvb(r->res_lvbptr); | ||
450 | r->res_lvbptr = NULL; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | /* When all references to the rsb are gone it's transfered to | ||
455 | the tossed list for later disposal. */ | ||
456 | |||
457 | static void put_rsb(struct dlm_rsb *r) | ||
458 | { | ||
459 | struct dlm_ls *ls = r->res_ls; | ||
460 | uint32_t bucket = r->res_bucket; | ||
461 | |||
462 | write_lock(&ls->ls_rsbtbl[bucket].lock); | ||
463 | kref_put(&r->res_ref, toss_rsb); | ||
464 | write_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
465 | } | ||
466 | |||
467 | void dlm_put_rsb(struct dlm_rsb *r) | ||
468 | { | ||
469 | put_rsb(r); | ||
470 | } | ||
471 | |||
472 | /* See comment for unhold_lkb */ | ||
473 | |||
474 | static void unhold_rsb(struct dlm_rsb *r) | ||
475 | { | ||
476 | int rv; | ||
477 | rv = kref_put(&r->res_ref, toss_rsb); | ||
478 | DLM_ASSERT(!rv, dlm_print_rsb(r);); | ||
479 | } | ||
480 | |||
481 | static void kill_rsb(struct kref *kref) | ||
482 | { | ||
483 | struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); | ||
484 | |||
485 | /* All work is done after the return from kref_put() so we | ||
486 | can release the write_lock before the remove and free. */ | ||
487 | |||
488 | DLM_ASSERT(list_empty(&r->res_lookup),); | ||
489 | DLM_ASSERT(list_empty(&r->res_grantqueue),); | ||
490 | DLM_ASSERT(list_empty(&r->res_convertqueue),); | ||
491 | DLM_ASSERT(list_empty(&r->res_waitqueue),); | ||
492 | DLM_ASSERT(list_empty(&r->res_root_list),); | ||
493 | DLM_ASSERT(list_empty(&r->res_recover_list),); | ||
494 | } | ||
495 | |||
496 | /* Attaching/detaching lkb's from rsb's is for rsb reference counting. | ||
497 | The rsb must exist as long as any lkb's for it do. */ | ||
498 | |||
499 | static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
500 | { | ||
501 | hold_rsb(r); | ||
502 | lkb->lkb_resource = r; | ||
503 | } | ||
504 | |||
505 | static void detach_lkb(struct dlm_lkb *lkb) | ||
506 | { | ||
507 | if (lkb->lkb_resource) { | ||
508 | put_rsb(lkb->lkb_resource); | ||
509 | lkb->lkb_resource = NULL; | ||
510 | } | ||
511 | } | ||
512 | |||
513 | static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | ||
514 | { | ||
515 | struct dlm_lkb *lkb, *tmp; | ||
516 | uint32_t lkid = 0; | ||
517 | uint16_t bucket; | ||
518 | |||
519 | lkb = allocate_lkb(ls); | ||
520 | if (!lkb) | ||
521 | return -ENOMEM; | ||
522 | |||
523 | lkb->lkb_nodeid = -1; | ||
524 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
525 | kref_init(&lkb->lkb_ref); | ||
526 | |||
527 | get_random_bytes(&bucket, sizeof(bucket)); | ||
528 | bucket &= (ls->ls_lkbtbl_size - 1); | ||
529 | |||
530 | write_lock(&ls->ls_lkbtbl[bucket].lock); | ||
531 | |||
532 | /* counter can roll over so we must verify lkid is not in use */ | ||
533 | |||
534 | while (lkid == 0) { | ||
535 | lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); | ||
536 | |||
537 | list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, | ||
538 | lkb_idtbl_list) { | ||
539 | if (tmp->lkb_id != lkid) | ||
540 | continue; | ||
541 | lkid = 0; | ||
542 | break; | ||
543 | } | ||
544 | } | ||
545 | |||
546 | lkb->lkb_id = lkid; | ||
547 | list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); | ||
548 | write_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
549 | |||
550 | *lkb_ret = lkb; | ||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) | ||
555 | { | ||
556 | uint16_t bucket = lkid & 0xFFFF; | ||
557 | struct dlm_lkb *lkb; | ||
558 | |||
559 | list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { | ||
560 | if (lkb->lkb_id == lkid) | ||
561 | return lkb; | ||
562 | } | ||
563 | return NULL; | ||
564 | } | ||
565 | |||
566 | static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) | ||
567 | { | ||
568 | struct dlm_lkb *lkb; | ||
569 | uint16_t bucket = lkid & 0xFFFF; | ||
570 | |||
571 | if (bucket >= ls->ls_lkbtbl_size) | ||
572 | return -EBADSLT; | ||
573 | |||
574 | read_lock(&ls->ls_lkbtbl[bucket].lock); | ||
575 | lkb = __find_lkb(ls, lkid); | ||
576 | if (lkb) | ||
577 | kref_get(&lkb->lkb_ref); | ||
578 | read_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
579 | |||
580 | *lkb_ret = lkb; | ||
581 | return lkb ? 0 : -ENOENT; | ||
582 | } | ||
583 | |||
584 | static void kill_lkb(struct kref *kref) | ||
585 | { | ||
586 | struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref); | ||
587 | |||
588 | /* All work is done after the return from kref_put() so we | ||
589 | can release the write_lock before the detach_lkb */ | ||
590 | |||
591 | DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); | ||
592 | } | ||
593 | |||
594 | static int put_lkb(struct dlm_lkb *lkb) | ||
595 | { | ||
596 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | ||
597 | uint16_t bucket = lkb->lkb_id & 0xFFFF; | ||
598 | |||
599 | write_lock(&ls->ls_lkbtbl[bucket].lock); | ||
600 | if (kref_put(&lkb->lkb_ref, kill_lkb)) { | ||
601 | list_del(&lkb->lkb_idtbl_list); | ||
602 | write_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
603 | |||
604 | detach_lkb(lkb); | ||
605 | |||
606 | /* for local/process lkbs, lvbptr points to caller's lksb */ | ||
607 | if (lkb->lkb_lvbptr && is_master_copy(lkb)) | ||
608 | free_lvb(lkb->lkb_lvbptr); | ||
609 | if (lkb->lkb_range) | ||
610 | free_range(lkb->lkb_range); | ||
611 | free_lkb(lkb); | ||
612 | return 1; | ||
613 | } else { | ||
614 | write_unlock(&ls->ls_lkbtbl[bucket].lock); | ||
615 | return 0; | ||
616 | } | ||
617 | } | ||
618 | |||
619 | int dlm_put_lkb(struct dlm_lkb *lkb) | ||
620 | { | ||
621 | return put_lkb(lkb); | ||
622 | } | ||
623 | |||
624 | /* This is only called to add a reference when the code already holds | ||
625 | a valid reference to the lkb, so there's no need for locking. */ | ||
626 | |||
627 | static inline void hold_lkb(struct dlm_lkb *lkb) | ||
628 | { | ||
629 | kref_get(&lkb->lkb_ref); | ||
630 | } | ||
631 | |||
632 | /* This is called when we need to remove a reference and are certain | ||
633 | it's not the last ref. e.g. del_lkb is always called between a | ||
634 | find_lkb/put_lkb and is always the inverse of a previous add_lkb. | ||
635 | put_lkb would work fine, but would involve unnecessary locking */ | ||
636 | |||
637 | static inline void unhold_lkb(struct dlm_lkb *lkb) | ||
638 | { | ||
639 | int rv; | ||
640 | rv = kref_put(&lkb->lkb_ref, kill_lkb); | ||
641 | DLM_ASSERT(!rv, dlm_print_lkb(lkb);); | ||
642 | } | ||
643 | |||
644 | static void lkb_add_ordered(struct list_head *new, struct list_head *head, | ||
645 | int mode) | ||
646 | { | ||
647 | struct dlm_lkb *lkb = NULL; | ||
648 | |||
649 | list_for_each_entry(lkb, head, lkb_statequeue) | ||
650 | if (lkb->lkb_rqmode < mode) | ||
651 | break; | ||
652 | |||
653 | if (!lkb) | ||
654 | list_add_tail(new, head); | ||
655 | else | ||
656 | __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue); | ||
657 | } | ||
658 | |||
659 | /* add/remove lkb to rsb's grant/convert/wait queue */ | ||
660 | |||
661 | static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status) | ||
662 | { | ||
663 | kref_get(&lkb->lkb_ref); | ||
664 | |||
665 | DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); | ||
666 | |||
667 | lkb->lkb_status = status; | ||
668 | |||
669 | switch (status) { | ||
670 | case DLM_LKSTS_WAITING: | ||
671 | if (lkb->lkb_exflags & DLM_LKF_HEADQUE) | ||
672 | list_add(&lkb->lkb_statequeue, &r->res_waitqueue); | ||
673 | else | ||
674 | list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue); | ||
675 | break; | ||
676 | case DLM_LKSTS_GRANTED: | ||
677 | /* convention says granted locks kept in order of grmode */ | ||
678 | lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue, | ||
679 | lkb->lkb_grmode); | ||
680 | break; | ||
681 | case DLM_LKSTS_CONVERT: | ||
682 | if (lkb->lkb_exflags & DLM_LKF_HEADQUE) | ||
683 | list_add(&lkb->lkb_statequeue, &r->res_convertqueue); | ||
684 | else | ||
685 | list_add_tail(&lkb->lkb_statequeue, | ||
686 | &r->res_convertqueue); | ||
687 | break; | ||
688 | default: | ||
689 | DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status);); | ||
690 | } | ||
691 | } | ||
692 | |||
693 | static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
694 | { | ||
695 | lkb->lkb_status = 0; | ||
696 | list_del(&lkb->lkb_statequeue); | ||
697 | unhold_lkb(lkb); | ||
698 | } | ||
699 | |||
700 | static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts) | ||
701 | { | ||
702 | hold_lkb(lkb); | ||
703 | del_lkb(r, lkb); | ||
704 | add_lkb(r, lkb, sts); | ||
705 | unhold_lkb(lkb); | ||
706 | } | ||
707 | |||
708 | /* add/remove lkb from global waiters list of lkb's waiting for | ||
709 | a reply from a remote node */ | ||
710 | |||
711 | static void add_to_waiters(struct dlm_lkb *lkb, int mstype) | ||
712 | { | ||
713 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | ||
714 | |||
715 | down(&ls->ls_waiters_sem); | ||
716 | if (lkb->lkb_wait_type) { | ||
717 | log_print("add_to_waiters error %d", lkb->lkb_wait_type); | ||
718 | goto out; | ||
719 | } | ||
720 | lkb->lkb_wait_type = mstype; | ||
721 | kref_get(&lkb->lkb_ref); | ||
722 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | ||
723 | out: | ||
724 | up(&ls->ls_waiters_sem); | ||
725 | } | ||
726 | |||
727 | static int _remove_from_waiters(struct dlm_lkb *lkb) | ||
728 | { | ||
729 | int error = 0; | ||
730 | |||
731 | if (!lkb->lkb_wait_type) { | ||
732 | log_print("remove_from_waiters error"); | ||
733 | error = -EINVAL; | ||
734 | goto out; | ||
735 | } | ||
736 | lkb->lkb_wait_type = 0; | ||
737 | list_del(&lkb->lkb_wait_reply); | ||
738 | unhold_lkb(lkb); | ||
739 | out: | ||
740 | return error; | ||
741 | } | ||
742 | |||
743 | static int remove_from_waiters(struct dlm_lkb *lkb) | ||
744 | { | ||
745 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | ||
746 | int error; | ||
747 | |||
748 | down(&ls->ls_waiters_sem); | ||
749 | error = _remove_from_waiters(lkb); | ||
750 | up(&ls->ls_waiters_sem); | ||
751 | return error; | ||
752 | } | ||
753 | |||
754 | static void dir_remove(struct dlm_rsb *r) | ||
755 | { | ||
756 | int to_nodeid; | ||
757 | |||
758 | if (dlm_no_directory(r->res_ls)) | ||
759 | return; | ||
760 | |||
761 | to_nodeid = dlm_dir_nodeid(r); | ||
762 | if (to_nodeid != dlm_our_nodeid()) | ||
763 | send_remove(r); | ||
764 | else | ||
765 | dlm_dir_remove_entry(r->res_ls, to_nodeid, | ||
766 | r->res_name, r->res_length); | ||
767 | } | ||
768 | |||
769 | /* FIXME: shouldn't this be able to exit as soon as one non-due rsb is | ||
770 | found since they are in order of newest to oldest? */ | ||
771 | |||
772 | static int shrink_bucket(struct dlm_ls *ls, int b) | ||
773 | { | ||
774 | struct dlm_rsb *r; | ||
775 | int count = 0, found; | ||
776 | |||
777 | for (;;) { | ||
778 | found = FALSE; | ||
779 | write_lock(&ls->ls_rsbtbl[b].lock); | ||
780 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, | ||
781 | res_hashchain) { | ||
782 | if (!time_after_eq(jiffies, r->res_toss_time + | ||
783 | dlm_config.toss_secs * HZ)) | ||
784 | continue; | ||
785 | found = TRUE; | ||
786 | break; | ||
787 | } | ||
788 | |||
789 | if (!found) { | ||
790 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
791 | break; | ||
792 | } | ||
793 | |||
794 | if (kref_put(&r->res_ref, kill_rsb)) { | ||
795 | list_del(&r->res_hashchain); | ||
796 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
797 | |||
798 | if (is_master(r)) | ||
799 | dir_remove(r); | ||
800 | free_rsb(r); | ||
801 | count++; | ||
802 | } else { | ||
803 | write_unlock(&ls->ls_rsbtbl[b].lock); | ||
804 | log_error(ls, "tossed rsb in use %s", r->res_name); | ||
805 | } | ||
806 | } | ||
807 | |||
808 | return count; | ||
809 | } | ||
810 | |||
811 | void dlm_scan_rsbs(struct dlm_ls *ls) | ||
812 | { | ||
813 | int i; | ||
814 | |||
815 | if (dlm_locking_stopped(ls)) | ||
816 | return; | ||
817 | |||
818 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
819 | shrink_bucket(ls, i); | ||
820 | cond_resched(); | ||
821 | } | ||
822 | } | ||
823 | |||
824 | /* lkb is master or local copy */ | ||
825 | |||
826 | static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
827 | { | ||
828 | int b, len = r->res_ls->ls_lvblen; | ||
829 | |||
830 | /* b=1 lvb returned to caller | ||
831 | b=0 lvb written to rsb or invalidated | ||
832 | b=-1 do nothing */ | ||
833 | |||
834 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | ||
835 | |||
836 | if (b == 1) { | ||
837 | if (!lkb->lkb_lvbptr) | ||
838 | return; | ||
839 | |||
840 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
841 | return; | ||
842 | |||
843 | if (!r->res_lvbptr) | ||
844 | return; | ||
845 | |||
846 | memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len); | ||
847 | lkb->lkb_lvbseq = r->res_lvbseq; | ||
848 | |||
849 | } else if (b == 0) { | ||
850 | if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { | ||
851 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
852 | return; | ||
853 | } | ||
854 | |||
855 | if (!lkb->lkb_lvbptr) | ||
856 | return; | ||
857 | |||
858 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
859 | return; | ||
860 | |||
861 | if (!r->res_lvbptr) | ||
862 | r->res_lvbptr = allocate_lvb(r->res_ls); | ||
863 | |||
864 | if (!r->res_lvbptr) | ||
865 | return; | ||
866 | |||
867 | memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len); | ||
868 | r->res_lvbseq++; | ||
869 | lkb->lkb_lvbseq = r->res_lvbseq; | ||
870 | rsb_clear_flag(r, RSB_VALNOTVALID); | ||
871 | } | ||
872 | |||
873 | if (rsb_flag(r, RSB_VALNOTVALID)) | ||
874 | lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID; | ||
875 | } | ||
876 | |||
877 | static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
878 | { | ||
879 | if (lkb->lkb_grmode < DLM_LOCK_PW) | ||
880 | return; | ||
881 | |||
882 | if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { | ||
883 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
884 | return; | ||
885 | } | ||
886 | |||
887 | if (!lkb->lkb_lvbptr) | ||
888 | return; | ||
889 | |||
890 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
891 | return; | ||
892 | |||
893 | if (!r->res_lvbptr) | ||
894 | r->res_lvbptr = allocate_lvb(r->res_ls); | ||
895 | |||
896 | if (!r->res_lvbptr) | ||
897 | return; | ||
898 | |||
899 | memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | ||
900 | r->res_lvbseq++; | ||
901 | rsb_clear_flag(r, RSB_VALNOTVALID); | ||
902 | } | ||
903 | |||
904 | /* lkb is process copy (pc) */ | ||
905 | |||
906 | static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
907 | struct dlm_message *ms) | ||
908 | { | ||
909 | int b; | ||
910 | |||
911 | if (!lkb->lkb_lvbptr) | ||
912 | return; | ||
913 | |||
914 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
915 | return; | ||
916 | |||
917 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | ||
918 | if (b == 1) { | ||
919 | int len = receive_extralen(ms); | ||
920 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | ||
921 | lkb->lkb_lvbseq = ms->m_lvbseq; | ||
922 | } | ||
923 | } | ||
924 | |||
925 | /* Manipulate lkb's on rsb's convert/granted/waiting queues | ||
926 | remove_lock -- used for unlock, removes lkb from granted | ||
927 | revert_lock -- used for cancel, moves lkb from convert to granted | ||
928 | grant_lock -- used for request and convert, adds lkb to granted or | ||
929 | moves lkb from convert or waiting to granted | ||
930 | |||
931 | Each of these is used for master or local copy lkb's. There is | ||
932 | also a _pc() variation used to make the corresponding change on | ||
933 | a process copy (pc) lkb. */ | ||
934 | |||
935 | static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
936 | { | ||
937 | del_lkb(r, lkb); | ||
938 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
939 | /* this unhold undoes the original ref from create_lkb() | ||
940 | so this leads to the lkb being freed */ | ||
941 | unhold_lkb(lkb); | ||
942 | } | ||
943 | |||
944 | static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
945 | { | ||
946 | set_lvb_unlock(r, lkb); | ||
947 | _remove_lock(r, lkb); | ||
948 | } | ||
949 | |||
950 | static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
951 | { | ||
952 | _remove_lock(r, lkb); | ||
953 | } | ||
954 | |||
955 | static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
956 | { | ||
957 | lkb->lkb_rqmode = DLM_LOCK_IV; | ||
958 | |||
959 | switch (lkb->lkb_status) { | ||
960 | case DLM_LKSTS_CONVERT: | ||
961 | move_lkb(r, lkb, DLM_LKSTS_GRANTED); | ||
962 | break; | ||
963 | case DLM_LKSTS_WAITING: | ||
964 | del_lkb(r, lkb); | ||
965 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
966 | /* this unhold undoes the original ref from create_lkb() | ||
967 | so this leads to the lkb being freed */ | ||
968 | unhold_lkb(lkb); | ||
969 | break; | ||
970 | default: | ||
971 | log_print("invalid status for revert %d", lkb->lkb_status); | ||
972 | } | ||
973 | } | ||
974 | |||
975 | static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
976 | { | ||
977 | revert_lock(r, lkb); | ||
978 | } | ||
979 | |||
980 | static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
981 | { | ||
982 | if (lkb->lkb_grmode != lkb->lkb_rqmode) { | ||
983 | lkb->lkb_grmode = lkb->lkb_rqmode; | ||
984 | if (lkb->lkb_status) | ||
985 | move_lkb(r, lkb, DLM_LKSTS_GRANTED); | ||
986 | else | ||
987 | add_lkb(r, lkb, DLM_LKSTS_GRANTED); | ||
988 | } | ||
989 | |||
990 | lkb->lkb_rqmode = DLM_LOCK_IV; | ||
991 | |||
992 | if (lkb->lkb_range) { | ||
993 | lkb->lkb_range[GR_RANGE_START] = lkb->lkb_range[RQ_RANGE_START]; | ||
994 | lkb->lkb_range[GR_RANGE_END] = lkb->lkb_range[RQ_RANGE_END]; | ||
995 | } | ||
996 | } | ||
997 | |||
998 | static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
999 | { | ||
1000 | set_lvb_lock(r, lkb); | ||
1001 | _grant_lock(r, lkb); | ||
1002 | lkb->lkb_highbast = 0; | ||
1003 | } | ||
1004 | |||
1005 | static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
1006 | struct dlm_message *ms) | ||
1007 | { | ||
1008 | set_lvb_lock_pc(r, lkb, ms); | ||
1009 | _grant_lock(r, lkb); | ||
1010 | } | ||
1011 | |||
1012 | /* called by grant_pending_locks() which means an async grant message must | ||
1013 | be sent to the requesting node in addition to granting the lock if the | ||
1014 | lkb belongs to a remote node. */ | ||
1015 | |||
1016 | static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1017 | { | ||
1018 | grant_lock(r, lkb); | ||
1019 | if (is_master_copy(lkb)) | ||
1020 | send_grant(r, lkb); | ||
1021 | else | ||
1022 | queue_cast(r, lkb, 0); | ||
1023 | } | ||
1024 | |||
1025 | static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) | ||
1026 | { | ||
1027 | struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, | ||
1028 | lkb_statequeue); | ||
1029 | if (lkb->lkb_id == first->lkb_id) | ||
1030 | return TRUE; | ||
1031 | |||
1032 | return FALSE; | ||
1033 | } | ||
1034 | |||
1035 | /* Return 1 if the locks' ranges overlap. If the lkb has no range then it is | ||
1036 | assumed to cover 0-ffffffff.ffffffff */ | ||
1037 | |||
1038 | static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) | ||
1039 | { | ||
1040 | if (!lkb1->lkb_range || !lkb2->lkb_range) | ||
1041 | return TRUE; | ||
1042 | |||
1043 | if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] || | ||
1044 | lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END]) | ||
1045 | return FALSE; | ||
1046 | |||
1047 | return TRUE; | ||
1048 | } | ||
1049 | |||
1050 | /* Check if the given lkb conflicts with another lkb on the queue. */ | ||
1051 | |||
1052 | static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) | ||
1053 | { | ||
1054 | struct dlm_lkb *this; | ||
1055 | |||
1056 | list_for_each_entry(this, head, lkb_statequeue) { | ||
1057 | if (this == lkb) | ||
1058 | continue; | ||
1059 | if (ranges_overlap(lkb, this) && !modes_compat(this, lkb)) | ||
1060 | return TRUE; | ||
1061 | } | ||
1062 | return FALSE; | ||
1063 | } | ||
1064 | |||
1065 | /* | ||
1066 | * "A conversion deadlock arises with a pair of lock requests in the converting | ||
1067 | * queue for one resource. The granted mode of each lock blocks the requested | ||
1068 | * mode of the other lock." | ||
1069 | * | ||
1070 | * Part 2: if the granted mode of lkb is preventing the first lkb in the | ||
1071 | * convert queue from being granted, then demote lkb (set grmode to NL). | ||
1072 | * This second form requires that we check for conv-deadlk even when | ||
1073 | * now == 0 in _can_be_granted(). | ||
1074 | * | ||
1075 | * Example: | ||
1076 | * Granted Queue: empty | ||
1077 | * Convert Queue: NL->EX (first lock) | ||
1078 | * PR->EX (second lock) | ||
1079 | * | ||
1080 | * The first lock can't be granted because of the granted mode of the second | ||
1081 | * lock and the second lock can't be granted because it's not first in the | ||
1082 | * list. We demote the granted mode of the second lock (the lkb passed to this | ||
1083 | * function). | ||
1084 | * | ||
1085 | * After the resolution, the "grant pending" function needs to go back and try | ||
1086 | * to grant locks on the convert queue again since the first lock can now be | ||
1087 | * granted. | ||
1088 | */ | ||
1089 | |||
1090 | static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) | ||
1091 | { | ||
1092 | struct dlm_lkb *this, *first = NULL, *self = NULL; | ||
1093 | |||
1094 | list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { | ||
1095 | if (!first) | ||
1096 | first = this; | ||
1097 | if (this == lkb) { | ||
1098 | self = lkb; | ||
1099 | continue; | ||
1100 | } | ||
1101 | |||
1102 | if (!ranges_overlap(lkb, this)) | ||
1103 | continue; | ||
1104 | |||
1105 | if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) | ||
1106 | return TRUE; | ||
1107 | } | ||
1108 | |||
1109 | /* if lkb is on the convert queue and is preventing the first | ||
1110 | from being granted, then there's deadlock and we demote lkb. | ||
1111 | multiple converting locks may need to do this before the first | ||
1112 | converting lock can be granted. */ | ||
1113 | |||
1114 | if (self && self != first) { | ||
1115 | if (!modes_compat(lkb, first) && | ||
1116 | !queue_conflict(&rsb->res_grantqueue, first)) | ||
1117 | return TRUE; | ||
1118 | } | ||
1119 | |||
1120 | return FALSE; | ||
1121 | } | ||
1122 | |||
1123 | /* | ||
1124 | * Return 1 if the lock can be granted, 0 otherwise. | ||
1125 | * Also detect and resolve conversion deadlocks. | ||
1126 | * | ||
1127 | * lkb is the lock to be granted | ||
1128 | * | ||
1129 | * now is 1 if the function is being called in the context of the | ||
1130 | * immediate request, it is 0 if called later, after the lock has been | ||
1131 | * queued. | ||
1132 | * | ||
1133 | * References are from chapter 6 of "VAXcluster Principles" by Roy Davis | ||
1134 | */ | ||
1135 | |||
1136 | static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) | ||
1137 | { | ||
1138 | int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); | ||
1139 | |||
1140 | /* | ||
1141 | * 6-10: Version 5.4 introduced an option to address the phenomenon of | ||
1142 | * a new request for a NL mode lock being blocked. | ||
1143 | * | ||
1144 | * 6-11: If the optional EXPEDITE flag is used with the new NL mode | ||
1145 | * request, then it would be granted. In essence, the use of this flag | ||
1146 | * tells the Lock Manager to expedite theis request by not considering | ||
1147 | * what may be in the CONVERTING or WAITING queues... As of this | ||
1148 | * writing, the EXPEDITE flag can be used only with new requests for NL | ||
1149 | * mode locks. This flag is not valid for conversion requests. | ||
1150 | * | ||
1151 | * A shortcut. Earlier checks return an error if EXPEDITE is used in a | ||
1152 | * conversion or used with a non-NL requested mode. We also know an | ||
1153 | * EXPEDITE request is always granted immediately, so now must always | ||
1154 | * be 1. The full condition to grant an expedite request: (now && | ||
1155 | * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can | ||
1156 | * therefore be shortened to just checking the flag. | ||
1157 | */ | ||
1158 | |||
1159 | if (lkb->lkb_exflags & DLM_LKF_EXPEDITE) | ||
1160 | return TRUE; | ||
1161 | |||
1162 | /* | ||
1163 | * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be | ||
1164 | * added to the remaining conditions. | ||
1165 | */ | ||
1166 | |||
1167 | if (queue_conflict(&r->res_grantqueue, lkb)) | ||
1168 | goto out; | ||
1169 | |||
1170 | /* | ||
1171 | * 6-3: By default, a conversion request is immediately granted if the | ||
1172 | * requested mode is compatible with the modes of all other granted | ||
1173 | * locks | ||
1174 | */ | ||
1175 | |||
1176 | if (queue_conflict(&r->res_convertqueue, lkb)) | ||
1177 | goto out; | ||
1178 | |||
1179 | /* | ||
1180 | * 6-5: But the default algorithm for deciding whether to grant or | ||
1181 | * queue conversion requests does not by itself guarantee that such | ||
1182 | * requests are serviced on a "first come first serve" basis. This, in | ||
1183 | * turn, can lead to a phenomenon known as "indefinate postponement". | ||
1184 | * | ||
1185 | * 6-7: This issue is dealt with by using the optional QUECVT flag with | ||
1186 | * the system service employed to request a lock conversion. This flag | ||
1187 | * forces certain conversion requests to be queued, even if they are | ||
1188 | * compatible with the granted modes of other locks on the same | ||
1189 | * resource. Thus, the use of this flag results in conversion requests | ||
1190 | * being ordered on a "first come first servce" basis. | ||
1191 | * | ||
1192 | * DCT: This condition is all about new conversions being able to occur | ||
1193 | * "in place" while the lock remains on the granted queue (assuming | ||
1194 | * nothing else conflicts.) IOW if QUECVT isn't set, a conversion | ||
1195 | * doesn't _have_ to go onto the convert queue where it's processed in | ||
1196 | * order. The "now" variable is necessary to distinguish converts | ||
1197 | * being received and processed for the first time now, because once a | ||
1198 | * convert is moved to the conversion queue the condition below applies | ||
1199 | * requiring fifo granting. | ||
1200 | */ | ||
1201 | |||
1202 | if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT)) | ||
1203 | return TRUE; | ||
1204 | |||
1205 | /* | ||
1206 | * When using range locks the NOORDER flag is set to avoid the standard | ||
1207 | * vms rules on grant order. | ||
1208 | */ | ||
1209 | |||
1210 | if (lkb->lkb_exflags & DLM_LKF_NOORDER) | ||
1211 | return TRUE; | ||
1212 | |||
1213 | /* | ||
1214 | * 6-3: Once in that queue [CONVERTING], a conversion request cannot be | ||
1215 | * granted until all other conversion requests ahead of it are granted | ||
1216 | * and/or canceled. | ||
1217 | */ | ||
1218 | |||
1219 | if (!now && conv && first_in_list(lkb, &r->res_convertqueue)) | ||
1220 | return TRUE; | ||
1221 | |||
1222 | /* | ||
1223 | * 6-4: By default, a new request is immediately granted only if all | ||
1224 | * three of the following conditions are satisfied when the request is | ||
1225 | * issued: | ||
1226 | * - The queue of ungranted conversion requests for the resource is | ||
1227 | * empty. | ||
1228 | * - The queue of ungranted new requests for the resource is empty. | ||
1229 | * - The mode of the new request is compatible with the most | ||
1230 | * restrictive mode of all granted locks on the resource. | ||
1231 | */ | ||
1232 | |||
1233 | if (now && !conv && list_empty(&r->res_convertqueue) && | ||
1234 | list_empty(&r->res_waitqueue)) | ||
1235 | return TRUE; | ||
1236 | |||
1237 | /* | ||
1238 | * 6-4: Once a lock request is in the queue of ungranted new requests, | ||
1239 | * it cannot be granted until the queue of ungranted conversion | ||
1240 | * requests is empty, all ungranted new requests ahead of it are | ||
1241 | * granted and/or canceled, and it is compatible with the granted mode | ||
1242 | * of the most restrictive lock granted on the resource. | ||
1243 | */ | ||
1244 | |||
1245 | if (!now && !conv && list_empty(&r->res_convertqueue) && | ||
1246 | first_in_list(lkb, &r->res_waitqueue)) | ||
1247 | return TRUE; | ||
1248 | |||
1249 | out: | ||
1250 | /* | ||
1251 | * The following, enabled by CONVDEADLK, departs from VMS. | ||
1252 | */ | ||
1253 | |||
1254 | if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) && | ||
1255 | conversion_deadlock_detect(r, lkb)) { | ||
1256 | lkb->lkb_grmode = DLM_LOCK_NL; | ||
1257 | lkb->lkb_sbflags |= DLM_SBF_DEMOTED; | ||
1258 | } | ||
1259 | |||
1260 | return FALSE; | ||
1261 | } | ||
1262 | |||
1263 | /* | ||
1264 | * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a | ||
1265 | * simple way to provide a big optimization to applications that can use them. | ||
1266 | */ | ||
1267 | |||
1268 | static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) | ||
1269 | { | ||
1270 | uint32_t flags = lkb->lkb_exflags; | ||
1271 | int rv; | ||
1272 | int8_t alt = 0, rqmode = lkb->lkb_rqmode; | ||
1273 | |||
1274 | rv = _can_be_granted(r, lkb, now); | ||
1275 | if (rv) | ||
1276 | goto out; | ||
1277 | |||
1278 | if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) | ||
1279 | goto out; | ||
1280 | |||
1281 | if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) | ||
1282 | alt = DLM_LOCK_PR; | ||
1283 | else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) | ||
1284 | alt = DLM_LOCK_CW; | ||
1285 | |||
1286 | if (alt) { | ||
1287 | lkb->lkb_rqmode = alt; | ||
1288 | rv = _can_be_granted(r, lkb, now); | ||
1289 | if (rv) | ||
1290 | lkb->lkb_sbflags |= DLM_SBF_ALTMODE; | ||
1291 | else | ||
1292 | lkb->lkb_rqmode = rqmode; | ||
1293 | } | ||
1294 | out: | ||
1295 | return rv; | ||
1296 | } | ||
1297 | |||
1298 | static int grant_pending_convert(struct dlm_rsb *r, int high) | ||
1299 | { | ||
1300 | struct dlm_lkb *lkb, *s; | ||
1301 | int hi, demoted, quit, grant_restart, demote_restart; | ||
1302 | |||
1303 | quit = 0; | ||
1304 | restart: | ||
1305 | grant_restart = 0; | ||
1306 | demote_restart = 0; | ||
1307 | hi = DLM_LOCK_IV; | ||
1308 | |||
1309 | list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { | ||
1310 | demoted = is_demoted(lkb); | ||
1311 | if (can_be_granted(r, lkb, FALSE)) { | ||
1312 | grant_lock_pending(r, lkb); | ||
1313 | grant_restart = 1; | ||
1314 | } else { | ||
1315 | hi = max_t(int, lkb->lkb_rqmode, hi); | ||
1316 | if (!demoted && is_demoted(lkb)) | ||
1317 | demote_restart = 1; | ||
1318 | } | ||
1319 | } | ||
1320 | |||
1321 | if (grant_restart) | ||
1322 | goto restart; | ||
1323 | if (demote_restart && !quit) { | ||
1324 | quit = 1; | ||
1325 | goto restart; | ||
1326 | } | ||
1327 | |||
1328 | return max_t(int, high, hi); | ||
1329 | } | ||
1330 | |||
1331 | static int grant_pending_wait(struct dlm_rsb *r, int high) | ||
1332 | { | ||
1333 | struct dlm_lkb *lkb, *s; | ||
1334 | |||
1335 | list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { | ||
1336 | if (can_be_granted(r, lkb, FALSE)) | ||
1337 | grant_lock_pending(r, lkb); | ||
1338 | else | ||
1339 | high = max_t(int, lkb->lkb_rqmode, high); | ||
1340 | } | ||
1341 | |||
1342 | return high; | ||
1343 | } | ||
1344 | |||
1345 | static void grant_pending_locks(struct dlm_rsb *r) | ||
1346 | { | ||
1347 | struct dlm_lkb *lkb, *s; | ||
1348 | int high = DLM_LOCK_IV; | ||
1349 | |||
1350 | DLM_ASSERT(is_master(r), dlm_print_rsb(r);); | ||
1351 | |||
1352 | high = grant_pending_convert(r, high); | ||
1353 | high = grant_pending_wait(r, high); | ||
1354 | |||
1355 | if (high == DLM_LOCK_IV) | ||
1356 | return; | ||
1357 | |||
1358 | /* | ||
1359 | * If there are locks left on the wait/convert queue then send blocking | ||
1360 | * ASTs to granted locks based on the largest requested mode (high) | ||
1361 | * found above. This can generate spurious blocking ASTs for range | ||
1362 | * locks. FIXME: highbast < high comparison not valid for PR/CW. | ||
1363 | */ | ||
1364 | |||
1365 | list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { | ||
1366 | if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) && | ||
1367 | !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) { | ||
1368 | queue_bast(r, lkb, high); | ||
1369 | lkb->lkb_highbast = high; | ||
1370 | } | ||
1371 | } | ||
1372 | } | ||
1373 | |||
1374 | static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, | ||
1375 | struct dlm_lkb *lkb) | ||
1376 | { | ||
1377 | struct dlm_lkb *gr; | ||
1378 | |||
1379 | list_for_each_entry(gr, head, lkb_statequeue) { | ||
1380 | if (gr->lkb_bastaddr && | ||
1381 | gr->lkb_highbast < lkb->lkb_rqmode && | ||
1382 | ranges_overlap(lkb, gr) && !modes_compat(gr, lkb)) { | ||
1383 | queue_bast(r, gr, lkb->lkb_rqmode); | ||
1384 | gr->lkb_highbast = lkb->lkb_rqmode; | ||
1385 | } | ||
1386 | } | ||
1387 | } | ||
1388 | |||
1389 | static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1390 | { | ||
1391 | send_bast_queue(r, &r->res_grantqueue, lkb); | ||
1392 | } | ||
1393 | |||
1394 | static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1395 | { | ||
1396 | send_bast_queue(r, &r->res_grantqueue, lkb); | ||
1397 | send_bast_queue(r, &r->res_convertqueue, lkb); | ||
1398 | } | ||
1399 | |||
1400 | /* set_master(r, lkb) -- set the master nodeid of a resource | ||
1401 | |||
1402 | The purpose of this function is to set the nodeid field in the given | ||
1403 | lkb using the nodeid field in the given rsb. If the rsb's nodeid is | ||
1404 | known, it can just be copied to the lkb and the function will return | ||
1405 | 0. If the rsb's nodeid is _not_ known, it needs to be looked up | ||
1406 | before it can be copied to the lkb. | ||
1407 | |||
1408 | When the rsb nodeid is being looked up remotely, the initial lkb | ||
1409 | causing the lookup is kept on the ls_waiters list waiting for the | ||
1410 | lookup reply. Other lkb's waiting for the same rsb lookup are kept | ||
1411 | on the rsb's res_lookup list until the master is verified. | ||
1412 | |||
1413 | Return values: | ||
1414 | 0: nodeid is set in rsb/lkb and the caller should go ahead and use it | ||
1415 | 1: the rsb master is not available and the lkb has been placed on | ||
1416 | a wait queue | ||
1417 | */ | ||
1418 | |||
1419 | static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1420 | { | ||
1421 | struct dlm_ls *ls = r->res_ls; | ||
1422 | int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); | ||
1423 | |||
1424 | if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { | ||
1425 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | ||
1426 | r->res_first_lkid = lkb->lkb_id; | ||
1427 | lkb->lkb_nodeid = r->res_nodeid; | ||
1428 | return 0; | ||
1429 | } | ||
1430 | |||
1431 | if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) { | ||
1432 | list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup); | ||
1433 | return 1; | ||
1434 | } | ||
1435 | |||
1436 | if (r->res_nodeid == 0) { | ||
1437 | lkb->lkb_nodeid = 0; | ||
1438 | return 0; | ||
1439 | } | ||
1440 | |||
1441 | if (r->res_nodeid > 0) { | ||
1442 | lkb->lkb_nodeid = r->res_nodeid; | ||
1443 | return 0; | ||
1444 | } | ||
1445 | |||
1446 | DLM_ASSERT(r->res_nodeid == -1, dlm_print_rsb(r);); | ||
1447 | |||
1448 | dir_nodeid = dlm_dir_nodeid(r); | ||
1449 | |||
1450 | if (dir_nodeid != our_nodeid) { | ||
1451 | r->res_first_lkid = lkb->lkb_id; | ||
1452 | send_lookup(r, lkb); | ||
1453 | return 1; | ||
1454 | } | ||
1455 | |||
1456 | for (;;) { | ||
1457 | /* It's possible for dlm_scand to remove an old rsb for | ||
1458 | this same resource from the toss list, us to create | ||
1459 | a new one, look up the master locally, and find it | ||
1460 | already exists just before dlm_scand does the | ||
1461 | dir_remove() on the previous rsb. */ | ||
1462 | |||
1463 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, | ||
1464 | r->res_length, &ret_nodeid); | ||
1465 | if (!error) | ||
1466 | break; | ||
1467 | log_debug(ls, "dir_lookup error %d %s", error, r->res_name); | ||
1468 | schedule(); | ||
1469 | } | ||
1470 | |||
1471 | if (ret_nodeid == our_nodeid) { | ||
1472 | r->res_first_lkid = 0; | ||
1473 | r->res_nodeid = 0; | ||
1474 | lkb->lkb_nodeid = 0; | ||
1475 | } else { | ||
1476 | r->res_first_lkid = lkb->lkb_id; | ||
1477 | r->res_nodeid = ret_nodeid; | ||
1478 | lkb->lkb_nodeid = ret_nodeid; | ||
1479 | } | ||
1480 | return 0; | ||
1481 | } | ||
1482 | |||
1483 | static void process_lookup_list(struct dlm_rsb *r) | ||
1484 | { | ||
1485 | struct dlm_lkb *lkb, *safe; | ||
1486 | |||
1487 | list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) { | ||
1488 | list_del(&lkb->lkb_rsb_lookup); | ||
1489 | _request_lock(r, lkb); | ||
1490 | schedule(); | ||
1491 | } | ||
1492 | } | ||
1493 | |||
1494 | /* confirm_master -- confirm (or deny) an rsb's master nodeid */ | ||
1495 | |||
1496 | static void confirm_master(struct dlm_rsb *r, int error) | ||
1497 | { | ||
1498 | struct dlm_lkb *lkb; | ||
1499 | |||
1500 | if (!r->res_first_lkid) | ||
1501 | return; | ||
1502 | |||
1503 | switch (error) { | ||
1504 | case 0: | ||
1505 | case -EINPROGRESS: | ||
1506 | r->res_first_lkid = 0; | ||
1507 | process_lookup_list(r); | ||
1508 | break; | ||
1509 | |||
1510 | case -EAGAIN: | ||
1511 | /* the remote master didn't queue our NOQUEUE request; | ||
1512 | make a waiting lkb the first_lkid */ | ||
1513 | |||
1514 | r->res_first_lkid = 0; | ||
1515 | |||
1516 | if (!list_empty(&r->res_lookup)) { | ||
1517 | lkb = list_entry(r->res_lookup.next, struct dlm_lkb, | ||
1518 | lkb_rsb_lookup); | ||
1519 | list_del(&lkb->lkb_rsb_lookup); | ||
1520 | r->res_first_lkid = lkb->lkb_id; | ||
1521 | _request_lock(r, lkb); | ||
1522 | } else | ||
1523 | r->res_nodeid = -1; | ||
1524 | break; | ||
1525 | |||
1526 | default: | ||
1527 | log_error(r->res_ls, "confirm_master unknown error %d", error); | ||
1528 | } | ||
1529 | } | ||
1530 | |||
1531 | static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, | ||
1532 | int namelen, uint32_t parent_lkid, void *ast, | ||
1533 | void *astarg, void *bast, struct dlm_range *range, | ||
1534 | struct dlm_args *args) | ||
1535 | { | ||
1536 | int rv = -EINVAL; | ||
1537 | |||
1538 | /* check for invalid arg usage */ | ||
1539 | |||
1540 | if (mode < 0 || mode > DLM_LOCK_EX) | ||
1541 | goto out; | ||
1542 | |||
1543 | if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN)) | ||
1544 | goto out; | ||
1545 | |||
1546 | if (flags & DLM_LKF_CANCEL) | ||
1547 | goto out; | ||
1548 | |||
1549 | if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT)) | ||
1550 | goto out; | ||
1551 | |||
1552 | if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT)) | ||
1553 | goto out; | ||
1554 | |||
1555 | if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE) | ||
1556 | goto out; | ||
1557 | |||
1558 | if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT) | ||
1559 | goto out; | ||
1560 | |||
1561 | if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT) | ||
1562 | goto out; | ||
1563 | |||
1564 | if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE) | ||
1565 | goto out; | ||
1566 | |||
1567 | if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL) | ||
1568 | goto out; | ||
1569 | |||
1570 | if (!ast || !lksb) | ||
1571 | goto out; | ||
1572 | |||
1573 | if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) | ||
1574 | goto out; | ||
1575 | |||
1576 | /* parent/child locks not yet supported */ | ||
1577 | if (parent_lkid) | ||
1578 | goto out; | ||
1579 | |||
1580 | if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) | ||
1581 | goto out; | ||
1582 | |||
1583 | /* these args will be copied to the lkb in validate_lock_args, | ||
1584 | it cannot be done now because when converting locks, fields in | ||
1585 | an active lkb cannot be modified before locking the rsb */ | ||
1586 | |||
1587 | args->flags = flags; | ||
1588 | args->astaddr = ast; | ||
1589 | args->astparam = (long) astarg; | ||
1590 | args->bastaddr = bast; | ||
1591 | args->mode = mode; | ||
1592 | args->lksb = lksb; | ||
1593 | args->range = range; | ||
1594 | rv = 0; | ||
1595 | out: | ||
1596 | return rv; | ||
1597 | } | ||
1598 | |||
1599 | static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) | ||
1600 | { | ||
1601 | if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK | | ||
1602 | DLM_LKF_FORCEUNLOCK)) | ||
1603 | return -EINVAL; | ||
1604 | |||
1605 | args->flags = flags; | ||
1606 | args->astparam = (long) astarg; | ||
1607 | return 0; | ||
1608 | } | ||
1609 | |||
1610 | static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
1611 | struct dlm_args *args) | ||
1612 | { | ||
1613 | int rv = -EINVAL; | ||
1614 | |||
1615 | if (args->flags & DLM_LKF_CONVERT) { | ||
1616 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
1617 | goto out; | ||
1618 | |||
1619 | if (args->flags & DLM_LKF_QUECVT && | ||
1620 | !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) | ||
1621 | goto out; | ||
1622 | |||
1623 | rv = -EBUSY; | ||
1624 | if (lkb->lkb_status != DLM_LKSTS_GRANTED) | ||
1625 | goto out; | ||
1626 | |||
1627 | if (lkb->lkb_wait_type) | ||
1628 | goto out; | ||
1629 | } | ||
1630 | |||
1631 | lkb->lkb_exflags = args->flags; | ||
1632 | lkb->lkb_sbflags = 0; | ||
1633 | lkb->lkb_astaddr = args->astaddr; | ||
1634 | lkb->lkb_astparam = args->astparam; | ||
1635 | lkb->lkb_bastaddr = args->bastaddr; | ||
1636 | lkb->lkb_rqmode = args->mode; | ||
1637 | lkb->lkb_lksb = args->lksb; | ||
1638 | lkb->lkb_lvbptr = args->lksb->sb_lvbptr; | ||
1639 | lkb->lkb_ownpid = (int) current->pid; | ||
1640 | |||
1641 | rv = 0; | ||
1642 | if (!args->range) | ||
1643 | goto out; | ||
1644 | |||
1645 | if (!lkb->lkb_range) { | ||
1646 | rv = -ENOMEM; | ||
1647 | lkb->lkb_range = allocate_range(ls); | ||
1648 | if (!lkb->lkb_range) | ||
1649 | goto out; | ||
1650 | /* This is needed for conversions that contain ranges | ||
1651 | where the original lock didn't but it's harmless for | ||
1652 | new locks too. */ | ||
1653 | lkb->lkb_range[GR_RANGE_START] = 0LL; | ||
1654 | lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; | ||
1655 | } | ||
1656 | |||
1657 | lkb->lkb_range[RQ_RANGE_START] = args->range->ra_start; | ||
1658 | lkb->lkb_range[RQ_RANGE_END] = args->range->ra_end; | ||
1659 | lkb->lkb_flags |= DLM_IFL_RANGE; | ||
1660 | rv = 0; | ||
1661 | out: | ||
1662 | return rv; | ||
1663 | } | ||
1664 | |||
1665 | static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) | ||
1666 | { | ||
1667 | int rv = -EINVAL; | ||
1668 | |||
1669 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
1670 | goto out; | ||
1671 | |||
1672 | if (args->flags & DLM_LKF_FORCEUNLOCK) | ||
1673 | goto out_ok; | ||
1674 | |||
1675 | if (args->flags & DLM_LKF_CANCEL && | ||
1676 | lkb->lkb_status == DLM_LKSTS_GRANTED) | ||
1677 | goto out; | ||
1678 | |||
1679 | if (!(args->flags & DLM_LKF_CANCEL) && | ||
1680 | lkb->lkb_status != DLM_LKSTS_GRANTED) | ||
1681 | goto out; | ||
1682 | |||
1683 | rv = -EBUSY; | ||
1684 | if (lkb->lkb_wait_type) | ||
1685 | goto out; | ||
1686 | |||
1687 | out_ok: | ||
1688 | lkb->lkb_exflags = args->flags; | ||
1689 | lkb->lkb_sbflags = 0; | ||
1690 | lkb->lkb_astparam = args->astparam; | ||
1691 | |||
1692 | rv = 0; | ||
1693 | out: | ||
1694 | return rv; | ||
1695 | } | ||
1696 | |||
1697 | /* | ||
1698 | * Four stage 4 varieties: | ||
1699 | * do_request(), do_convert(), do_unlock(), do_cancel() | ||
1700 | * These are called on the master node for the given lock and | ||
1701 | * from the central locking logic. | ||
1702 | */ | ||
1703 | |||
1704 | static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1705 | { | ||
1706 | int error = 0; | ||
1707 | |||
1708 | if (can_be_granted(r, lkb, TRUE)) { | ||
1709 | grant_lock(r, lkb); | ||
1710 | queue_cast(r, lkb, 0); | ||
1711 | goto out; | ||
1712 | } | ||
1713 | |||
1714 | if (can_be_queued(lkb)) { | ||
1715 | error = -EINPROGRESS; | ||
1716 | add_lkb(r, lkb, DLM_LKSTS_WAITING); | ||
1717 | send_blocking_asts(r, lkb); | ||
1718 | goto out; | ||
1719 | } | ||
1720 | |||
1721 | error = -EAGAIN; | ||
1722 | if (force_blocking_asts(lkb)) | ||
1723 | send_blocking_asts_all(r, lkb); | ||
1724 | queue_cast(r, lkb, -EAGAIN); | ||
1725 | |||
1726 | out: | ||
1727 | return error; | ||
1728 | } | ||
1729 | |||
1730 | static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1731 | { | ||
1732 | int error = 0; | ||
1733 | |||
1734 | /* changing an existing lock may allow others to be granted */ | ||
1735 | |||
1736 | if (can_be_granted(r, lkb, TRUE)) { | ||
1737 | grant_lock(r, lkb); | ||
1738 | queue_cast(r, lkb, 0); | ||
1739 | grant_pending_locks(r); | ||
1740 | goto out; | ||
1741 | } | ||
1742 | |||
1743 | if (can_be_queued(lkb)) { | ||
1744 | if (is_demoted(lkb)) | ||
1745 | grant_pending_locks(r); | ||
1746 | error = -EINPROGRESS; | ||
1747 | del_lkb(r, lkb); | ||
1748 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | ||
1749 | send_blocking_asts(r, lkb); | ||
1750 | goto out; | ||
1751 | } | ||
1752 | |||
1753 | error = -EAGAIN; | ||
1754 | if (force_blocking_asts(lkb)) | ||
1755 | send_blocking_asts_all(r, lkb); | ||
1756 | queue_cast(r, lkb, -EAGAIN); | ||
1757 | |||
1758 | out: | ||
1759 | return error; | ||
1760 | } | ||
1761 | |||
1762 | static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1763 | { | ||
1764 | remove_lock(r, lkb); | ||
1765 | queue_cast(r, lkb, -DLM_EUNLOCK); | ||
1766 | grant_pending_locks(r); | ||
1767 | return -DLM_EUNLOCK; | ||
1768 | } | ||
1769 | |||
1770 | static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1771 | { | ||
1772 | revert_lock(r, lkb); | ||
1773 | queue_cast(r, lkb, -DLM_ECANCEL); | ||
1774 | grant_pending_locks(r); | ||
1775 | return -DLM_ECANCEL; | ||
1776 | } | ||
1777 | |||
1778 | /* | ||
1779 | * Four stage 3 varieties: | ||
1780 | * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock() | ||
1781 | */ | ||
1782 | |||
1783 | /* add a new lkb to a possibly new rsb, called by requesting process */ | ||
1784 | |||
1785 | static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1786 | { | ||
1787 | int error; | ||
1788 | |||
1789 | /* set_master: sets lkb nodeid from r */ | ||
1790 | |||
1791 | error = set_master(r, lkb); | ||
1792 | if (error < 0) | ||
1793 | goto out; | ||
1794 | if (error) { | ||
1795 | error = 0; | ||
1796 | goto out; | ||
1797 | } | ||
1798 | |||
1799 | if (is_remote(r)) | ||
1800 | /* receive_request() calls do_request() on remote node */ | ||
1801 | error = send_request(r, lkb); | ||
1802 | else | ||
1803 | error = do_request(r, lkb); | ||
1804 | out: | ||
1805 | return error; | ||
1806 | } | ||
1807 | |||
1808 | /* change some property of an existing lkb, e.g. mode, range */ | ||
1809 | |||
1810 | static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1811 | { | ||
1812 | int error; | ||
1813 | |||
1814 | if (is_remote(r)) | ||
1815 | /* receive_convert() calls do_convert() on remote node */ | ||
1816 | error = send_convert(r, lkb); | ||
1817 | else | ||
1818 | error = do_convert(r, lkb); | ||
1819 | |||
1820 | return error; | ||
1821 | } | ||
1822 | |||
1823 | /* remove an existing lkb from the granted queue */ | ||
1824 | |||
1825 | static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1826 | { | ||
1827 | int error; | ||
1828 | |||
1829 | if (is_remote(r)) | ||
1830 | /* receive_unlock() calls do_unlock() on remote node */ | ||
1831 | error = send_unlock(r, lkb); | ||
1832 | else | ||
1833 | error = do_unlock(r, lkb); | ||
1834 | |||
1835 | return error; | ||
1836 | } | ||
1837 | |||
1838 | /* remove an existing lkb from the convert or wait queue */ | ||
1839 | |||
1840 | static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
1841 | { | ||
1842 | int error; | ||
1843 | |||
1844 | if (is_remote(r)) | ||
1845 | /* receive_cancel() calls do_cancel() on remote node */ | ||
1846 | error = send_cancel(r, lkb); | ||
1847 | else | ||
1848 | error = do_cancel(r, lkb); | ||
1849 | |||
1850 | return error; | ||
1851 | } | ||
1852 | |||
1853 | /* | ||
1854 | * Four stage 2 varieties: | ||
1855 | * request_lock(), convert_lock(), unlock_lock(), cancel_lock() | ||
1856 | */ | ||
1857 | |||
1858 | static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, | ||
1859 | int len, struct dlm_args *args) | ||
1860 | { | ||
1861 | struct dlm_rsb *r; | ||
1862 | int error; | ||
1863 | |||
1864 | error = validate_lock_args(ls, lkb, args); | ||
1865 | if (error) | ||
1866 | goto out; | ||
1867 | |||
1868 | error = find_rsb(ls, name, len, R_CREATE, &r); | ||
1869 | if (error) | ||
1870 | goto out; | ||
1871 | |||
1872 | lock_rsb(r); | ||
1873 | |||
1874 | attach_lkb(r, lkb); | ||
1875 | lkb->lkb_lksb->sb_lkid = lkb->lkb_id; | ||
1876 | |||
1877 | error = _request_lock(r, lkb); | ||
1878 | |||
1879 | unlock_rsb(r); | ||
1880 | put_rsb(r); | ||
1881 | |||
1882 | out: | ||
1883 | return error; | ||
1884 | } | ||
1885 | |||
1886 | static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
1887 | struct dlm_args *args) | ||
1888 | { | ||
1889 | struct dlm_rsb *r; | ||
1890 | int error; | ||
1891 | |||
1892 | r = lkb->lkb_resource; | ||
1893 | |||
1894 | hold_rsb(r); | ||
1895 | lock_rsb(r); | ||
1896 | |||
1897 | error = validate_lock_args(ls, lkb, args); | ||
1898 | if (error) | ||
1899 | goto out; | ||
1900 | |||
1901 | error = _convert_lock(r, lkb); | ||
1902 | out: | ||
1903 | unlock_rsb(r); | ||
1904 | put_rsb(r); | ||
1905 | return error; | ||
1906 | } | ||
1907 | |||
1908 | static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
1909 | struct dlm_args *args) | ||
1910 | { | ||
1911 | struct dlm_rsb *r; | ||
1912 | int error; | ||
1913 | |||
1914 | r = lkb->lkb_resource; | ||
1915 | |||
1916 | hold_rsb(r); | ||
1917 | lock_rsb(r); | ||
1918 | |||
1919 | error = validate_unlock_args(lkb, args); | ||
1920 | if (error) | ||
1921 | goto out; | ||
1922 | |||
1923 | error = _unlock_lock(r, lkb); | ||
1924 | out: | ||
1925 | unlock_rsb(r); | ||
1926 | put_rsb(r); | ||
1927 | return error; | ||
1928 | } | ||
1929 | |||
1930 | static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
1931 | struct dlm_args *args) | ||
1932 | { | ||
1933 | struct dlm_rsb *r; | ||
1934 | int error; | ||
1935 | |||
1936 | r = lkb->lkb_resource; | ||
1937 | |||
1938 | hold_rsb(r); | ||
1939 | lock_rsb(r); | ||
1940 | |||
1941 | error = validate_unlock_args(lkb, args); | ||
1942 | if (error) | ||
1943 | goto out; | ||
1944 | |||
1945 | error = _cancel_lock(r, lkb); | ||
1946 | out: | ||
1947 | unlock_rsb(r); | ||
1948 | put_rsb(r); | ||
1949 | return error; | ||
1950 | } | ||
1951 | |||
1952 | /* | ||
1953 | * Two stage 1 varieties: dlm_lock() and dlm_unlock() | ||
1954 | */ | ||
1955 | |||
1956 | int dlm_lock(dlm_lockspace_t *lockspace, | ||
1957 | int mode, | ||
1958 | struct dlm_lksb *lksb, | ||
1959 | uint32_t flags, | ||
1960 | void *name, | ||
1961 | unsigned int namelen, | ||
1962 | uint32_t parent_lkid, | ||
1963 | void (*ast) (void *astarg), | ||
1964 | void *astarg, | ||
1965 | void (*bast) (void *astarg, int mode), | ||
1966 | struct dlm_range *range) | ||
1967 | { | ||
1968 | struct dlm_ls *ls; | ||
1969 | struct dlm_lkb *lkb; | ||
1970 | struct dlm_args args; | ||
1971 | int error, convert = flags & DLM_LKF_CONVERT; | ||
1972 | |||
1973 | ls = dlm_find_lockspace_local(lockspace); | ||
1974 | if (!ls) | ||
1975 | return -EINVAL; | ||
1976 | |||
1977 | lock_recovery(ls); | ||
1978 | |||
1979 | if (convert) | ||
1980 | error = find_lkb(ls, lksb->sb_lkid, &lkb); | ||
1981 | else | ||
1982 | error = create_lkb(ls, &lkb); | ||
1983 | |||
1984 | if (error) | ||
1985 | goto out; | ||
1986 | |||
1987 | error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, | ||
1988 | astarg, bast, range, &args); | ||
1989 | if (error) | ||
1990 | goto out_put; | ||
1991 | |||
1992 | if (convert) | ||
1993 | error = convert_lock(ls, lkb, &args); | ||
1994 | else | ||
1995 | error = request_lock(ls, lkb, name, namelen, &args); | ||
1996 | |||
1997 | if (error == -EINPROGRESS) | ||
1998 | error = 0; | ||
1999 | out_put: | ||
2000 | if (convert || error) | ||
2001 | put_lkb(lkb); | ||
2002 | if (error == -EAGAIN) | ||
2003 | error = 0; | ||
2004 | out: | ||
2005 | unlock_recovery(ls); | ||
2006 | dlm_put_lockspace(ls); | ||
2007 | return error; | ||
2008 | } | ||
2009 | |||
2010 | int dlm_unlock(dlm_lockspace_t *lockspace, | ||
2011 | uint32_t lkid, | ||
2012 | uint32_t flags, | ||
2013 | struct dlm_lksb *lksb, | ||
2014 | void *astarg) | ||
2015 | { | ||
2016 | struct dlm_ls *ls; | ||
2017 | struct dlm_lkb *lkb; | ||
2018 | struct dlm_args args; | ||
2019 | int error; | ||
2020 | |||
2021 | ls = dlm_find_lockspace_local(lockspace); | ||
2022 | if (!ls) | ||
2023 | return -EINVAL; | ||
2024 | |||
2025 | lock_recovery(ls); | ||
2026 | |||
2027 | error = find_lkb(ls, lkid, &lkb); | ||
2028 | if (error) | ||
2029 | goto out; | ||
2030 | |||
2031 | error = set_unlock_args(flags, astarg, &args); | ||
2032 | if (error) | ||
2033 | goto out_put; | ||
2034 | |||
2035 | if (flags & DLM_LKF_CANCEL) | ||
2036 | error = cancel_lock(ls, lkb, &args); | ||
2037 | else | ||
2038 | error = unlock_lock(ls, lkb, &args); | ||
2039 | |||
2040 | if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) | ||
2041 | error = 0; | ||
2042 | out_put: | ||
2043 | put_lkb(lkb); | ||
2044 | out: | ||
2045 | unlock_recovery(ls); | ||
2046 | dlm_put_lockspace(ls); | ||
2047 | return error; | ||
2048 | } | ||
2049 | |||
2050 | /* | ||
2051 | * send/receive routines for remote operations and replies | ||
2052 | * | ||
2053 | * send_args | ||
2054 | * send_common | ||
2055 | * send_request receive_request | ||
2056 | * send_convert receive_convert | ||
2057 | * send_unlock receive_unlock | ||
2058 | * send_cancel receive_cancel | ||
2059 | * send_grant receive_grant | ||
2060 | * send_bast receive_bast | ||
2061 | * send_lookup receive_lookup | ||
2062 | * send_remove receive_remove | ||
2063 | * | ||
2064 | * send_common_reply | ||
2065 | * receive_request_reply send_request_reply | ||
2066 | * receive_convert_reply send_convert_reply | ||
2067 | * receive_unlock_reply send_unlock_reply | ||
2068 | * receive_cancel_reply send_cancel_reply | ||
2069 | * receive_lookup_reply send_lookup_reply | ||
2070 | */ | ||
2071 | |||
2072 | static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
2073 | int to_nodeid, int mstype, | ||
2074 | struct dlm_message **ms_ret, | ||
2075 | struct dlm_mhandle **mh_ret) | ||
2076 | { | ||
2077 | struct dlm_message *ms; | ||
2078 | struct dlm_mhandle *mh; | ||
2079 | char *mb; | ||
2080 | int mb_len = sizeof(struct dlm_message); | ||
2081 | |||
2082 | switch (mstype) { | ||
2083 | case DLM_MSG_REQUEST: | ||
2084 | case DLM_MSG_LOOKUP: | ||
2085 | case DLM_MSG_REMOVE: | ||
2086 | mb_len += r->res_length; | ||
2087 | break; | ||
2088 | case DLM_MSG_CONVERT: | ||
2089 | case DLM_MSG_UNLOCK: | ||
2090 | case DLM_MSG_REQUEST_REPLY: | ||
2091 | case DLM_MSG_CONVERT_REPLY: | ||
2092 | case DLM_MSG_GRANT: | ||
2093 | if (lkb && lkb->lkb_lvbptr) | ||
2094 | mb_len += r->res_ls->ls_lvblen; | ||
2095 | break; | ||
2096 | } | ||
2097 | |||
2098 | /* get_buffer gives us a message handle (mh) that we need to | ||
2099 | pass into lowcomms_commit and a message buffer (mb) that we | ||
2100 | write our data into */ | ||
2101 | |||
2102 | mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); | ||
2103 | if (!mh) | ||
2104 | return -ENOBUFS; | ||
2105 | |||
2106 | memset(mb, 0, mb_len); | ||
2107 | |||
2108 | ms = (struct dlm_message *) mb; | ||
2109 | |||
2110 | ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | ||
2111 | ms->m_header.h_lockspace = r->res_ls->ls_global_id; | ||
2112 | ms->m_header.h_nodeid = dlm_our_nodeid(); | ||
2113 | ms->m_header.h_length = mb_len; | ||
2114 | ms->m_header.h_cmd = DLM_MSG; | ||
2115 | |||
2116 | ms->m_type = mstype; | ||
2117 | |||
2118 | *mh_ret = mh; | ||
2119 | *ms_ret = ms; | ||
2120 | return 0; | ||
2121 | } | ||
2122 | |||
2123 | /* further lowcomms enhancements or alternate implementations may make | ||
2124 | the return value from this function useful at some point */ | ||
2125 | |||
2126 | static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms) | ||
2127 | { | ||
2128 | dlm_message_out(ms); | ||
2129 | dlm_lowcomms_commit_buffer(mh); | ||
2130 | return 0; | ||
2131 | } | ||
2132 | |||
2133 | static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
2134 | struct dlm_message *ms) | ||
2135 | { | ||
2136 | ms->m_nodeid = lkb->lkb_nodeid; | ||
2137 | ms->m_pid = lkb->lkb_ownpid; | ||
2138 | ms->m_lkid = lkb->lkb_id; | ||
2139 | ms->m_remid = lkb->lkb_remid; | ||
2140 | ms->m_exflags = lkb->lkb_exflags; | ||
2141 | ms->m_sbflags = lkb->lkb_sbflags; | ||
2142 | ms->m_flags = lkb->lkb_flags; | ||
2143 | ms->m_lvbseq = lkb->lkb_lvbseq; | ||
2144 | ms->m_status = lkb->lkb_status; | ||
2145 | ms->m_grmode = lkb->lkb_grmode; | ||
2146 | ms->m_rqmode = lkb->lkb_rqmode; | ||
2147 | ms->m_hash = r->res_hash; | ||
2148 | |||
2149 | /* m_result and m_bastmode are set from function args, | ||
2150 | not from lkb fields */ | ||
2151 | |||
2152 | if (lkb->lkb_bastaddr) | ||
2153 | ms->m_asts |= AST_BAST; | ||
2154 | if (lkb->lkb_astaddr) | ||
2155 | ms->m_asts |= AST_COMP; | ||
2156 | |||
2157 | if (lkb->lkb_range) { | ||
2158 | ms->m_range[0] = lkb->lkb_range[RQ_RANGE_START]; | ||
2159 | ms->m_range[1] = lkb->lkb_range[RQ_RANGE_END]; | ||
2160 | } | ||
2161 | |||
2162 | if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) | ||
2163 | memcpy(ms->m_extra, r->res_name, r->res_length); | ||
2164 | |||
2165 | else if (lkb->lkb_lvbptr) | ||
2166 | memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | ||
2167 | |||
2168 | } | ||
2169 | |||
2170 | static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | ||
2171 | { | ||
2172 | struct dlm_message *ms; | ||
2173 | struct dlm_mhandle *mh; | ||
2174 | int to_nodeid, error; | ||
2175 | |||
2176 | add_to_waiters(lkb, mstype); | ||
2177 | |||
2178 | to_nodeid = r->res_nodeid; | ||
2179 | |||
2180 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); | ||
2181 | if (error) | ||
2182 | goto fail; | ||
2183 | |||
2184 | send_args(r, lkb, ms); | ||
2185 | |||
2186 | error = send_message(mh, ms); | ||
2187 | if (error) | ||
2188 | goto fail; | ||
2189 | return 0; | ||
2190 | |||
2191 | fail: | ||
2192 | remove_from_waiters(lkb); | ||
2193 | return error; | ||
2194 | } | ||
2195 | |||
2196 | static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
2197 | { | ||
2198 | return send_common(r, lkb, DLM_MSG_REQUEST); | ||
2199 | } | ||
2200 | |||
2201 | static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
2202 | { | ||
2203 | int error; | ||
2204 | |||
2205 | error = send_common(r, lkb, DLM_MSG_CONVERT); | ||
2206 | |||
2207 | /* down conversions go without a reply from the master */ | ||
2208 | if (!error && down_conversion(lkb)) { | ||
2209 | remove_from_waiters(lkb); | ||
2210 | r->res_ls->ls_stub_ms.m_result = 0; | ||
2211 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); | ||
2212 | } | ||
2213 | |||
2214 | return error; | ||
2215 | } | ||
2216 | |||
2217 | /* FIXME: if this lkb is the only lock we hold on the rsb, then set | ||
2218 | MASTER_UNCERTAIN to force the next request on the rsb to confirm | ||
2219 | that the master is still correct. */ | ||
2220 | |||
2221 | static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
2222 | { | ||
2223 | return send_common(r, lkb, DLM_MSG_UNLOCK); | ||
2224 | } | ||
2225 | |||
2226 | static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
2227 | { | ||
2228 | return send_common(r, lkb, DLM_MSG_CANCEL); | ||
2229 | } | ||
2230 | |||
2231 | static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
2232 | { | ||
2233 | struct dlm_message *ms; | ||
2234 | struct dlm_mhandle *mh; | ||
2235 | int to_nodeid, error; | ||
2236 | |||
2237 | to_nodeid = lkb->lkb_nodeid; | ||
2238 | |||
2239 | error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh); | ||
2240 | if (error) | ||
2241 | goto out; | ||
2242 | |||
2243 | send_args(r, lkb, ms); | ||
2244 | |||
2245 | ms->m_result = 0; | ||
2246 | |||
2247 | error = send_message(mh, ms); | ||
2248 | out: | ||
2249 | return error; | ||
2250 | } | ||
2251 | |||
2252 | static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode) | ||
2253 | { | ||
2254 | struct dlm_message *ms; | ||
2255 | struct dlm_mhandle *mh; | ||
2256 | int to_nodeid, error; | ||
2257 | |||
2258 | to_nodeid = lkb->lkb_nodeid; | ||
2259 | |||
2260 | error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh); | ||
2261 | if (error) | ||
2262 | goto out; | ||
2263 | |||
2264 | send_args(r, lkb, ms); | ||
2265 | |||
2266 | ms->m_bastmode = mode; | ||
2267 | |||
2268 | error = send_message(mh, ms); | ||
2269 | out: | ||
2270 | return error; | ||
2271 | } | ||
2272 | |||
2273 | static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
2274 | { | ||
2275 | struct dlm_message *ms; | ||
2276 | struct dlm_mhandle *mh; | ||
2277 | int to_nodeid, error; | ||
2278 | |||
2279 | add_to_waiters(lkb, DLM_MSG_LOOKUP); | ||
2280 | |||
2281 | to_nodeid = dlm_dir_nodeid(r); | ||
2282 | |||
2283 | error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); | ||
2284 | if (error) | ||
2285 | goto fail; | ||
2286 | |||
2287 | send_args(r, lkb, ms); | ||
2288 | |||
2289 | error = send_message(mh, ms); | ||
2290 | if (error) | ||
2291 | goto fail; | ||
2292 | return 0; | ||
2293 | |||
2294 | fail: | ||
2295 | remove_from_waiters(lkb); | ||
2296 | return error; | ||
2297 | } | ||
2298 | |||
2299 | static int send_remove(struct dlm_rsb *r) | ||
2300 | { | ||
2301 | struct dlm_message *ms; | ||
2302 | struct dlm_mhandle *mh; | ||
2303 | int to_nodeid, error; | ||
2304 | |||
2305 | to_nodeid = dlm_dir_nodeid(r); | ||
2306 | |||
2307 | error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh); | ||
2308 | if (error) | ||
2309 | goto out; | ||
2310 | |||
2311 | memcpy(ms->m_extra, r->res_name, r->res_length); | ||
2312 | ms->m_hash = r->res_hash; | ||
2313 | |||
2314 | error = send_message(mh, ms); | ||
2315 | out: | ||
2316 | return error; | ||
2317 | } | ||
2318 | |||
2319 | static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
2320 | int mstype, int rv) | ||
2321 | { | ||
2322 | struct dlm_message *ms; | ||
2323 | struct dlm_mhandle *mh; | ||
2324 | int to_nodeid, error; | ||
2325 | |||
2326 | to_nodeid = lkb->lkb_nodeid; | ||
2327 | |||
2328 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); | ||
2329 | if (error) | ||
2330 | goto out; | ||
2331 | |||
2332 | send_args(r, lkb, ms); | ||
2333 | |||
2334 | ms->m_result = rv; | ||
2335 | |||
2336 | error = send_message(mh, ms); | ||
2337 | out: | ||
2338 | return error; | ||
2339 | } | ||
2340 | |||
2341 | static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
2342 | { | ||
2343 | return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv); | ||
2344 | } | ||
2345 | |||
2346 | static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
2347 | { | ||
2348 | return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv); | ||
2349 | } | ||
2350 | |||
2351 | static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
2352 | { | ||
2353 | return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv); | ||
2354 | } | ||
2355 | |||
2356 | static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | ||
2357 | { | ||
2358 | return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv); | ||
2359 | } | ||
2360 | |||
2361 | static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in, | ||
2362 | int ret_nodeid, int rv) | ||
2363 | { | ||
2364 | struct dlm_rsb *r = &ls->ls_stub_rsb; | ||
2365 | struct dlm_message *ms; | ||
2366 | struct dlm_mhandle *mh; | ||
2367 | int error, nodeid = ms_in->m_header.h_nodeid; | ||
2368 | |||
2369 | error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh); | ||
2370 | if (error) | ||
2371 | goto out; | ||
2372 | |||
2373 | ms->m_lkid = ms_in->m_lkid; | ||
2374 | ms->m_result = rv; | ||
2375 | ms->m_nodeid = ret_nodeid; | ||
2376 | |||
2377 | error = send_message(mh, ms); | ||
2378 | out: | ||
2379 | return error; | ||
2380 | } | ||
2381 | |||
2382 | /* which args we save from a received message depends heavily on the type | ||
2383 | of message, unlike the send side where we can safely send everything about | ||
2384 | the lkb for any type of message */ | ||
2385 | |||
2386 | static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
2387 | { | ||
2388 | lkb->lkb_exflags = ms->m_exflags; | ||
2389 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | | ||
2390 | (ms->m_flags & 0x0000FFFF); | ||
2391 | } | ||
2392 | |||
2393 | static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
2394 | { | ||
2395 | lkb->lkb_sbflags = ms->m_sbflags; | ||
2396 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | | ||
2397 | (ms->m_flags & 0x0000FFFF); | ||
2398 | } | ||
2399 | |||
2400 | static int receive_extralen(struct dlm_message *ms) | ||
2401 | { | ||
2402 | return (ms->m_header.h_length - sizeof(struct dlm_message)); | ||
2403 | } | ||
2404 | |||
2405 | static int receive_range(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
2406 | struct dlm_message *ms) | ||
2407 | { | ||
2408 | if (lkb->lkb_flags & DLM_IFL_RANGE) { | ||
2409 | if (!lkb->lkb_range) | ||
2410 | lkb->lkb_range = allocate_range(ls); | ||
2411 | if (!lkb->lkb_range) | ||
2412 | return -ENOMEM; | ||
2413 | lkb->lkb_range[RQ_RANGE_START] = ms->m_range[0]; | ||
2414 | lkb->lkb_range[RQ_RANGE_END] = ms->m_range[1]; | ||
2415 | } | ||
2416 | return 0; | ||
2417 | } | ||
2418 | |||
2419 | static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
2420 | struct dlm_message *ms) | ||
2421 | { | ||
2422 | int len; | ||
2423 | |||
2424 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { | ||
2425 | if (!lkb->lkb_lvbptr) | ||
2426 | lkb->lkb_lvbptr = allocate_lvb(ls); | ||
2427 | if (!lkb->lkb_lvbptr) | ||
2428 | return -ENOMEM; | ||
2429 | len = receive_extralen(ms); | ||
2430 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | ||
2431 | } | ||
2432 | return 0; | ||
2433 | } | ||
2434 | |||
2435 | static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
2436 | struct dlm_message *ms) | ||
2437 | { | ||
2438 | lkb->lkb_nodeid = ms->m_header.h_nodeid; | ||
2439 | lkb->lkb_ownpid = ms->m_pid; | ||
2440 | lkb->lkb_remid = ms->m_lkid; | ||
2441 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
2442 | lkb->lkb_rqmode = ms->m_rqmode; | ||
2443 | lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); | ||
2444 | lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); | ||
2445 | |||
2446 | DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); | ||
2447 | |||
2448 | if (receive_range(ls, lkb, ms)) | ||
2449 | return -ENOMEM; | ||
2450 | |||
2451 | if (receive_lvb(ls, lkb, ms)) | ||
2452 | return -ENOMEM; | ||
2453 | |||
2454 | return 0; | ||
2455 | } | ||
2456 | |||
2457 | static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
2458 | struct dlm_message *ms) | ||
2459 | { | ||
2460 | if (lkb->lkb_nodeid != ms->m_header.h_nodeid) { | ||
2461 | log_error(ls, "convert_args nodeid %d %d lkid %x %x", | ||
2462 | lkb->lkb_nodeid, ms->m_header.h_nodeid, | ||
2463 | lkb->lkb_id, lkb->lkb_remid); | ||
2464 | return -EINVAL; | ||
2465 | } | ||
2466 | |||
2467 | if (!is_master_copy(lkb)) | ||
2468 | return -EINVAL; | ||
2469 | |||
2470 | if (lkb->lkb_status != DLM_LKSTS_GRANTED) | ||
2471 | return -EBUSY; | ||
2472 | |||
2473 | if (receive_range(ls, lkb, ms)) | ||
2474 | return -ENOMEM; | ||
2475 | if (lkb->lkb_range) { | ||
2476 | lkb->lkb_range[GR_RANGE_START] = 0LL; | ||
2477 | lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; | ||
2478 | } | ||
2479 | |||
2480 | if (receive_lvb(ls, lkb, ms)) | ||
2481 | return -ENOMEM; | ||
2482 | |||
2483 | lkb->lkb_rqmode = ms->m_rqmode; | ||
2484 | lkb->lkb_lvbseq = ms->m_lvbseq; | ||
2485 | |||
2486 | return 0; | ||
2487 | } | ||
2488 | |||
2489 | static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
2490 | struct dlm_message *ms) | ||
2491 | { | ||
2492 | if (!is_master_copy(lkb)) | ||
2493 | return -EINVAL; | ||
2494 | if (receive_lvb(ls, lkb, ms)) | ||
2495 | return -ENOMEM; | ||
2496 | return 0; | ||
2497 | } | ||
2498 | |||
2499 | /* We fill in the stub-lkb fields with the info that send_xxxx_reply() | ||
2500 | uses to send a reply and that the remote end uses to process the reply. */ | ||
2501 | |||
2502 | static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) | ||
2503 | { | ||
2504 | struct dlm_lkb *lkb = &ls->ls_stub_lkb; | ||
2505 | lkb->lkb_nodeid = ms->m_header.h_nodeid; | ||
2506 | lkb->lkb_remid = ms->m_lkid; | ||
2507 | } | ||
2508 | |||
2509 | static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) | ||
2510 | { | ||
2511 | struct dlm_lkb *lkb; | ||
2512 | struct dlm_rsb *r; | ||
2513 | int error, namelen; | ||
2514 | |||
2515 | error = create_lkb(ls, &lkb); | ||
2516 | if (error) | ||
2517 | goto fail; | ||
2518 | |||
2519 | receive_flags(lkb, ms); | ||
2520 | lkb->lkb_flags |= DLM_IFL_MSTCPY; | ||
2521 | error = receive_request_args(ls, lkb, ms); | ||
2522 | if (error) { | ||
2523 | put_lkb(lkb); | ||
2524 | goto fail; | ||
2525 | } | ||
2526 | |||
2527 | namelen = receive_extralen(ms); | ||
2528 | |||
2529 | error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r); | ||
2530 | if (error) { | ||
2531 | put_lkb(lkb); | ||
2532 | goto fail; | ||
2533 | } | ||
2534 | |||
2535 | lock_rsb(r); | ||
2536 | |||
2537 | attach_lkb(r, lkb); | ||
2538 | error = do_request(r, lkb); | ||
2539 | send_request_reply(r, lkb, error); | ||
2540 | |||
2541 | unlock_rsb(r); | ||
2542 | put_rsb(r); | ||
2543 | |||
2544 | if (error == -EINPROGRESS) | ||
2545 | error = 0; | ||
2546 | if (error) | ||
2547 | put_lkb(lkb); | ||
2548 | return; | ||
2549 | |||
2550 | fail: | ||
2551 | setup_stub_lkb(ls, ms); | ||
2552 | send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
2553 | } | ||
2554 | |||
2555 | static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) | ||
2556 | { | ||
2557 | struct dlm_lkb *lkb; | ||
2558 | struct dlm_rsb *r; | ||
2559 | int error, reply = TRUE; | ||
2560 | |||
2561 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2562 | if (error) | ||
2563 | goto fail; | ||
2564 | |||
2565 | r = lkb->lkb_resource; | ||
2566 | |||
2567 | hold_rsb(r); | ||
2568 | lock_rsb(r); | ||
2569 | |||
2570 | receive_flags(lkb, ms); | ||
2571 | error = receive_convert_args(ls, lkb, ms); | ||
2572 | if (error) | ||
2573 | goto out; | ||
2574 | reply = !down_conversion(lkb); | ||
2575 | |||
2576 | error = do_convert(r, lkb); | ||
2577 | out: | ||
2578 | if (reply) | ||
2579 | send_convert_reply(r, lkb, error); | ||
2580 | |||
2581 | unlock_rsb(r); | ||
2582 | put_rsb(r); | ||
2583 | put_lkb(lkb); | ||
2584 | return; | ||
2585 | |||
2586 | fail: | ||
2587 | setup_stub_lkb(ls, ms); | ||
2588 | send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
2589 | } | ||
2590 | |||
2591 | static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) | ||
2592 | { | ||
2593 | struct dlm_lkb *lkb; | ||
2594 | struct dlm_rsb *r; | ||
2595 | int error; | ||
2596 | |||
2597 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2598 | if (error) | ||
2599 | goto fail; | ||
2600 | |||
2601 | r = lkb->lkb_resource; | ||
2602 | |||
2603 | hold_rsb(r); | ||
2604 | lock_rsb(r); | ||
2605 | |||
2606 | receive_flags(lkb, ms); | ||
2607 | error = receive_unlock_args(ls, lkb, ms); | ||
2608 | if (error) | ||
2609 | goto out; | ||
2610 | |||
2611 | error = do_unlock(r, lkb); | ||
2612 | out: | ||
2613 | send_unlock_reply(r, lkb, error); | ||
2614 | |||
2615 | unlock_rsb(r); | ||
2616 | put_rsb(r); | ||
2617 | put_lkb(lkb); | ||
2618 | return; | ||
2619 | |||
2620 | fail: | ||
2621 | setup_stub_lkb(ls, ms); | ||
2622 | send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
2623 | } | ||
2624 | |||
2625 | static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) | ||
2626 | { | ||
2627 | struct dlm_lkb *lkb; | ||
2628 | struct dlm_rsb *r; | ||
2629 | int error; | ||
2630 | |||
2631 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2632 | if (error) | ||
2633 | goto fail; | ||
2634 | |||
2635 | receive_flags(lkb, ms); | ||
2636 | |||
2637 | r = lkb->lkb_resource; | ||
2638 | |||
2639 | hold_rsb(r); | ||
2640 | lock_rsb(r); | ||
2641 | |||
2642 | error = do_cancel(r, lkb); | ||
2643 | send_cancel_reply(r, lkb, error); | ||
2644 | |||
2645 | unlock_rsb(r); | ||
2646 | put_rsb(r); | ||
2647 | put_lkb(lkb); | ||
2648 | return; | ||
2649 | |||
2650 | fail: | ||
2651 | setup_stub_lkb(ls, ms); | ||
2652 | send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | ||
2653 | } | ||
2654 | |||
2655 | static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) | ||
2656 | { | ||
2657 | struct dlm_lkb *lkb; | ||
2658 | struct dlm_rsb *r; | ||
2659 | int error; | ||
2660 | |||
2661 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2662 | if (error) { | ||
2663 | log_error(ls, "receive_grant no lkb"); | ||
2664 | return; | ||
2665 | } | ||
2666 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
2667 | |||
2668 | r = lkb->lkb_resource; | ||
2669 | |||
2670 | hold_rsb(r); | ||
2671 | lock_rsb(r); | ||
2672 | |||
2673 | receive_flags_reply(lkb, ms); | ||
2674 | grant_lock_pc(r, lkb, ms); | ||
2675 | queue_cast(r, lkb, 0); | ||
2676 | |||
2677 | unlock_rsb(r); | ||
2678 | put_rsb(r); | ||
2679 | put_lkb(lkb); | ||
2680 | } | ||
2681 | |||
2682 | static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) | ||
2683 | { | ||
2684 | struct dlm_lkb *lkb; | ||
2685 | struct dlm_rsb *r; | ||
2686 | int error; | ||
2687 | |||
2688 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2689 | if (error) { | ||
2690 | log_error(ls, "receive_bast no lkb"); | ||
2691 | return; | ||
2692 | } | ||
2693 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
2694 | |||
2695 | r = lkb->lkb_resource; | ||
2696 | |||
2697 | hold_rsb(r); | ||
2698 | lock_rsb(r); | ||
2699 | |||
2700 | queue_bast(r, lkb, ms->m_bastmode); | ||
2701 | |||
2702 | unlock_rsb(r); | ||
2703 | put_rsb(r); | ||
2704 | put_lkb(lkb); | ||
2705 | } | ||
2706 | |||
2707 | static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) | ||
2708 | { | ||
2709 | int len, error, ret_nodeid, dir_nodeid, from_nodeid, our_nodeid; | ||
2710 | |||
2711 | from_nodeid = ms->m_header.h_nodeid; | ||
2712 | our_nodeid = dlm_our_nodeid(); | ||
2713 | |||
2714 | len = receive_extralen(ms); | ||
2715 | |||
2716 | dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); | ||
2717 | if (dir_nodeid != our_nodeid) { | ||
2718 | log_error(ls, "lookup dir_nodeid %d from %d", | ||
2719 | dir_nodeid, from_nodeid); | ||
2720 | error = -EINVAL; | ||
2721 | ret_nodeid = -1; | ||
2722 | goto out; | ||
2723 | } | ||
2724 | |||
2725 | error = dlm_dir_lookup(ls, from_nodeid, ms->m_extra, len, &ret_nodeid); | ||
2726 | |||
2727 | /* Optimization: we're master so treat lookup as a request */ | ||
2728 | if (!error && ret_nodeid == our_nodeid) { | ||
2729 | receive_request(ls, ms); | ||
2730 | return; | ||
2731 | } | ||
2732 | out: | ||
2733 | send_lookup_reply(ls, ms, ret_nodeid, error); | ||
2734 | } | ||
2735 | |||
2736 | static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) | ||
2737 | { | ||
2738 | int len, dir_nodeid, from_nodeid; | ||
2739 | |||
2740 | from_nodeid = ms->m_header.h_nodeid; | ||
2741 | |||
2742 | len = receive_extralen(ms); | ||
2743 | |||
2744 | dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); | ||
2745 | if (dir_nodeid != dlm_our_nodeid()) { | ||
2746 | log_error(ls, "remove dir entry dir_nodeid %d from %d", | ||
2747 | dir_nodeid, from_nodeid); | ||
2748 | return; | ||
2749 | } | ||
2750 | |||
2751 | dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); | ||
2752 | } | ||
2753 | |||
2754 | static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
2755 | { | ||
2756 | struct dlm_lkb *lkb; | ||
2757 | struct dlm_rsb *r; | ||
2758 | int error, mstype; | ||
2759 | |||
2760 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2761 | if (error) { | ||
2762 | log_error(ls, "receive_request_reply no lkb"); | ||
2763 | return; | ||
2764 | } | ||
2765 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
2766 | |||
2767 | mstype = lkb->lkb_wait_type; | ||
2768 | error = remove_from_waiters(lkb); | ||
2769 | if (error) { | ||
2770 | log_error(ls, "receive_request_reply not on waiters"); | ||
2771 | goto out; | ||
2772 | } | ||
2773 | |||
2774 | /* this is the value returned from do_request() on the master */ | ||
2775 | error = ms->m_result; | ||
2776 | |||
2777 | r = lkb->lkb_resource; | ||
2778 | hold_rsb(r); | ||
2779 | lock_rsb(r); | ||
2780 | |||
2781 | /* Optimization: the dir node was also the master, so it took our | ||
2782 | lookup as a request and sent request reply instead of lookup reply */ | ||
2783 | if (mstype == DLM_MSG_LOOKUP) { | ||
2784 | r->res_nodeid = ms->m_header.h_nodeid; | ||
2785 | lkb->lkb_nodeid = r->res_nodeid; | ||
2786 | } | ||
2787 | |||
2788 | switch (error) { | ||
2789 | case -EAGAIN: | ||
2790 | /* request would block (be queued) on remote master; | ||
2791 | the unhold undoes the original ref from create_lkb() | ||
2792 | so it leads to the lkb being freed */ | ||
2793 | queue_cast(r, lkb, -EAGAIN); | ||
2794 | confirm_master(r, -EAGAIN); | ||
2795 | unhold_lkb(lkb); | ||
2796 | break; | ||
2797 | |||
2798 | case -EINPROGRESS: | ||
2799 | case 0: | ||
2800 | /* request was queued or granted on remote master */ | ||
2801 | receive_flags_reply(lkb, ms); | ||
2802 | lkb->lkb_remid = ms->m_lkid; | ||
2803 | if (error) | ||
2804 | add_lkb(r, lkb, DLM_LKSTS_WAITING); | ||
2805 | else { | ||
2806 | grant_lock_pc(r, lkb, ms); | ||
2807 | queue_cast(r, lkb, 0); | ||
2808 | } | ||
2809 | confirm_master(r, error); | ||
2810 | break; | ||
2811 | |||
2812 | case -ENOENT: | ||
2813 | case -ENOTBLK: | ||
2814 | /* find_rsb failed to find rsb or rsb wasn't master */ | ||
2815 | r->res_nodeid = -1; | ||
2816 | lkb->lkb_nodeid = -1; | ||
2817 | _request_lock(r, lkb); | ||
2818 | break; | ||
2819 | |||
2820 | default: | ||
2821 | log_error(ls, "receive_request_reply error %d", error); | ||
2822 | } | ||
2823 | |||
2824 | unlock_rsb(r); | ||
2825 | put_rsb(r); | ||
2826 | out: | ||
2827 | put_lkb(lkb); | ||
2828 | } | ||
2829 | |||
2830 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
2831 | struct dlm_message *ms) | ||
2832 | { | ||
2833 | int error = ms->m_result; | ||
2834 | |||
2835 | /* this is the value returned from do_convert() on the master */ | ||
2836 | |||
2837 | switch (error) { | ||
2838 | case -EAGAIN: | ||
2839 | /* convert would block (be queued) on remote master */ | ||
2840 | queue_cast(r, lkb, -EAGAIN); | ||
2841 | break; | ||
2842 | |||
2843 | case -EINPROGRESS: | ||
2844 | /* convert was queued on remote master */ | ||
2845 | del_lkb(r, lkb); | ||
2846 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | ||
2847 | break; | ||
2848 | |||
2849 | case 0: | ||
2850 | /* convert was granted on remote master */ | ||
2851 | receive_flags_reply(lkb, ms); | ||
2852 | grant_lock_pc(r, lkb, ms); | ||
2853 | queue_cast(r, lkb, 0); | ||
2854 | break; | ||
2855 | |||
2856 | default: | ||
2857 | log_error(r->res_ls, "receive_convert_reply error %d", error); | ||
2858 | } | ||
2859 | } | ||
2860 | |||
2861 | static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
2862 | { | ||
2863 | struct dlm_rsb *r = lkb->lkb_resource; | ||
2864 | |||
2865 | hold_rsb(r); | ||
2866 | lock_rsb(r); | ||
2867 | |||
2868 | __receive_convert_reply(r, lkb, ms); | ||
2869 | |||
2870 | unlock_rsb(r); | ||
2871 | put_rsb(r); | ||
2872 | } | ||
2873 | |||
2874 | static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
2875 | { | ||
2876 | struct dlm_lkb *lkb; | ||
2877 | int error; | ||
2878 | |||
2879 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2880 | if (error) { | ||
2881 | log_error(ls, "receive_convert_reply no lkb"); | ||
2882 | return; | ||
2883 | } | ||
2884 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
2885 | |||
2886 | error = remove_from_waiters(lkb); | ||
2887 | if (error) { | ||
2888 | log_error(ls, "receive_convert_reply not on waiters"); | ||
2889 | goto out; | ||
2890 | } | ||
2891 | |||
2892 | _receive_convert_reply(lkb, ms); | ||
2893 | out: | ||
2894 | put_lkb(lkb); | ||
2895 | } | ||
2896 | |||
2897 | static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
2898 | { | ||
2899 | struct dlm_rsb *r = lkb->lkb_resource; | ||
2900 | int error = ms->m_result; | ||
2901 | |||
2902 | hold_rsb(r); | ||
2903 | lock_rsb(r); | ||
2904 | |||
2905 | /* this is the value returned from do_unlock() on the master */ | ||
2906 | |||
2907 | switch (error) { | ||
2908 | case -DLM_EUNLOCK: | ||
2909 | receive_flags_reply(lkb, ms); | ||
2910 | remove_lock_pc(r, lkb); | ||
2911 | queue_cast(r, lkb, -DLM_EUNLOCK); | ||
2912 | break; | ||
2913 | default: | ||
2914 | log_error(r->res_ls, "receive_unlock_reply error %d", error); | ||
2915 | } | ||
2916 | |||
2917 | unlock_rsb(r); | ||
2918 | put_rsb(r); | ||
2919 | } | ||
2920 | |||
2921 | static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
2922 | { | ||
2923 | struct dlm_lkb *lkb; | ||
2924 | int error; | ||
2925 | |||
2926 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2927 | if (error) { | ||
2928 | log_error(ls, "receive_unlock_reply no lkb"); | ||
2929 | return; | ||
2930 | } | ||
2931 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
2932 | |||
2933 | error = remove_from_waiters(lkb); | ||
2934 | if (error) { | ||
2935 | log_error(ls, "receive_unlock_reply not on waiters"); | ||
2936 | goto out; | ||
2937 | } | ||
2938 | |||
2939 | _receive_unlock_reply(lkb, ms); | ||
2940 | out: | ||
2941 | put_lkb(lkb); | ||
2942 | } | ||
2943 | |||
2944 | static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
2945 | { | ||
2946 | struct dlm_rsb *r = lkb->lkb_resource; | ||
2947 | int error = ms->m_result; | ||
2948 | |||
2949 | hold_rsb(r); | ||
2950 | lock_rsb(r); | ||
2951 | |||
2952 | /* this is the value returned from do_cancel() on the master */ | ||
2953 | |||
2954 | switch (error) { | ||
2955 | case -DLM_ECANCEL: | ||
2956 | receive_flags_reply(lkb, ms); | ||
2957 | revert_lock_pc(r, lkb); | ||
2958 | queue_cast(r, lkb, -DLM_ECANCEL); | ||
2959 | break; | ||
2960 | default: | ||
2961 | log_error(r->res_ls, "receive_cancel_reply error %d", error); | ||
2962 | } | ||
2963 | |||
2964 | unlock_rsb(r); | ||
2965 | put_rsb(r); | ||
2966 | } | ||
2967 | |||
2968 | static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
2969 | { | ||
2970 | struct dlm_lkb *lkb; | ||
2971 | int error; | ||
2972 | |||
2973 | error = find_lkb(ls, ms->m_remid, &lkb); | ||
2974 | if (error) { | ||
2975 | log_error(ls, "receive_cancel_reply no lkb"); | ||
2976 | return; | ||
2977 | } | ||
2978 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
2979 | |||
2980 | error = remove_from_waiters(lkb); | ||
2981 | if (error) { | ||
2982 | log_error(ls, "receive_cancel_reply not on waiters"); | ||
2983 | goto out; | ||
2984 | } | ||
2985 | |||
2986 | _receive_cancel_reply(lkb, ms); | ||
2987 | out: | ||
2988 | put_lkb(lkb); | ||
2989 | } | ||
2990 | |||
2991 | static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | ||
2992 | { | ||
2993 | struct dlm_lkb *lkb; | ||
2994 | struct dlm_rsb *r; | ||
2995 | int error, ret_nodeid; | ||
2996 | |||
2997 | error = find_lkb(ls, ms->m_lkid, &lkb); | ||
2998 | if (error) { | ||
2999 | log_error(ls, "receive_lookup_reply no lkb"); | ||
3000 | return; | ||
3001 | } | ||
3002 | |||
3003 | error = remove_from_waiters(lkb); | ||
3004 | if (error) { | ||
3005 | log_error(ls, "receive_lookup_reply not on waiters"); | ||
3006 | goto out; | ||
3007 | } | ||
3008 | |||
3009 | /* this is the value returned by dlm_dir_lookup on dir node | ||
3010 | FIXME: will a non-zero error ever be returned? */ | ||
3011 | error = ms->m_result; | ||
3012 | |||
3013 | r = lkb->lkb_resource; | ||
3014 | hold_rsb(r); | ||
3015 | lock_rsb(r); | ||
3016 | |||
3017 | ret_nodeid = ms->m_nodeid; | ||
3018 | if (ret_nodeid == dlm_our_nodeid()) { | ||
3019 | r->res_nodeid = 0; | ||
3020 | ret_nodeid = 0; | ||
3021 | r->res_first_lkid = 0; | ||
3022 | } else { | ||
3023 | /* set_master() will copy res_nodeid to lkb_nodeid */ | ||
3024 | r->res_nodeid = ret_nodeid; | ||
3025 | } | ||
3026 | |||
3027 | _request_lock(r, lkb); | ||
3028 | |||
3029 | if (!ret_nodeid) | ||
3030 | process_lookup_list(r); | ||
3031 | |||
3032 | unlock_rsb(r); | ||
3033 | put_rsb(r); | ||
3034 | out: | ||
3035 | put_lkb(lkb); | ||
3036 | } | ||
3037 | |||
3038 | int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) | ||
3039 | { | ||
3040 | struct dlm_message *ms = (struct dlm_message *) hd; | ||
3041 | struct dlm_ls *ls; | ||
3042 | int error; | ||
3043 | |||
3044 | if (!recovery) | ||
3045 | dlm_message_in(ms); | ||
3046 | |||
3047 | ls = dlm_find_lockspace_global(hd->h_lockspace); | ||
3048 | if (!ls) { | ||
3049 | log_print("drop message %d from %d for unknown lockspace %d", | ||
3050 | ms->m_type, nodeid, hd->h_lockspace); | ||
3051 | return -EINVAL; | ||
3052 | } | ||
3053 | |||
3054 | /* recovery may have just ended leaving a bunch of backed-up requests | ||
3055 | in the requestqueue; wait while dlm_recoverd clears them */ | ||
3056 | |||
3057 | if (!recovery) | ||
3058 | dlm_wait_requestqueue(ls); | ||
3059 | |||
3060 | /* recovery may have just started while there were a bunch of | ||
3061 | in-flight requests -- save them in requestqueue to be processed | ||
3062 | after recovery. we can't let dlm_recvd block on the recovery | ||
3063 | lock. if dlm_recoverd is calling this function to clear the | ||
3064 | requestqueue, it needs to be interrupted (-EINTR) if another | ||
3065 | recovery operation is starting. */ | ||
3066 | |||
3067 | while (1) { | ||
3068 | if (dlm_locking_stopped(ls)) { | ||
3069 | if (!recovery) | ||
3070 | dlm_add_requestqueue(ls, nodeid, hd); | ||
3071 | error = -EINTR; | ||
3072 | goto out; | ||
3073 | } | ||
3074 | |||
3075 | if (lock_recovery_try(ls)) | ||
3076 | break; | ||
3077 | schedule(); | ||
3078 | } | ||
3079 | |||
3080 | switch (ms->m_type) { | ||
3081 | |||
3082 | /* messages sent to a master node */ | ||
3083 | |||
3084 | case DLM_MSG_REQUEST: | ||
3085 | receive_request(ls, ms); | ||
3086 | break; | ||
3087 | |||
3088 | case DLM_MSG_CONVERT: | ||
3089 | receive_convert(ls, ms); | ||
3090 | break; | ||
3091 | |||
3092 | case DLM_MSG_UNLOCK: | ||
3093 | receive_unlock(ls, ms); | ||
3094 | break; | ||
3095 | |||
3096 | case DLM_MSG_CANCEL: | ||
3097 | receive_cancel(ls, ms); | ||
3098 | break; | ||
3099 | |||
3100 | /* messages sent from a master node (replies to above) */ | ||
3101 | |||
3102 | case DLM_MSG_REQUEST_REPLY: | ||
3103 | receive_request_reply(ls, ms); | ||
3104 | break; | ||
3105 | |||
3106 | case DLM_MSG_CONVERT_REPLY: | ||
3107 | receive_convert_reply(ls, ms); | ||
3108 | break; | ||
3109 | |||
3110 | case DLM_MSG_UNLOCK_REPLY: | ||
3111 | receive_unlock_reply(ls, ms); | ||
3112 | break; | ||
3113 | |||
3114 | case DLM_MSG_CANCEL_REPLY: | ||
3115 | receive_cancel_reply(ls, ms); | ||
3116 | break; | ||
3117 | |||
3118 | /* messages sent from a master node (only two types of async msg) */ | ||
3119 | |||
3120 | case DLM_MSG_GRANT: | ||
3121 | receive_grant(ls, ms); | ||
3122 | break; | ||
3123 | |||
3124 | case DLM_MSG_BAST: | ||
3125 | receive_bast(ls, ms); | ||
3126 | break; | ||
3127 | |||
3128 | /* messages sent to a dir node */ | ||
3129 | |||
3130 | case DLM_MSG_LOOKUP: | ||
3131 | receive_lookup(ls, ms); | ||
3132 | break; | ||
3133 | |||
3134 | case DLM_MSG_REMOVE: | ||
3135 | receive_remove(ls, ms); | ||
3136 | break; | ||
3137 | |||
3138 | /* messages sent from a dir node (remove has no reply) */ | ||
3139 | |||
3140 | case DLM_MSG_LOOKUP_REPLY: | ||
3141 | receive_lookup_reply(ls, ms); | ||
3142 | break; | ||
3143 | |||
3144 | default: | ||
3145 | log_error(ls, "unknown message type %d", ms->m_type); | ||
3146 | } | ||
3147 | |||
3148 | unlock_recovery(ls); | ||
3149 | out: | ||
3150 | dlm_put_lockspace(ls); | ||
3151 | dlm_astd_wake(); | ||
3152 | return 0; | ||
3153 | } | ||
3154 | |||
3155 | |||
3156 | /* | ||
3157 | * Recovery related | ||
3158 | */ | ||
3159 | |||
3160 | static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
3161 | { | ||
3162 | if (middle_conversion(lkb)) { | ||
3163 | hold_lkb(lkb); | ||
3164 | ls->ls_stub_ms.m_result = -EINPROGRESS; | ||
3165 | _remove_from_waiters(lkb); | ||
3166 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | ||
3167 | |||
3168 | /* Same special case as in receive_rcom_lock_args() */ | ||
3169 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
3170 | rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT); | ||
3171 | unhold_lkb(lkb); | ||
3172 | |||
3173 | } else if (lkb->lkb_rqmode >= lkb->lkb_grmode) { | ||
3174 | lkb->lkb_flags |= DLM_IFL_RESEND; | ||
3175 | } | ||
3176 | |||
3177 | /* lkb->lkb_rqmode < lkb->lkb_grmode shouldn't happen since down | ||
3178 | conversions are async; there's no reply from the remote master */ | ||
3179 | } | ||
3180 | |||
3181 | /* A waiting lkb needs recovery if the master node has failed, or | ||
3182 | the master node is changing (only when no directory is used) */ | ||
3183 | |||
3184 | static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
3185 | { | ||
3186 | if (dlm_is_removed(ls, lkb->lkb_nodeid)) | ||
3187 | return 1; | ||
3188 | |||
3189 | if (!dlm_no_directory(ls)) | ||
3190 | return 0; | ||
3191 | |||
3192 | if (dlm_dir_nodeid(lkb->lkb_resource) != lkb->lkb_nodeid) | ||
3193 | return 1; | ||
3194 | |||
3195 | return 0; | ||
3196 | } | ||
3197 | |||
3198 | /* Recovery for locks that are waiting for replies from nodes that are now | ||
3199 | gone. We can just complete unlocks and cancels by faking a reply from the | ||
3200 | dead node. Requests and up-conversions we flag to be resent after | ||
3201 | recovery. Down-conversions can just be completed with a fake reply like | ||
3202 | unlocks. Conversions between PR and CW need special attention. */ | ||
3203 | |||
3204 | void dlm_recover_waiters_pre(struct dlm_ls *ls) | ||
3205 | { | ||
3206 | struct dlm_lkb *lkb, *safe; | ||
3207 | |||
3208 | down(&ls->ls_waiters_sem); | ||
3209 | |||
3210 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { | ||
3211 | log_debug(ls, "pre recover waiter lkid %x type %d flags %x", | ||
3212 | lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); | ||
3213 | |||
3214 | /* all outstanding lookups, regardless of destination will be | ||
3215 | resent after recovery is done */ | ||
3216 | |||
3217 | if (lkb->lkb_wait_type == DLM_MSG_LOOKUP) { | ||
3218 | lkb->lkb_flags |= DLM_IFL_RESEND; | ||
3219 | continue; | ||
3220 | } | ||
3221 | |||
3222 | if (!waiter_needs_recovery(ls, lkb)) | ||
3223 | continue; | ||
3224 | |||
3225 | switch (lkb->lkb_wait_type) { | ||
3226 | |||
3227 | case DLM_MSG_REQUEST: | ||
3228 | lkb->lkb_flags |= DLM_IFL_RESEND; | ||
3229 | break; | ||
3230 | |||
3231 | case DLM_MSG_CONVERT: | ||
3232 | recover_convert_waiter(ls, lkb); | ||
3233 | break; | ||
3234 | |||
3235 | case DLM_MSG_UNLOCK: | ||
3236 | hold_lkb(lkb); | ||
3237 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; | ||
3238 | _remove_from_waiters(lkb); | ||
3239 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | ||
3240 | put_lkb(lkb); | ||
3241 | break; | ||
3242 | |||
3243 | case DLM_MSG_CANCEL: | ||
3244 | hold_lkb(lkb); | ||
3245 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; | ||
3246 | _remove_from_waiters(lkb); | ||
3247 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | ||
3248 | put_lkb(lkb); | ||
3249 | break; | ||
3250 | |||
3251 | default: | ||
3252 | log_error(ls, "invalid lkb wait_type %d", | ||
3253 | lkb->lkb_wait_type); | ||
3254 | } | ||
3255 | } | ||
3256 | up(&ls->ls_waiters_sem); | ||
3257 | } | ||
3258 | |||
3259 | static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | ||
3260 | { | ||
3261 | struct dlm_lkb *lkb; | ||
3262 | int rv = 0; | ||
3263 | |||
3264 | down(&ls->ls_waiters_sem); | ||
3265 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | ||
3266 | if (lkb->lkb_flags & DLM_IFL_RESEND) { | ||
3267 | rv = lkb->lkb_wait_type; | ||
3268 | _remove_from_waiters(lkb); | ||
3269 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | ||
3270 | break; | ||
3271 | } | ||
3272 | } | ||
3273 | up(&ls->ls_waiters_sem); | ||
3274 | |||
3275 | if (!rv) | ||
3276 | lkb = NULL; | ||
3277 | *lkb_ret = lkb; | ||
3278 | return rv; | ||
3279 | } | ||
3280 | |||
3281 | /* Deal with lookups and lkb's marked RESEND from _pre. We may now be the | ||
3282 | master or dir-node for r. Processing the lkb may result in it being placed | ||
3283 | back on waiters. */ | ||
3284 | |||
3285 | int dlm_recover_waiters_post(struct dlm_ls *ls) | ||
3286 | { | ||
3287 | struct dlm_lkb *lkb; | ||
3288 | struct dlm_rsb *r; | ||
3289 | int error = 0, mstype; | ||
3290 | |||
3291 | while (1) { | ||
3292 | if (dlm_locking_stopped(ls)) { | ||
3293 | log_debug(ls, "recover_waiters_post aborted"); | ||
3294 | error = -EINTR; | ||
3295 | break; | ||
3296 | } | ||
3297 | |||
3298 | mstype = remove_resend_waiter(ls, &lkb); | ||
3299 | if (!mstype) | ||
3300 | break; | ||
3301 | |||
3302 | r = lkb->lkb_resource; | ||
3303 | |||
3304 | log_debug(ls, "recover_waiters_post %x type %d flags %x %s", | ||
3305 | lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); | ||
3306 | |||
3307 | switch (mstype) { | ||
3308 | |||
3309 | case DLM_MSG_LOOKUP: | ||
3310 | hold_rsb(r); | ||
3311 | lock_rsb(r); | ||
3312 | _request_lock(r, lkb); | ||
3313 | if (is_master(r)) | ||
3314 | confirm_master(r, 0); | ||
3315 | unlock_rsb(r); | ||
3316 | put_rsb(r); | ||
3317 | break; | ||
3318 | |||
3319 | case DLM_MSG_REQUEST: | ||
3320 | hold_rsb(r); | ||
3321 | lock_rsb(r); | ||
3322 | _request_lock(r, lkb); | ||
3323 | unlock_rsb(r); | ||
3324 | put_rsb(r); | ||
3325 | break; | ||
3326 | |||
3327 | case DLM_MSG_CONVERT: | ||
3328 | hold_rsb(r); | ||
3329 | lock_rsb(r); | ||
3330 | _convert_lock(r, lkb); | ||
3331 | unlock_rsb(r); | ||
3332 | put_rsb(r); | ||
3333 | break; | ||
3334 | |||
3335 | default: | ||
3336 | log_error(ls, "recover_waiters_post type %d", mstype); | ||
3337 | } | ||
3338 | } | ||
3339 | |||
3340 | return error; | ||
3341 | } | ||
3342 | |||
3343 | static void purge_queue(struct dlm_rsb *r, struct list_head *queue, | ||
3344 | int (*test)(struct dlm_ls *ls, struct dlm_lkb *lkb)) | ||
3345 | { | ||
3346 | struct dlm_ls *ls = r->res_ls; | ||
3347 | struct dlm_lkb *lkb, *safe; | ||
3348 | |||
3349 | list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) { | ||
3350 | if (test(ls, lkb)) { | ||
3351 | del_lkb(r, lkb); | ||
3352 | /* this put should free the lkb */ | ||
3353 | if (!put_lkb(lkb)) | ||
3354 | log_error(ls, "purged lkb not released"); | ||
3355 | } | ||
3356 | } | ||
3357 | } | ||
3358 | |||
3359 | static int purge_dead_test(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
3360 | { | ||
3361 | return (is_master_copy(lkb) && dlm_is_removed(ls, lkb->lkb_nodeid)); | ||
3362 | } | ||
3363 | |||
3364 | static int purge_mstcpy_test(struct dlm_ls *ls, struct dlm_lkb *lkb) | ||
3365 | { | ||
3366 | return is_master_copy(lkb); | ||
3367 | } | ||
3368 | |||
3369 | static void purge_dead_locks(struct dlm_rsb *r) | ||
3370 | { | ||
3371 | purge_queue(r, &r->res_grantqueue, &purge_dead_test); | ||
3372 | purge_queue(r, &r->res_convertqueue, &purge_dead_test); | ||
3373 | purge_queue(r, &r->res_waitqueue, &purge_dead_test); | ||
3374 | } | ||
3375 | |||
3376 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r) | ||
3377 | { | ||
3378 | purge_queue(r, &r->res_grantqueue, &purge_mstcpy_test); | ||
3379 | purge_queue(r, &r->res_convertqueue, &purge_mstcpy_test); | ||
3380 | purge_queue(r, &r->res_waitqueue, &purge_mstcpy_test); | ||
3381 | } | ||
3382 | |||
3383 | /* Get rid of locks held by nodes that are gone. */ | ||
3384 | |||
3385 | int dlm_purge_locks(struct dlm_ls *ls) | ||
3386 | { | ||
3387 | struct dlm_rsb *r; | ||
3388 | |||
3389 | log_debug(ls, "dlm_purge_locks"); | ||
3390 | |||
3391 | down_write(&ls->ls_root_sem); | ||
3392 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
3393 | hold_rsb(r); | ||
3394 | lock_rsb(r); | ||
3395 | if (is_master(r)) | ||
3396 | purge_dead_locks(r); | ||
3397 | unlock_rsb(r); | ||
3398 | unhold_rsb(r); | ||
3399 | |||
3400 | schedule(); | ||
3401 | } | ||
3402 | up_write(&ls->ls_root_sem); | ||
3403 | |||
3404 | return 0; | ||
3405 | } | ||
3406 | |||
3407 | int dlm_grant_after_purge(struct dlm_ls *ls) | ||
3408 | { | ||
3409 | struct dlm_rsb *r; | ||
3410 | int i; | ||
3411 | |||
3412 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
3413 | read_lock(&ls->ls_rsbtbl[i].lock); | ||
3414 | list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { | ||
3415 | hold_rsb(r); | ||
3416 | lock_rsb(r); | ||
3417 | if (is_master(r)) { | ||
3418 | grant_pending_locks(r); | ||
3419 | confirm_master(r, 0); | ||
3420 | } | ||
3421 | unlock_rsb(r); | ||
3422 | put_rsb(r); | ||
3423 | } | ||
3424 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
3425 | } | ||
3426 | |||
3427 | return 0; | ||
3428 | } | ||
3429 | |||
3430 | static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid, | ||
3431 | uint32_t remid) | ||
3432 | { | ||
3433 | struct dlm_lkb *lkb; | ||
3434 | |||
3435 | list_for_each_entry(lkb, head, lkb_statequeue) { | ||
3436 | if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid) | ||
3437 | return lkb; | ||
3438 | } | ||
3439 | return NULL; | ||
3440 | } | ||
3441 | |||
3442 | static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid, | ||
3443 | uint32_t remid) | ||
3444 | { | ||
3445 | struct dlm_lkb *lkb; | ||
3446 | |||
3447 | lkb = search_remid_list(&r->res_grantqueue, nodeid, remid); | ||
3448 | if (lkb) | ||
3449 | return lkb; | ||
3450 | lkb = search_remid_list(&r->res_convertqueue, nodeid, remid); | ||
3451 | if (lkb) | ||
3452 | return lkb; | ||
3453 | lkb = search_remid_list(&r->res_waitqueue, nodeid, remid); | ||
3454 | if (lkb) | ||
3455 | return lkb; | ||
3456 | return NULL; | ||
3457 | } | ||
3458 | |||
3459 | static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | ||
3460 | struct dlm_rsb *r, struct dlm_rcom *rc) | ||
3461 | { | ||
3462 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | ||
3463 | int lvblen; | ||
3464 | |||
3465 | lkb->lkb_nodeid = rc->rc_header.h_nodeid; | ||
3466 | lkb->lkb_ownpid = rl->rl_ownpid; | ||
3467 | lkb->lkb_remid = rl->rl_lkid; | ||
3468 | lkb->lkb_exflags = rl->rl_exflags; | ||
3469 | lkb->lkb_flags = rl->rl_flags & 0x0000FFFF; | ||
3470 | lkb->lkb_flags |= DLM_IFL_MSTCPY; | ||
3471 | lkb->lkb_lvbseq = rl->rl_lvbseq; | ||
3472 | lkb->lkb_rqmode = rl->rl_rqmode; | ||
3473 | lkb->lkb_grmode = rl->rl_grmode; | ||
3474 | /* don't set lkb_status because add_lkb wants to itself */ | ||
3475 | |||
3476 | lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST); | ||
3477 | lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); | ||
3478 | |||
3479 | if (lkb->lkb_flags & DLM_IFL_RANGE) { | ||
3480 | lkb->lkb_range = allocate_range(ls); | ||
3481 | if (!lkb->lkb_range) | ||
3482 | return -ENOMEM; | ||
3483 | memcpy(lkb->lkb_range, rl->rl_range, 4*sizeof(uint64_t)); | ||
3484 | } | ||
3485 | |||
3486 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { | ||
3487 | lkb->lkb_lvbptr = allocate_lvb(ls); | ||
3488 | if (!lkb->lkb_lvbptr) | ||
3489 | return -ENOMEM; | ||
3490 | lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - | ||
3491 | sizeof(struct rcom_lock); | ||
3492 | memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen); | ||
3493 | } | ||
3494 | |||
3495 | /* Conversions between PR and CW (middle modes) need special handling. | ||
3496 | The real granted mode of these converting locks cannot be determined | ||
3497 | until all locks have been rebuilt on the rsb (recover_conversion) */ | ||
3498 | |||
3499 | if (rl->rl_wait_type == DLM_MSG_CONVERT && middle_conversion(lkb)) { | ||
3500 | rl->rl_status = DLM_LKSTS_CONVERT; | ||
3501 | lkb->lkb_grmode = DLM_LOCK_IV; | ||
3502 | rsb_set_flag(r, RSB_RECOVER_CONVERT); | ||
3503 | } | ||
3504 | |||
3505 | return 0; | ||
3506 | } | ||
3507 | |||
3508 | /* This lkb may have been recovered in a previous aborted recovery so we need | ||
3509 | to check if the rsb already has an lkb with the given remote nodeid/lkid. | ||
3510 | If so we just send back a standard reply. If not, we create a new lkb with | ||
3511 | the given values and send back our lkid. We send back our lkid by sending | ||
3512 | back the rcom_lock struct we got but with the remid field filled in. */ | ||
3513 | |||
3514 | int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
3515 | { | ||
3516 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | ||
3517 | struct dlm_rsb *r; | ||
3518 | struct dlm_lkb *lkb; | ||
3519 | int error; | ||
3520 | |||
3521 | if (rl->rl_parent_lkid) { | ||
3522 | error = -EOPNOTSUPP; | ||
3523 | goto out; | ||
3524 | } | ||
3525 | |||
3526 | error = find_rsb(ls, rl->rl_name, rl->rl_namelen, R_MASTER, &r); | ||
3527 | if (error) | ||
3528 | goto out; | ||
3529 | |||
3530 | lock_rsb(r); | ||
3531 | |||
3532 | lkb = search_remid(r, rc->rc_header.h_nodeid, rl->rl_lkid); | ||
3533 | if (lkb) { | ||
3534 | error = -EEXIST; | ||
3535 | goto out_remid; | ||
3536 | } | ||
3537 | |||
3538 | error = create_lkb(ls, &lkb); | ||
3539 | if (error) | ||
3540 | goto out_unlock; | ||
3541 | |||
3542 | error = receive_rcom_lock_args(ls, lkb, r, rc); | ||
3543 | if (error) { | ||
3544 | put_lkb(lkb); | ||
3545 | goto out_unlock; | ||
3546 | } | ||
3547 | |||
3548 | attach_lkb(r, lkb); | ||
3549 | add_lkb(r, lkb, rl->rl_status); | ||
3550 | error = 0; | ||
3551 | |||
3552 | out_remid: | ||
3553 | /* this is the new value returned to the lock holder for | ||
3554 | saving in its process-copy lkb */ | ||
3555 | rl->rl_remid = lkb->lkb_id; | ||
3556 | |||
3557 | out_unlock: | ||
3558 | unlock_rsb(r); | ||
3559 | put_rsb(r); | ||
3560 | out: | ||
3561 | if (error) | ||
3562 | log_print("recover_master_copy %d %x", error, rl->rl_lkid); | ||
3563 | rl->rl_result = error; | ||
3564 | return error; | ||
3565 | } | ||
3566 | |||
3567 | int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
3568 | { | ||
3569 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | ||
3570 | struct dlm_rsb *r; | ||
3571 | struct dlm_lkb *lkb; | ||
3572 | int error; | ||
3573 | |||
3574 | error = find_lkb(ls, rl->rl_lkid, &lkb); | ||
3575 | if (error) { | ||
3576 | log_error(ls, "recover_process_copy no lkid %x", rl->rl_lkid); | ||
3577 | return error; | ||
3578 | } | ||
3579 | |||
3580 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
3581 | |||
3582 | error = rl->rl_result; | ||
3583 | |||
3584 | r = lkb->lkb_resource; | ||
3585 | hold_rsb(r); | ||
3586 | lock_rsb(r); | ||
3587 | |||
3588 | switch (error) { | ||
3589 | case -EEXIST: | ||
3590 | log_debug(ls, "master copy exists %x", lkb->lkb_id); | ||
3591 | /* fall through */ | ||
3592 | case 0: | ||
3593 | lkb->lkb_remid = rl->rl_remid; | ||
3594 | break; | ||
3595 | default: | ||
3596 | log_error(ls, "dlm_recover_process_copy unknown error %d %x", | ||
3597 | error, lkb->lkb_id); | ||
3598 | } | ||
3599 | |||
3600 | /* an ack for dlm_recover_locks() which waits for replies from | ||
3601 | all the locks it sends to new masters */ | ||
3602 | dlm_recovered_lock(r); | ||
3603 | |||
3604 | unlock_rsb(r); | ||
3605 | put_rsb(r); | ||
3606 | put_lkb(lkb); | ||
3607 | |||
3608 | return 0; | ||
3609 | } | ||
3610 | |||
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h new file mode 100644 index 000000000000..9e6499f773da --- /dev/null +++ b/fs/dlm/lock.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #ifndef __LOCK_DOT_H__ | ||
14 | #define __LOCK_DOT_H__ | ||
15 | |||
16 | void dlm_print_rsb(struct dlm_rsb *r); | ||
17 | int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); | ||
18 | int dlm_modes_compat(int mode1, int mode2); | ||
19 | int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
20 | unsigned int flags, struct dlm_rsb **r_ret); | ||
21 | void dlm_put_rsb(struct dlm_rsb *r); | ||
22 | void dlm_hold_rsb(struct dlm_rsb *r); | ||
23 | int dlm_put_lkb(struct dlm_lkb *lkb); | ||
24 | void dlm_scan_rsbs(struct dlm_ls *ls); | ||
25 | |||
26 | int dlm_purge_locks(struct dlm_ls *ls); | ||
27 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r); | ||
28 | int dlm_grant_after_purge(struct dlm_ls *ls); | ||
29 | int dlm_recover_waiters_post(struct dlm_ls *ls); | ||
30 | void dlm_recover_waiters_pre(struct dlm_ls *ls); | ||
31 | int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
32 | int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
33 | |||
34 | static inline int is_master(struct dlm_rsb *r) | ||
35 | { | ||
36 | return !r->res_nodeid; | ||
37 | } | ||
38 | |||
39 | static inline void lock_rsb(struct dlm_rsb *r) | ||
40 | { | ||
41 | down(&r->res_sem); | ||
42 | } | ||
43 | |||
44 | static inline void unlock_rsb(struct dlm_rsb *r) | ||
45 | { | ||
46 | up(&r->res_sem); | ||
47 | } | ||
48 | |||
49 | #endif | ||
50 | |||
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c new file mode 100644 index 000000000000..fee4659b6582 --- /dev/null +++ b/fs/dlm/lockspace.c | |||
@@ -0,0 +1,666 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "lockspace.h" | ||
16 | #include "member.h" | ||
17 | #include "recoverd.h" | ||
18 | #include "ast.h" | ||
19 | #include "dir.h" | ||
20 | #include "lowcomms.h" | ||
21 | #include "config.h" | ||
22 | #include "memory.h" | ||
23 | #include "lock.h" | ||
24 | |||
25 | #ifdef CONFIG_DLM_DEBUG | ||
26 | int dlm_create_debug_file(struct dlm_ls *ls); | ||
27 | void dlm_delete_debug_file(struct dlm_ls *ls); | ||
28 | #else | ||
29 | static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } | ||
30 | static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } | ||
31 | #endif | ||
32 | |||
33 | static int ls_count; | ||
34 | static struct semaphore ls_lock; | ||
35 | static struct list_head lslist; | ||
36 | static spinlock_t lslist_lock; | ||
37 | static struct task_struct * scand_task; | ||
38 | |||
39 | |||
40 | static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len) | ||
41 | { | ||
42 | ssize_t ret = len; | ||
43 | int n = simple_strtol(buf, NULL, 0); | ||
44 | |||
45 | switch (n) { | ||
46 | case 0: | ||
47 | dlm_ls_stop(ls); | ||
48 | break; | ||
49 | case 1: | ||
50 | dlm_ls_start(ls); | ||
51 | break; | ||
52 | default: | ||
53 | ret = -EINVAL; | ||
54 | } | ||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len) | ||
59 | { | ||
60 | ls->ls_uevent_result = simple_strtol(buf, NULL, 0); | ||
61 | set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags); | ||
62 | wake_up(&ls->ls_uevent_wait); | ||
63 | return len; | ||
64 | } | ||
65 | |||
66 | static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf) | ||
67 | { | ||
68 | return sprintf(buf, "%u\n", ls->ls_global_id); | ||
69 | } | ||
70 | |||
71 | static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len) | ||
72 | { | ||
73 | ls->ls_global_id = simple_strtoul(buf, NULL, 0); | ||
74 | return len; | ||
75 | } | ||
76 | |||
77 | struct dlm_attr { | ||
78 | struct attribute attr; | ||
79 | ssize_t (*show)(struct dlm_ls *, char *); | ||
80 | ssize_t (*store)(struct dlm_ls *, const char *, size_t); | ||
81 | }; | ||
82 | |||
83 | static struct dlm_attr dlm_attr_control = { | ||
84 | .attr = {.name = "control", .mode = S_IWUSR}, | ||
85 | .store = dlm_control_store | ||
86 | }; | ||
87 | |||
88 | static struct dlm_attr dlm_attr_event = { | ||
89 | .attr = {.name = "event_done", .mode = S_IWUSR}, | ||
90 | .store = dlm_event_store | ||
91 | }; | ||
92 | |||
93 | static struct dlm_attr dlm_attr_id = { | ||
94 | .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR}, | ||
95 | .show = dlm_id_show, | ||
96 | .store = dlm_id_store | ||
97 | }; | ||
98 | |||
99 | static struct attribute *dlm_attrs[] = { | ||
100 | &dlm_attr_control.attr, | ||
101 | &dlm_attr_event.attr, | ||
102 | &dlm_attr_id.attr, | ||
103 | NULL, | ||
104 | }; | ||
105 | |||
106 | static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr, | ||
107 | char *buf) | ||
108 | { | ||
109 | struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); | ||
110 | struct dlm_attr *a = container_of(attr, struct dlm_attr, attr); | ||
111 | return a->show ? a->show(ls, buf) : 0; | ||
112 | } | ||
113 | |||
114 | static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr, | ||
115 | const char *buf, size_t len) | ||
116 | { | ||
117 | struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); | ||
118 | struct dlm_attr *a = container_of(attr, struct dlm_attr, attr); | ||
119 | return a->store ? a->store(ls, buf, len) : len; | ||
120 | } | ||
121 | |||
122 | static struct sysfs_ops dlm_attr_ops = { | ||
123 | .show = dlm_attr_show, | ||
124 | .store = dlm_attr_store, | ||
125 | }; | ||
126 | |||
127 | static struct kobj_type dlm_ktype = { | ||
128 | .default_attrs = dlm_attrs, | ||
129 | .sysfs_ops = &dlm_attr_ops, | ||
130 | }; | ||
131 | |||
132 | static struct kset dlm_kset = { | ||
133 | .subsys = &kernel_subsys, | ||
134 | .kobj = {.name = "dlm",}, | ||
135 | .ktype = &dlm_ktype, | ||
136 | }; | ||
137 | |||
138 | static int kobject_setup(struct dlm_ls *ls) | ||
139 | { | ||
140 | char lsname[DLM_LOCKSPACE_LEN]; | ||
141 | int error; | ||
142 | |||
143 | memset(lsname, 0, DLM_LOCKSPACE_LEN); | ||
144 | snprintf(lsname, DLM_LOCKSPACE_LEN, "%s", ls->ls_name); | ||
145 | |||
146 | error = kobject_set_name(&ls->ls_kobj, "%s", lsname); | ||
147 | if (error) | ||
148 | return error; | ||
149 | |||
150 | ls->ls_kobj.kset = &dlm_kset; | ||
151 | ls->ls_kobj.ktype = &dlm_ktype; | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | static int do_uevent(struct dlm_ls *ls, int in) | ||
156 | { | ||
157 | int error; | ||
158 | |||
159 | if (in) | ||
160 | kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE); | ||
161 | else | ||
162 | kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); | ||
163 | |||
164 | error = wait_event_interruptible(ls->ls_uevent_wait, | ||
165 | test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); | ||
166 | if (error) | ||
167 | goto out; | ||
168 | |||
169 | error = ls->ls_uevent_result; | ||
170 | out: | ||
171 | return error; | ||
172 | } | ||
173 | |||
174 | |||
175 | int dlm_lockspace_init(void) | ||
176 | { | ||
177 | int error; | ||
178 | |||
179 | ls_count = 0; | ||
180 | init_MUTEX(&ls_lock); | ||
181 | INIT_LIST_HEAD(&lslist); | ||
182 | spin_lock_init(&lslist_lock); | ||
183 | |||
184 | error = kset_register(&dlm_kset); | ||
185 | if (error) | ||
186 | printk("dlm_lockspace_init: cannot register kset %d\n", error); | ||
187 | return error; | ||
188 | } | ||
189 | |||
190 | void dlm_lockspace_exit(void) | ||
191 | { | ||
192 | kset_unregister(&dlm_kset); | ||
193 | } | ||
194 | |||
195 | static int dlm_scand(void *data) | ||
196 | { | ||
197 | struct dlm_ls *ls; | ||
198 | |||
199 | while (!kthread_should_stop()) { | ||
200 | list_for_each_entry(ls, &lslist, ls_list) | ||
201 | dlm_scan_rsbs(ls); | ||
202 | schedule_timeout_interruptible(dlm_config.scan_secs * HZ); | ||
203 | } | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int dlm_scand_start(void) | ||
208 | { | ||
209 | struct task_struct *p; | ||
210 | int error = 0; | ||
211 | |||
212 | p = kthread_run(dlm_scand, NULL, "dlm_scand"); | ||
213 | if (IS_ERR(p)) | ||
214 | error = PTR_ERR(p); | ||
215 | else | ||
216 | scand_task = p; | ||
217 | return error; | ||
218 | } | ||
219 | |||
220 | static void dlm_scand_stop(void) | ||
221 | { | ||
222 | kthread_stop(scand_task); | ||
223 | } | ||
224 | |||
225 | static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen) | ||
226 | { | ||
227 | struct dlm_ls *ls; | ||
228 | |||
229 | spin_lock(&lslist_lock); | ||
230 | |||
231 | list_for_each_entry(ls, &lslist, ls_list) { | ||
232 | if (ls->ls_namelen == namelen && | ||
233 | memcmp(ls->ls_name, name, namelen) == 0) | ||
234 | goto out; | ||
235 | } | ||
236 | ls = NULL; | ||
237 | out: | ||
238 | spin_unlock(&lslist_lock); | ||
239 | return ls; | ||
240 | } | ||
241 | |||
242 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) | ||
243 | { | ||
244 | struct dlm_ls *ls; | ||
245 | |||
246 | spin_lock(&lslist_lock); | ||
247 | |||
248 | list_for_each_entry(ls, &lslist, ls_list) { | ||
249 | if (ls->ls_global_id == id) { | ||
250 | ls->ls_count++; | ||
251 | goto out; | ||
252 | } | ||
253 | } | ||
254 | ls = NULL; | ||
255 | out: | ||
256 | spin_unlock(&lslist_lock); | ||
257 | return ls; | ||
258 | } | ||
259 | |||
260 | struct dlm_ls *dlm_find_lockspace_local(void *id) | ||
261 | { | ||
262 | struct dlm_ls *ls = id; | ||
263 | |||
264 | spin_lock(&lslist_lock); | ||
265 | ls->ls_count++; | ||
266 | spin_unlock(&lslist_lock); | ||
267 | return ls; | ||
268 | } | ||
269 | |||
270 | void dlm_put_lockspace(struct dlm_ls *ls) | ||
271 | { | ||
272 | spin_lock(&lslist_lock); | ||
273 | ls->ls_count--; | ||
274 | spin_unlock(&lslist_lock); | ||
275 | } | ||
276 | |||
277 | static void remove_lockspace(struct dlm_ls *ls) | ||
278 | { | ||
279 | for (;;) { | ||
280 | spin_lock(&lslist_lock); | ||
281 | if (ls->ls_count == 0) { | ||
282 | list_del(&ls->ls_list); | ||
283 | spin_unlock(&lslist_lock); | ||
284 | return; | ||
285 | } | ||
286 | spin_unlock(&lslist_lock); | ||
287 | ssleep(1); | ||
288 | } | ||
289 | } | ||
290 | |||
291 | static int threads_start(void) | ||
292 | { | ||
293 | int error; | ||
294 | |||
295 | /* Thread which process lock requests for all lockspace's */ | ||
296 | error = dlm_astd_start(); | ||
297 | if (error) { | ||
298 | log_print("cannot start dlm_astd thread %d", error); | ||
299 | goto fail; | ||
300 | } | ||
301 | |||
302 | error = dlm_scand_start(); | ||
303 | if (error) { | ||
304 | log_print("cannot start dlm_scand thread %d", error); | ||
305 | goto astd_fail; | ||
306 | } | ||
307 | |||
308 | /* Thread for sending/receiving messages for all lockspace's */ | ||
309 | error = dlm_lowcomms_start(); | ||
310 | if (error) { | ||
311 | log_print("cannot start dlm lowcomms %d", error); | ||
312 | goto scand_fail; | ||
313 | } | ||
314 | |||
315 | return 0; | ||
316 | |||
317 | scand_fail: | ||
318 | dlm_scand_stop(); | ||
319 | astd_fail: | ||
320 | dlm_astd_stop(); | ||
321 | fail: | ||
322 | return error; | ||
323 | } | ||
324 | |||
325 | static void threads_stop(void) | ||
326 | { | ||
327 | dlm_scand_stop(); | ||
328 | dlm_lowcomms_stop(); | ||
329 | dlm_astd_stop(); | ||
330 | } | ||
331 | |||
332 | static int new_lockspace(char *name, int namelen, void **lockspace, | ||
333 | uint32_t flags, int lvblen) | ||
334 | { | ||
335 | struct dlm_ls *ls; | ||
336 | int i, size, error = -ENOMEM; | ||
337 | |||
338 | if (namelen > DLM_LOCKSPACE_LEN) | ||
339 | return -EINVAL; | ||
340 | |||
341 | if (!lvblen || (lvblen % 8)) | ||
342 | return -EINVAL; | ||
343 | |||
344 | if (!try_module_get(THIS_MODULE)) | ||
345 | return -EINVAL; | ||
346 | |||
347 | ls = dlm_find_lockspace_name(name, namelen); | ||
348 | if (ls) { | ||
349 | *lockspace = ls; | ||
350 | module_put(THIS_MODULE); | ||
351 | return -EEXIST; | ||
352 | } | ||
353 | |||
354 | ls = kmalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); | ||
355 | if (!ls) | ||
356 | goto out; | ||
357 | memset(ls, 0, sizeof(struct dlm_ls) + namelen); | ||
358 | memcpy(ls->ls_name, name, namelen); | ||
359 | ls->ls_namelen = namelen; | ||
360 | ls->ls_exflags = flags; | ||
361 | ls->ls_lvblen = lvblen; | ||
362 | ls->ls_count = 0; | ||
363 | ls->ls_flags = 0; | ||
364 | |||
365 | size = dlm_config.rsbtbl_size; | ||
366 | ls->ls_rsbtbl_size = size; | ||
367 | |||
368 | ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); | ||
369 | if (!ls->ls_rsbtbl) | ||
370 | goto out_lsfree; | ||
371 | for (i = 0; i < size; i++) { | ||
372 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); | ||
373 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); | ||
374 | rwlock_init(&ls->ls_rsbtbl[i].lock); | ||
375 | } | ||
376 | |||
377 | size = dlm_config.lkbtbl_size; | ||
378 | ls->ls_lkbtbl_size = size; | ||
379 | |||
380 | ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); | ||
381 | if (!ls->ls_lkbtbl) | ||
382 | goto out_rsbfree; | ||
383 | for (i = 0; i < size; i++) { | ||
384 | INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list); | ||
385 | rwlock_init(&ls->ls_lkbtbl[i].lock); | ||
386 | ls->ls_lkbtbl[i].counter = 1; | ||
387 | } | ||
388 | |||
389 | size = dlm_config.dirtbl_size; | ||
390 | ls->ls_dirtbl_size = size; | ||
391 | |||
392 | ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); | ||
393 | if (!ls->ls_dirtbl) | ||
394 | goto out_lkbfree; | ||
395 | for (i = 0; i < size; i++) { | ||
396 | INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); | ||
397 | rwlock_init(&ls->ls_dirtbl[i].lock); | ||
398 | } | ||
399 | |||
400 | INIT_LIST_HEAD(&ls->ls_waiters); | ||
401 | init_MUTEX(&ls->ls_waiters_sem); | ||
402 | |||
403 | INIT_LIST_HEAD(&ls->ls_nodes); | ||
404 | INIT_LIST_HEAD(&ls->ls_nodes_gone); | ||
405 | ls->ls_num_nodes = 0; | ||
406 | ls->ls_low_nodeid = 0; | ||
407 | ls->ls_total_weight = 0; | ||
408 | ls->ls_node_array = NULL; | ||
409 | |||
410 | memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb)); | ||
411 | ls->ls_stub_rsb.res_ls = ls; | ||
412 | |||
413 | ls->ls_debug_dentry = NULL; | ||
414 | |||
415 | init_waitqueue_head(&ls->ls_uevent_wait); | ||
416 | ls->ls_uevent_result = 0; | ||
417 | |||
418 | ls->ls_recoverd_task = NULL; | ||
419 | init_MUTEX(&ls->ls_recoverd_active); | ||
420 | spin_lock_init(&ls->ls_recover_lock); | ||
421 | ls->ls_recover_status = 0; | ||
422 | ls->ls_recover_seq = 0; | ||
423 | ls->ls_recover_args = NULL; | ||
424 | init_rwsem(&ls->ls_in_recovery); | ||
425 | INIT_LIST_HEAD(&ls->ls_requestqueue); | ||
426 | init_MUTEX(&ls->ls_requestqueue_lock); | ||
427 | |||
428 | ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | ||
429 | if (!ls->ls_recover_buf) | ||
430 | goto out_dirfree; | ||
431 | |||
432 | INIT_LIST_HEAD(&ls->ls_recover_list); | ||
433 | spin_lock_init(&ls->ls_recover_list_lock); | ||
434 | ls->ls_recover_list_count = 0; | ||
435 | init_waitqueue_head(&ls->ls_wait_general); | ||
436 | INIT_LIST_HEAD(&ls->ls_root_list); | ||
437 | init_rwsem(&ls->ls_root_sem); | ||
438 | |||
439 | down_write(&ls->ls_in_recovery); | ||
440 | |||
441 | error = dlm_recoverd_start(ls); | ||
442 | if (error) { | ||
443 | log_error(ls, "can't start dlm_recoverd %d", error); | ||
444 | goto out_rcomfree; | ||
445 | } | ||
446 | |||
447 | spin_lock(&lslist_lock); | ||
448 | list_add(&ls->ls_list, &lslist); | ||
449 | spin_unlock(&lslist_lock); | ||
450 | |||
451 | dlm_create_debug_file(ls); | ||
452 | |||
453 | error = kobject_setup(ls); | ||
454 | if (error) | ||
455 | goto out_del; | ||
456 | |||
457 | error = kobject_register(&ls->ls_kobj); | ||
458 | if (error) | ||
459 | goto out_del; | ||
460 | |||
461 | error = do_uevent(ls, 1); | ||
462 | if (error) | ||
463 | goto out_unreg; | ||
464 | |||
465 | *lockspace = ls; | ||
466 | return 0; | ||
467 | |||
468 | out_unreg: | ||
469 | kobject_unregister(&ls->ls_kobj); | ||
470 | out_del: | ||
471 | dlm_delete_debug_file(ls); | ||
472 | spin_lock(&lslist_lock); | ||
473 | list_del(&ls->ls_list); | ||
474 | spin_unlock(&lslist_lock); | ||
475 | dlm_recoverd_stop(ls); | ||
476 | out_rcomfree: | ||
477 | kfree(ls->ls_recover_buf); | ||
478 | out_dirfree: | ||
479 | kfree(ls->ls_dirtbl); | ||
480 | out_lkbfree: | ||
481 | kfree(ls->ls_lkbtbl); | ||
482 | out_rsbfree: | ||
483 | kfree(ls->ls_rsbtbl); | ||
484 | out_lsfree: | ||
485 | kfree(ls); | ||
486 | out: | ||
487 | module_put(THIS_MODULE); | ||
488 | return error; | ||
489 | } | ||
490 | |||
491 | int dlm_new_lockspace(char *name, int namelen, void **lockspace, | ||
492 | uint32_t flags, int lvblen) | ||
493 | { | ||
494 | int error = 0; | ||
495 | |||
496 | down(&ls_lock); | ||
497 | if (!ls_count) | ||
498 | error = threads_start(); | ||
499 | if (error) | ||
500 | goto out; | ||
501 | |||
502 | error = new_lockspace(name, namelen, lockspace, flags, lvblen); | ||
503 | if (!error) | ||
504 | ls_count++; | ||
505 | out: | ||
506 | up(&ls_lock); | ||
507 | return error; | ||
508 | } | ||
509 | |||
510 | /* Return 1 if the lockspace still has active remote locks, | ||
511 | * 2 if the lockspace still has active local locks. | ||
512 | */ | ||
513 | static int lockspace_busy(struct dlm_ls *ls) | ||
514 | { | ||
515 | int i, lkb_found = 0; | ||
516 | struct dlm_lkb *lkb; | ||
517 | |||
518 | /* NOTE: We check the lockidtbl here rather than the resource table. | ||
519 | This is because there may be LKBs queued as ASTs that have been | ||
520 | unlinked from their RSBs and are pending deletion once the AST has | ||
521 | been delivered */ | ||
522 | |||
523 | for (i = 0; i < ls->ls_lkbtbl_size; i++) { | ||
524 | read_lock(&ls->ls_lkbtbl[i].lock); | ||
525 | if (!list_empty(&ls->ls_lkbtbl[i].list)) { | ||
526 | lkb_found = 1; | ||
527 | list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, | ||
528 | lkb_idtbl_list) { | ||
529 | if (!lkb->lkb_nodeid) { | ||
530 | read_unlock(&ls->ls_lkbtbl[i].lock); | ||
531 | return 2; | ||
532 | } | ||
533 | } | ||
534 | } | ||
535 | read_unlock(&ls->ls_lkbtbl[i].lock); | ||
536 | } | ||
537 | return lkb_found; | ||
538 | } | ||
539 | |||
540 | static int release_lockspace(struct dlm_ls *ls, int force) | ||
541 | { | ||
542 | struct dlm_lkb *lkb; | ||
543 | struct dlm_rsb *rsb; | ||
544 | struct list_head *head; | ||
545 | int i; | ||
546 | int busy = lockspace_busy(ls); | ||
547 | |||
548 | if (busy > force) | ||
549 | return -EBUSY; | ||
550 | |||
551 | if (force < 3) | ||
552 | do_uevent(ls, 0); | ||
553 | |||
554 | dlm_recoverd_stop(ls); | ||
555 | |||
556 | remove_lockspace(ls); | ||
557 | |||
558 | dlm_delete_debug_file(ls); | ||
559 | |||
560 | dlm_astd_suspend(); | ||
561 | |||
562 | kfree(ls->ls_recover_buf); | ||
563 | |||
564 | /* | ||
565 | * Free direntry structs. | ||
566 | */ | ||
567 | |||
568 | dlm_dir_clear(ls); | ||
569 | kfree(ls->ls_dirtbl); | ||
570 | |||
571 | /* | ||
572 | * Free all lkb's on lkbtbl[] lists. | ||
573 | */ | ||
574 | |||
575 | for (i = 0; i < ls->ls_lkbtbl_size; i++) { | ||
576 | head = &ls->ls_lkbtbl[i].list; | ||
577 | while (!list_empty(head)) { | ||
578 | lkb = list_entry(head->next, struct dlm_lkb, | ||
579 | lkb_idtbl_list); | ||
580 | |||
581 | list_del(&lkb->lkb_idtbl_list); | ||
582 | |||
583 | dlm_del_ast(lkb); | ||
584 | |||
585 | if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
586 | free_lvb(lkb->lkb_lvbptr); | ||
587 | |||
588 | free_lkb(lkb); | ||
589 | } | ||
590 | } | ||
591 | dlm_astd_resume(); | ||
592 | |||
593 | kfree(ls->ls_lkbtbl); | ||
594 | |||
595 | /* | ||
596 | * Free all rsb's on rsbtbl[] lists | ||
597 | */ | ||
598 | |||
599 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
600 | head = &ls->ls_rsbtbl[i].list; | ||
601 | while (!list_empty(head)) { | ||
602 | rsb = list_entry(head->next, struct dlm_rsb, | ||
603 | res_hashchain); | ||
604 | |||
605 | list_del(&rsb->res_hashchain); | ||
606 | free_rsb(rsb); | ||
607 | } | ||
608 | |||
609 | head = &ls->ls_rsbtbl[i].toss; | ||
610 | while (!list_empty(head)) { | ||
611 | rsb = list_entry(head->next, struct dlm_rsb, | ||
612 | res_hashchain); | ||
613 | list_del(&rsb->res_hashchain); | ||
614 | free_rsb(rsb); | ||
615 | } | ||
616 | } | ||
617 | |||
618 | kfree(ls->ls_rsbtbl); | ||
619 | |||
620 | /* | ||
621 | * Free structures on any other lists | ||
622 | */ | ||
623 | |||
624 | kfree(ls->ls_recover_args); | ||
625 | dlm_clear_free_entries(ls); | ||
626 | dlm_clear_members(ls); | ||
627 | dlm_clear_members_gone(ls); | ||
628 | kfree(ls->ls_node_array); | ||
629 | kobject_unregister(&ls->ls_kobj); | ||
630 | kfree(ls); | ||
631 | |||
632 | down(&ls_lock); | ||
633 | ls_count--; | ||
634 | if (!ls_count) | ||
635 | threads_stop(); | ||
636 | up(&ls_lock); | ||
637 | |||
638 | module_put(THIS_MODULE); | ||
639 | return 0; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * Called when a system has released all its locks and is not going to use the | ||
644 | * lockspace any longer. We free everything we're managing for this lockspace. | ||
645 | * Remaining nodes will go through the recovery process as if we'd died. The | ||
646 | * lockspace must continue to function as usual, participating in recoveries, | ||
647 | * until this returns. | ||
648 | * | ||
649 | * Force has 4 possible values: | ||
650 | * 0 - don't destroy locksapce if it has any LKBs | ||
651 | * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs | ||
652 | * 2 - destroy lockspace regardless of LKBs | ||
653 | * 3 - destroy lockspace as part of a forced shutdown | ||
654 | */ | ||
655 | |||
656 | int dlm_release_lockspace(void *lockspace, int force) | ||
657 | { | ||
658 | struct dlm_ls *ls; | ||
659 | |||
660 | ls = dlm_find_lockspace_local(lockspace); | ||
661 | if (!ls) | ||
662 | return -EINVAL; | ||
663 | dlm_put_lockspace(ls); | ||
664 | return release_lockspace(ls, force); | ||
665 | } | ||
666 | |||
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h new file mode 100644 index 000000000000..17bd3ba863a9 --- /dev/null +++ b/fs/dlm/lockspace.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __LOCKSPACE_DOT_H__ | ||
15 | #define __LOCKSPACE_DOT_H__ | ||
16 | |||
17 | int dlm_lockspace_init(void); | ||
18 | void dlm_lockspace_exit(void); | ||
19 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id); | ||
20 | struct dlm_ls *dlm_find_lockspace_local(void *id); | ||
21 | void dlm_put_lockspace(struct dlm_ls *ls); | ||
22 | |||
23 | #endif /* __LOCKSPACE_DOT_H__ */ | ||
24 | |||
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c new file mode 100644 index 000000000000..09b0124f7fc4 --- /dev/null +++ b/fs/dlm/lowcomms.c | |||
@@ -0,0 +1,1218 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | /* | ||
15 | * lowcomms.c | ||
16 | * | ||
17 | * This is the "low-level" comms layer. | ||
18 | * | ||
19 | * It is responsible for sending/receiving messages | ||
20 | * from other nodes in the cluster. | ||
21 | * | ||
22 | * Cluster nodes are referred to by their nodeids. nodeids are | ||
23 | * simply 32 bit numbers to the locking module - if they need to | ||
24 | * be expanded for the cluster infrastructure then that is it's | ||
25 | * responsibility. It is this layer's | ||
26 | * responsibility to resolve these into IP address or | ||
27 | * whatever it needs for inter-node communication. | ||
28 | * | ||
29 | * The comms level is two kernel threads that deal mainly with | ||
30 | * the receiving of messages from other nodes and passing them | ||
31 | * up to the mid-level comms layer (which understands the | ||
32 | * message format) for execution by the locking core, and | ||
33 | * a send thread which does all the setting up of connections | ||
34 | * to remote nodes and the sending of data. Threads are not allowed | ||
35 | * to send their own data because it may cause them to wait in times | ||
36 | * of high load. Also, this way, the sending thread can collect together | ||
37 | * messages bound for one node and send them in one block. | ||
38 | * | ||
39 | * I don't see any problem with the recv thread executing the locking | ||
40 | * code on behalf of remote processes as the locking code is | ||
41 | * short, efficient and never (well, hardly ever) waits. | ||
42 | * | ||
43 | */ | ||
44 | |||
45 | #include <asm/ioctls.h> | ||
46 | #include <net/sock.h> | ||
47 | #include <net/tcp.h> | ||
48 | #include <net/sctp/user.h> | ||
49 | #include <linux/pagemap.h> | ||
50 | #include <linux/socket.h> | ||
51 | #include <linux/idr.h> | ||
52 | |||
53 | #include "dlm_internal.h" | ||
54 | #include "lowcomms.h" | ||
55 | #include "config.h" | ||
56 | #include "midcomms.h" | ||
57 | |||
58 | static struct sockaddr_storage *local_addr[DLM_MAX_ADDR_COUNT]; | ||
59 | static int local_count; | ||
60 | static int local_nodeid; | ||
61 | |||
62 | /* One of these per connected node */ | ||
63 | |||
64 | #define NI_INIT_PENDING 1 | ||
65 | #define NI_WRITE_PENDING 2 | ||
66 | |||
67 | struct nodeinfo { | ||
68 | spinlock_t lock; | ||
69 | sctp_assoc_t assoc_id; | ||
70 | unsigned long flags; | ||
71 | struct list_head write_list; /* nodes with pending writes */ | ||
72 | struct list_head writequeue; /* outgoing writequeue_entries */ | ||
73 | spinlock_t writequeue_lock; | ||
74 | int nodeid; | ||
75 | }; | ||
76 | |||
77 | static DEFINE_IDR(nodeinfo_idr); | ||
78 | static struct rw_semaphore nodeinfo_lock; | ||
79 | static int max_nodeid; | ||
80 | |||
81 | struct cbuf { | ||
82 | unsigned base; | ||
83 | unsigned len; | ||
84 | unsigned mask; | ||
85 | }; | ||
86 | |||
87 | /* Just the one of these, now. But this struct keeps | ||
88 | the connection-specific variables together */ | ||
89 | |||
90 | #define CF_READ_PENDING 1 | ||
91 | |||
92 | struct connection { | ||
93 | struct socket *sock; | ||
94 | unsigned long flags; | ||
95 | struct page *rx_page; | ||
96 | atomic_t waiting_requests; | ||
97 | struct cbuf cb; | ||
98 | int eagain_flag; | ||
99 | }; | ||
100 | |||
101 | /* An entry waiting to be sent */ | ||
102 | |||
103 | struct writequeue_entry { | ||
104 | struct list_head list; | ||
105 | struct page *page; | ||
106 | int offset; | ||
107 | int len; | ||
108 | int end; | ||
109 | int users; | ||
110 | struct nodeinfo *ni; | ||
111 | }; | ||
112 | |||
113 | #define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0) | ||
114 | #define CBUF_EMPTY(cb) ((cb)->len == 0) | ||
115 | #define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1)) | ||
116 | #define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask) | ||
117 | |||
118 | #define CBUF_INIT(cb, size) \ | ||
119 | do { \ | ||
120 | (cb)->base = (cb)->len = 0; \ | ||
121 | (cb)->mask = ((size)-1); \ | ||
122 | } while(0) | ||
123 | |||
124 | #define CBUF_EAT(cb, n) \ | ||
125 | do { \ | ||
126 | (cb)->len -= (n); \ | ||
127 | (cb)->base += (n); \ | ||
128 | (cb)->base &= (cb)->mask; \ | ||
129 | } while(0) | ||
130 | |||
131 | |||
132 | /* List of nodes which have writes pending */ | ||
133 | static struct list_head write_nodes; | ||
134 | static spinlock_t write_nodes_lock; | ||
135 | |||
136 | /* Maximum number of incoming messages to process before | ||
137 | * doing a schedule() | ||
138 | */ | ||
139 | #define MAX_RX_MSG_COUNT 25 | ||
140 | |||
141 | /* Manage daemons */ | ||
142 | static struct task_struct *recv_task; | ||
143 | static struct task_struct *send_task; | ||
144 | static wait_queue_head_t lowcomms_recv_wait; | ||
145 | static atomic_t accepting; | ||
146 | |||
147 | /* The SCTP connection */ | ||
148 | static struct connection sctp_con; | ||
149 | |||
150 | |||
151 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | ||
152 | { | ||
153 | struct sockaddr_storage addr; | ||
154 | int error; | ||
155 | |||
156 | if (!local_count) | ||
157 | return -1; | ||
158 | |||
159 | error = dlm_nodeid_to_addr(nodeid, &addr); | ||
160 | if (error) | ||
161 | return error; | ||
162 | |||
163 | if (local_addr[0]->ss_family == AF_INET) { | ||
164 | struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; | ||
165 | struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; | ||
166 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | ||
167 | } else { | ||
168 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | ||
169 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | ||
170 | memcpy(&ret6->sin6_addr, &in6->sin6_addr, | ||
171 | sizeof(in6->sin6_addr)); | ||
172 | } | ||
173 | |||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | static struct nodeinfo *nodeid2nodeinfo(int nodeid, int alloc) | ||
178 | { | ||
179 | struct nodeinfo *ni; | ||
180 | int r; | ||
181 | int n; | ||
182 | |||
183 | down_read(&nodeinfo_lock); | ||
184 | ni = idr_find(&nodeinfo_idr, nodeid); | ||
185 | up_read(&nodeinfo_lock); | ||
186 | |||
187 | if (!ni && alloc) { | ||
188 | down_write(&nodeinfo_lock); | ||
189 | |||
190 | ni = idr_find(&nodeinfo_idr, nodeid); | ||
191 | if (ni) | ||
192 | goto out_up; | ||
193 | |||
194 | r = idr_pre_get(&nodeinfo_idr, alloc); | ||
195 | if (!r) | ||
196 | goto out_up; | ||
197 | |||
198 | ni = kmalloc(sizeof(struct nodeinfo), alloc); | ||
199 | if (!ni) | ||
200 | goto out_up; | ||
201 | |||
202 | r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n); | ||
203 | if (r) { | ||
204 | kfree(ni); | ||
205 | ni = NULL; | ||
206 | goto out_up; | ||
207 | } | ||
208 | if (n != nodeid) { | ||
209 | idr_remove(&nodeinfo_idr, n); | ||
210 | kfree(ni); | ||
211 | ni = NULL; | ||
212 | goto out_up; | ||
213 | } | ||
214 | memset(ni, 0, sizeof(struct nodeinfo)); | ||
215 | spin_lock_init(&ni->lock); | ||
216 | INIT_LIST_HEAD(&ni->writequeue); | ||
217 | spin_lock_init(&ni->writequeue_lock); | ||
218 | ni->nodeid = nodeid; | ||
219 | |||
220 | if (nodeid > max_nodeid) | ||
221 | max_nodeid = nodeid; | ||
222 | out_up: | ||
223 | up_write(&nodeinfo_lock); | ||
224 | } | ||
225 | |||
226 | return ni; | ||
227 | } | ||
228 | |||
229 | /* Don't call this too often... */ | ||
230 | static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc) | ||
231 | { | ||
232 | int i; | ||
233 | struct nodeinfo *ni; | ||
234 | |||
235 | for (i=1; i<=max_nodeid; i++) { | ||
236 | ni = nodeid2nodeinfo(i, 0); | ||
237 | if (ni && ni->assoc_id == assoc) | ||
238 | return ni; | ||
239 | } | ||
240 | return NULL; | ||
241 | } | ||
242 | |||
243 | /* Data or notification available on socket */ | ||
244 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | ||
245 | { | ||
246 | atomic_inc(&sctp_con.waiting_requests); | ||
247 | if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) | ||
248 | return; | ||
249 | |||
250 | wake_up_interruptible(&lowcomms_recv_wait); | ||
251 | } | ||
252 | |||
253 | |||
254 | /* Add the port number to an IP6 or 4 sockaddr and return the address length. | ||
255 | Also padd out the struct with zeros to make comparisons meaningful */ | ||
256 | |||
257 | static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | ||
258 | int *addr_len) | ||
259 | { | ||
260 | struct sockaddr_in *local4_addr; | ||
261 | struct sockaddr_in6 *local6_addr; | ||
262 | |||
263 | if (!local_count) | ||
264 | return; | ||
265 | |||
266 | if (!port) { | ||
267 | if (local_addr[0]->ss_family == AF_INET) { | ||
268 | local4_addr = (struct sockaddr_in *)local_addr[0]; | ||
269 | port = be16_to_cpu(local4_addr->sin_port); | ||
270 | } else { | ||
271 | local6_addr = (struct sockaddr_in6 *)local_addr[0]; | ||
272 | port = be16_to_cpu(local6_addr->sin6_port); | ||
273 | } | ||
274 | } | ||
275 | |||
276 | saddr->ss_family = local_addr[0]->ss_family; | ||
277 | if (local_addr[0]->ss_family == AF_INET) { | ||
278 | struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; | ||
279 | in4_addr->sin_port = cpu_to_be16(port); | ||
280 | memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero)); | ||
281 | memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) - | ||
282 | sizeof(struct sockaddr_in)); | ||
283 | *addr_len = sizeof(struct sockaddr_in); | ||
284 | } else { | ||
285 | struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; | ||
286 | in6_addr->sin6_port = cpu_to_be16(port); | ||
287 | memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) - | ||
288 | sizeof(struct sockaddr_in6)); | ||
289 | *addr_len = sizeof(struct sockaddr_in6); | ||
290 | } | ||
291 | } | ||
292 | |||
293 | /* Close the connection and tidy up */ | ||
294 | static void close_connection(void) | ||
295 | { | ||
296 | if (sctp_con.sock) { | ||
297 | sock_release(sctp_con.sock); | ||
298 | sctp_con.sock = NULL; | ||
299 | } | ||
300 | |||
301 | if (sctp_con.rx_page) { | ||
302 | __free_page(sctp_con.rx_page); | ||
303 | sctp_con.rx_page = NULL; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | /* We only send shutdown messages to nodes that are not part of the cluster */ | ||
308 | static void send_shutdown(sctp_assoc_t associd) | ||
309 | { | ||
310 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
311 | struct msghdr outmessage; | ||
312 | struct cmsghdr *cmsg; | ||
313 | struct sctp_sndrcvinfo *sinfo; | ||
314 | int ret; | ||
315 | |||
316 | outmessage.msg_name = NULL; | ||
317 | outmessage.msg_namelen = 0; | ||
318 | outmessage.msg_control = outcmsg; | ||
319 | outmessage.msg_controllen = sizeof(outcmsg); | ||
320 | outmessage.msg_flags = MSG_EOR; | ||
321 | |||
322 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
323 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
324 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
325 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
326 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
327 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
328 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
329 | |||
330 | sinfo->sinfo_flags |= MSG_EOF; | ||
331 | sinfo->sinfo_assoc_id = associd; | ||
332 | |||
333 | ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0); | ||
334 | |||
335 | if (ret != 0) | ||
336 | log_print("send EOF to node failed: %d", ret); | ||
337 | } | ||
338 | |||
339 | |||
340 | /* INIT failed but we don't know which node... | ||
341 | restart INIT on all pending nodes */ | ||
342 | static void init_failed(void) | ||
343 | { | ||
344 | int i; | ||
345 | struct nodeinfo *ni; | ||
346 | |||
347 | for (i=1; i<=max_nodeid; i++) { | ||
348 | ni = nodeid2nodeinfo(i, 0); | ||
349 | if (!ni) | ||
350 | continue; | ||
351 | |||
352 | if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) { | ||
353 | ni->assoc_id = 0; | ||
354 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
355 | spin_lock_bh(&write_nodes_lock); | ||
356 | list_add_tail(&ni->write_list, &write_nodes); | ||
357 | spin_unlock_bh(&write_nodes_lock); | ||
358 | } | ||
359 | } | ||
360 | } | ||
361 | wake_up_process(send_task); | ||
362 | } | ||
363 | |||
364 | /* Something happened to an association */ | ||
365 | static void process_sctp_notification(struct msghdr *msg, char *buf) | ||
366 | { | ||
367 | union sctp_notification *sn = (union sctp_notification *)buf; | ||
368 | |||
369 | if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { | ||
370 | switch (sn->sn_assoc_change.sac_state) { | ||
371 | |||
372 | case SCTP_COMM_UP: | ||
373 | case SCTP_RESTART: | ||
374 | { | ||
375 | /* Check that the new node is in the lockspace */ | ||
376 | struct sctp_prim prim; | ||
377 | mm_segment_t fs; | ||
378 | int nodeid; | ||
379 | int prim_len, ret; | ||
380 | int addr_len; | ||
381 | struct nodeinfo *ni; | ||
382 | |||
383 | /* This seems to happen when we received a connection | ||
384 | * too early... or something... anyway, it happens but | ||
385 | * we always seem to get a real message too, see | ||
386 | * receive_from_sock */ | ||
387 | |||
388 | if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) { | ||
389 | log_print("COMM_UP for invalid assoc ID %d", | ||
390 | (int)sn->sn_assoc_change.sac_assoc_id); | ||
391 | init_failed(); | ||
392 | return; | ||
393 | } | ||
394 | memset(&prim, 0, sizeof(struct sctp_prim)); | ||
395 | prim_len = sizeof(struct sctp_prim); | ||
396 | prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
397 | |||
398 | fs = get_fs(); | ||
399 | set_fs(get_ds()); | ||
400 | ret = sctp_con.sock->ops->getsockopt(sctp_con.sock, | ||
401 | IPPROTO_SCTP, SCTP_PRIMARY_ADDR, | ||
402 | (char*)&prim, &prim_len); | ||
403 | set_fs(fs); | ||
404 | if (ret < 0) { | ||
405 | struct nodeinfo *ni; | ||
406 | |||
407 | log_print("getsockopt/sctp_primary_addr on " | ||
408 | "new assoc %d failed : %d", | ||
409 | (int)sn->sn_assoc_change.sac_assoc_id, ret); | ||
410 | |||
411 | /* Retry INIT later */ | ||
412 | ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id); | ||
413 | if (ni) | ||
414 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
415 | return; | ||
416 | } | ||
417 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | ||
418 | if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | ||
419 | log_print("reject connect from unknown addr"); | ||
420 | send_shutdown(prim.ssp_assoc_id); | ||
421 | return; | ||
422 | } | ||
423 | |||
424 | ni = nodeid2nodeinfo(nodeid, GFP_KERNEL); | ||
425 | if (!ni) | ||
426 | return; | ||
427 | |||
428 | /* Save the assoc ID */ | ||
429 | spin_lock(&ni->lock); | ||
430 | ni->assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
431 | spin_unlock(&ni->lock); | ||
432 | |||
433 | log_print("got new/restarted association %d nodeid %d", | ||
434 | (int)sn->sn_assoc_change.sac_assoc_id, nodeid); | ||
435 | |||
436 | /* Send any pending writes */ | ||
437 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
438 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
439 | spin_lock_bh(&write_nodes_lock); | ||
440 | list_add_tail(&ni->write_list, &write_nodes); | ||
441 | spin_unlock_bh(&write_nodes_lock); | ||
442 | } | ||
443 | wake_up_process(send_task); | ||
444 | } | ||
445 | break; | ||
446 | |||
447 | case SCTP_COMM_LOST: | ||
448 | case SCTP_SHUTDOWN_COMP: | ||
449 | { | ||
450 | struct nodeinfo *ni; | ||
451 | |||
452 | ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id); | ||
453 | if (ni) { | ||
454 | spin_lock(&ni->lock); | ||
455 | ni->assoc_id = 0; | ||
456 | spin_unlock(&ni->lock); | ||
457 | } | ||
458 | } | ||
459 | break; | ||
460 | |||
461 | /* We don't know which INIT failed, so clear the PENDING flags | ||
462 | * on them all. if assoc_id is zero then it will then try | ||
463 | * again */ | ||
464 | |||
465 | case SCTP_CANT_STR_ASSOC: | ||
466 | { | ||
467 | log_print("Can't start SCTP association - retrying"); | ||
468 | init_failed(); | ||
469 | } | ||
470 | break; | ||
471 | |||
472 | default: | ||
473 | log_print("unexpected SCTP assoc change id=%d state=%d", | ||
474 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
475 | sn->sn_assoc_change.sac_state); | ||
476 | } | ||
477 | } | ||
478 | } | ||
479 | |||
480 | /* Data received from remote end */ | ||
481 | static int receive_from_sock(void) | ||
482 | { | ||
483 | int ret = 0; | ||
484 | struct msghdr msg; | ||
485 | struct kvec iov[2]; | ||
486 | unsigned len; | ||
487 | int r; | ||
488 | struct sctp_sndrcvinfo *sinfo; | ||
489 | struct cmsghdr *cmsg; | ||
490 | struct nodeinfo *ni; | ||
491 | |||
492 | /* These two are marginally too big for stack allocation, but this | ||
493 | * function is (currently) only called by dlm_recvd so static should be | ||
494 | * OK. | ||
495 | */ | ||
496 | static struct sockaddr_storage msgname; | ||
497 | static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
498 | |||
499 | if (sctp_con.sock == NULL) | ||
500 | goto out; | ||
501 | |||
502 | if (sctp_con.rx_page == NULL) { | ||
503 | /* | ||
504 | * This doesn't need to be atomic, but I think it should | ||
505 | * improve performance if it is. | ||
506 | */ | ||
507 | sctp_con.rx_page = alloc_page(GFP_ATOMIC); | ||
508 | if (sctp_con.rx_page == NULL) | ||
509 | goto out_resched; | ||
510 | CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE); | ||
511 | } | ||
512 | |||
513 | memset(&incmsg, 0, sizeof(incmsg)); | ||
514 | memset(&msgname, 0, sizeof(msgname)); | ||
515 | |||
516 | memset(incmsg, 0, sizeof(incmsg)); | ||
517 | msg.msg_name = &msgname; | ||
518 | msg.msg_namelen = sizeof(msgname); | ||
519 | msg.msg_flags = 0; | ||
520 | msg.msg_control = incmsg; | ||
521 | msg.msg_controllen = sizeof(incmsg); | ||
522 | |||
523 | /* I don't see why this circular buffer stuff is necessary for SCTP | ||
524 | * which is a packet-based protocol, but the whole thing breaks under | ||
525 | * load without it! The overhead is minimal (and is in the TCP lowcomms | ||
526 | * anyway, of course) so I'll leave it in until I can figure out what's | ||
527 | * really happening. | ||
528 | */ | ||
529 | |||
530 | /* | ||
531 | * iov[0] is the bit of the circular buffer between the current end | ||
532 | * point (cb.base + cb.len) and the end of the buffer. | ||
533 | */ | ||
534 | iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb); | ||
535 | iov[0].iov_base = page_address(sctp_con.rx_page) + | ||
536 | CBUF_DATA(&sctp_con.cb); | ||
537 | iov[1].iov_len = 0; | ||
538 | |||
539 | /* | ||
540 | * iov[1] is the bit of the circular buffer between the start of the | ||
541 | * buffer and the start of the currently used section (cb.base) | ||
542 | */ | ||
543 | if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) { | ||
544 | iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb); | ||
545 | iov[1].iov_len = sctp_con.cb.base; | ||
546 | iov[1].iov_base = page_address(sctp_con.rx_page); | ||
547 | msg.msg_iovlen = 2; | ||
548 | } | ||
549 | len = iov[0].iov_len + iov[1].iov_len; | ||
550 | |||
551 | r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, 1, len, | ||
552 | MSG_NOSIGNAL | MSG_DONTWAIT); | ||
553 | if (ret <= 0) | ||
554 | goto out_close; | ||
555 | |||
556 | msg.msg_control = incmsg; | ||
557 | msg.msg_controllen = sizeof(incmsg); | ||
558 | cmsg = CMSG_FIRSTHDR(&msg); | ||
559 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
560 | |||
561 | if (msg.msg_flags & MSG_NOTIFICATION) { | ||
562 | process_sctp_notification(&msg, page_address(sctp_con.rx_page)); | ||
563 | return 0; | ||
564 | } | ||
565 | |||
566 | /* Is this a new association ? */ | ||
567 | ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL); | ||
568 | if (ni) { | ||
569 | ni->assoc_id = sinfo->sinfo_assoc_id; | ||
570 | if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) { | ||
571 | |||
572 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
573 | spin_lock_bh(&write_nodes_lock); | ||
574 | list_add_tail(&ni->write_list, &write_nodes); | ||
575 | spin_unlock_bh(&write_nodes_lock); | ||
576 | } | ||
577 | wake_up_process(send_task); | ||
578 | } | ||
579 | } | ||
580 | |||
581 | /* INIT sends a message with length of 1 - ignore it */ | ||
582 | if (r == 1) | ||
583 | return 0; | ||
584 | |||
585 | CBUF_ADD(&sctp_con.cb, ret); | ||
586 | ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), | ||
587 | page_address(sctp_con.rx_page), | ||
588 | sctp_con.cb.base, sctp_con.cb.len, | ||
589 | PAGE_CACHE_SIZE); | ||
590 | if (ret < 0) | ||
591 | goto out_close; | ||
592 | CBUF_EAT(&sctp_con.cb, ret); | ||
593 | |||
594 | out: | ||
595 | ret = 0; | ||
596 | goto out_ret; | ||
597 | |||
598 | out_resched: | ||
599 | lowcomms_data_ready(sctp_con.sock->sk, 0); | ||
600 | ret = 0; | ||
601 | schedule(); | ||
602 | goto out_ret; | ||
603 | |||
604 | out_close: | ||
605 | if (ret != -EAGAIN) | ||
606 | log_print("error reading from sctp socket: %d", ret); | ||
607 | out_ret: | ||
608 | return ret; | ||
609 | } | ||
610 | |||
611 | /* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */ | ||
612 | static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num) | ||
613 | { | ||
614 | mm_segment_t fs; | ||
615 | int result = 0; | ||
616 | |||
617 | fs = get_fs(); | ||
618 | set_fs(get_ds()); | ||
619 | if (num == 1) | ||
620 | result = sctp_con.sock->ops->bind(sctp_con.sock, | ||
621 | (struct sockaddr *) addr, addr_len); | ||
622 | else | ||
623 | result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP, | ||
624 | SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len); | ||
625 | set_fs(fs); | ||
626 | |||
627 | if (result < 0) | ||
628 | log_print("Can't bind to port %d addr number %d", | ||
629 | dlm_config.tcp_port, num); | ||
630 | |||
631 | return result; | ||
632 | } | ||
633 | |||
634 | static void init_local(void) | ||
635 | { | ||
636 | struct sockaddr_storage sas, *addr; | ||
637 | int i; | ||
638 | |||
639 | local_nodeid = dlm_our_nodeid(); | ||
640 | |||
641 | for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { | ||
642 | if (dlm_our_addr(&sas, i)) | ||
643 | break; | ||
644 | |||
645 | addr = kmalloc(sizeof(*addr), GFP_KERNEL); | ||
646 | if (!addr) | ||
647 | break; | ||
648 | memcpy(addr, &sas, sizeof(*addr)); | ||
649 | local_addr[local_count++] = addr; | ||
650 | } | ||
651 | } | ||
652 | |||
653 | /* Initialise SCTP socket and bind to all interfaces */ | ||
654 | static int init_sock(void) | ||
655 | { | ||
656 | mm_segment_t fs; | ||
657 | struct socket *sock = NULL; | ||
658 | struct sockaddr_storage localaddr; | ||
659 | struct sctp_event_subscribe subscribe; | ||
660 | int result = -EINVAL, num = 1, i, addr_len; | ||
661 | |||
662 | if (!local_count) { | ||
663 | init_local(); | ||
664 | if (!local_count) { | ||
665 | log_print("no local IP address has been set"); | ||
666 | goto out; | ||
667 | } | ||
668 | } | ||
669 | |||
670 | result = sock_create_kern(local_addr[0]->ss_family, SOCK_SEQPACKET, | ||
671 | IPPROTO_SCTP, &sock); | ||
672 | if (result < 0) { | ||
673 | log_print("Can't create comms socket, check SCTP is loaded"); | ||
674 | goto out; | ||
675 | } | ||
676 | |||
677 | /* Listen for events */ | ||
678 | memset(&subscribe, 0, sizeof(subscribe)); | ||
679 | subscribe.sctp_data_io_event = 1; | ||
680 | subscribe.sctp_association_event = 1; | ||
681 | subscribe.sctp_send_failure_event = 1; | ||
682 | subscribe.sctp_shutdown_event = 1; | ||
683 | subscribe.sctp_partial_delivery_event = 1; | ||
684 | |||
685 | fs = get_fs(); | ||
686 | set_fs(get_ds()); | ||
687 | result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS, | ||
688 | (char *)&subscribe, sizeof(subscribe)); | ||
689 | set_fs(fs); | ||
690 | |||
691 | if (result < 0) { | ||
692 | log_print("Failed to set SCTP_EVENTS on socket: result=%d", | ||
693 | result); | ||
694 | goto create_delsock; | ||
695 | } | ||
696 | |||
697 | /* Init con struct */ | ||
698 | sock->sk->sk_user_data = &sctp_con; | ||
699 | sctp_con.sock = sock; | ||
700 | sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready; | ||
701 | |||
702 | /* Bind to all interfaces. */ | ||
703 | for (i = 0; i < local_count; i++) { | ||
704 | memcpy(&localaddr, local_addr[i], sizeof(localaddr)); | ||
705 | make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); | ||
706 | |||
707 | result = add_bind_addr(&localaddr, addr_len, num); | ||
708 | if (result) | ||
709 | goto create_delsock; | ||
710 | ++num; | ||
711 | } | ||
712 | |||
713 | result = sock->ops->listen(sock, 5); | ||
714 | if (result < 0) { | ||
715 | log_print("Can't set socket listening"); | ||
716 | goto create_delsock; | ||
717 | } | ||
718 | |||
719 | return 0; | ||
720 | |||
721 | create_delsock: | ||
722 | sock_release(sock); | ||
723 | sctp_con.sock = NULL; | ||
724 | out: | ||
725 | return result; | ||
726 | } | ||
727 | |||
728 | |||
729 | static struct writequeue_entry *new_writequeue_entry(int allocation) | ||
730 | { | ||
731 | struct writequeue_entry *entry; | ||
732 | |||
733 | entry = kmalloc(sizeof(struct writequeue_entry), allocation); | ||
734 | if (!entry) | ||
735 | return NULL; | ||
736 | |||
737 | entry->page = alloc_page(allocation); | ||
738 | if (!entry->page) { | ||
739 | kfree(entry); | ||
740 | return NULL; | ||
741 | } | ||
742 | |||
743 | entry->offset = 0; | ||
744 | entry->len = 0; | ||
745 | entry->end = 0; | ||
746 | entry->users = 0; | ||
747 | |||
748 | return entry; | ||
749 | } | ||
750 | |||
751 | void *dlm_lowcomms_get_buffer(int nodeid, int len, int allocation, char **ppc) | ||
752 | { | ||
753 | struct writequeue_entry *e; | ||
754 | int offset = 0; | ||
755 | int users = 0; | ||
756 | struct nodeinfo *ni; | ||
757 | |||
758 | if (!atomic_read(&accepting)) | ||
759 | return NULL; | ||
760 | |||
761 | ni = nodeid2nodeinfo(nodeid, allocation); | ||
762 | if (!ni) | ||
763 | return NULL; | ||
764 | |||
765 | spin_lock(&ni->writequeue_lock); | ||
766 | e = list_entry(ni->writequeue.prev, struct writequeue_entry, list); | ||
767 | if (((struct list_head *) e == &ni->writequeue) || | ||
768 | (PAGE_CACHE_SIZE - e->end < len)) { | ||
769 | e = NULL; | ||
770 | } else { | ||
771 | offset = e->end; | ||
772 | e->end += len; | ||
773 | users = e->users++; | ||
774 | } | ||
775 | spin_unlock(&ni->writequeue_lock); | ||
776 | |||
777 | if (e) { | ||
778 | got_one: | ||
779 | if (users == 0) | ||
780 | kmap(e->page); | ||
781 | *ppc = page_address(e->page) + offset; | ||
782 | return e; | ||
783 | } | ||
784 | |||
785 | e = new_writequeue_entry(allocation); | ||
786 | if (e) { | ||
787 | spin_lock(&ni->writequeue_lock); | ||
788 | offset = e->end; | ||
789 | e->end += len; | ||
790 | e->ni = ni; | ||
791 | users = e->users++; | ||
792 | list_add_tail(&e->list, &ni->writequeue); | ||
793 | spin_unlock(&ni->writequeue_lock); | ||
794 | goto got_one; | ||
795 | } | ||
796 | return NULL; | ||
797 | } | ||
798 | |||
799 | void dlm_lowcomms_commit_buffer(void *arg) | ||
800 | { | ||
801 | struct writequeue_entry *e = (struct writequeue_entry *) arg; | ||
802 | int users; | ||
803 | struct nodeinfo *ni = e->ni; | ||
804 | |||
805 | if (!atomic_read(&accepting)) | ||
806 | return; | ||
807 | |||
808 | spin_lock(&ni->writequeue_lock); | ||
809 | users = --e->users; | ||
810 | if (users) | ||
811 | goto out; | ||
812 | e->len = e->end - e->offset; | ||
813 | kunmap(e->page); | ||
814 | spin_unlock(&ni->writequeue_lock); | ||
815 | |||
816 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
817 | spin_lock_bh(&write_nodes_lock); | ||
818 | list_add_tail(&ni->write_list, &write_nodes); | ||
819 | spin_unlock_bh(&write_nodes_lock); | ||
820 | wake_up_process(send_task); | ||
821 | } | ||
822 | return; | ||
823 | |||
824 | out: | ||
825 | spin_unlock(&ni->writequeue_lock); | ||
826 | return; | ||
827 | } | ||
828 | |||
829 | static void free_entry(struct writequeue_entry *e) | ||
830 | { | ||
831 | __free_page(e->page); | ||
832 | kfree(e); | ||
833 | } | ||
834 | |||
835 | /* Initiate an SCTP association. In theory we could just use sendmsg() on | ||
836 | the first IP address and it should work, but this allows us to set up the | ||
837 | association before sending any valuable data that we can't afford to lose. | ||
838 | It also keeps the send path clean as it can now always use the association ID */ | ||
839 | static void initiate_association(int nodeid) | ||
840 | { | ||
841 | struct sockaddr_storage rem_addr; | ||
842 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
843 | struct msghdr outmessage; | ||
844 | struct cmsghdr *cmsg; | ||
845 | struct sctp_sndrcvinfo *sinfo; | ||
846 | int ret; | ||
847 | int addrlen; | ||
848 | char buf[1]; | ||
849 | struct kvec iov[1]; | ||
850 | struct nodeinfo *ni; | ||
851 | |||
852 | log_print("Initiating association with node %d", nodeid); | ||
853 | |||
854 | ni = nodeid2nodeinfo(nodeid, GFP_KERNEL); | ||
855 | if (!ni) | ||
856 | return; | ||
857 | |||
858 | if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) { | ||
859 | log_print("no address for nodeid %d", nodeid); | ||
860 | return; | ||
861 | } | ||
862 | |||
863 | make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); | ||
864 | |||
865 | outmessage.msg_name = &rem_addr; | ||
866 | outmessage.msg_namelen = addrlen; | ||
867 | outmessage.msg_control = outcmsg; | ||
868 | outmessage.msg_controllen = sizeof(outcmsg); | ||
869 | outmessage.msg_flags = MSG_EOR; | ||
870 | |||
871 | iov[0].iov_base = buf; | ||
872 | iov[0].iov_len = 1; | ||
873 | |||
874 | /* Real INIT messages seem to cause trouble. Just send a 1 byte message | ||
875 | we can afford to lose */ | ||
876 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
877 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
878 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
879 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
880 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
881 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
882 | sinfo->sinfo_ppid = cpu_to_le32(local_nodeid); | ||
883 | |||
884 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
885 | ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1); | ||
886 | if (ret < 0) { | ||
887 | log_print("send INIT to node failed: %d", ret); | ||
888 | /* Try again later */ | ||
889 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
890 | } | ||
891 | } | ||
892 | |||
893 | /* Send a message */ | ||
894 | static int send_to_sock(struct nodeinfo *ni) | ||
895 | { | ||
896 | int ret = 0; | ||
897 | struct writequeue_entry *e; | ||
898 | int len, offset; | ||
899 | struct msghdr outmsg; | ||
900 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
901 | struct cmsghdr *cmsg; | ||
902 | struct sctp_sndrcvinfo *sinfo; | ||
903 | struct kvec iov; | ||
904 | |||
905 | /* See if we need to init an association before we start | ||
906 | sending precious messages */ | ||
907 | spin_lock(&ni->lock); | ||
908 | if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { | ||
909 | spin_unlock(&ni->lock); | ||
910 | initiate_association(ni->nodeid); | ||
911 | return 0; | ||
912 | } | ||
913 | spin_unlock(&ni->lock); | ||
914 | |||
915 | outmsg.msg_name = NULL; /* We use assoc_id */ | ||
916 | outmsg.msg_namelen = 0; | ||
917 | outmsg.msg_control = outcmsg; | ||
918 | outmsg.msg_controllen = sizeof(outcmsg); | ||
919 | outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR; | ||
920 | |||
921 | cmsg = CMSG_FIRSTHDR(&outmsg); | ||
922 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
923 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
924 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
925 | sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); | ||
926 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
927 | sinfo->sinfo_ppid = cpu_to_le32(local_nodeid); | ||
928 | sinfo->sinfo_assoc_id = ni->assoc_id; | ||
929 | outmsg.msg_controllen = cmsg->cmsg_len; | ||
930 | |||
931 | spin_lock(&ni->writequeue_lock); | ||
932 | for (;;) { | ||
933 | if (list_empty(&ni->writequeue)) | ||
934 | break; | ||
935 | e = list_entry(ni->writequeue.next, struct writequeue_entry, | ||
936 | list); | ||
937 | kmap(e->page); | ||
938 | len = e->len; | ||
939 | offset = e->offset; | ||
940 | BUG_ON(len == 0 && e->users == 0); | ||
941 | spin_unlock(&ni->writequeue_lock); | ||
942 | |||
943 | ret = 0; | ||
944 | if (len) { | ||
945 | iov.iov_base = page_address(e->page)+offset; | ||
946 | iov.iov_len = len; | ||
947 | |||
948 | ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1, | ||
949 | len); | ||
950 | if (ret == -EAGAIN) { | ||
951 | sctp_con.eagain_flag = 1; | ||
952 | goto out; | ||
953 | } else if (ret < 0) | ||
954 | goto send_error; | ||
955 | } else { | ||
956 | /* Don't starve people filling buffers */ | ||
957 | schedule(); | ||
958 | } | ||
959 | |||
960 | spin_lock(&ni->writequeue_lock); | ||
961 | e->offset += ret; | ||
962 | e->len -= ret; | ||
963 | |||
964 | if (e->len == 0 && e->users == 0) { | ||
965 | list_del(&e->list); | ||
966 | free_entry(e); | ||
967 | continue; | ||
968 | } | ||
969 | } | ||
970 | spin_unlock(&ni->writequeue_lock); | ||
971 | out: | ||
972 | return ret; | ||
973 | |||
974 | send_error: | ||
975 | log_print("Error sending to node %d %d", ni->nodeid, ret); | ||
976 | spin_lock(&ni->lock); | ||
977 | if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { | ||
978 | ni->assoc_id = 0; | ||
979 | spin_unlock(&ni->lock); | ||
980 | initiate_association(ni->nodeid); | ||
981 | } else | ||
982 | spin_unlock(&ni->lock); | ||
983 | |||
984 | return ret; | ||
985 | } | ||
986 | |||
987 | /* Try to send any messages that are pending */ | ||
988 | static void process_output_queue(void) | ||
989 | { | ||
990 | struct list_head *list; | ||
991 | struct list_head *temp; | ||
992 | |||
993 | spin_lock_bh(&write_nodes_lock); | ||
994 | list_for_each_safe(list, temp, &write_nodes) { | ||
995 | struct nodeinfo *ni = | ||
996 | list_entry(list, struct nodeinfo, write_list); | ||
997 | clear_bit(NI_WRITE_PENDING, &ni->flags); | ||
998 | list_del(&ni->write_list); | ||
999 | |||
1000 | spin_unlock_bh(&write_nodes_lock); | ||
1001 | |||
1002 | send_to_sock(ni); | ||
1003 | spin_lock_bh(&write_nodes_lock); | ||
1004 | } | ||
1005 | spin_unlock_bh(&write_nodes_lock); | ||
1006 | } | ||
1007 | |||
1008 | /* Called after we've had -EAGAIN and been woken up */ | ||
1009 | static void refill_write_queue(void) | ||
1010 | { | ||
1011 | int i; | ||
1012 | |||
1013 | for (i=1; i<=max_nodeid; i++) { | ||
1014 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
1015 | |||
1016 | if (ni) { | ||
1017 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
1018 | spin_lock_bh(&write_nodes_lock); | ||
1019 | list_add_tail(&ni->write_list, &write_nodes); | ||
1020 | spin_unlock_bh(&write_nodes_lock); | ||
1021 | } | ||
1022 | } | ||
1023 | } | ||
1024 | } | ||
1025 | |||
1026 | static void clean_one_writequeue(struct nodeinfo *ni) | ||
1027 | { | ||
1028 | struct list_head *list; | ||
1029 | struct list_head *temp; | ||
1030 | |||
1031 | spin_lock(&ni->writequeue_lock); | ||
1032 | list_for_each_safe(list, temp, &ni->writequeue) { | ||
1033 | struct writequeue_entry *e = | ||
1034 | list_entry(list, struct writequeue_entry, list); | ||
1035 | list_del(&e->list); | ||
1036 | free_entry(e); | ||
1037 | } | ||
1038 | spin_unlock(&ni->writequeue_lock); | ||
1039 | } | ||
1040 | |||
1041 | static void clean_writequeues(void) | ||
1042 | { | ||
1043 | int i; | ||
1044 | |||
1045 | for (i=1; i<=max_nodeid; i++) { | ||
1046 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
1047 | if (ni) | ||
1048 | clean_one_writequeue(ni); | ||
1049 | } | ||
1050 | } | ||
1051 | |||
1052 | |||
1053 | static void dealloc_nodeinfo(void) | ||
1054 | { | ||
1055 | int i; | ||
1056 | |||
1057 | for (i=1; i<=max_nodeid; i++) { | ||
1058 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
1059 | if (ni) { | ||
1060 | idr_remove(&nodeinfo_idr, i); | ||
1061 | kfree(ni); | ||
1062 | } | ||
1063 | } | ||
1064 | } | ||
1065 | |||
1066 | static int write_list_empty(void) | ||
1067 | { | ||
1068 | int status; | ||
1069 | |||
1070 | spin_lock_bh(&write_nodes_lock); | ||
1071 | status = list_empty(&write_nodes); | ||
1072 | spin_unlock_bh(&write_nodes_lock); | ||
1073 | |||
1074 | return status; | ||
1075 | } | ||
1076 | |||
1077 | static int dlm_recvd(void *data) | ||
1078 | { | ||
1079 | DECLARE_WAITQUEUE(wait, current); | ||
1080 | |||
1081 | while (!kthread_should_stop()) { | ||
1082 | int count = 0; | ||
1083 | |||
1084 | set_current_state(TASK_INTERRUPTIBLE); | ||
1085 | add_wait_queue(&lowcomms_recv_wait, &wait); | ||
1086 | if (!test_bit(CF_READ_PENDING, &sctp_con.flags)) | ||
1087 | schedule(); | ||
1088 | remove_wait_queue(&lowcomms_recv_wait, &wait); | ||
1089 | set_current_state(TASK_RUNNING); | ||
1090 | |||
1091 | if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { | ||
1092 | int ret; | ||
1093 | |||
1094 | do { | ||
1095 | ret = receive_from_sock(); | ||
1096 | |||
1097 | /* Don't starve out everyone else */ | ||
1098 | if (++count >= MAX_RX_MSG_COUNT) { | ||
1099 | schedule(); | ||
1100 | count = 0; | ||
1101 | } | ||
1102 | } while (!kthread_should_stop() && ret >=0); | ||
1103 | } | ||
1104 | schedule(); | ||
1105 | } | ||
1106 | |||
1107 | return 0; | ||
1108 | } | ||
1109 | |||
1110 | static int dlm_sendd(void *data) | ||
1111 | { | ||
1112 | DECLARE_WAITQUEUE(wait, current); | ||
1113 | |||
1114 | add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | ||
1115 | |||
1116 | while (!kthread_should_stop()) { | ||
1117 | set_current_state(TASK_INTERRUPTIBLE); | ||
1118 | if (write_list_empty()) | ||
1119 | schedule(); | ||
1120 | set_current_state(TASK_RUNNING); | ||
1121 | |||
1122 | if (sctp_con.eagain_flag) { | ||
1123 | sctp_con.eagain_flag = 0; | ||
1124 | refill_write_queue(); | ||
1125 | } | ||
1126 | process_output_queue(); | ||
1127 | } | ||
1128 | |||
1129 | remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | ||
1130 | |||
1131 | return 0; | ||
1132 | } | ||
1133 | |||
1134 | static void daemons_stop(void) | ||
1135 | { | ||
1136 | kthread_stop(recv_task); | ||
1137 | kthread_stop(send_task); | ||
1138 | } | ||
1139 | |||
1140 | static int daemons_start(void) | ||
1141 | { | ||
1142 | struct task_struct *p; | ||
1143 | int error; | ||
1144 | |||
1145 | p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); | ||
1146 | error = IS_ERR(p); | ||
1147 | if (error) { | ||
1148 | log_print("can't start dlm_recvd %d", error); | ||
1149 | return error; | ||
1150 | } | ||
1151 | recv_task = p; | ||
1152 | |||
1153 | p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); | ||
1154 | error = IS_ERR(p); | ||
1155 | if (error) { | ||
1156 | log_print("can't start dlm_sendd %d", error); | ||
1157 | kthread_stop(recv_task); | ||
1158 | return error; | ||
1159 | } | ||
1160 | send_task = p; | ||
1161 | |||
1162 | return 0; | ||
1163 | } | ||
1164 | |||
1165 | /* | ||
1166 | * This is quite likely to sleep... | ||
1167 | */ | ||
1168 | int dlm_lowcomms_start(void) | ||
1169 | { | ||
1170 | int error; | ||
1171 | |||
1172 | spin_lock_init(&write_nodes_lock); | ||
1173 | INIT_LIST_HEAD(&write_nodes); | ||
1174 | init_rwsem(&nodeinfo_lock); | ||
1175 | |||
1176 | error = init_sock(); | ||
1177 | if (error) | ||
1178 | goto fail_sock; | ||
1179 | error = daemons_start(); | ||
1180 | if (error) | ||
1181 | goto fail_sock; | ||
1182 | atomic_set(&accepting, 1); | ||
1183 | return 0; | ||
1184 | |||
1185 | fail_sock: | ||
1186 | close_connection(); | ||
1187 | return error; | ||
1188 | } | ||
1189 | |||
1190 | /* Set all the activity flags to prevent any socket activity. */ | ||
1191 | |||
1192 | void dlm_lowcomms_stop(void) | ||
1193 | { | ||
1194 | atomic_set(&accepting, 0); | ||
1195 | sctp_con.flags = 0x7; | ||
1196 | daemons_stop(); | ||
1197 | clean_writequeues(); | ||
1198 | close_connection(); | ||
1199 | dealloc_nodeinfo(); | ||
1200 | max_nodeid = 0; | ||
1201 | } | ||
1202 | |||
1203 | int dlm_lowcomms_init(void) | ||
1204 | { | ||
1205 | init_waitqueue_head(&lowcomms_recv_wait); | ||
1206 | return 0; | ||
1207 | } | ||
1208 | |||
1209 | void dlm_lowcomms_exit(void) | ||
1210 | { | ||
1211 | int i; | ||
1212 | |||
1213 | for (i = 0; i < local_count; i++) | ||
1214 | kfree(local_addr[i]); | ||
1215 | local_count = 0; | ||
1216 | local_nodeid = 0; | ||
1217 | } | ||
1218 | |||
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h new file mode 100644 index 000000000000..3af8035ff12f --- /dev/null +++ b/fs/dlm/lowcomms.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __LOWCOMMS_DOT_H__ | ||
15 | #define __LOWCOMMS_DOT_H__ | ||
16 | |||
17 | int dlm_lowcomms_init(void); | ||
18 | void dlm_lowcomms_exit(void); | ||
19 | int dlm_lowcomms_start(void); | ||
20 | void dlm_lowcomms_stop(void); | ||
21 | void *dlm_lowcomms_get_buffer(int nodeid, int len, int allocation, char **ppc); | ||
22 | void dlm_lowcomms_commit_buffer(void *mh); | ||
23 | |||
24 | #endif /* __LOWCOMMS_DOT_H__ */ | ||
25 | |||
diff --git a/fs/dlm/lvb_table.h b/fs/dlm/lvb_table.h new file mode 100644 index 000000000000..cc3e92f3feef --- /dev/null +++ b/fs/dlm/lvb_table.h | |||
@@ -0,0 +1,18 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #ifndef __LVB_TABLE_DOT_H__ | ||
14 | #define __LVB_TABLE_DOT_H__ | ||
15 | |||
16 | extern const int dlm_lvb_operations[8][8]; | ||
17 | |||
18 | #endif | ||
diff --git a/fs/dlm/main.c b/fs/dlm/main.c new file mode 100644 index 000000000000..81bf4cb22033 --- /dev/null +++ b/fs/dlm/main.c | |||
@@ -0,0 +1,89 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "lockspace.h" | ||
16 | #include "lock.h" | ||
17 | #include "memory.h" | ||
18 | #include "lowcomms.h" | ||
19 | #include "config.h" | ||
20 | |||
21 | #ifdef CONFIG_DLM_DEBUG | ||
22 | int dlm_register_debugfs(void); | ||
23 | void dlm_unregister_debugfs(void); | ||
24 | #else | ||
25 | static inline int dlm_register_debugfs(void) { return 0; } | ||
26 | static inline void dlm_unregister_debugfs(void) { } | ||
27 | #endif | ||
28 | |||
29 | static int __init init_dlm(void) | ||
30 | { | ||
31 | int error; | ||
32 | |||
33 | error = dlm_memory_init(); | ||
34 | if (error) | ||
35 | goto out; | ||
36 | |||
37 | error = dlm_lockspace_init(); | ||
38 | if (error) | ||
39 | goto out_mem; | ||
40 | |||
41 | error = dlm_config_init(); | ||
42 | if (error) | ||
43 | goto out_lockspace; | ||
44 | |||
45 | error = dlm_register_debugfs(); | ||
46 | if (error) | ||
47 | goto out_config; | ||
48 | |||
49 | error = dlm_lowcomms_init(); | ||
50 | if (error) | ||
51 | goto out_debug; | ||
52 | |||
53 | printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); | ||
54 | |||
55 | return 0; | ||
56 | |||
57 | out_debug: | ||
58 | dlm_unregister_debugfs(); | ||
59 | out_config: | ||
60 | dlm_config_exit(); | ||
61 | out_lockspace: | ||
62 | dlm_lockspace_exit(); | ||
63 | out_mem: | ||
64 | dlm_memory_exit(); | ||
65 | out: | ||
66 | return error; | ||
67 | } | ||
68 | |||
69 | static void __exit exit_dlm(void) | ||
70 | { | ||
71 | dlm_lowcomms_exit(); | ||
72 | dlm_config_exit(); | ||
73 | dlm_memory_exit(); | ||
74 | dlm_lockspace_exit(); | ||
75 | dlm_unregister_debugfs(); | ||
76 | } | ||
77 | |||
78 | module_init(init_dlm); | ||
79 | module_exit(exit_dlm); | ||
80 | |||
81 | MODULE_DESCRIPTION("Distributed Lock Manager"); | ||
82 | MODULE_AUTHOR("Red Hat, Inc."); | ||
83 | MODULE_LICENSE("GPL"); | ||
84 | |||
85 | EXPORT_SYMBOL_GPL(dlm_new_lockspace); | ||
86 | EXPORT_SYMBOL_GPL(dlm_release_lockspace); | ||
87 | EXPORT_SYMBOL_GPL(dlm_lock); | ||
88 | EXPORT_SYMBOL_GPL(dlm_unlock); | ||
89 | |||
diff --git a/fs/dlm/member.c b/fs/dlm/member.c new file mode 100644 index 000000000000..439249b62a57 --- /dev/null +++ b/fs/dlm/member.c | |||
@@ -0,0 +1,314 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #include "dlm_internal.h" | ||
14 | #include "lockspace.h" | ||
15 | #include "member.h" | ||
16 | #include "recoverd.h" | ||
17 | #include "recover.h" | ||
18 | #include "lowcomms.h" | ||
19 | #include "rcom.h" | ||
20 | #include "config.h" | ||
21 | |||
22 | /* | ||
23 | * Following called by dlm_recoverd thread | ||
24 | */ | ||
25 | |||
26 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | ||
27 | { | ||
28 | struct dlm_member *memb = NULL; | ||
29 | struct list_head *tmp; | ||
30 | struct list_head *newlist = &new->list; | ||
31 | struct list_head *head = &ls->ls_nodes; | ||
32 | |||
33 | list_for_each(tmp, head) { | ||
34 | memb = list_entry(tmp, struct dlm_member, list); | ||
35 | if (new->nodeid < memb->nodeid) | ||
36 | break; | ||
37 | } | ||
38 | |||
39 | if (!memb) | ||
40 | list_add_tail(newlist, head); | ||
41 | else { | ||
42 | /* FIXME: can use list macro here */ | ||
43 | newlist->prev = tmp->prev; | ||
44 | newlist->next = tmp; | ||
45 | tmp->prev->next = newlist; | ||
46 | tmp->prev = newlist; | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static int dlm_add_member(struct dlm_ls *ls, int nodeid) | ||
51 | { | ||
52 | struct dlm_member *memb; | ||
53 | int w; | ||
54 | |||
55 | memb = kmalloc(sizeof(struct dlm_member), GFP_KERNEL); | ||
56 | if (!memb) | ||
57 | return -ENOMEM; | ||
58 | |||
59 | w = dlm_node_weight(ls->ls_name, nodeid); | ||
60 | if (w < 0) | ||
61 | return w; | ||
62 | |||
63 | memb->nodeid = nodeid; | ||
64 | memb->weight = w; | ||
65 | add_ordered_member(ls, memb); | ||
66 | ls->ls_num_nodes++; | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) | ||
71 | { | ||
72 | list_move(&memb->list, &ls->ls_nodes_gone); | ||
73 | ls->ls_num_nodes--; | ||
74 | } | ||
75 | |||
76 | static int dlm_is_member(struct dlm_ls *ls, int nodeid) | ||
77 | { | ||
78 | struct dlm_member *memb; | ||
79 | |||
80 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
81 | if (memb->nodeid == nodeid) | ||
82 | return TRUE; | ||
83 | } | ||
84 | return FALSE; | ||
85 | } | ||
86 | |||
87 | int dlm_is_removed(struct dlm_ls *ls, int nodeid) | ||
88 | { | ||
89 | struct dlm_member *memb; | ||
90 | |||
91 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { | ||
92 | if (memb->nodeid == nodeid) | ||
93 | return TRUE; | ||
94 | } | ||
95 | return FALSE; | ||
96 | } | ||
97 | |||
98 | static void clear_memb_list(struct list_head *head) | ||
99 | { | ||
100 | struct dlm_member *memb; | ||
101 | |||
102 | while (!list_empty(head)) { | ||
103 | memb = list_entry(head->next, struct dlm_member, list); | ||
104 | list_del(&memb->list); | ||
105 | kfree(memb); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | void dlm_clear_members(struct dlm_ls *ls) | ||
110 | { | ||
111 | clear_memb_list(&ls->ls_nodes); | ||
112 | ls->ls_num_nodes = 0; | ||
113 | } | ||
114 | |||
115 | void dlm_clear_members_gone(struct dlm_ls *ls) | ||
116 | { | ||
117 | clear_memb_list(&ls->ls_nodes_gone); | ||
118 | } | ||
119 | |||
120 | static void make_member_array(struct dlm_ls *ls) | ||
121 | { | ||
122 | struct dlm_member *memb; | ||
123 | int i, w, x = 0, total = 0, all_zero = 0, *array; | ||
124 | |||
125 | kfree(ls->ls_node_array); | ||
126 | ls->ls_node_array = NULL; | ||
127 | |||
128 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
129 | if (memb->weight) | ||
130 | total += memb->weight; | ||
131 | } | ||
132 | |||
133 | /* all nodes revert to weight of 1 if all have weight 0 */ | ||
134 | |||
135 | if (!total) { | ||
136 | total = ls->ls_num_nodes; | ||
137 | all_zero = 1; | ||
138 | } | ||
139 | |||
140 | ls->ls_total_weight = total; | ||
141 | |||
142 | array = kmalloc(sizeof(int) * total, GFP_KERNEL); | ||
143 | if (!array) | ||
144 | return; | ||
145 | |||
146 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
147 | if (!all_zero && !memb->weight) | ||
148 | continue; | ||
149 | |||
150 | if (all_zero) | ||
151 | w = 1; | ||
152 | else | ||
153 | w = memb->weight; | ||
154 | |||
155 | DLM_ASSERT(x < total, printk("total %d x %d\n", total, x);); | ||
156 | |||
157 | for (i = 0; i < w; i++) | ||
158 | array[x++] = memb->nodeid; | ||
159 | } | ||
160 | |||
161 | ls->ls_node_array = array; | ||
162 | } | ||
163 | |||
164 | /* send a status request to all members just to establish comms connections */ | ||
165 | |||
166 | static void ping_members(struct dlm_ls *ls) | ||
167 | { | ||
168 | struct dlm_member *memb; | ||
169 | list_for_each_entry(memb, &ls->ls_nodes, list) | ||
170 | dlm_rcom_status(ls, memb->nodeid); | ||
171 | } | ||
172 | |||
173 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | ||
174 | { | ||
175 | struct dlm_member *memb, *safe; | ||
176 | int i, error, found, pos = 0, neg = 0, low = -1; | ||
177 | |||
178 | /* move departed members from ls_nodes to ls_nodes_gone */ | ||
179 | |||
180 | list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { | ||
181 | found = FALSE; | ||
182 | for (i = 0; i < rv->node_count; i++) { | ||
183 | if (memb->nodeid == rv->nodeids[i]) { | ||
184 | found = TRUE; | ||
185 | break; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | if (!found) { | ||
190 | neg++; | ||
191 | dlm_remove_member(ls, memb); | ||
192 | log_debug(ls, "remove member %d", memb->nodeid); | ||
193 | } | ||
194 | } | ||
195 | |||
196 | /* add new members to ls_nodes */ | ||
197 | |||
198 | for (i = 0; i < rv->node_count; i++) { | ||
199 | if (dlm_is_member(ls, rv->nodeids[i])) | ||
200 | continue; | ||
201 | dlm_add_member(ls, rv->nodeids[i]); | ||
202 | pos++; | ||
203 | log_debug(ls, "add member %d", rv->nodeids[i]); | ||
204 | } | ||
205 | |||
206 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
207 | if (low == -1 || memb->nodeid < low) | ||
208 | low = memb->nodeid; | ||
209 | } | ||
210 | ls->ls_low_nodeid = low; | ||
211 | |||
212 | make_member_array(ls); | ||
213 | dlm_set_recover_status(ls, DLM_RS_NODES); | ||
214 | *neg_out = neg; | ||
215 | |||
216 | ping_members(ls); | ||
217 | |||
218 | error = dlm_recover_members_wait(ls); | ||
219 | log_debug(ls, "total members %d", ls->ls_num_nodes); | ||
220 | return error; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Following called from lockspace.c | ||
225 | */ | ||
226 | |||
227 | int dlm_ls_stop(struct dlm_ls *ls) | ||
228 | { | ||
229 | int new; | ||
230 | |||
231 | /* | ||
232 | * A stop cancels any recovery that's in progress (see RECOVERY_STOP, | ||
233 | * dlm_recovery_stopped()) and prevents any new locks from being | ||
234 | * processed (see RUNNING, dlm_locking_stopped()). | ||
235 | */ | ||
236 | |||
237 | spin_lock(&ls->ls_recover_lock); | ||
238 | set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | ||
239 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); | ||
240 | ls->ls_recover_seq++; | ||
241 | spin_unlock(&ls->ls_recover_lock); | ||
242 | |||
243 | /* | ||
244 | * This in_recovery lock does two things: | ||
245 | * | ||
246 | * 1) Keeps this function from returning until all threads are out | ||
247 | * of locking routines and locking is truely stopped. | ||
248 | * 2) Keeps any new requests from being processed until it's unlocked | ||
249 | * when recovery is complete. | ||
250 | */ | ||
251 | |||
252 | if (new) | ||
253 | down_write(&ls->ls_in_recovery); | ||
254 | |||
255 | /* | ||
256 | * The recoverd suspend/resume makes sure that dlm_recoverd (if | ||
257 | * running) has noticed the clearing of RUNNING above and quit | ||
258 | * processing the previous recovery. This will be true for all nodes | ||
259 | * before any nodes start the new recovery. | ||
260 | */ | ||
261 | |||
262 | dlm_recoverd_suspend(ls); | ||
263 | ls->ls_recover_status = 0; | ||
264 | dlm_recoverd_resume(ls); | ||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | int dlm_ls_start(struct dlm_ls *ls) | ||
269 | { | ||
270 | struct dlm_recover *rv = NULL, *rv_old; | ||
271 | int *ids = NULL; | ||
272 | int error, count; | ||
273 | |||
274 | rv = kmalloc(sizeof(struct dlm_recover), GFP_KERNEL); | ||
275 | if (!rv) | ||
276 | return -ENOMEM; | ||
277 | memset(rv, 0, sizeof(struct dlm_recover)); | ||
278 | |||
279 | error = count = dlm_nodeid_list(ls->ls_name, &ids); | ||
280 | if (error <= 0) | ||
281 | goto fail; | ||
282 | |||
283 | spin_lock(&ls->ls_recover_lock); | ||
284 | |||
285 | /* the lockspace needs to be stopped before it can be started */ | ||
286 | |||
287 | if (!dlm_locking_stopped(ls)) { | ||
288 | spin_unlock(&ls->ls_recover_lock); | ||
289 | log_error(ls, "start ignored: lockspace running"); | ||
290 | error = -EINVAL; | ||
291 | goto fail; | ||
292 | } | ||
293 | |||
294 | rv->nodeids = ids; | ||
295 | rv->node_count = count; | ||
296 | rv->seq = ++ls->ls_recover_seq; | ||
297 | rv_old = ls->ls_recover_args; | ||
298 | ls->ls_recover_args = rv; | ||
299 | spin_unlock(&ls->ls_recover_lock); | ||
300 | |||
301 | if (rv_old) { | ||
302 | kfree(rv_old->nodeids); | ||
303 | kfree(rv_old); | ||
304 | } | ||
305 | |||
306 | dlm_recoverd_kick(ls); | ||
307 | return 0; | ||
308 | |||
309 | fail: | ||
310 | kfree(rv); | ||
311 | kfree(ids); | ||
312 | return error; | ||
313 | } | ||
314 | |||
diff --git a/fs/dlm/member.h b/fs/dlm/member.h new file mode 100644 index 000000000000..927c08c19214 --- /dev/null +++ b/fs/dlm/member.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #ifndef __MEMBER_DOT_H__ | ||
14 | #define __MEMBER_DOT_H__ | ||
15 | |||
16 | int dlm_ls_stop(struct dlm_ls *ls); | ||
17 | int dlm_ls_start(struct dlm_ls *ls); | ||
18 | void dlm_clear_members(struct dlm_ls *ls); | ||
19 | void dlm_clear_members_gone(struct dlm_ls *ls); | ||
20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); | ||
21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); | ||
22 | |||
23 | #endif /* __MEMBER_DOT_H__ */ | ||
24 | |||
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c new file mode 100644 index 000000000000..0b9851d0bdb2 --- /dev/null +++ b/fs/dlm/memory.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "config.h" | ||
16 | #include "memory.h" | ||
17 | |||
18 | static kmem_cache_t *lkb_cache; | ||
19 | |||
20 | |||
21 | int dlm_memory_init(void) | ||
22 | { | ||
23 | int ret = 0; | ||
24 | |||
25 | lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), | ||
26 | __alignof__(struct dlm_lkb), 0, NULL, NULL); | ||
27 | if (!lkb_cache) | ||
28 | ret = -ENOMEM; | ||
29 | return ret; | ||
30 | } | ||
31 | |||
32 | void dlm_memory_exit(void) | ||
33 | { | ||
34 | if (lkb_cache) | ||
35 | kmem_cache_destroy(lkb_cache); | ||
36 | } | ||
37 | |||
38 | char *allocate_lvb(struct dlm_ls *ls) | ||
39 | { | ||
40 | char *p; | ||
41 | |||
42 | p = kmalloc(ls->ls_lvblen, GFP_KERNEL); | ||
43 | if (p) | ||
44 | memset(p, 0, ls->ls_lvblen); | ||
45 | return p; | ||
46 | } | ||
47 | |||
48 | void free_lvb(char *p) | ||
49 | { | ||
50 | kfree(p); | ||
51 | } | ||
52 | |||
53 | uint64_t *allocate_range(struct dlm_ls *ls) | ||
54 | { | ||
55 | int ralen = 4*sizeof(uint64_t); | ||
56 | uint64_t *p; | ||
57 | |||
58 | p = kmalloc(ralen, GFP_KERNEL); | ||
59 | if (p) | ||
60 | memset(p, 0, ralen); | ||
61 | return p; | ||
62 | } | ||
63 | |||
64 | void free_range(uint64_t *p) | ||
65 | { | ||
66 | kfree(p); | ||
67 | } | ||
68 | |||
69 | /* FIXME: have some minimal space built-in to rsb for the name and | ||
70 | kmalloc a separate name if needed, like dentries are done */ | ||
71 | |||
72 | struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) | ||
73 | { | ||
74 | struct dlm_rsb *r; | ||
75 | |||
76 | DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); | ||
77 | |||
78 | r = kmalloc(sizeof(*r) + namelen, GFP_KERNEL); | ||
79 | if (r) | ||
80 | memset(r, 0, sizeof(*r) + namelen); | ||
81 | return r; | ||
82 | } | ||
83 | |||
84 | void free_rsb(struct dlm_rsb *r) | ||
85 | { | ||
86 | if (r->res_lvbptr) | ||
87 | free_lvb(r->res_lvbptr); | ||
88 | kfree(r); | ||
89 | } | ||
90 | |||
91 | struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) | ||
92 | { | ||
93 | struct dlm_lkb *lkb; | ||
94 | |||
95 | lkb = kmem_cache_alloc(lkb_cache, GFP_KERNEL); | ||
96 | if (lkb) | ||
97 | memset(lkb, 0, sizeof(*lkb)); | ||
98 | return lkb; | ||
99 | } | ||
100 | |||
101 | void free_lkb(struct dlm_lkb *lkb) | ||
102 | { | ||
103 | kmem_cache_free(lkb_cache, lkb); | ||
104 | } | ||
105 | |||
106 | struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen) | ||
107 | { | ||
108 | struct dlm_direntry *de; | ||
109 | |||
110 | DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); | ||
111 | |||
112 | de = kmalloc(sizeof(*de) + namelen, GFP_KERNEL); | ||
113 | if (de) | ||
114 | memset(de, 0, sizeof(*de) + namelen); | ||
115 | return de; | ||
116 | } | ||
117 | |||
118 | void free_direntry(struct dlm_direntry *de) | ||
119 | { | ||
120 | kfree(de); | ||
121 | } | ||
122 | |||
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h new file mode 100644 index 000000000000..7b235132b0b4 --- /dev/null +++ b/fs/dlm/memory.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __MEMORY_DOT_H__ | ||
15 | #define __MEMORY_DOT_H__ | ||
16 | |||
17 | int dlm_memory_init(void); | ||
18 | void dlm_memory_exit(void); | ||
19 | struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen); | ||
20 | void free_rsb(struct dlm_rsb *r); | ||
21 | struct dlm_lkb *allocate_lkb(struct dlm_ls *ls); | ||
22 | void free_lkb(struct dlm_lkb *l); | ||
23 | struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen); | ||
24 | void free_direntry(struct dlm_direntry *de); | ||
25 | char *allocate_lvb(struct dlm_ls *ls); | ||
26 | void free_lvb(char *l); | ||
27 | uint64_t *allocate_range(struct dlm_ls *ls); | ||
28 | void free_range(uint64_t *l); | ||
29 | |||
30 | #endif /* __MEMORY_DOT_H__ */ | ||
31 | |||
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c new file mode 100644 index 000000000000..d96f9bbb407c --- /dev/null +++ b/fs/dlm/midcomms.c | |||
@@ -0,0 +1,140 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | /* | ||
15 | * midcomms.c | ||
16 | * | ||
17 | * This is the appallingly named "mid-level" comms layer. | ||
18 | * | ||
19 | * Its purpose is to take packets from the "real" comms layer, | ||
20 | * split them up into packets and pass them to the interested | ||
21 | * part of the locking mechanism. | ||
22 | * | ||
23 | * It also takes messages from the locking layer, formats them | ||
24 | * into packets and sends them to the comms layer. | ||
25 | */ | ||
26 | |||
27 | #include "dlm_internal.h" | ||
28 | #include "lowcomms.h" | ||
29 | #include "config.h" | ||
30 | #include "rcom.h" | ||
31 | #include "lock.h" | ||
32 | #include "midcomms.h" | ||
33 | |||
34 | |||
35 | static void copy_from_cb(void *dst, const void *base, unsigned offset, | ||
36 | unsigned len, unsigned limit) | ||
37 | { | ||
38 | unsigned copy = len; | ||
39 | |||
40 | if ((copy + offset) > limit) | ||
41 | copy = limit - offset; | ||
42 | memcpy(dst, base + offset, copy); | ||
43 | len -= copy; | ||
44 | if (len) | ||
45 | memcpy(dst + copy, base, len); | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * Called from the low-level comms layer to process a buffer of | ||
50 | * commands. | ||
51 | * | ||
52 | * Only complete messages are processed here, any "spare" bytes from | ||
53 | * the end of a buffer are saved and tacked onto the front of the next | ||
54 | * message that comes in. I doubt this will happen very often but we | ||
55 | * need to be able to cope with it and I don't want the task to be waiting | ||
56 | * for packets to come in when there is useful work to be done. | ||
57 | */ | ||
58 | |||
59 | int dlm_process_incoming_buffer(int nodeid, const void *base, | ||
60 | unsigned offset, unsigned len, unsigned limit) | ||
61 | { | ||
62 | unsigned char __tmp[DLM_INBUF_LEN]; | ||
63 | struct dlm_header *msg = (struct dlm_header *) __tmp; | ||
64 | int ret = 0; | ||
65 | int err = 0; | ||
66 | uint16_t msglen; | ||
67 | uint32_t lockspace; | ||
68 | |||
69 | while (len > sizeof(struct dlm_header)) { | ||
70 | |||
71 | /* Copy just the header to check the total length. The | ||
72 | message may wrap around the end of the buffer back to the | ||
73 | start, so we need to use a temp buffer and copy_from_cb. */ | ||
74 | |||
75 | copy_from_cb(msg, base, offset, sizeof(struct dlm_header), | ||
76 | limit); | ||
77 | |||
78 | msglen = le16_to_cpu(msg->h_length); | ||
79 | lockspace = msg->h_lockspace; | ||
80 | |||
81 | err = -EINVAL; | ||
82 | if (msglen < sizeof(struct dlm_header)) | ||
83 | break; | ||
84 | err = -E2BIG; | ||
85 | if (msglen > dlm_config.buffer_size) { | ||
86 | log_print("message size %d from %d too big, buf len %d", | ||
87 | msglen, nodeid, len); | ||
88 | break; | ||
89 | } | ||
90 | err = 0; | ||
91 | |||
92 | /* If only part of the full message is contained in this | ||
93 | buffer, then do nothing and wait for lowcomms to call | ||
94 | us again later with more data. We return 0 meaning | ||
95 | we've consumed none of the input buffer. */ | ||
96 | |||
97 | if (msglen > len) | ||
98 | break; | ||
99 | |||
100 | /* Allocate a larger temp buffer if the full message won't fit | ||
101 | in the buffer on the stack (which should work for most | ||
102 | ordinary messages). */ | ||
103 | |||
104 | if (msglen > sizeof(__tmp) && | ||
105 | msg == (struct dlm_header *) __tmp) { | ||
106 | msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | ||
107 | if (msg == NULL) | ||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | copy_from_cb(msg, base, offset, msglen, limit); | ||
112 | |||
113 | BUG_ON(lockspace != msg->h_lockspace); | ||
114 | |||
115 | ret += msglen; | ||
116 | offset += msglen; | ||
117 | offset &= (limit - 1); | ||
118 | len -= msglen; | ||
119 | |||
120 | switch (msg->h_cmd) { | ||
121 | case DLM_MSG: | ||
122 | dlm_receive_message(msg, nodeid, FALSE); | ||
123 | break; | ||
124 | |||
125 | case DLM_RCOM: | ||
126 | dlm_receive_rcom(msg, nodeid); | ||
127 | break; | ||
128 | |||
129 | default: | ||
130 | log_print("unknown msg type %x from %u: %u %u %u %u", | ||
131 | msg->h_cmd, nodeid, msglen, len, offset, ret); | ||
132 | } | ||
133 | } | ||
134 | |||
135 | if (msg != (struct dlm_header *) __tmp) | ||
136 | kfree(msg); | ||
137 | |||
138 | return err ? err : ret; | ||
139 | } | ||
140 | |||
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h new file mode 100644 index 000000000000..95852a5f111d --- /dev/null +++ b/fs/dlm/midcomms.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __MIDCOMMS_DOT_H__ | ||
15 | #define __MIDCOMMS_DOT_H__ | ||
16 | |||
17 | int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset, | ||
18 | unsigned len, unsigned limit); | ||
19 | |||
20 | #endif /* __MIDCOMMS_DOT_H__ */ | ||
21 | |||
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c new file mode 100644 index 000000000000..4c5c08a8860e --- /dev/null +++ b/fs/dlm/rcom.c | |||
@@ -0,0 +1,460 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "lockspace.h" | ||
16 | #include "member.h" | ||
17 | #include "lowcomms.h" | ||
18 | #include "midcomms.h" | ||
19 | #include "rcom.h" | ||
20 | #include "recover.h" | ||
21 | #include "dir.h" | ||
22 | #include "config.h" | ||
23 | #include "memory.h" | ||
24 | #include "lock.h" | ||
25 | #include "util.h" | ||
26 | |||
27 | |||
28 | static int rcom_response(struct dlm_ls *ls) | ||
29 | { | ||
30 | return test_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
31 | } | ||
32 | |||
33 | static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len, | ||
34 | struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret) | ||
35 | { | ||
36 | struct dlm_rcom *rc; | ||
37 | struct dlm_mhandle *mh; | ||
38 | char *mb; | ||
39 | int mb_len = sizeof(struct dlm_rcom) + len; | ||
40 | |||
41 | mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); | ||
42 | if (!mh) { | ||
43 | log_print("create_rcom to %d type %d len %d ENOBUFS", | ||
44 | to_nodeid, type, len); | ||
45 | return -ENOBUFS; | ||
46 | } | ||
47 | memset(mb, 0, mb_len); | ||
48 | |||
49 | rc = (struct dlm_rcom *) mb; | ||
50 | |||
51 | rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | ||
52 | rc->rc_header.h_lockspace = ls->ls_global_id; | ||
53 | rc->rc_header.h_nodeid = dlm_our_nodeid(); | ||
54 | rc->rc_header.h_length = mb_len; | ||
55 | rc->rc_header.h_cmd = DLM_RCOM; | ||
56 | |||
57 | rc->rc_type = type; | ||
58 | |||
59 | *mh_ret = mh; | ||
60 | *rc_ret = rc; | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh, | ||
65 | struct dlm_rcom *rc) | ||
66 | { | ||
67 | dlm_rcom_out(rc); | ||
68 | dlm_lowcomms_commit_buffer(mh); | ||
69 | } | ||
70 | |||
71 | /* When replying to a status request, a node also sends back its | ||
72 | configuration values. The requesting node then checks that the remote | ||
73 | node is configured the same way as itself. */ | ||
74 | |||
75 | static void make_config(struct dlm_ls *ls, struct rcom_config *rf) | ||
76 | { | ||
77 | rf->rf_lvblen = ls->ls_lvblen; | ||
78 | rf->rf_lsflags = ls->ls_exflags; | ||
79 | } | ||
80 | |||
81 | static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) | ||
82 | { | ||
83 | if (rf->rf_lvblen != ls->ls_lvblen || | ||
84 | rf->rf_lsflags != ls->ls_exflags) { | ||
85 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", | ||
86 | ls->ls_lvblen, ls->ls_exflags, | ||
87 | nodeid, rf->rf_lvblen, rf->rf_lsflags); | ||
88 | return -EINVAL; | ||
89 | } | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | ||
94 | { | ||
95 | struct dlm_rcom *rc; | ||
96 | struct dlm_mhandle *mh; | ||
97 | int error = 0; | ||
98 | |||
99 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | ||
100 | |||
101 | if (nodeid == dlm_our_nodeid()) { | ||
102 | rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
103 | rc->rc_result = dlm_recover_status(ls); | ||
104 | goto out; | ||
105 | } | ||
106 | |||
107 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); | ||
108 | if (error) | ||
109 | goto out; | ||
110 | |||
111 | send_rcom(ls, mh, rc); | ||
112 | |||
113 | error = dlm_wait_function(ls, &rcom_response); | ||
114 | clear_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
115 | if (error) | ||
116 | goto out; | ||
117 | |||
118 | rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
119 | |||
120 | if (rc->rc_result == -ESRCH) { | ||
121 | /* we pretend the remote lockspace exists with 0 status */ | ||
122 | log_debug(ls, "remote node %d not ready", nodeid); | ||
123 | rc->rc_result = 0; | ||
124 | } else | ||
125 | error = check_config(ls, (struct rcom_config *) rc->rc_buf, | ||
126 | nodeid); | ||
127 | /* the caller looks at rc_result for the remote recovery status */ | ||
128 | out: | ||
129 | return error; | ||
130 | } | ||
131 | |||
132 | static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
133 | { | ||
134 | struct dlm_rcom *rc; | ||
135 | struct dlm_mhandle *mh; | ||
136 | int error, nodeid = rc_in->rc_header.h_nodeid; | ||
137 | |||
138 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, | ||
139 | sizeof(struct rcom_config), &rc, &mh); | ||
140 | if (error) | ||
141 | return; | ||
142 | rc->rc_result = dlm_recover_status(ls); | ||
143 | make_config(ls, (struct rcom_config *) rc->rc_buf); | ||
144 | |||
145 | send_rcom(ls, mh, rc); | ||
146 | } | ||
147 | |||
148 | static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
149 | { | ||
150 | memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length); | ||
151 | set_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
152 | wake_up(&ls->ls_wait_general); | ||
153 | } | ||
154 | |||
155 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | ||
156 | { | ||
157 | struct dlm_rcom *rc; | ||
158 | struct dlm_mhandle *mh; | ||
159 | int error = 0, len = sizeof(struct dlm_rcom); | ||
160 | |||
161 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | ||
162 | |||
163 | if (nodeid == dlm_our_nodeid()) { | ||
164 | dlm_copy_master_names(ls, last_name, last_len, | ||
165 | ls->ls_recover_buf + len, | ||
166 | dlm_config.buffer_size - len, nodeid); | ||
167 | goto out; | ||
168 | } | ||
169 | |||
170 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); | ||
171 | if (error) | ||
172 | goto out; | ||
173 | memcpy(rc->rc_buf, last_name, last_len); | ||
174 | |||
175 | send_rcom(ls, mh, rc); | ||
176 | |||
177 | error = dlm_wait_function(ls, &rcom_response); | ||
178 | clear_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
179 | out: | ||
180 | return error; | ||
181 | } | ||
182 | |||
183 | static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
184 | { | ||
185 | struct dlm_rcom *rc; | ||
186 | struct dlm_mhandle *mh; | ||
187 | int error, inlen, outlen; | ||
188 | int nodeid = rc_in->rc_header.h_nodeid; | ||
189 | uint32_t status = dlm_recover_status(ls); | ||
190 | |||
191 | /* | ||
192 | * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while | ||
193 | * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes). | ||
194 | * It could only happen in rare cases where we get a late NAMES | ||
195 | * message from a previous instance of recovery. | ||
196 | */ | ||
197 | |||
198 | if (!(status & DLM_RS_NODES)) { | ||
199 | log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid); | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | nodeid = rc_in->rc_header.h_nodeid; | ||
204 | inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); | ||
205 | outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); | ||
206 | |||
207 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); | ||
208 | if (error) | ||
209 | return; | ||
210 | |||
211 | dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, | ||
212 | nodeid); | ||
213 | send_rcom(ls, mh, rc); | ||
214 | } | ||
215 | |||
216 | static void receive_rcom_names_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
217 | { | ||
218 | memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length); | ||
219 | set_bit(LSFL_RCOM_READY, &ls->ls_flags); | ||
220 | wake_up(&ls->ls_wait_general); | ||
221 | } | ||
222 | |||
223 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) | ||
224 | { | ||
225 | struct dlm_rcom *rc; | ||
226 | struct dlm_mhandle *mh; | ||
227 | struct dlm_ls *ls = r->res_ls; | ||
228 | int error; | ||
229 | |||
230 | error = create_rcom(ls, dir_nodeid, DLM_RCOM_LOOKUP, r->res_length, | ||
231 | &rc, &mh); | ||
232 | if (error) | ||
233 | goto out; | ||
234 | memcpy(rc->rc_buf, r->res_name, r->res_length); | ||
235 | rc->rc_id = (unsigned long) r; | ||
236 | |||
237 | send_rcom(ls, mh, rc); | ||
238 | out: | ||
239 | return error; | ||
240 | } | ||
241 | |||
242 | static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
243 | { | ||
244 | struct dlm_rcom *rc; | ||
245 | struct dlm_mhandle *mh; | ||
246 | int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid; | ||
247 | int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); | ||
248 | |||
249 | error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh); | ||
250 | if (error) | ||
251 | return; | ||
252 | |||
253 | error = dlm_dir_lookup(ls, nodeid, rc_in->rc_buf, len, &ret_nodeid); | ||
254 | if (error) | ||
255 | ret_nodeid = error; | ||
256 | rc->rc_result = ret_nodeid; | ||
257 | rc->rc_id = rc_in->rc_id; | ||
258 | |||
259 | send_rcom(ls, mh, rc); | ||
260 | } | ||
261 | |||
262 | static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
263 | { | ||
264 | dlm_recover_master_reply(ls, rc_in); | ||
265 | } | ||
266 | |||
267 | static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
268 | struct rcom_lock *rl) | ||
269 | { | ||
270 | memset(rl, 0, sizeof(*rl)); | ||
271 | |||
272 | rl->rl_ownpid = lkb->lkb_ownpid; | ||
273 | rl->rl_lkid = lkb->lkb_id; | ||
274 | rl->rl_exflags = lkb->lkb_exflags; | ||
275 | rl->rl_flags = lkb->lkb_flags; | ||
276 | rl->rl_lvbseq = lkb->lkb_lvbseq; | ||
277 | rl->rl_rqmode = lkb->lkb_rqmode; | ||
278 | rl->rl_grmode = lkb->lkb_grmode; | ||
279 | rl->rl_status = lkb->lkb_status; | ||
280 | rl->rl_wait_type = lkb->lkb_wait_type; | ||
281 | |||
282 | if (lkb->lkb_bastaddr) | ||
283 | rl->rl_asts |= AST_BAST; | ||
284 | if (lkb->lkb_astaddr) | ||
285 | rl->rl_asts |= AST_COMP; | ||
286 | |||
287 | if (lkb->lkb_range) | ||
288 | memcpy(rl->rl_range, lkb->lkb_range, 4*sizeof(uint64_t)); | ||
289 | |||
290 | rl->rl_namelen = r->res_length; | ||
291 | memcpy(rl->rl_name, r->res_name, r->res_length); | ||
292 | |||
293 | /* FIXME: might we have an lvb without DLM_LKF_VALBLK set ? | ||
294 | If so, receive_rcom_lock_args() won't take this copy. */ | ||
295 | |||
296 | if (lkb->lkb_lvbptr) | ||
297 | memcpy(rl->rl_lvb, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | ||
298 | } | ||
299 | |||
300 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
301 | { | ||
302 | struct dlm_ls *ls = r->res_ls; | ||
303 | struct dlm_rcom *rc; | ||
304 | struct dlm_mhandle *mh; | ||
305 | struct rcom_lock *rl; | ||
306 | int error, len = sizeof(struct rcom_lock); | ||
307 | |||
308 | if (lkb->lkb_lvbptr) | ||
309 | len += ls->ls_lvblen; | ||
310 | |||
311 | error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh); | ||
312 | if (error) | ||
313 | goto out; | ||
314 | |||
315 | rl = (struct rcom_lock *) rc->rc_buf; | ||
316 | pack_rcom_lock(r, lkb, rl); | ||
317 | rc->rc_id = (unsigned long) r; | ||
318 | |||
319 | send_rcom(ls, mh, rc); | ||
320 | out: | ||
321 | return error; | ||
322 | } | ||
323 | |||
324 | static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
325 | { | ||
326 | struct dlm_rcom *rc; | ||
327 | struct dlm_mhandle *mh; | ||
328 | int error, nodeid = rc_in->rc_header.h_nodeid; | ||
329 | |||
330 | dlm_recover_master_copy(ls, rc_in); | ||
331 | |||
332 | error = create_rcom(ls, nodeid, DLM_RCOM_LOCK_REPLY, | ||
333 | sizeof(struct rcom_lock), &rc, &mh); | ||
334 | if (error) | ||
335 | return; | ||
336 | |||
337 | /* We send back the same rcom_lock struct we received, but | ||
338 | dlm_recover_master_copy() has filled in rl_remid and rl_result */ | ||
339 | |||
340 | memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); | ||
341 | rc->rc_id = rc_in->rc_id; | ||
342 | |||
343 | send_rcom(ls, mh, rc); | ||
344 | } | ||
345 | |||
346 | static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
347 | { | ||
348 | uint32_t status = dlm_recover_status(ls); | ||
349 | |||
350 | if (!(status & DLM_RS_DIR)) { | ||
351 | log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u", | ||
352 | rc_in->rc_header.h_nodeid); | ||
353 | return; | ||
354 | } | ||
355 | |||
356 | dlm_recover_process_copy(ls, rc_in); | ||
357 | } | ||
358 | |||
359 | static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | ||
360 | { | ||
361 | struct dlm_rcom *rc; | ||
362 | struct dlm_mhandle *mh; | ||
363 | char *mb; | ||
364 | int mb_len = sizeof(struct dlm_rcom); | ||
365 | |||
366 | mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb); | ||
367 | if (!mh) | ||
368 | return -ENOBUFS; | ||
369 | memset(mb, 0, mb_len); | ||
370 | |||
371 | rc = (struct dlm_rcom *) mb; | ||
372 | |||
373 | rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | ||
374 | rc->rc_header.h_lockspace = rc_in->rc_header.h_lockspace; | ||
375 | rc->rc_header.h_nodeid = dlm_our_nodeid(); | ||
376 | rc->rc_header.h_length = mb_len; | ||
377 | rc->rc_header.h_cmd = DLM_RCOM; | ||
378 | |||
379 | rc->rc_type = DLM_RCOM_STATUS_REPLY; | ||
380 | rc->rc_result = -ESRCH; | ||
381 | |||
382 | dlm_rcom_out(rc); | ||
383 | dlm_lowcomms_commit_buffer(mh); | ||
384 | |||
385 | return 0; | ||
386 | } | ||
387 | |||
388 | /* Called by dlm_recvd; corresponds to dlm_receive_message() but special | ||
389 | recovery-only comms are sent through here. */ | ||
390 | |||
391 | void dlm_receive_rcom(struct dlm_header *hd, int nodeid) | ||
392 | { | ||
393 | struct dlm_rcom *rc = (struct dlm_rcom *) hd; | ||
394 | struct dlm_ls *ls; | ||
395 | |||
396 | dlm_rcom_in(rc); | ||
397 | |||
398 | /* If the lockspace doesn't exist then still send a status message | ||
399 | back; it's possible that it just doesn't have its global_id yet. */ | ||
400 | |||
401 | ls = dlm_find_lockspace_global(hd->h_lockspace); | ||
402 | if (!ls) { | ||
403 | log_print("lockspace %x from %d not found", | ||
404 | hd->h_lockspace, nodeid); | ||
405 | send_ls_not_ready(nodeid, rc); | ||
406 | return; | ||
407 | } | ||
408 | |||
409 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { | ||
410 | log_error(ls, "ignoring recovery message %x from %d", | ||
411 | rc->rc_type, nodeid); | ||
412 | goto out; | ||
413 | } | ||
414 | |||
415 | if (nodeid != rc->rc_header.h_nodeid) { | ||
416 | log_error(ls, "bad rcom nodeid %d from %d", | ||
417 | rc->rc_header.h_nodeid, nodeid); | ||
418 | goto out; | ||
419 | } | ||
420 | |||
421 | switch (rc->rc_type) { | ||
422 | case DLM_RCOM_STATUS: | ||
423 | receive_rcom_status(ls, rc); | ||
424 | break; | ||
425 | |||
426 | case DLM_RCOM_NAMES: | ||
427 | receive_rcom_names(ls, rc); | ||
428 | break; | ||
429 | |||
430 | case DLM_RCOM_LOOKUP: | ||
431 | receive_rcom_lookup(ls, rc); | ||
432 | break; | ||
433 | |||
434 | case DLM_RCOM_LOCK: | ||
435 | receive_rcom_lock(ls, rc); | ||
436 | break; | ||
437 | |||
438 | case DLM_RCOM_STATUS_REPLY: | ||
439 | receive_rcom_status_reply(ls, rc); | ||
440 | break; | ||
441 | |||
442 | case DLM_RCOM_NAMES_REPLY: | ||
443 | receive_rcom_names_reply(ls, rc); | ||
444 | break; | ||
445 | |||
446 | case DLM_RCOM_LOOKUP_REPLY: | ||
447 | receive_rcom_lookup_reply(ls, rc); | ||
448 | break; | ||
449 | |||
450 | case DLM_RCOM_LOCK_REPLY: | ||
451 | receive_rcom_lock_reply(ls, rc); | ||
452 | break; | ||
453 | |||
454 | default: | ||
455 | DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); | ||
456 | } | ||
457 | out: | ||
458 | dlm_put_lockspace(ls); | ||
459 | } | ||
460 | |||
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h new file mode 100644 index 000000000000..d7984321ff41 --- /dev/null +++ b/fs/dlm/rcom.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __RCOM_DOT_H__ | ||
15 | #define __RCOM_DOT_H__ | ||
16 | |||
17 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid); | ||
18 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); | ||
19 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); | ||
20 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); | ||
21 | void dlm_receive_rcom(struct dlm_header *hd, int nodeid); | ||
22 | |||
23 | #endif | ||
24 | |||
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c new file mode 100644 index 000000000000..1712c97bc229 --- /dev/null +++ b/fs/dlm/recover.c | |||
@@ -0,0 +1,762 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "lockspace.h" | ||
16 | #include "dir.h" | ||
17 | #include "config.h" | ||
18 | #include "ast.h" | ||
19 | #include "memory.h" | ||
20 | #include "rcom.h" | ||
21 | #include "lock.h" | ||
22 | #include "lowcomms.h" | ||
23 | #include "member.h" | ||
24 | #include "recover.h" | ||
25 | |||
26 | |||
27 | /* | ||
28 | * Recovery waiting routines: these functions wait for a particular reply from | ||
29 | * a remote node, or for the remote node to report a certain status. They need | ||
30 | * to abort if the lockspace is stopped indicating a node has failed (perhaps | ||
31 | * the one being waited for). | ||
32 | */ | ||
33 | |||
34 | /* | ||
35 | * Wait until given function returns non-zero or lockspace is stopped | ||
36 | * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another | ||
37 | * function thinks it could have completed the waited-on task, they should wake | ||
38 | * up ls_wait_general to get an immediate response rather than waiting for the | ||
39 | * timer to detect the result. A timer wakes us up periodically while waiting | ||
40 | * to see if we should abort due to a node failure. This should only be called | ||
41 | * by the dlm_recoverd thread. | ||
42 | */ | ||
43 | |||
44 | static void dlm_wait_timer_fn(unsigned long data) | ||
45 | { | ||
46 | struct dlm_ls *ls = (struct dlm_ls *) data; | ||
47 | mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); | ||
48 | wake_up(&ls->ls_wait_general); | ||
49 | } | ||
50 | |||
51 | int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) | ||
52 | { | ||
53 | int error = 0; | ||
54 | |||
55 | init_timer(&ls->ls_timer); | ||
56 | ls->ls_timer.function = dlm_wait_timer_fn; | ||
57 | ls->ls_timer.data = (long) ls; | ||
58 | ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); | ||
59 | add_timer(&ls->ls_timer); | ||
60 | |||
61 | wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); | ||
62 | del_timer_sync(&ls->ls_timer); | ||
63 | |||
64 | if (dlm_recovery_stopped(ls)) { | ||
65 | log_debug(ls, "dlm_wait_function aborted"); | ||
66 | error = -EINTR; | ||
67 | } | ||
68 | return error; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * An efficient way for all nodes to wait for all others to have a certain | ||
73 | * status. The node with the lowest nodeid polls all the others for their | ||
74 | * status (wait_status_all) and all the others poll the node with the low id | ||
75 | * for its accumulated result (wait_status_low). When all nodes have set | ||
76 | * status flag X, then status flag X_ALL will be set on the low nodeid. | ||
77 | */ | ||
78 | |||
79 | uint32_t dlm_recover_status(struct dlm_ls *ls) | ||
80 | { | ||
81 | uint32_t status; | ||
82 | spin_lock(&ls->ls_recover_lock); | ||
83 | status = ls->ls_recover_status; | ||
84 | spin_unlock(&ls->ls_recover_lock); | ||
85 | return status; | ||
86 | } | ||
87 | |||
88 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) | ||
89 | { | ||
90 | spin_lock(&ls->ls_recover_lock); | ||
91 | ls->ls_recover_status |= status; | ||
92 | spin_unlock(&ls->ls_recover_lock); | ||
93 | } | ||
94 | |||
95 | static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) | ||
96 | { | ||
97 | struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
98 | struct dlm_member *memb; | ||
99 | int error = 0, delay; | ||
100 | |||
101 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
102 | delay = 0; | ||
103 | for (;;) { | ||
104 | if (dlm_recovery_stopped(ls)) { | ||
105 | error = -EINTR; | ||
106 | goto out; | ||
107 | } | ||
108 | |||
109 | error = dlm_rcom_status(ls, memb->nodeid); | ||
110 | if (error) | ||
111 | goto out; | ||
112 | |||
113 | if (rc->rc_result & wait_status) | ||
114 | break; | ||
115 | if (delay < 1000) | ||
116 | delay += 20; | ||
117 | msleep(delay); | ||
118 | } | ||
119 | } | ||
120 | out: | ||
121 | return error; | ||
122 | } | ||
123 | |||
124 | static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) | ||
125 | { | ||
126 | struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf; | ||
127 | int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; | ||
128 | |||
129 | for (;;) { | ||
130 | if (dlm_recovery_stopped(ls)) { | ||
131 | error = -EINTR; | ||
132 | goto out; | ||
133 | } | ||
134 | |||
135 | error = dlm_rcom_status(ls, nodeid); | ||
136 | if (error) | ||
137 | break; | ||
138 | |||
139 | if (rc->rc_result & wait_status) | ||
140 | break; | ||
141 | if (delay < 1000) | ||
142 | delay += 20; | ||
143 | msleep(delay); | ||
144 | } | ||
145 | out: | ||
146 | return error; | ||
147 | } | ||
148 | |||
149 | static int wait_status(struct dlm_ls *ls, uint32_t status) | ||
150 | { | ||
151 | uint32_t status_all = status << 1; | ||
152 | int error; | ||
153 | |||
154 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { | ||
155 | error = wait_status_all(ls, status); | ||
156 | if (!error) | ||
157 | dlm_set_recover_status(ls, status_all); | ||
158 | } else | ||
159 | error = wait_status_low(ls, status_all); | ||
160 | |||
161 | return error; | ||
162 | } | ||
163 | |||
164 | int dlm_recover_members_wait(struct dlm_ls *ls) | ||
165 | { | ||
166 | return wait_status(ls, DLM_RS_NODES); | ||
167 | } | ||
168 | |||
169 | int dlm_recover_directory_wait(struct dlm_ls *ls) | ||
170 | { | ||
171 | return wait_status(ls, DLM_RS_DIR); | ||
172 | } | ||
173 | |||
174 | int dlm_recover_locks_wait(struct dlm_ls *ls) | ||
175 | { | ||
176 | return wait_status(ls, DLM_RS_LOCKS); | ||
177 | } | ||
178 | |||
179 | int dlm_recover_done_wait(struct dlm_ls *ls) | ||
180 | { | ||
181 | return wait_status(ls, DLM_RS_DONE); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * The recover_list contains all the rsb's for which we've requested the new | ||
186 | * master nodeid. As replies are returned from the resource directories the | ||
187 | * rsb's are removed from the list. When the list is empty we're done. | ||
188 | * | ||
189 | * The recover_list is later similarly used for all rsb's for which we've sent | ||
190 | * new lkb's and need to receive new corresponding lkid's. | ||
191 | * | ||
192 | * We use the address of the rsb struct as a simple local identifier for the | ||
193 | * rsb so we can match an rcom reply with the rsb it was sent for. | ||
194 | */ | ||
195 | |||
196 | static int recover_list_empty(struct dlm_ls *ls) | ||
197 | { | ||
198 | int empty; | ||
199 | |||
200 | spin_lock(&ls->ls_recover_list_lock); | ||
201 | empty = list_empty(&ls->ls_recover_list); | ||
202 | spin_unlock(&ls->ls_recover_list_lock); | ||
203 | |||
204 | return empty; | ||
205 | } | ||
206 | |||
207 | static void recover_list_add(struct dlm_rsb *r) | ||
208 | { | ||
209 | struct dlm_ls *ls = r->res_ls; | ||
210 | |||
211 | spin_lock(&ls->ls_recover_list_lock); | ||
212 | if (list_empty(&r->res_recover_list)) { | ||
213 | list_add_tail(&r->res_recover_list, &ls->ls_recover_list); | ||
214 | ls->ls_recover_list_count++; | ||
215 | dlm_hold_rsb(r); | ||
216 | } | ||
217 | spin_unlock(&ls->ls_recover_list_lock); | ||
218 | } | ||
219 | |||
220 | static void recover_list_del(struct dlm_rsb *r) | ||
221 | { | ||
222 | struct dlm_ls *ls = r->res_ls; | ||
223 | |||
224 | spin_lock(&ls->ls_recover_list_lock); | ||
225 | list_del_init(&r->res_recover_list); | ||
226 | ls->ls_recover_list_count--; | ||
227 | spin_unlock(&ls->ls_recover_list_lock); | ||
228 | |||
229 | dlm_put_rsb(r); | ||
230 | } | ||
231 | |||
232 | static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id) | ||
233 | { | ||
234 | struct dlm_rsb *r = NULL; | ||
235 | |||
236 | spin_lock(&ls->ls_recover_list_lock); | ||
237 | |||
238 | list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) { | ||
239 | if (id == (unsigned long) r) | ||
240 | goto out; | ||
241 | } | ||
242 | r = NULL; | ||
243 | out: | ||
244 | spin_unlock(&ls->ls_recover_list_lock); | ||
245 | return r; | ||
246 | } | ||
247 | |||
248 | static void recover_list_clear(struct dlm_ls *ls) | ||
249 | { | ||
250 | struct dlm_rsb *r, *s; | ||
251 | |||
252 | spin_lock(&ls->ls_recover_list_lock); | ||
253 | list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) { | ||
254 | list_del_init(&r->res_recover_list); | ||
255 | dlm_put_rsb(r); | ||
256 | ls->ls_recover_list_count--; | ||
257 | } | ||
258 | |||
259 | if (ls->ls_recover_list_count != 0) { | ||
260 | log_error(ls, "warning: recover_list_count %d", | ||
261 | ls->ls_recover_list_count); | ||
262 | ls->ls_recover_list_count = 0; | ||
263 | } | ||
264 | spin_unlock(&ls->ls_recover_list_lock); | ||
265 | } | ||
266 | |||
267 | |||
268 | /* Master recovery: find new master node for rsb's that were | ||
269 | mastered on nodes that have been removed. | ||
270 | |||
271 | dlm_recover_masters | ||
272 | recover_master | ||
273 | dlm_send_rcom_lookup -> receive_rcom_lookup | ||
274 | dlm_dir_lookup | ||
275 | receive_rcom_lookup_reply <- | ||
276 | dlm_recover_master_reply | ||
277 | set_new_master | ||
278 | set_master_lkbs | ||
279 | set_lock_master | ||
280 | */ | ||
281 | |||
282 | /* | ||
283 | * Set the lock master for all LKBs in a lock queue | ||
284 | * If we are the new master of the rsb, we may have received new | ||
285 | * MSTCPY locks from other nodes already which we need to ignore | ||
286 | * when setting the new nodeid. | ||
287 | */ | ||
288 | |||
289 | static void set_lock_master(struct list_head *queue, int nodeid) | ||
290 | { | ||
291 | struct dlm_lkb *lkb; | ||
292 | |||
293 | list_for_each_entry(lkb, queue, lkb_statequeue) | ||
294 | if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) | ||
295 | lkb->lkb_nodeid = nodeid; | ||
296 | } | ||
297 | |||
298 | static void set_master_lkbs(struct dlm_rsb *r) | ||
299 | { | ||
300 | set_lock_master(&r->res_grantqueue, r->res_nodeid); | ||
301 | set_lock_master(&r->res_convertqueue, r->res_nodeid); | ||
302 | set_lock_master(&r->res_waitqueue, r->res_nodeid); | ||
303 | } | ||
304 | |||
305 | /* | ||
306 | * Propogate the new master nodeid to locks | ||
307 | * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. | ||
308 | * The NEW_MASTER2 flag tells recover_lvb() which rsb's to consider. | ||
309 | */ | ||
310 | |||
311 | static void set_new_master(struct dlm_rsb *r, int nodeid) | ||
312 | { | ||
313 | lock_rsb(r); | ||
314 | r->res_nodeid = nodeid; | ||
315 | set_master_lkbs(r); | ||
316 | rsb_set_flag(r, RSB_NEW_MASTER); | ||
317 | rsb_set_flag(r, RSB_NEW_MASTER2); | ||
318 | unlock_rsb(r); | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * We do async lookups on rsb's that need new masters. The rsb's | ||
323 | * waiting for a lookup reply are kept on the recover_list. | ||
324 | */ | ||
325 | |||
326 | static int recover_master(struct dlm_rsb *r) | ||
327 | { | ||
328 | struct dlm_ls *ls = r->res_ls; | ||
329 | int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); | ||
330 | |||
331 | dir_nodeid = dlm_dir_nodeid(r); | ||
332 | |||
333 | if (dir_nodeid == our_nodeid) { | ||
334 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, | ||
335 | r->res_length, &ret_nodeid); | ||
336 | if (error) | ||
337 | log_error(ls, "recover dir lookup error %d", error); | ||
338 | |||
339 | if (ret_nodeid == our_nodeid) | ||
340 | ret_nodeid = 0; | ||
341 | set_new_master(r, ret_nodeid); | ||
342 | } else { | ||
343 | recover_list_add(r); | ||
344 | error = dlm_send_rcom_lookup(r, dir_nodeid); | ||
345 | } | ||
346 | |||
347 | return error; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * When not using a directory, most resource names will hash to a new static | ||
352 | * master nodeid and the resource will need to be remastered. | ||
353 | */ | ||
354 | |||
355 | static int recover_master_static(struct dlm_rsb *r) | ||
356 | { | ||
357 | int master = dlm_dir_nodeid(r); | ||
358 | |||
359 | if (master == dlm_our_nodeid()) | ||
360 | master = 0; | ||
361 | |||
362 | if (r->res_nodeid != master) { | ||
363 | if (is_master(r)) | ||
364 | dlm_purge_mstcpy_locks(r); | ||
365 | set_new_master(r, master); | ||
366 | return 1; | ||
367 | } | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * Go through local root resources and for each rsb which has a master which | ||
373 | * has departed, get the new master nodeid from the directory. The dir will | ||
374 | * assign mastery to the first node to look up the new master. That means | ||
375 | * we'll discover in this lookup if we're the new master of any rsb's. | ||
376 | * | ||
377 | * We fire off all the dir lookup requests individually and asynchronously to | ||
378 | * the correct dir node. | ||
379 | */ | ||
380 | |||
381 | int dlm_recover_masters(struct dlm_ls *ls) | ||
382 | { | ||
383 | struct dlm_rsb *r; | ||
384 | int error = 0, count = 0; | ||
385 | |||
386 | log_debug(ls, "dlm_recover_masters"); | ||
387 | |||
388 | down_read(&ls->ls_root_sem); | ||
389 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
390 | if (dlm_recovery_stopped(ls)) { | ||
391 | up_read(&ls->ls_root_sem); | ||
392 | error = -EINTR; | ||
393 | goto out; | ||
394 | } | ||
395 | |||
396 | if (dlm_no_directory(ls)) | ||
397 | count += recover_master_static(r); | ||
398 | else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { | ||
399 | recover_master(r); | ||
400 | count++; | ||
401 | } | ||
402 | |||
403 | schedule(); | ||
404 | } | ||
405 | up_read(&ls->ls_root_sem); | ||
406 | |||
407 | log_debug(ls, "dlm_recover_masters %d resources", count); | ||
408 | |||
409 | error = dlm_wait_function(ls, &recover_list_empty); | ||
410 | out: | ||
411 | if (error) | ||
412 | recover_list_clear(ls); | ||
413 | return error; | ||
414 | } | ||
415 | |||
416 | int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
417 | { | ||
418 | struct dlm_rsb *r; | ||
419 | int nodeid; | ||
420 | |||
421 | r = recover_list_find(ls, rc->rc_id); | ||
422 | if (!r) { | ||
423 | log_error(ls, "dlm_recover_master_reply no id %"PRIx64"", | ||
424 | rc->rc_id); | ||
425 | goto out; | ||
426 | } | ||
427 | |||
428 | nodeid = rc->rc_result; | ||
429 | if (nodeid == dlm_our_nodeid()) | ||
430 | nodeid = 0; | ||
431 | |||
432 | set_new_master(r, nodeid); | ||
433 | recover_list_del(r); | ||
434 | |||
435 | if (recover_list_empty(ls)) | ||
436 | wake_up(&ls->ls_wait_general); | ||
437 | out: | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | |||
442 | /* Lock recovery: rebuild the process-copy locks we hold on a | ||
443 | remastered rsb on the new rsb master. | ||
444 | |||
445 | dlm_recover_locks | ||
446 | recover_locks | ||
447 | recover_locks_queue | ||
448 | dlm_send_rcom_lock -> receive_rcom_lock | ||
449 | dlm_recover_master_copy | ||
450 | receive_rcom_lock_reply <- | ||
451 | dlm_recover_process_copy | ||
452 | */ | ||
453 | |||
454 | |||
455 | /* | ||
456 | * keep a count of the number of lkb's we send to the new master; when we get | ||
457 | * an equal number of replies then recovery for the rsb is done | ||
458 | */ | ||
459 | |||
460 | static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head) | ||
461 | { | ||
462 | struct dlm_lkb *lkb; | ||
463 | int error = 0; | ||
464 | |||
465 | list_for_each_entry(lkb, head, lkb_statequeue) { | ||
466 | error = dlm_send_rcom_lock(r, lkb); | ||
467 | if (error) | ||
468 | break; | ||
469 | r->res_recover_locks_count++; | ||
470 | } | ||
471 | |||
472 | return error; | ||
473 | } | ||
474 | |||
475 | static int all_queues_empty(struct dlm_rsb *r) | ||
476 | { | ||
477 | if (!list_empty(&r->res_grantqueue) || | ||
478 | !list_empty(&r->res_convertqueue) || | ||
479 | !list_empty(&r->res_waitqueue)) | ||
480 | return FALSE; | ||
481 | return TRUE; | ||
482 | } | ||
483 | |||
484 | static int recover_locks(struct dlm_rsb *r) | ||
485 | { | ||
486 | int error = 0; | ||
487 | |||
488 | lock_rsb(r); | ||
489 | if (all_queues_empty(r)) | ||
490 | goto out; | ||
491 | |||
492 | DLM_ASSERT(!r->res_recover_locks_count, dlm_print_rsb(r);); | ||
493 | |||
494 | error = recover_locks_queue(r, &r->res_grantqueue); | ||
495 | if (error) | ||
496 | goto out; | ||
497 | error = recover_locks_queue(r, &r->res_convertqueue); | ||
498 | if (error) | ||
499 | goto out; | ||
500 | error = recover_locks_queue(r, &r->res_waitqueue); | ||
501 | if (error) | ||
502 | goto out; | ||
503 | |||
504 | if (r->res_recover_locks_count) | ||
505 | recover_list_add(r); | ||
506 | else | ||
507 | rsb_clear_flag(r, RSB_NEW_MASTER); | ||
508 | out: | ||
509 | unlock_rsb(r); | ||
510 | return error; | ||
511 | } | ||
512 | |||
513 | int dlm_recover_locks(struct dlm_ls *ls) | ||
514 | { | ||
515 | struct dlm_rsb *r; | ||
516 | int error, count = 0; | ||
517 | |||
518 | log_debug(ls, "dlm_recover_locks"); | ||
519 | |||
520 | down_read(&ls->ls_root_sem); | ||
521 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
522 | if (is_master(r)) { | ||
523 | rsb_clear_flag(r, RSB_NEW_MASTER); | ||
524 | continue; | ||
525 | } | ||
526 | |||
527 | if (!rsb_flag(r, RSB_NEW_MASTER)) | ||
528 | continue; | ||
529 | |||
530 | if (dlm_recovery_stopped(ls)) { | ||
531 | error = -EINTR; | ||
532 | up_read(&ls->ls_root_sem); | ||
533 | goto out; | ||
534 | } | ||
535 | |||
536 | error = recover_locks(r); | ||
537 | if (error) { | ||
538 | up_read(&ls->ls_root_sem); | ||
539 | goto out; | ||
540 | } | ||
541 | |||
542 | count += r->res_recover_locks_count; | ||
543 | } | ||
544 | up_read(&ls->ls_root_sem); | ||
545 | |||
546 | log_debug(ls, "dlm_recover_locks %d locks", count); | ||
547 | |||
548 | error = dlm_wait_function(ls, &recover_list_empty); | ||
549 | out: | ||
550 | if (error) | ||
551 | recover_list_clear(ls); | ||
552 | else | ||
553 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | ||
554 | return error; | ||
555 | } | ||
556 | |||
557 | void dlm_recovered_lock(struct dlm_rsb *r) | ||
558 | { | ||
559 | DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_print_rsb(r);); | ||
560 | |||
561 | r->res_recover_locks_count--; | ||
562 | if (!r->res_recover_locks_count) { | ||
563 | rsb_clear_flag(r, RSB_NEW_MASTER); | ||
564 | recover_list_del(r); | ||
565 | } | ||
566 | |||
567 | if (recover_list_empty(r->res_ls)) | ||
568 | wake_up(&r->res_ls->ls_wait_general); | ||
569 | } | ||
570 | |||
571 | /* | ||
572 | * The lvb needs to be recovered on all master rsb's. This includes setting | ||
573 | * the VALNOTVALID flag if necessary, and determining the correct lvb contents | ||
574 | * based on the lvb's of the locks held on the rsb. | ||
575 | * | ||
576 | * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it | ||
577 | * was already set prior to recovery, it's not cleared, regardless of locks. | ||
578 | * | ||
579 | * The LVB contents are only considered for changing when this is a new master | ||
580 | * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with | ||
581 | * mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken | ||
582 | * from the lkb with the largest lvb sequence number. | ||
583 | */ | ||
584 | |||
585 | static void recover_lvb(struct dlm_rsb *r) | ||
586 | { | ||
587 | struct dlm_lkb *lkb, *high_lkb = NULL; | ||
588 | uint32_t high_seq = 0; | ||
589 | int lock_lvb_exists = FALSE; | ||
590 | int big_lock_exists = FALSE; | ||
591 | int lvblen = r->res_ls->ls_lvblen; | ||
592 | |||
593 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { | ||
594 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
595 | continue; | ||
596 | |||
597 | lock_lvb_exists = TRUE; | ||
598 | |||
599 | if (lkb->lkb_grmode > DLM_LOCK_CR) { | ||
600 | big_lock_exists = TRUE; | ||
601 | goto setflag; | ||
602 | } | ||
603 | |||
604 | if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { | ||
605 | high_lkb = lkb; | ||
606 | high_seq = lkb->lkb_lvbseq; | ||
607 | } | ||
608 | } | ||
609 | |||
610 | list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { | ||
611 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | ||
612 | continue; | ||
613 | |||
614 | lock_lvb_exists = TRUE; | ||
615 | |||
616 | if (lkb->lkb_grmode > DLM_LOCK_CR) { | ||
617 | big_lock_exists = TRUE; | ||
618 | goto setflag; | ||
619 | } | ||
620 | |||
621 | if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { | ||
622 | high_lkb = lkb; | ||
623 | high_seq = lkb->lkb_lvbseq; | ||
624 | } | ||
625 | } | ||
626 | |||
627 | setflag: | ||
628 | if (!lock_lvb_exists) | ||
629 | goto out; | ||
630 | |||
631 | if (!big_lock_exists) | ||
632 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
633 | |||
634 | /* don't mess with the lvb unless we're the new master */ | ||
635 | if (!rsb_flag(r, RSB_NEW_MASTER2)) | ||
636 | goto out; | ||
637 | |||
638 | if (!r->res_lvbptr) { | ||
639 | r->res_lvbptr = allocate_lvb(r->res_ls); | ||
640 | if (!r->res_lvbptr) | ||
641 | goto out; | ||
642 | } | ||
643 | |||
644 | if (big_lock_exists) { | ||
645 | r->res_lvbseq = lkb->lkb_lvbseq; | ||
646 | memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen); | ||
647 | } else if (high_lkb) { | ||
648 | r->res_lvbseq = high_lkb->lkb_lvbseq; | ||
649 | memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen); | ||
650 | } else { | ||
651 | r->res_lvbseq = 0; | ||
652 | memset(r->res_lvbptr, 0, lvblen); | ||
653 | } | ||
654 | out: | ||
655 | return; | ||
656 | } | ||
657 | |||
658 | /* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks | ||
659 | converting PR->CW or CW->PR need to have their lkb_grmode set. */ | ||
660 | |||
661 | static void recover_conversion(struct dlm_rsb *r) | ||
662 | { | ||
663 | struct dlm_lkb *lkb; | ||
664 | int grmode = -1; | ||
665 | |||
666 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { | ||
667 | if (lkb->lkb_grmode == DLM_LOCK_PR || | ||
668 | lkb->lkb_grmode == DLM_LOCK_CW) { | ||
669 | grmode = lkb->lkb_grmode; | ||
670 | break; | ||
671 | } | ||
672 | } | ||
673 | |||
674 | list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { | ||
675 | if (lkb->lkb_grmode != DLM_LOCK_IV) | ||
676 | continue; | ||
677 | if (grmode == -1) | ||
678 | lkb->lkb_grmode = lkb->lkb_rqmode; | ||
679 | else | ||
680 | lkb->lkb_grmode = grmode; | ||
681 | } | ||
682 | } | ||
683 | |||
684 | void dlm_recover_rsbs(struct dlm_ls *ls) | ||
685 | { | ||
686 | struct dlm_rsb *r; | ||
687 | int count = 0; | ||
688 | |||
689 | log_debug(ls, "dlm_recover_rsbs"); | ||
690 | |||
691 | down_read(&ls->ls_root_sem); | ||
692 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
693 | lock_rsb(r); | ||
694 | if (is_master(r)) { | ||
695 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) | ||
696 | recover_conversion(r); | ||
697 | recover_lvb(r); | ||
698 | count++; | ||
699 | } | ||
700 | rsb_clear_flag(r, RSB_RECOVER_CONVERT); | ||
701 | unlock_rsb(r); | ||
702 | } | ||
703 | up_read(&ls->ls_root_sem); | ||
704 | |||
705 | log_debug(ls, "dlm_recover_rsbs %d rsbs", count); | ||
706 | } | ||
707 | |||
708 | /* Create a single list of all root rsb's to be used during recovery */ | ||
709 | |||
710 | int dlm_create_root_list(struct dlm_ls *ls) | ||
711 | { | ||
712 | struct dlm_rsb *r; | ||
713 | int i, error = 0; | ||
714 | |||
715 | down_write(&ls->ls_root_sem); | ||
716 | if (!list_empty(&ls->ls_root_list)) { | ||
717 | log_error(ls, "root list not empty"); | ||
718 | error = -EINVAL; | ||
719 | goto out; | ||
720 | } | ||
721 | |||
722 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
723 | read_lock(&ls->ls_rsbtbl[i].lock); | ||
724 | list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { | ||
725 | list_add(&r->res_root_list, &ls->ls_root_list); | ||
726 | dlm_hold_rsb(r); | ||
727 | } | ||
728 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
729 | } | ||
730 | out: | ||
731 | up_write(&ls->ls_root_sem); | ||
732 | return error; | ||
733 | } | ||
734 | |||
735 | void dlm_release_root_list(struct dlm_ls *ls) | ||
736 | { | ||
737 | struct dlm_rsb *r, *safe; | ||
738 | |||
739 | down_write(&ls->ls_root_sem); | ||
740 | list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) { | ||
741 | list_del_init(&r->res_root_list); | ||
742 | dlm_put_rsb(r); | ||
743 | } | ||
744 | up_write(&ls->ls_root_sem); | ||
745 | } | ||
746 | |||
747 | void dlm_clear_toss_list(struct dlm_ls *ls) | ||
748 | { | ||
749 | struct dlm_rsb *r, *safe; | ||
750 | int i; | ||
751 | |||
752 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
753 | write_lock(&ls->ls_rsbtbl[i].lock); | ||
754 | list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, | ||
755 | res_hashchain) { | ||
756 | list_del(&r->res_hashchain); | ||
757 | free_rsb(r); | ||
758 | } | ||
759 | write_unlock(&ls->ls_rsbtbl[i].lock); | ||
760 | } | ||
761 | } | ||
762 | |||
diff --git a/fs/dlm/recover.h b/fs/dlm/recover.h new file mode 100644 index 000000000000..ebd0363f1e08 --- /dev/null +++ b/fs/dlm/recover.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __RECOVER_DOT_H__ | ||
15 | #define __RECOVER_DOT_H__ | ||
16 | |||
17 | int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)); | ||
18 | uint32_t dlm_recover_status(struct dlm_ls *ls); | ||
19 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status); | ||
20 | int dlm_recover_members_wait(struct dlm_ls *ls); | ||
21 | int dlm_recover_directory_wait(struct dlm_ls *ls); | ||
22 | int dlm_recover_locks_wait(struct dlm_ls *ls); | ||
23 | int dlm_recover_done_wait(struct dlm_ls *ls); | ||
24 | int dlm_recover_masters(struct dlm_ls *ls); | ||
25 | int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
26 | int dlm_recover_locks(struct dlm_ls *ls); | ||
27 | void dlm_recovered_lock(struct dlm_rsb *r); | ||
28 | int dlm_create_root_list(struct dlm_ls *ls); | ||
29 | void dlm_release_root_list(struct dlm_ls *ls); | ||
30 | void dlm_clear_toss_list(struct dlm_ls *ls); | ||
31 | void dlm_recover_rsbs(struct dlm_ls *ls); | ||
32 | |||
33 | #endif /* __RECOVER_DOT_H__ */ | ||
34 | |||
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c new file mode 100644 index 000000000000..06e4f7cab6e7 --- /dev/null +++ b/fs/dlm/recoverd.c | |||
@@ -0,0 +1,285 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #include "dlm_internal.h" | ||
15 | #include "lockspace.h" | ||
16 | #include "member.h" | ||
17 | #include "dir.h" | ||
18 | #include "ast.h" | ||
19 | #include "recover.h" | ||
20 | #include "lowcomms.h" | ||
21 | #include "lock.h" | ||
22 | #include "requestqueue.h" | ||
23 | #include "recoverd.h" | ||
24 | |||
25 | |||
26 | /* If the start for which we're re-enabling locking (seq) has been superseded | ||
27 | by a newer stop (ls_recover_seq), we need to leave locking disabled. */ | ||
28 | |||
29 | static int enable_locking(struct dlm_ls *ls, uint64_t seq) | ||
30 | { | ||
31 | int error = -EINTR; | ||
32 | |||
33 | spin_lock(&ls->ls_recover_lock); | ||
34 | if (ls->ls_recover_seq == seq) { | ||
35 | set_bit(LSFL_RUNNING, &ls->ls_flags); | ||
36 | up_write(&ls->ls_in_recovery); | ||
37 | error = 0; | ||
38 | } | ||
39 | spin_unlock(&ls->ls_recover_lock); | ||
40 | return error; | ||
41 | } | ||
42 | |||
43 | static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | ||
44 | { | ||
45 | unsigned long start; | ||
46 | int error, neg = 0; | ||
47 | |||
48 | log_debug(ls, "recover %"PRIx64"", rv->seq); | ||
49 | |||
50 | down(&ls->ls_recoverd_active); | ||
51 | |||
52 | /* | ||
53 | * Suspending and resuming dlm_astd ensures that no lkb's from this ls | ||
54 | * will be processed by dlm_astd during recovery. | ||
55 | */ | ||
56 | |||
57 | dlm_astd_suspend(); | ||
58 | dlm_astd_resume(); | ||
59 | |||
60 | /* | ||
61 | * This list of root rsb's will be the basis of most of the recovery | ||
62 | * routines. | ||
63 | */ | ||
64 | |||
65 | dlm_create_root_list(ls); | ||
66 | |||
67 | /* | ||
68 | * Free all the tossed rsb's so we don't have to recover them. | ||
69 | */ | ||
70 | |||
71 | dlm_clear_toss_list(ls); | ||
72 | |||
73 | /* | ||
74 | * Add or remove nodes from the lockspace's ls_nodes list. | ||
75 | * Also waits for all nodes to complete dlm_recover_members. | ||
76 | */ | ||
77 | |||
78 | error = dlm_recover_members(ls, rv, &neg); | ||
79 | if (error) { | ||
80 | log_error(ls, "recover_members failed %d", error); | ||
81 | goto fail; | ||
82 | } | ||
83 | start = jiffies; | ||
84 | |||
85 | /* | ||
86 | * Rebuild our own share of the directory by collecting from all other | ||
87 | * nodes their master rsb names that hash to us. | ||
88 | */ | ||
89 | |||
90 | error = dlm_recover_directory(ls); | ||
91 | if (error) { | ||
92 | log_error(ls, "recover_directory failed %d", error); | ||
93 | goto fail; | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Purge directory-related requests that are saved in requestqueue. | ||
98 | * All dir requests from before recovery are invalid now due to the dir | ||
99 | * rebuild and will be resent by the requesting nodes. | ||
100 | */ | ||
101 | |||
102 | dlm_purge_requestqueue(ls); | ||
103 | |||
104 | /* | ||
105 | * Wait for all nodes to complete directory rebuild. | ||
106 | */ | ||
107 | |||
108 | error = dlm_recover_directory_wait(ls); | ||
109 | if (error) { | ||
110 | log_error(ls, "recover_directory_wait failed %d", error); | ||
111 | goto fail; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * We may have outstanding operations that are waiting for a reply from | ||
116 | * a failed node. Mark these to be resent after recovery. Unlock and | ||
117 | * cancel ops can just be completed. | ||
118 | */ | ||
119 | |||
120 | dlm_recover_waiters_pre(ls); | ||
121 | |||
122 | error = dlm_recovery_stopped(ls); | ||
123 | if (error) | ||
124 | goto fail; | ||
125 | |||
126 | if (neg || dlm_no_directory(ls)) { | ||
127 | /* | ||
128 | * Clear lkb's for departed nodes. | ||
129 | */ | ||
130 | |||
131 | dlm_purge_locks(ls); | ||
132 | |||
133 | /* | ||
134 | * Get new master nodeid's for rsb's that were mastered on | ||
135 | * departed nodes. | ||
136 | */ | ||
137 | |||
138 | error = dlm_recover_masters(ls); | ||
139 | if (error) { | ||
140 | log_error(ls, "recover_masters failed %d", error); | ||
141 | goto fail; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Send our locks on remastered rsb's to the new masters. | ||
146 | */ | ||
147 | |||
148 | error = dlm_recover_locks(ls); | ||
149 | if (error) { | ||
150 | log_error(ls, "recover_locks failed %d", error); | ||
151 | goto fail; | ||
152 | } | ||
153 | |||
154 | error = dlm_recover_locks_wait(ls); | ||
155 | if (error) { | ||
156 | log_error(ls, "recover_locks_wait failed %d", error); | ||
157 | goto fail; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Finalize state in master rsb's now that all locks can be | ||
162 | * checked. This includes conversion resolution and lvb | ||
163 | * settings. | ||
164 | */ | ||
165 | |||
166 | dlm_recover_rsbs(ls); | ||
167 | } | ||
168 | |||
169 | dlm_release_root_list(ls); | ||
170 | |||
171 | dlm_set_recover_status(ls, DLM_RS_DONE); | ||
172 | error = dlm_recover_done_wait(ls); | ||
173 | if (error) { | ||
174 | log_error(ls, "recover_done_wait failed %d", error); | ||
175 | goto fail; | ||
176 | } | ||
177 | |||
178 | dlm_clear_members_gone(ls); | ||
179 | |||
180 | error = enable_locking(ls, rv->seq); | ||
181 | if (error) { | ||
182 | log_error(ls, "enable_locking failed %d", error); | ||
183 | goto fail; | ||
184 | } | ||
185 | |||
186 | error = dlm_process_requestqueue(ls); | ||
187 | if (error) { | ||
188 | log_error(ls, "process_requestqueue failed %d", error); | ||
189 | goto fail; | ||
190 | } | ||
191 | |||
192 | error = dlm_recover_waiters_post(ls); | ||
193 | if (error) { | ||
194 | log_error(ls, "recover_waiters_post failed %d", error); | ||
195 | goto fail; | ||
196 | } | ||
197 | |||
198 | dlm_grant_after_purge(ls); | ||
199 | |||
200 | dlm_astd_wake(); | ||
201 | |||
202 | log_debug(ls, "recover %"PRIx64" done: %u ms", rv->seq, | ||
203 | jiffies_to_msecs(jiffies - start)); | ||
204 | up(&ls->ls_recoverd_active); | ||
205 | |||
206 | return 0; | ||
207 | |||
208 | fail: | ||
209 | dlm_release_root_list(ls); | ||
210 | log_debug(ls, "recover %"PRIx64" error %d", rv->seq, error); | ||
211 | up(&ls->ls_recoverd_active); | ||
212 | return error; | ||
213 | } | ||
214 | |||
215 | static void do_ls_recovery(struct dlm_ls *ls) | ||
216 | { | ||
217 | struct dlm_recover *rv = NULL; | ||
218 | |||
219 | spin_lock(&ls->ls_recover_lock); | ||
220 | rv = ls->ls_recover_args; | ||
221 | ls->ls_recover_args = NULL; | ||
222 | clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | ||
223 | spin_unlock(&ls->ls_recover_lock); | ||
224 | |||
225 | if (rv) { | ||
226 | ls_recover(ls, rv); | ||
227 | kfree(rv->nodeids); | ||
228 | kfree(rv); | ||
229 | } | ||
230 | } | ||
231 | |||
232 | static int dlm_recoverd(void *arg) | ||
233 | { | ||
234 | struct dlm_ls *ls; | ||
235 | |||
236 | ls = dlm_find_lockspace_local(arg); | ||
237 | |||
238 | while (!kthread_should_stop()) { | ||
239 | set_current_state(TASK_INTERRUPTIBLE); | ||
240 | if (!test_bit(LSFL_WORK, &ls->ls_flags)) | ||
241 | schedule(); | ||
242 | set_current_state(TASK_RUNNING); | ||
243 | |||
244 | if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) | ||
245 | do_ls_recovery(ls); | ||
246 | } | ||
247 | |||
248 | dlm_put_lockspace(ls); | ||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | void dlm_recoverd_kick(struct dlm_ls *ls) | ||
253 | { | ||
254 | set_bit(LSFL_WORK, &ls->ls_flags); | ||
255 | wake_up_process(ls->ls_recoverd_task); | ||
256 | } | ||
257 | |||
258 | int dlm_recoverd_start(struct dlm_ls *ls) | ||
259 | { | ||
260 | struct task_struct *p; | ||
261 | int error = 0; | ||
262 | |||
263 | p = kthread_run(dlm_recoverd, ls, "dlm_recoverd"); | ||
264 | if (IS_ERR(p)) | ||
265 | error = PTR_ERR(p); | ||
266 | else | ||
267 | ls->ls_recoverd_task = p; | ||
268 | return error; | ||
269 | } | ||
270 | |||
271 | void dlm_recoverd_stop(struct dlm_ls *ls) | ||
272 | { | ||
273 | kthread_stop(ls->ls_recoverd_task); | ||
274 | } | ||
275 | |||
276 | void dlm_recoverd_suspend(struct dlm_ls *ls) | ||
277 | { | ||
278 | down(&ls->ls_recoverd_active); | ||
279 | } | ||
280 | |||
281 | void dlm_recoverd_resume(struct dlm_ls *ls) | ||
282 | { | ||
283 | up(&ls->ls_recoverd_active); | ||
284 | } | ||
285 | |||
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h new file mode 100644 index 000000000000..866657c5d69d --- /dev/null +++ b/fs/dlm/recoverd.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __RECOVERD_DOT_H__ | ||
15 | #define __RECOVERD_DOT_H__ | ||
16 | |||
17 | void dlm_recoverd_kick(struct dlm_ls *ls); | ||
18 | void dlm_recoverd_stop(struct dlm_ls *ls); | ||
19 | int dlm_recoverd_start(struct dlm_ls *ls); | ||
20 | void dlm_recoverd_suspend(struct dlm_ls *ls); | ||
21 | void dlm_recoverd_resume(struct dlm_ls *ls); | ||
22 | |||
23 | #endif /* __RECOVERD_DOT_H__ */ | ||
24 | |||
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c new file mode 100644 index 000000000000..36afe99e4f93 --- /dev/null +++ b/fs/dlm/requestqueue.c | |||
@@ -0,0 +1,184 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #include "dlm_internal.h" | ||
14 | #include "member.h" | ||
15 | #include "lock.h" | ||
16 | #include "dir.h" | ||
17 | #include "config.h" | ||
18 | #include "requestqueue.h" | ||
19 | |||
20 | struct rq_entry { | ||
21 | struct list_head list; | ||
22 | int nodeid; | ||
23 | char request[1]; | ||
24 | }; | ||
25 | |||
26 | /* | ||
27 | * Requests received while the lockspace is in recovery get added to the | ||
28 | * request queue and processed when recovery is complete. This happens when | ||
29 | * the lockspace is suspended on some nodes before it is on others, or the | ||
30 | * lockspace is enabled on some while still suspended on others. | ||
31 | */ | ||
32 | |||
33 | void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) | ||
34 | { | ||
35 | struct rq_entry *e; | ||
36 | int length = hd->h_length; | ||
37 | |||
38 | if (dlm_is_removed(ls, nodeid)) | ||
39 | return; | ||
40 | |||
41 | e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); | ||
42 | if (!e) { | ||
43 | log_print("dlm_add_requestqueue: out of memory\n"); | ||
44 | return; | ||
45 | } | ||
46 | |||
47 | e->nodeid = nodeid; | ||
48 | memcpy(e->request, hd, length); | ||
49 | |||
50 | down(&ls->ls_requestqueue_lock); | ||
51 | list_add_tail(&e->list, &ls->ls_requestqueue); | ||
52 | up(&ls->ls_requestqueue_lock); | ||
53 | } | ||
54 | |||
55 | int dlm_process_requestqueue(struct dlm_ls *ls) | ||
56 | { | ||
57 | struct rq_entry *e; | ||
58 | struct dlm_header *hd; | ||
59 | int error = 0; | ||
60 | |||
61 | down(&ls->ls_requestqueue_lock); | ||
62 | |||
63 | for (;;) { | ||
64 | if (list_empty(&ls->ls_requestqueue)) { | ||
65 | up(&ls->ls_requestqueue_lock); | ||
66 | error = 0; | ||
67 | break; | ||
68 | } | ||
69 | e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); | ||
70 | up(&ls->ls_requestqueue_lock); | ||
71 | |||
72 | hd = (struct dlm_header *) e->request; | ||
73 | error = dlm_receive_message(hd, e->nodeid, TRUE); | ||
74 | |||
75 | if (error == -EINTR) { | ||
76 | /* entry is left on requestqueue */ | ||
77 | log_debug(ls, "process_requestqueue abort eintr"); | ||
78 | break; | ||
79 | } | ||
80 | |||
81 | down(&ls->ls_requestqueue_lock); | ||
82 | list_del(&e->list); | ||
83 | kfree(e); | ||
84 | |||
85 | if (dlm_locking_stopped(ls)) { | ||
86 | log_debug(ls, "process_requestqueue abort running"); | ||
87 | up(&ls->ls_requestqueue_lock); | ||
88 | error = -EINTR; | ||
89 | break; | ||
90 | } | ||
91 | schedule(); | ||
92 | } | ||
93 | |||
94 | return error; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * After recovery is done, locking is resumed and dlm_recoverd takes all the | ||
99 | * saved requests and processes them as they would have been by dlm_recvd. At | ||
100 | * the same time, dlm_recvd will start receiving new requests from remote | ||
101 | * nodes. We want to delay dlm_recvd processing new requests until | ||
102 | * dlm_recoverd has finished processing the old saved requests. | ||
103 | */ | ||
104 | |||
105 | void dlm_wait_requestqueue(struct dlm_ls *ls) | ||
106 | { | ||
107 | for (;;) { | ||
108 | down(&ls->ls_requestqueue_lock); | ||
109 | if (list_empty(&ls->ls_requestqueue)) | ||
110 | break; | ||
111 | if (dlm_locking_stopped(ls)) | ||
112 | break; | ||
113 | up(&ls->ls_requestqueue_lock); | ||
114 | schedule(); | ||
115 | } | ||
116 | up(&ls->ls_requestqueue_lock); | ||
117 | } | ||
118 | |||
119 | static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) | ||
120 | { | ||
121 | uint32_t type = ms->m_type; | ||
122 | |||
123 | if (dlm_is_removed(ls, nodeid)) | ||
124 | return 1; | ||
125 | |||
126 | /* directory operations are always purged because the directory is | ||
127 | always rebuilt during recovery and the lookups resent */ | ||
128 | |||
129 | if (type == DLM_MSG_REMOVE || | ||
130 | type == DLM_MSG_LOOKUP || | ||
131 | type == DLM_MSG_LOOKUP_REPLY) | ||
132 | return 1; | ||
133 | |||
134 | if (!dlm_no_directory(ls)) | ||
135 | return 0; | ||
136 | |||
137 | /* with no directory, the master is likely to change as a part of | ||
138 | recovery; requests to/from the defunct master need to be purged */ | ||
139 | |||
140 | switch (type) { | ||
141 | case DLM_MSG_REQUEST: | ||
142 | case DLM_MSG_CONVERT: | ||
143 | case DLM_MSG_UNLOCK: | ||
144 | case DLM_MSG_CANCEL: | ||
145 | /* we're no longer the master of this resource, the sender | ||
146 | will resend to the new master (see waiter_needs_recovery) */ | ||
147 | |||
148 | if (dlm_hash2nodeid(ls, ms->m_hash) != dlm_our_nodeid()) | ||
149 | return 1; | ||
150 | break; | ||
151 | |||
152 | case DLM_MSG_REQUEST_REPLY: | ||
153 | case DLM_MSG_CONVERT_REPLY: | ||
154 | case DLM_MSG_UNLOCK_REPLY: | ||
155 | case DLM_MSG_CANCEL_REPLY: | ||
156 | case DLM_MSG_GRANT: | ||
157 | /* this reply is from the former master of the resource, | ||
158 | we'll resend to the new master if needed */ | ||
159 | |||
160 | if (dlm_hash2nodeid(ls, ms->m_hash) != nodeid) | ||
161 | return 1; | ||
162 | break; | ||
163 | } | ||
164 | |||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | void dlm_purge_requestqueue(struct dlm_ls *ls) | ||
169 | { | ||
170 | struct dlm_message *ms; | ||
171 | struct rq_entry *e, *safe; | ||
172 | |||
173 | down(&ls->ls_requestqueue_lock); | ||
174 | list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) { | ||
175 | ms = (struct dlm_message *) e->request; | ||
176 | |||
177 | if (purge_request(ls, ms, e->nodeid)) { | ||
178 | list_del(&e->list); | ||
179 | kfree(e); | ||
180 | } | ||
181 | } | ||
182 | up(&ls->ls_requestqueue_lock); | ||
183 | } | ||
184 | |||
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h new file mode 100644 index 000000000000..349f0d292d95 --- /dev/null +++ b/fs/dlm/requestqueue.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #ifndef __REQUESTQUEUE_DOT_H__ | ||
14 | #define __REQUESTQUEUE_DOT_H__ | ||
15 | |||
16 | void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); | ||
17 | int dlm_process_requestqueue(struct dlm_ls *ls); | ||
18 | void dlm_wait_requestqueue(struct dlm_ls *ls); | ||
19 | void dlm_purge_requestqueue(struct dlm_ls *ls); | ||
20 | |||
21 | #endif | ||
22 | |||
diff --git a/fs/dlm/util.c b/fs/dlm/util.c new file mode 100644 index 000000000000..826d122edf55 --- /dev/null +++ b/fs/dlm/util.c | |||
@@ -0,0 +1,173 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #include "dlm_internal.h" | ||
14 | #include "rcom.h" | ||
15 | #include "util.h" | ||
16 | |||
17 | static void header_out(struct dlm_header *hd) | ||
18 | { | ||
19 | hd->h_version = cpu_to_le32(hd->h_version); | ||
20 | hd->h_lockspace = cpu_to_le32(hd->h_lockspace); | ||
21 | hd->h_nodeid = cpu_to_le32(hd->h_nodeid); | ||
22 | hd->h_length = cpu_to_le16(hd->h_length); | ||
23 | } | ||
24 | |||
25 | static void header_in(struct dlm_header *hd) | ||
26 | { | ||
27 | hd->h_version = le32_to_cpu(hd->h_version); | ||
28 | hd->h_lockspace = le32_to_cpu(hd->h_lockspace); | ||
29 | hd->h_nodeid = le32_to_cpu(hd->h_nodeid); | ||
30 | hd->h_length = le16_to_cpu(hd->h_length); | ||
31 | } | ||
32 | |||
33 | void dlm_message_out(struct dlm_message *ms) | ||
34 | { | ||
35 | struct dlm_header *hd = (struct dlm_header *) ms; | ||
36 | |||
37 | header_out(hd); | ||
38 | |||
39 | ms->m_type = cpu_to_le32(ms->m_type); | ||
40 | ms->m_nodeid = cpu_to_le32(ms->m_nodeid); | ||
41 | ms->m_pid = cpu_to_le32(ms->m_pid); | ||
42 | ms->m_lkid = cpu_to_le32(ms->m_lkid); | ||
43 | ms->m_remid = cpu_to_le32(ms->m_remid); | ||
44 | ms->m_parent_lkid = cpu_to_le32(ms->m_parent_lkid); | ||
45 | ms->m_parent_remid = cpu_to_le32(ms->m_parent_remid); | ||
46 | ms->m_exflags = cpu_to_le32(ms->m_exflags); | ||
47 | ms->m_sbflags = cpu_to_le32(ms->m_sbflags); | ||
48 | ms->m_flags = cpu_to_le32(ms->m_flags); | ||
49 | ms->m_lvbseq = cpu_to_le32(ms->m_lvbseq); | ||
50 | ms->m_hash = cpu_to_le32(ms->m_hash); | ||
51 | ms->m_status = cpu_to_le32(ms->m_status); | ||
52 | ms->m_grmode = cpu_to_le32(ms->m_grmode); | ||
53 | ms->m_rqmode = cpu_to_le32(ms->m_rqmode); | ||
54 | ms->m_bastmode = cpu_to_le32(ms->m_bastmode); | ||
55 | ms->m_asts = cpu_to_le32(ms->m_asts); | ||
56 | ms->m_result = cpu_to_le32(ms->m_result); | ||
57 | ms->m_range[0] = cpu_to_le64(ms->m_range[0]); | ||
58 | ms->m_range[1] = cpu_to_le64(ms->m_range[1]); | ||
59 | } | ||
60 | |||
61 | void dlm_message_in(struct dlm_message *ms) | ||
62 | { | ||
63 | struct dlm_header *hd = (struct dlm_header *) ms; | ||
64 | |||
65 | header_in(hd); | ||
66 | |||
67 | ms->m_type = le32_to_cpu(ms->m_type); | ||
68 | ms->m_nodeid = le32_to_cpu(ms->m_nodeid); | ||
69 | ms->m_pid = le32_to_cpu(ms->m_pid); | ||
70 | ms->m_lkid = le32_to_cpu(ms->m_lkid); | ||
71 | ms->m_remid = le32_to_cpu(ms->m_remid); | ||
72 | ms->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid); | ||
73 | ms->m_parent_remid = le32_to_cpu(ms->m_parent_remid); | ||
74 | ms->m_exflags = le32_to_cpu(ms->m_exflags); | ||
75 | ms->m_sbflags = le32_to_cpu(ms->m_sbflags); | ||
76 | ms->m_flags = le32_to_cpu(ms->m_flags); | ||
77 | ms->m_lvbseq = le32_to_cpu(ms->m_lvbseq); | ||
78 | ms->m_hash = le32_to_cpu(ms->m_hash); | ||
79 | ms->m_status = le32_to_cpu(ms->m_status); | ||
80 | ms->m_grmode = le32_to_cpu(ms->m_grmode); | ||
81 | ms->m_rqmode = le32_to_cpu(ms->m_rqmode); | ||
82 | ms->m_bastmode = le32_to_cpu(ms->m_bastmode); | ||
83 | ms->m_asts = le32_to_cpu(ms->m_asts); | ||
84 | ms->m_result = le32_to_cpu(ms->m_result); | ||
85 | ms->m_range[0] = le64_to_cpu(ms->m_range[0]); | ||
86 | ms->m_range[1] = le64_to_cpu(ms->m_range[1]); | ||
87 | } | ||
88 | |||
89 | static void rcom_lock_out(struct rcom_lock *rl) | ||
90 | { | ||
91 | rl->rl_ownpid = cpu_to_le32(rl->rl_ownpid); | ||
92 | rl->rl_lkid = cpu_to_le32(rl->rl_lkid); | ||
93 | rl->rl_remid = cpu_to_le32(rl->rl_remid); | ||
94 | rl->rl_parent_lkid = cpu_to_le32(rl->rl_parent_lkid); | ||
95 | rl->rl_parent_remid = cpu_to_le32(rl->rl_parent_remid); | ||
96 | rl->rl_exflags = cpu_to_le32(rl->rl_exflags); | ||
97 | rl->rl_flags = cpu_to_le32(rl->rl_flags); | ||
98 | rl->rl_lvbseq = cpu_to_le32(rl->rl_lvbseq); | ||
99 | rl->rl_result = cpu_to_le32(rl->rl_result); | ||
100 | rl->rl_wait_type = cpu_to_le16(rl->rl_wait_type); | ||
101 | rl->rl_namelen = cpu_to_le16(rl->rl_namelen); | ||
102 | rl->rl_range[0] = cpu_to_le64(rl->rl_range[0]); | ||
103 | rl->rl_range[1] = cpu_to_le64(rl->rl_range[1]); | ||
104 | rl->rl_range[2] = cpu_to_le64(rl->rl_range[2]); | ||
105 | rl->rl_range[3] = cpu_to_le64(rl->rl_range[3]); | ||
106 | } | ||
107 | |||
108 | static void rcom_lock_in(struct rcom_lock *rl) | ||
109 | { | ||
110 | rl->rl_ownpid = le32_to_cpu(rl->rl_ownpid); | ||
111 | rl->rl_lkid = le32_to_cpu(rl->rl_lkid); | ||
112 | rl->rl_remid = le32_to_cpu(rl->rl_remid); | ||
113 | rl->rl_parent_lkid = le32_to_cpu(rl->rl_parent_lkid); | ||
114 | rl->rl_parent_remid = le32_to_cpu(rl->rl_parent_remid); | ||
115 | rl->rl_exflags = le32_to_cpu(rl->rl_exflags); | ||
116 | rl->rl_flags = le32_to_cpu(rl->rl_flags); | ||
117 | rl->rl_lvbseq = le32_to_cpu(rl->rl_lvbseq); | ||
118 | rl->rl_result = le32_to_cpu(rl->rl_result); | ||
119 | rl->rl_wait_type = le16_to_cpu(rl->rl_wait_type); | ||
120 | rl->rl_namelen = le16_to_cpu(rl->rl_namelen); | ||
121 | rl->rl_range[0] = le64_to_cpu(rl->rl_range[0]); | ||
122 | rl->rl_range[1] = le64_to_cpu(rl->rl_range[1]); | ||
123 | rl->rl_range[2] = le64_to_cpu(rl->rl_range[2]); | ||
124 | rl->rl_range[3] = le64_to_cpu(rl->rl_range[3]); | ||
125 | } | ||
126 | |||
127 | static void rcom_config_out(struct rcom_config *rf) | ||
128 | { | ||
129 | rf->rf_lvblen = cpu_to_le32(rf->rf_lvblen); | ||
130 | rf->rf_lsflags = cpu_to_le32(rf->rf_lsflags); | ||
131 | } | ||
132 | |||
133 | static void rcom_config_in(struct rcom_config *rf) | ||
134 | { | ||
135 | rf->rf_lvblen = le32_to_cpu(rf->rf_lvblen); | ||
136 | rf->rf_lsflags = le32_to_cpu(rf->rf_lsflags); | ||
137 | } | ||
138 | |||
139 | void dlm_rcom_out(struct dlm_rcom *rc) | ||
140 | { | ||
141 | struct dlm_header *hd = (struct dlm_header *) rc; | ||
142 | int type = rc->rc_type; | ||
143 | |||
144 | header_out(hd); | ||
145 | |||
146 | rc->rc_type = cpu_to_le32(rc->rc_type); | ||
147 | rc->rc_result = cpu_to_le32(rc->rc_result); | ||
148 | rc->rc_id = cpu_to_le64(rc->rc_id); | ||
149 | |||
150 | if (type == DLM_RCOM_LOCK) | ||
151 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); | ||
152 | |||
153 | else if (type == DLM_RCOM_STATUS_REPLY) | ||
154 | rcom_config_out((struct rcom_config *) rc->rc_buf); | ||
155 | } | ||
156 | |||
157 | void dlm_rcom_in(struct dlm_rcom *rc) | ||
158 | { | ||
159 | struct dlm_header *hd = (struct dlm_header *) rc; | ||
160 | |||
161 | header_in(hd); | ||
162 | |||
163 | rc->rc_type = le32_to_cpu(rc->rc_type); | ||
164 | rc->rc_result = le32_to_cpu(rc->rc_result); | ||
165 | rc->rc_id = le64_to_cpu(rc->rc_id); | ||
166 | |||
167 | if (rc->rc_type == DLM_RCOM_LOCK) | ||
168 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); | ||
169 | |||
170 | else if (rc->rc_type == DLM_RCOM_STATUS_REPLY) | ||
171 | rcom_config_in((struct rcom_config *) rc->rc_buf); | ||
172 | } | ||
173 | |||
diff --git a/fs/dlm/util.h b/fs/dlm/util.h new file mode 100644 index 000000000000..2b25915161c0 --- /dev/null +++ b/fs/dlm/util.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | ||
5 | ** | ||
6 | ** This copyrighted material is made available to anyone wishing to use, | ||
7 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
8 | ** of the GNU General Public License v.2. | ||
9 | ** | ||
10 | ******************************************************************************* | ||
11 | ******************************************************************************/ | ||
12 | |||
13 | #ifndef __UTIL_DOT_H__ | ||
14 | #define __UTIL_DOT_H__ | ||
15 | |||
16 | void dlm_message_out(struct dlm_message *ms); | ||
17 | void dlm_message_in(struct dlm_message *ms); | ||
18 | void dlm_rcom_out(struct dlm_rcom *rc); | ||
19 | void dlm_rcom_in(struct dlm_rcom *rc); | ||
20 | |||
21 | #endif | ||
22 | |||
diff --git a/include/linux/dlm.h b/include/linux/dlm.h new file mode 100644 index 000000000000..dd324ba44d80 --- /dev/null +++ b/include/linux/dlm.h | |||
@@ -0,0 +1,312 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | #ifndef __DLM_DOT_H__ | ||
15 | #define __DLM_DOT_H__ | ||
16 | |||
17 | /* | ||
18 | * Interface to Distributed Lock Manager (DLM) | ||
19 | * routines and structures to use DLM lockspaces | ||
20 | */ | ||
21 | |||
22 | /* | ||
23 | * Lock Modes | ||
24 | */ | ||
25 | |||
26 | #define DLM_LOCK_IV -1 /* invalid */ | ||
27 | #define DLM_LOCK_NL 0 /* null */ | ||
28 | #define DLM_LOCK_CR 1 /* concurrent read */ | ||
29 | #define DLM_LOCK_CW 2 /* concurrent write */ | ||
30 | #define DLM_LOCK_PR 3 /* protected read */ | ||
31 | #define DLM_LOCK_PW 4 /* protected write */ | ||
32 | #define DLM_LOCK_EX 5 /* exclusive */ | ||
33 | |||
34 | /* | ||
35 | * Maximum size in bytes of a dlm_lock name | ||
36 | */ | ||
37 | |||
38 | #define DLM_RESNAME_MAXLEN 64 | ||
39 | |||
40 | /* | ||
41 | * Flags to dlm_lock | ||
42 | * | ||
43 | * DLM_LKF_NOQUEUE | ||
44 | * | ||
45 | * Do not queue the lock request on the wait queue if it cannot be granted | ||
46 | * immediately. If the lock cannot be granted because of this flag, DLM will | ||
47 | * either return -EAGAIN from the dlm_lock call or will return 0 from | ||
48 | * dlm_lock and -EAGAIN in the lock status block when the AST is executed. | ||
49 | * | ||
50 | * DLM_LKF_CANCEL | ||
51 | * | ||
52 | * Used to cancel a pending lock request or conversion. A converting lock is | ||
53 | * returned to its previously granted mode. | ||
54 | * | ||
55 | * DLM_LKF_CONVERT | ||
56 | * | ||
57 | * Indicates a lock conversion request. For conversions the name and namelen | ||
58 | * are ignored and the lock ID in the LKSB is used to identify the lock. | ||
59 | * | ||
60 | * DLM_LKF_VALBLK | ||
61 | * | ||
62 | * Requests DLM to return the current contents of the lock value block in the | ||
63 | * lock status block. When this flag is set in a lock conversion from PW or EX | ||
64 | * modes, DLM assigns the value specified in the lock status block to the lock | ||
65 | * value block of the lock resource. The LVB is a DLM_LVB_LEN size array | ||
66 | * containing application-specific information. | ||
67 | * | ||
68 | * DLM_LKF_QUECVT | ||
69 | * | ||
70 | * Force a conversion request to be queued, even if it is compatible with | ||
71 | * the granted modes of other locks on the same resource. | ||
72 | * | ||
73 | * DLM_LKF_IVVALBLK | ||
74 | * | ||
75 | * Invalidate the lock value block. | ||
76 | * | ||
77 | * DLM_LKF_CONVDEADLK | ||
78 | * | ||
79 | * Allows the dlm to resolve conversion deadlocks internally by demoting the | ||
80 | * granted mode of a converting lock to NL. The DLM_SBF_DEMOTED flag is | ||
81 | * returned for a conversion that's been effected by this. | ||
82 | * | ||
83 | * DLM_LKF_PERSISTENT | ||
84 | * | ||
85 | * Only relevant to locks originating in userspace. A persistent lock will not | ||
86 | * be removed if the process holding the lock exits. | ||
87 | * | ||
88 | * DLM_LKF_NODLKWT | ||
89 | * DLM_LKF_NODLCKBLK | ||
90 | * | ||
91 | * net yet implemented | ||
92 | * | ||
93 | * DLM_LKF_EXPEDITE | ||
94 | * | ||
95 | * Used only with new requests for NL mode locks. Tells the lock manager | ||
96 | * to grant the lock, ignoring other locks in convert and wait queues. | ||
97 | * | ||
98 | * DLM_LKF_NOQUEUEBAST | ||
99 | * | ||
100 | * Send blocking AST's before returning -EAGAIN to the caller. It is only | ||
101 | * used along with the NOQUEUE flag. Blocking AST's are not sent for failed | ||
102 | * NOQUEUE requests otherwise. | ||
103 | * | ||
104 | * DLM_LKF_HEADQUE | ||
105 | * | ||
106 | * Add a lock to the head of the convert or wait queue rather than the tail. | ||
107 | * | ||
108 | * DLM_LKF_NOORDER | ||
109 | * | ||
110 | * Disregard the standard grant order rules and grant a lock as soon as it | ||
111 | * is compatible with other granted locks. | ||
112 | * | ||
113 | * DLM_LKF_ORPHAN | ||
114 | * | ||
115 | * not yet implemented | ||
116 | * | ||
117 | * DLM_LKF_ALTPR | ||
118 | * | ||
119 | * If the requested mode cannot be granted immediately, try to grant the lock | ||
120 | * in PR mode instead. If this alternate mode is granted instead of the | ||
121 | * requested mode, DLM_SBF_ALTMODE is returned in the lksb. | ||
122 | * | ||
123 | * DLM_LKF_ALTCW | ||
124 | * | ||
125 | * The same as ALTPR, but the alternate mode is CW. | ||
126 | * | ||
127 | * DLM_LKF_FORCEUNLOCK | ||
128 | * | ||
129 | * Unlock the lock even if it is converting or waiting or has sublocks. | ||
130 | * Only really for use by the userland device.c code. | ||
131 | * | ||
132 | */ | ||
133 | |||
134 | #define DLM_LKF_NOQUEUE 0x00000001 | ||
135 | #define DLM_LKF_CANCEL 0x00000002 | ||
136 | #define DLM_LKF_CONVERT 0x00000004 | ||
137 | #define DLM_LKF_VALBLK 0x00000008 | ||
138 | #define DLM_LKF_QUECVT 0x00000010 | ||
139 | #define DLM_LKF_IVVALBLK 0x00000020 | ||
140 | #define DLM_LKF_CONVDEADLK 0x00000040 | ||
141 | #define DLM_LKF_PERSISTENT 0x00000080 | ||
142 | #define DLM_LKF_NODLCKWT 0x00000100 | ||
143 | #define DLM_LKF_NODLCKBLK 0x00000200 | ||
144 | #define DLM_LKF_EXPEDITE 0x00000400 | ||
145 | #define DLM_LKF_NOQUEUEBAST 0x00000800 | ||
146 | #define DLM_LKF_HEADQUE 0x00001000 | ||
147 | #define DLM_LKF_NOORDER 0x00002000 | ||
148 | #define DLM_LKF_ORPHAN 0x00004000 | ||
149 | #define DLM_LKF_ALTPR 0x00008000 | ||
150 | #define DLM_LKF_ALTCW 0x00010000 | ||
151 | #define DLM_LKF_FORCEUNLOCK 0x00020000 | ||
152 | |||
153 | /* | ||
154 | * Some return codes that are not in errno.h | ||
155 | */ | ||
156 | |||
157 | #define DLM_ECANCEL 0x10001 | ||
158 | #define DLM_EUNLOCK 0x10002 | ||
159 | |||
160 | typedef void dlm_lockspace_t; | ||
161 | |||
162 | /* | ||
163 | * Lock range structure | ||
164 | */ | ||
165 | |||
166 | struct dlm_range { | ||
167 | uint64_t ra_start; | ||
168 | uint64_t ra_end; | ||
169 | }; | ||
170 | |||
171 | /* | ||
172 | * Lock status block | ||
173 | * | ||
174 | * Use this structure to specify the contents of the lock value block. For a | ||
175 | * conversion request, this structure is used to specify the lock ID of the | ||
176 | * lock. DLM writes the status of the lock request and the lock ID assigned | ||
177 | * to the request in the lock status block. | ||
178 | * | ||
179 | * sb_lkid: the returned lock ID. It is set on new (non-conversion) requests. | ||
180 | * It is available when dlm_lock returns. | ||
181 | * | ||
182 | * sb_lvbptr: saves or returns the contents of the lock's LVB according to rules | ||
183 | * shown for the DLM_LKF_VALBLK flag. | ||
184 | * | ||
185 | * sb_flags: DLM_SBF_DEMOTED is returned if in the process of promoting a lock, | ||
186 | * it was first demoted to NL to avoid conversion deadlock. | ||
187 | * DLM_SBF_VALNOTVALID is returned if the resource's LVB is marked invalid. | ||
188 | * | ||
189 | * sb_status: the returned status of the lock request set prior to AST | ||
190 | * execution. Possible return values: | ||
191 | * | ||
192 | * 0 if lock request was successful | ||
193 | * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | ||
194 | * -ENOMEM if there is no memory to process request | ||
195 | * -EINVAL if there are invalid parameters | ||
196 | * -DLM_EUNLOCK if unlock request was successful | ||
197 | * -DLM_ECANCEL if a cancel completed successfully | ||
198 | */ | ||
199 | |||
200 | #define DLM_SBF_DEMOTED 0x01 | ||
201 | #define DLM_SBF_VALNOTVALID 0x02 | ||
202 | #define DLM_SBF_ALTMODE 0x04 | ||
203 | |||
204 | struct dlm_lksb { | ||
205 | int sb_status; | ||
206 | uint32_t sb_lkid; | ||
207 | char sb_flags; | ||
208 | char * sb_lvbptr; | ||
209 | }; | ||
210 | |||
211 | |||
212 | #ifdef __KERNEL__ | ||
213 | |||
214 | #define DLM_LSFL_NODIR 0x00000001 | ||
215 | |||
216 | /* | ||
217 | * dlm_new_lockspace | ||
218 | * | ||
219 | * Starts a lockspace with the given name. If the named lockspace exists in | ||
220 | * the cluster, the calling node joins it. | ||
221 | */ | ||
222 | |||
223 | int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace, | ||
224 | uint32_t flags, int lvblen); | ||
225 | |||
226 | /* | ||
227 | * dlm_release_lockspace | ||
228 | * | ||
229 | * Stop a lockspace. | ||
230 | */ | ||
231 | |||
232 | int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); | ||
233 | |||
234 | /* | ||
235 | * dlm_lock | ||
236 | * | ||
237 | * Make an asyncronous request to acquire or convert a lock on a named | ||
238 | * resource. | ||
239 | * | ||
240 | * lockspace: context for the request | ||
241 | * mode: the requested mode of the lock (DLM_LOCK_) | ||
242 | * lksb: lock status block for input and async return values | ||
243 | * flags: input flags (DLM_LKF_) | ||
244 | * name: name of the resource to lock, can be binary | ||
245 | * namelen: the length in bytes of the resource name (MAX_RESNAME_LEN) | ||
246 | * parent: the lock ID of a parent lock or 0 if none | ||
247 | * lockast: function DLM executes when it completes processing the request | ||
248 | * astarg: argument passed to lockast and bast functions | ||
249 | * bast: function DLM executes when this lock later blocks another request | ||
250 | * | ||
251 | * Returns: | ||
252 | * 0 if request is successfully queued for processing | ||
253 | * -EINVAL if any input parameters are invalid | ||
254 | * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | ||
255 | * -ENOMEM if there is no memory to process request | ||
256 | * -ENOTCONN if there is a communication error | ||
257 | * | ||
258 | * If the call to dlm_lock returns an error then the operation has failed and | ||
259 | * the AST routine will not be called. If dlm_lock returns 0 it is still | ||
260 | * possible that the lock operation will fail. The AST routine will be called | ||
261 | * when the locking is complete and the status is returned in the lksb. | ||
262 | * | ||
263 | * If the AST routines or parameter are passed to a conversion operation then | ||
264 | * they will overwrite those values that were passed to a previous dlm_lock | ||
265 | * call. | ||
266 | * | ||
267 | * AST routines should not block (at least not for long), but may make | ||
268 | * any locking calls they please. | ||
269 | */ | ||
270 | |||
271 | int dlm_lock(dlm_lockspace_t *lockspace, | ||
272 | int mode, | ||
273 | struct dlm_lksb *lksb, | ||
274 | uint32_t flags, | ||
275 | void *name, | ||
276 | unsigned int namelen, | ||
277 | uint32_t parent_lkid, | ||
278 | void (*lockast) (void *astarg), | ||
279 | void *astarg, | ||
280 | void (*bast) (void *astarg, int mode), | ||
281 | struct dlm_range *range); | ||
282 | |||
283 | /* | ||
284 | * dlm_unlock | ||
285 | * | ||
286 | * Asynchronously release a lock on a resource. The AST routine is called | ||
287 | * when the resource is successfully unlocked. | ||
288 | * | ||
289 | * lockspace: context for the request | ||
290 | * lkid: the lock ID as returned in the lksb | ||
291 | * flags: input flags (DLM_LKF_) | ||
292 | * lksb: if NULL the lksb parameter passed to last lock request is used | ||
293 | * astarg: the arg used with the completion ast for the unlock | ||
294 | * | ||
295 | * Returns: | ||
296 | * 0 if request is successfully queued for processing | ||
297 | * -EINVAL if any input parameters are invalid | ||
298 | * -ENOTEMPTY if the lock still has sublocks | ||
299 | * -EBUSY if the lock is waiting for a remote lock operation | ||
300 | * -ENOTCONN if there is a communication error | ||
301 | */ | ||
302 | |||
303 | int dlm_unlock(dlm_lockspace_t *lockspace, | ||
304 | uint32_t lkid, | ||
305 | uint32_t flags, | ||
306 | struct dlm_lksb *lksb, | ||
307 | void *astarg); | ||
308 | |||
309 | #endif /* __KERNEL__ */ | ||
310 | |||
311 | #endif /* __DLM_DOT_H__ */ | ||
312 | |||
diff --git a/include/linux/dlm_device.h b/include/linux/dlm_device.h new file mode 100644 index 000000000000..5e17d295544b --- /dev/null +++ b/include/linux/dlm_device.h | |||
@@ -0,0 +1,84 @@ | |||
1 | /****************************************************************************** | ||
2 | ******************************************************************************* | ||
3 | ** | ||
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | ||
6 | ** | ||
7 | ** This copyrighted material is made available to anyone wishing to use, | ||
8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
9 | ** of the GNU General Public License v.2. | ||
10 | ** | ||
11 | ******************************************************************************* | ||
12 | ******************************************************************************/ | ||
13 | |||
14 | /* This is the device interface for dlm, most users will use a library | ||
15 | * interface. | ||
16 | */ | ||
17 | |||
18 | #define DLM_USER_LVB_LEN 32 | ||
19 | |||
20 | /* Version of the device interface */ | ||
21 | #define DLM_DEVICE_VERSION_MAJOR 3 | ||
22 | #define DLM_DEVICE_VERSION_MINOR 0 | ||
23 | #define DLM_DEVICE_VERSION_PATCH 0 | ||
24 | |||
25 | /* struct passed to the lock write */ | ||
26 | struct dlm_lock_params { | ||
27 | __u8 mode; | ||
28 | __u16 flags; | ||
29 | __u32 lkid; | ||
30 | __u32 parent; | ||
31 | struct dlm_range range; | ||
32 | __u8 namelen; | ||
33 | void __user *castparam; | ||
34 | void __user *castaddr; | ||
35 | void __user *bastparam; | ||
36 | void __user *bastaddr; | ||
37 | struct dlm_lksb __user *lksb; | ||
38 | char lvb[DLM_USER_LVB_LEN]; | ||
39 | char name[1]; | ||
40 | }; | ||
41 | |||
42 | struct dlm_lspace_params { | ||
43 | __u32 flags; | ||
44 | __u32 minor; | ||
45 | char name[1]; | ||
46 | }; | ||
47 | |||
48 | struct dlm_write_request { | ||
49 | __u32 version[3]; | ||
50 | __u8 cmd; | ||
51 | |||
52 | union { | ||
53 | struct dlm_lock_params lock; | ||
54 | struct dlm_lspace_params lspace; | ||
55 | } i; | ||
56 | }; | ||
57 | |||
58 | /* struct read from the "device" fd, | ||
59 | consists mainly of userspace pointers for the library to use */ | ||
60 | struct dlm_lock_result { | ||
61 | __u32 length; | ||
62 | void __user * user_astaddr; | ||
63 | void __user * user_astparam; | ||
64 | struct dlm_lksb __user * user_lksb; | ||
65 | struct dlm_lksb lksb; | ||
66 | __u8 bast_mode; | ||
67 | /* Offsets may be zero if no data is present */ | ||
68 | __u32 lvb_offset; | ||
69 | }; | ||
70 | |||
71 | /* Commands passed to the device */ | ||
72 | #define DLM_USER_LOCK 1 | ||
73 | #define DLM_USER_UNLOCK 2 | ||
74 | #define DLM_USER_QUERY 3 | ||
75 | #define DLM_USER_CREATE_LOCKSPACE 4 | ||
76 | #define DLM_USER_REMOVE_LOCKSPACE 5 | ||
77 | |||
78 | /* Arbitrary length restriction */ | ||
79 | #define MAX_LS_NAME_LEN 64 | ||
80 | |||
81 | /* Lockspace flags */ | ||
82 | #define DLM_USER_LSFLG_AUTOFREE 1 | ||
83 | #define DLM_USER_LSFLG_FORCEFREE 2 | ||
84 | |||