aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/Kconfig2
-rw-r--r--fs/dlm/Makefile1
-rw-r--r--fs/dlm/config.c39
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/debug_fs.c186
-rw-r--r--fs/dlm/dlm_internal.h17
-rw-r--r--fs/dlm/lock.c470
-rw-r--r--fs/dlm/lock.h13
-rw-r--r--fs/dlm/lockspace.c86
-rw-r--r--fs/dlm/lowcomms.c23
-rw-r--r--fs/dlm/main.c11
-rw-r--r--fs/dlm/member.c11
-rw-r--r--fs/dlm/netlink.c153
-rw-r--r--fs/dlm/rcom.c13
-rw-r--r--fs/dlm/recoverd.c4
-rw-r--r--fs/dlm/user.c129
16 files changed, 968 insertions, 191 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 69a94690e493..54bcc00ec8df 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,7 +3,7 @@ menu "Distributed Lock Manager"
3 3
4config DLM 4config DLM
5 tristate "Distributed Lock Manager (DLM)" 5 tristate "Distributed Lock Manager (DLM)"
6 depends on IPV6 || IPV6=n 6 depends on SYSFS && (IPV6 || IPV6=n)
7 select CONFIGFS_FS 7 select CONFIGFS_FS
8 select IP_SCTP 8 select IP_SCTP
9 help 9 help
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f39..d248e60951ba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y := ast.o \
8 member.o \ 8 member.o \
9 memory.o \ 9 memory.o \
10 midcomms.o \ 10 midcomms.o \
11 netlink.o \
11 lowcomms.o \ 12 lowcomms.o \
12 rcom.o \ 13 rcom.o \
13 recover.o \ 14 recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd1434..2f8e3c81bc19 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
90 unsigned int cl_scan_secs; 90 unsigned int cl_scan_secs;
91 unsigned int cl_log_debug; 91 unsigned int cl_log_debug;
92 unsigned int cl_protocol; 92 unsigned int cl_protocol;
93 unsigned int cl_timewarn_cs;
93}; 94};
94 95
95enum { 96enum {
@@ -103,6 +104,7 @@ enum {
103 CLUSTER_ATTR_SCAN_SECS, 104 CLUSTER_ATTR_SCAN_SECS,
104 CLUSTER_ATTR_LOG_DEBUG, 105 CLUSTER_ATTR_LOG_DEBUG,
105 CLUSTER_ATTR_PROTOCOL, 106 CLUSTER_ATTR_PROTOCOL,
107 CLUSTER_ATTR_TIMEWARN_CS,
106}; 108};
107 109
108struct cluster_attribute { 110struct cluster_attribute {
@@ -131,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
131 return len; 133 return len;
132} 134}
133 135
134#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
135 .attr = { .ca_name = __stringify(_name), \
136 .ca_mode = _mode, \
137 .ca_owner = THIS_MODULE }, \
138 .show = _read, \
139 .store = _write, \
140}
141
142#define CLUSTER_ATTR(name, check_zero) \ 136#define CLUSTER_ATTR(name, check_zero) \
143static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ 137static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
144{ \ 138{ \
@@ -162,6 +156,7 @@ CLUSTER_ATTR(toss_secs, 1);
162CLUSTER_ATTR(scan_secs, 1); 156CLUSTER_ATTR(scan_secs, 1);
163CLUSTER_ATTR(log_debug, 0); 157CLUSTER_ATTR(log_debug, 0);
164CLUSTER_ATTR(protocol, 0); 158CLUSTER_ATTR(protocol, 0);
159CLUSTER_ATTR(timewarn_cs, 1);
165 160
166static struct configfs_attribute *cluster_attrs[] = { 161static struct configfs_attribute *cluster_attrs[] = {
167 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 162 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +169,7 @@ static struct configfs_attribute *cluster_attrs[] = {
174 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, 169 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
175 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 170 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
176 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 171 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
172 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
177 NULL, 173 NULL,
178}; 174};
179 175
@@ -429,6 +425,8 @@ static struct config_group *make_cluster(struct config_group *g,
429 cl->cl_toss_secs = dlm_config.ci_toss_secs; 425 cl->cl_toss_secs = dlm_config.ci_toss_secs;
430 cl->cl_scan_secs = dlm_config.ci_scan_secs; 426 cl->cl_scan_secs = dlm_config.ci_scan_secs;
431 cl->cl_log_debug = dlm_config.ci_log_debug; 427 cl->cl_log_debug = dlm_config.ci_log_debug;
428 cl->cl_protocol = dlm_config.ci_protocol;
429 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
432 430
433 space_list = &sps->ss_group; 431 space_list = &sps->ss_group;
434 comm_list = &cms->cs_group; 432 comm_list = &cms->cs_group;
@@ -609,7 +607,7 @@ static struct clusters clusters_root = {
609int dlm_config_init(void) 607int dlm_config_init(void)
610{ 608{
611 config_group_init(&clusters_root.subsys.su_group); 609 config_group_init(&clusters_root.subsys.su_group);
612 init_MUTEX(&clusters_root.subsys.su_sem); 610 mutex_init(&clusters_root.subsys.su_mutex);
613 return configfs_register_subsystem(&clusters_root.subsys); 611 return configfs_register_subsystem(&clusters_root.subsys);
614} 612}
615 613
@@ -748,9 +746,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
748 746
749static struct space *get_space(char *name) 747static struct space *get_space(char *name)
750{ 748{
749 struct config_item *i;
750
751 if (!space_list) 751 if (!space_list)
752 return NULL; 752 return NULL;
753 return to_space(config_group_find_obj(space_list, name)); 753
754 mutex_lock(&space_list->cg_subsys->su_mutex);
755 i = config_group_find_item(space_list, name);
756 mutex_unlock(&space_list->cg_subsys->su_mutex);
757
758 return to_space(i);
754} 759}
755 760
756static void put_space(struct space *sp) 761static void put_space(struct space *sp)
@@ -767,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
767 if (!comm_list) 772 if (!comm_list)
768 return NULL; 773 return NULL;
769 774
770 down(&clusters_root.subsys.su_sem); 775 mutex_lock(&clusters_root.subsys.su_mutex);
771 776
772 list_for_each_entry(i, &comm_list->cg_children, ci_entry) { 777 list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
773 cm = to_comm(i); 778 cm = to_comm(i);
@@ -776,20 +781,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
776 if (cm->nodeid != nodeid) 781 if (cm->nodeid != nodeid)
777 continue; 782 continue;
778 found = 1; 783 found = 1;
784 config_item_get(i);
779 break; 785 break;
780 } else { 786 } else {
781 if (!cm->addr_count || 787 if (!cm->addr_count ||
782 memcmp(cm->addr[0], addr, sizeof(*addr))) 788 memcmp(cm->addr[0], addr, sizeof(*addr)))
783 continue; 789 continue;
784 found = 1; 790 found = 1;
791 config_item_get(i);
785 break; 792 break;
786 } 793 }
787 } 794 }
788 up(&clusters_root.subsys.su_sem); 795 mutex_unlock(&clusters_root.subsys.su_mutex);
789 796
790 if (found) 797 if (!found)
791 config_item_get(i);
792 else
793 cm = NULL; 798 cm = NULL;
794 return cm; 799 return cm;
795} 800}
@@ -909,6 +914,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
909#define DEFAULT_SCAN_SECS 5 914#define DEFAULT_SCAN_SECS 5
910#define DEFAULT_LOG_DEBUG 0 915#define DEFAULT_LOG_DEBUG 0
911#define DEFAULT_PROTOCOL 0 916#define DEFAULT_PROTOCOL 0
917#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
912 918
913struct dlm_config_info dlm_config = { 919struct dlm_config_info dlm_config = {
914 .ci_tcp_port = DEFAULT_TCP_PORT, 920 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +926,7 @@ struct dlm_config_info dlm_config = {
920 .ci_toss_secs = DEFAULT_TOSS_SECS, 926 .ci_toss_secs = DEFAULT_TOSS_SECS,
921 .ci_scan_secs = DEFAULT_SCAN_SECS, 927 .ci_scan_secs = DEFAULT_SCAN_SECS,
922 .ci_log_debug = DEFAULT_LOG_DEBUG, 928 .ci_log_debug = DEFAULT_LOG_DEBUG,
923 .ci_protocol = DEFAULT_PROTOCOL 929 .ci_protocol = DEFAULT_PROTOCOL,
930 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
924}; 931};
925 932
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5e..a3170fe22090 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
27 int ci_scan_secs; 27 int ci_scan_secs;
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol; 29 int ci_protocol;
30 int ci_timewarn_cs;
30}; 31};
31 32
32extern struct dlm_config_info dlm_config; 33extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e02..12c3bfd5e660 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
17#include <linux/debugfs.h> 17#include <linux/debugfs.h>
18 18
19#include "dlm_internal.h" 19#include "dlm_internal.h"
20#include "lock.h"
20 21
21#define DLM_DEBUG_BUF_LEN 4096 22#define DLM_DEBUG_BUF_LEN 4096
22static char debug_buf[DLM_DEBUG_BUF_LEN]; 23static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
26 27
27struct rsb_iter { 28struct rsb_iter {
28 int entry; 29 int entry;
30 int locks;
31 int header;
29 struct dlm_ls *ls; 32 struct dlm_ls *ls;
30 struct list_head *next; 33 struct list_head *next;
31 struct dlm_rsb *rsb; 34 struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
57 } 60 }
58} 61}
59 62
60static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, 63static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
61 struct dlm_rsb *res) 64 struct dlm_rsb *res)
62{ 65{
63 seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); 66 seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
64 67
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
85 struct dlm_lkb *lkb; 88 struct dlm_lkb *lkb;
86 int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; 89 int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
87 90
91 lock_rsb(res);
92
88 seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); 93 seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
89 for (i = 0; i < res->res_length; i++) { 94 for (i = 0; i < res->res_length; i++) {
90 if (isprint(res->res_name[i])) 95 if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
129 /* Print the locks attached to this resource */ 134 /* Print the locks attached to this resource */
130 seq_printf(s, "Granted Queue\n"); 135 seq_printf(s, "Granted Queue\n");
131 list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) 136 list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
132 print_lock(s, lkb, res); 137 print_resource_lock(s, lkb, res);
133 138
134 seq_printf(s, "Conversion Queue\n"); 139 seq_printf(s, "Conversion Queue\n");
135 list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) 140 list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
136 print_lock(s, lkb, res); 141 print_resource_lock(s, lkb, res);
137 142
138 seq_printf(s, "Waiting Queue\n"); 143 seq_printf(s, "Waiting Queue\n");
139 list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) 144 list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
140 print_lock(s, lkb, res); 145 print_resource_lock(s, lkb, res);
141 146
142 if (list_empty(&res->res_lookup)) 147 if (list_empty(&res->res_lookup))
143 goto out; 148 goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
151 seq_printf(s, "\n"); 156 seq_printf(s, "\n");
152 } 157 }
153 out: 158 out:
159 unlock_rsb(res);
160 return 0;
161}
162
163static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
164{
165 struct dlm_user_args *ua;
166 unsigned int waiting = 0;
167 uint64_t xid = 0;
168
169 if (lkb->lkb_flags & DLM_IFL_USER) {
170 ua = (struct dlm_user_args *) lkb->lkb_astparam;
171 if (ua)
172 xid = ua->xid;
173 }
174
175 if (lkb->lkb_timestamp)
176 waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
177
178 /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
179 r_nodeid r_len r_name */
180
181 seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
182 lkb->lkb_id,
183 lkb->lkb_nodeid,
184 lkb->lkb_remid,
185 lkb->lkb_ownpid,
186 (unsigned long long)xid,
187 lkb->lkb_exflags,
188 lkb->lkb_flags,
189 lkb->lkb_status,
190 lkb->lkb_grmode,
191 lkb->lkb_rqmode,
192 waiting,
193 r->res_nodeid,
194 r->res_length,
195 r->res_name);
196}
197
198static int print_locks(struct dlm_rsb *r, struct seq_file *s)
199{
200 struct dlm_lkb *lkb;
201
202 lock_rsb(r);
203
204 list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
205 print_lock(s, lkb, r);
206
207 list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
208 print_lock(s, lkb, r);
209
210 list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
211 print_lock(s, lkb, r);
212
213 unlock_rsb(r);
154 return 0; 214 return 0;
155} 215}
156 216
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
166 read_lock(&ls->ls_rsbtbl[i].lock); 226 read_lock(&ls->ls_rsbtbl[i].lock);
167 if (!list_empty(&ls->ls_rsbtbl[i].list)) { 227 if (!list_empty(&ls->ls_rsbtbl[i].list)) {
168 ri->next = ls->ls_rsbtbl[i].list.next; 228 ri->next = ls->ls_rsbtbl[i].list.next;
229 ri->rsb = list_entry(ri->next, struct dlm_rsb,
230 res_hashchain);
231 dlm_hold_rsb(ri->rsb);
169 read_unlock(&ls->ls_rsbtbl[i].lock); 232 read_unlock(&ls->ls_rsbtbl[i].lock);
170 break; 233 break;
171 } 234 }
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
176 if (ri->entry >= ls->ls_rsbtbl_size) 239 if (ri->entry >= ls->ls_rsbtbl_size)
177 return 1; 240 return 1;
178 } else { 241 } else {
242 struct dlm_rsb *old = ri->rsb;
179 i = ri->entry; 243 i = ri->entry;
180 read_lock(&ls->ls_rsbtbl[i].lock); 244 read_lock(&ls->ls_rsbtbl[i].lock);
181 ri->next = ri->next->next; 245 ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
184 ri->next = NULL; 248 ri->next = NULL;
185 ri->entry++; 249 ri->entry++;
186 read_unlock(&ls->ls_rsbtbl[i].lock); 250 read_unlock(&ls->ls_rsbtbl[i].lock);
251 dlm_put_rsb(old);
187 goto top; 252 goto top;
188 } 253 }
254 ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
255 dlm_hold_rsb(ri->rsb);
189 read_unlock(&ls->ls_rsbtbl[i].lock); 256 read_unlock(&ls->ls_rsbtbl[i].lock);
257 dlm_put_rsb(old);
190 } 258 }
191 ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
192 259
193 return 0; 260 return 0;
194} 261}
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
202{ 269{
203 struct rsb_iter *ri; 270 struct rsb_iter *ri;
204 271
205 ri = kmalloc(sizeof *ri, GFP_KERNEL); 272 ri = kzalloc(sizeof *ri, GFP_KERNEL);
206 if (!ri) 273 if (!ri)
207 return NULL; 274 return NULL;
208 275
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
260{ 327{
261 struct rsb_iter *ri = iter_ptr; 328 struct rsb_iter *ri = iter_ptr;
262 329
263 print_resource(ri->rsb, file); 330 if (ri->locks) {
331 if (ri->header) {
332 seq_printf(file, "id nodeid remid pid xid exflags flags "
333 "sts grmode rqmode time_ms r_nodeid "
334 "r_len r_name\n");
335 ri->header = 0;
336 }
337 print_locks(ri->rsb, file);
338 } else {
339 print_resource(ri->rsb, file);
340 }
264 341
265 return 0; 342 return 0;
266} 343}
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
296}; 373};
297 374
298/* 375/*
376 * Dump state in compact per-lock listing
377 */
378
379static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
380{
381 struct rsb_iter *ri;
382
383 ri = kzalloc(sizeof *ri, GFP_KERNEL);
384 if (!ri)
385 return NULL;
386
387 ri->ls = ls;
388 ri->entry = 0;
389 ri->next = NULL;
390 ri->locks = 1;
391
392 if (*pos == 0)
393 ri->header = 1;
394
395 if (rsb_iter_next(ri)) {
396 rsb_iter_free(ri);
397 return NULL;
398 }
399
400 return ri;
401}
402
403static void *locks_seq_start(struct seq_file *file, loff_t *pos)
404{
405 struct rsb_iter *ri;
406 loff_t n = *pos;
407
408 ri = locks_iter_init(file->private, pos);
409 if (!ri)
410 return NULL;
411
412 while (n--) {
413 if (rsb_iter_next(ri)) {
414 rsb_iter_free(ri);
415 return NULL;
416 }
417 }
418
419 return ri;
420}
421
422static struct seq_operations locks_seq_ops = {
423 .start = locks_seq_start,
424 .next = rsb_seq_next,
425 .stop = rsb_seq_stop,
426 .show = rsb_seq_show,
427};
428
429static int locks_open(struct inode *inode, struct file *file)
430{
431 struct seq_file *seq;
432 int ret;
433
434 ret = seq_open(file, &locks_seq_ops);
435 if (ret)
436 return ret;
437
438 seq = file->private_data;
439 seq->private = inode->i_private;
440
441 return 0;
442}
443
444static const struct file_operations locks_fops = {
445 .owner = THIS_MODULE,
446 .open = locks_open,
447 .read = seq_read,
448 .llseek = seq_lseek,
449 .release = seq_release
450};
451
452/*
299 * dump lkb's on the ls_waiters list 453 * dump lkb's on the ls_waiters list
300 */ 454 */
301 455
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
362 return -ENOMEM; 516 return -ENOMEM;
363 } 517 }
364 518
519 memset(name, 0, sizeof(name));
520 snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
521
522 ls->ls_debug_locks_dentry = debugfs_create_file(name,
523 S_IFREG | S_IRUGO,
524 dlm_root,
525 ls,
526 &locks_fops);
527 if (!ls->ls_debug_locks_dentry) {
528 debugfs_remove(ls->ls_debug_waiters_dentry);
529 debugfs_remove(ls->ls_debug_rsb_dentry);
530 return -ENOMEM;
531 }
532
365 return 0; 533 return 0;
366} 534}
367 535
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
371 debugfs_remove(ls->ls_debug_rsb_dentry); 539 debugfs_remove(ls->ls_debug_rsb_dentry);
372 if (ls->ls_debug_waiters_dentry) 540 if (ls->ls_debug_waiters_dentry)
373 debugfs_remove(ls->ls_debug_waiters_dentry); 541 debugfs_remove(ls->ls_debug_waiters_dentry);
542 if (ls->ls_debug_locks_dentry)
543 debugfs_remove(ls->ls_debug_locks_dentry);
374} 544}
375 545
376int dlm_register_debugfs(void) 546int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a0..74901e981e10 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
151 void *bastaddr; 151 void *bastaddr;
152 int mode; 152 int mode;
153 struct dlm_lksb *lksb; 153 struct dlm_lksb *lksb;
154 unsigned long timeout;
154}; 155};
155 156
156 157
@@ -213,6 +214,9 @@ struct dlm_args {
213#define DLM_IFL_OVERLAP_UNLOCK 0x00080000 214#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
214#define DLM_IFL_OVERLAP_CANCEL 0x00100000 215#define DLM_IFL_OVERLAP_CANCEL 0x00100000
215#define DLM_IFL_ENDOFLIFE 0x00200000 216#define DLM_IFL_ENDOFLIFE 0x00200000
217#define DLM_IFL_WATCH_TIMEWARN 0x00400000
218#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
219#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
216#define DLM_IFL_USER 0x00000001 220#define DLM_IFL_USER 0x00000001
217#define DLM_IFL_ORPHAN 0x00000002 221#define DLM_IFL_ORPHAN 0x00000002
218 222
@@ -243,6 +247,9 @@ struct dlm_lkb {
243 struct list_head lkb_wait_reply; /* waiting for remote reply */ 247 struct list_head lkb_wait_reply; /* waiting for remote reply */
244 struct list_head lkb_astqueue; /* need ast to be sent */ 248 struct list_head lkb_astqueue; /* need ast to be sent */
245 struct list_head lkb_ownqueue; /* list of locks for a process */ 249 struct list_head lkb_ownqueue; /* list of locks for a process */
250 struct list_head lkb_time_list;
251 unsigned long lkb_timestamp;
252 unsigned long lkb_timeout_cs;
246 253
247 char *lkb_lvbptr; 254 char *lkb_lvbptr;
248 struct dlm_lksb *lkb_lksb; /* caller's status block */ 255 struct dlm_lksb *lkb_lksb; /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
447 struct mutex ls_orphans_mutex; 454 struct mutex ls_orphans_mutex;
448 struct list_head ls_orphans; 455 struct list_head ls_orphans;
449 456
457 struct mutex ls_timeout_mutex;
458 struct list_head ls_timeout;
459
450 struct list_head ls_nodes; /* current nodes in ls */ 460 struct list_head ls_nodes; /* current nodes in ls */
451 struct list_head ls_nodes_gone; /* dead node list, recovery */ 461 struct list_head ls_nodes_gone; /* dead node list, recovery */
452 int ls_num_nodes; /* number of nodes in ls */ 462 int ls_num_nodes; /* number of nodes in ls */
453 int ls_low_nodeid; 463 int ls_low_nodeid;
454 int ls_total_weight; 464 int ls_total_weight;
455 int *ls_node_array; 465 int *ls_node_array;
466 gfp_t ls_allocation;
456 467
457 struct dlm_rsb ls_stub_rsb; /* for returning errors */ 468 struct dlm_rsb ls_stub_rsb; /* for returning errors */
458 struct dlm_lkb ls_stub_lkb; /* for returning errors */ 469 struct dlm_lkb ls_stub_lkb; /* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
460 471
461 struct dentry *ls_debug_rsb_dentry; /* debugfs */ 472 struct dentry *ls_debug_rsb_dentry; /* debugfs */
462 struct dentry *ls_debug_waiters_dentry; /* debugfs */ 473 struct dentry *ls_debug_waiters_dentry; /* debugfs */
474 struct dentry *ls_debug_locks_dentry; /* debugfs */
463 475
464 wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ 476 wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
465 int ls_uevent_result; 477 int ls_uevent_result;
478 struct completion ls_members_done;
479 int ls_members_result;
466 480
467 struct miscdevice ls_device; 481 struct miscdevice ls_device;
468 482
@@ -472,6 +486,7 @@ struct dlm_ls {
472 struct task_struct *ls_recoverd_task; 486 struct task_struct *ls_recoverd_task;
473 struct mutex ls_recoverd_active; 487 struct mutex ls_recoverd_active;
474 spinlock_t ls_recover_lock; 488 spinlock_t ls_recover_lock;
489 unsigned long ls_recover_begin; /* jiffies timestamp */
475 uint32_t ls_recover_status; /* DLM_RS_ */ 490 uint32_t ls_recover_status; /* DLM_RS_ */
476 uint64_t ls_recover_seq; 491 uint64_t ls_recover_seq;
477 struct dlm_recover *ls_recover_args; 492 struct dlm_recover *ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
501#define LSFL_RCOM_READY 3 516#define LSFL_RCOM_READY 3
502#define LSFL_RCOM_WAIT 4 517#define LSFL_RCOM_WAIT 4
503#define LSFL_UEVENT_WAIT 5 518#define LSFL_UEVENT_WAIT 5
519#define LSFL_TIMEWARN 6
504 520
505/* much of this is just saving user space pointers associated with the 521/* much of this is just saving user space pointers associated with the
506 lock that we pass back to the user lib with an ast */ 522 lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
518 void __user *castaddr; 534 void __user *castaddr;
519 void __user *bastparam; 535 void __user *bastparam;
520 void __user *bastaddr; 536 void __user *bastaddr;
537 uint64_t xid;
521}; 538};
522 539
523#define DLM_PROC_FLAGS_CLOSING 1 540#define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96b..b455919c1998 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
82static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); 82static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
83static int send_remove(struct dlm_rsb *r); 83static int send_remove(struct dlm_rsb *r);
84static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); 84static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, 86static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
86 struct dlm_message *ms); 87 struct dlm_message *ms);
87static int receive_extralen(struct dlm_message *ms); 88static int receive_extralen(struct dlm_message *ms);
88static void do_purge(struct dlm_ls *ls, int nodeid, int pid); 89static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
90static void del_timeout(struct dlm_lkb *lkb);
91void dlm_timeout_warn(struct dlm_lkb *lkb);
89 92
90/* 93/*
91 * Lock compatibilty matrix - thanks Steve 94 * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
194 197
195/* Threads cannot use the lockspace while it's being recovered */ 198/* Threads cannot use the lockspace while it's being recovered */
196 199
197static inline void lock_recovery(struct dlm_ls *ls) 200static inline void dlm_lock_recovery(struct dlm_ls *ls)
198{ 201{
199 down_read(&ls->ls_in_recovery); 202 down_read(&ls->ls_in_recovery);
200} 203}
201 204
202static inline void unlock_recovery(struct dlm_ls *ls) 205void dlm_unlock_recovery(struct dlm_ls *ls)
203{ 206{
204 up_read(&ls->ls_in_recovery); 207 up_read(&ls->ls_in_recovery);
205} 208}
206 209
207static inline int lock_recovery_try(struct dlm_ls *ls) 210int dlm_lock_recovery_try(struct dlm_ls *ls)
208{ 211{
209 return down_read_trylock(&ls->ls_in_recovery); 212 return down_read_trylock(&ls->ls_in_recovery);
210} 213}
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
286 if (is_master_copy(lkb)) 289 if (is_master_copy(lkb))
287 return; 290 return;
288 291
292 del_timeout(lkb);
293
289 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); 294 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
290 295
296 /* if the operation was a cancel, then return -DLM_ECANCEL, if a
297 timeout caused the cancel then return -ETIMEDOUT */
298 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
299 lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
300 rv = -ETIMEDOUT;
301 }
302
303 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
304 lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
305 rv = -EDEADLK;
306 }
307
291 lkb->lkb_lksb->sb_status = rv; 308 lkb->lkb_lksb->sb_status = rv;
292 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; 309 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
293 310
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
581 kref_init(&lkb->lkb_ref); 598 kref_init(&lkb->lkb_ref);
582 INIT_LIST_HEAD(&lkb->lkb_ownqueue); 599 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
583 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); 600 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
601 INIT_LIST_HEAD(&lkb->lkb_time_list);
584 602
585 get_random_bytes(&bucket, sizeof(bucket)); 603 get_random_bytes(&bucket, sizeof(bucket));
586 bucket &= (ls->ls_lkbtbl_size - 1); 604 bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
985{ 1003{
986 int i; 1004 int i;
987 1005
988 if (dlm_locking_stopped(ls))
989 return;
990
991 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 1006 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
992 shrink_bucket(ls, i); 1007 shrink_bucket(ls, i);
1008 if (dlm_locking_stopped(ls))
1009 break;
993 cond_resched(); 1010 cond_resched();
994 } 1011 }
995} 1012}
996 1013
1014static void add_timeout(struct dlm_lkb *lkb)
1015{
1016 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1017
1018 if (is_master_copy(lkb)) {
1019 lkb->lkb_timestamp = jiffies;
1020 return;
1021 }
1022
1023 if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
1024 !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1025 lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
1026 goto add_it;
1027 }
1028 if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
1029 goto add_it;
1030 return;
1031
1032 add_it:
1033 DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
1034 mutex_lock(&ls->ls_timeout_mutex);
1035 hold_lkb(lkb);
1036 lkb->lkb_timestamp = jiffies;
1037 list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
1038 mutex_unlock(&ls->ls_timeout_mutex);
1039}
1040
1041static void del_timeout(struct dlm_lkb *lkb)
1042{
1043 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1044
1045 mutex_lock(&ls->ls_timeout_mutex);
1046 if (!list_empty(&lkb->lkb_time_list)) {
1047 list_del_init(&lkb->lkb_time_list);
1048 unhold_lkb(lkb);
1049 }
1050 mutex_unlock(&ls->ls_timeout_mutex);
1051}
1052
1053/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
1054 lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex
1055 and then lock rsb because of lock ordering in add_timeout. We may need
1056 to specify some special timeout-related bits in the lkb that are just to
1057 be accessed under the timeout_mutex. */
1058
1059void dlm_scan_timeout(struct dlm_ls *ls)
1060{
1061 struct dlm_rsb *r;
1062 struct dlm_lkb *lkb;
1063 int do_cancel, do_warn;
1064
1065 for (;;) {
1066 if (dlm_locking_stopped(ls))
1067 break;
1068
1069 do_cancel = 0;
1070 do_warn = 0;
1071 mutex_lock(&ls->ls_timeout_mutex);
1072 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
1073
1074 if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
1075 time_after_eq(jiffies, lkb->lkb_timestamp +
1076 lkb->lkb_timeout_cs * HZ/100))
1077 do_cancel = 1;
1078
1079 if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
1080 time_after_eq(jiffies, lkb->lkb_timestamp +
1081 dlm_config.ci_timewarn_cs * HZ/100))
1082 do_warn = 1;
1083
1084 if (!do_cancel && !do_warn)
1085 continue;
1086 hold_lkb(lkb);
1087 break;
1088 }
1089 mutex_unlock(&ls->ls_timeout_mutex);
1090
1091 if (!do_cancel && !do_warn)
1092 break;
1093
1094 r = lkb->lkb_resource;
1095 hold_rsb(r);
1096 lock_rsb(r);
1097
1098 if (do_warn) {
1099 /* clear flag so we only warn once */
1100 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1101 if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
1102 del_timeout(lkb);
1103 dlm_timeout_warn(lkb);
1104 }
1105
1106 if (do_cancel) {
1107 log_debug(ls, "timeout cancel %x node %d %s",
1108 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1109 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1110 lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
1111 del_timeout(lkb);
1112 _cancel_lock(r, lkb);
1113 }
1114
1115 unlock_rsb(r);
1116 unhold_rsb(r);
1117 dlm_put_lkb(lkb);
1118 }
1119}
1120
1121/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
1122 dlm_recoverd before checking/setting ls_recover_begin. */
1123
1124void dlm_adjust_timeouts(struct dlm_ls *ls)
1125{
1126 struct dlm_lkb *lkb;
1127 long adj = jiffies - ls->ls_recover_begin;
1128
1129 ls->ls_recover_begin = 0;
1130 mutex_lock(&ls->ls_timeout_mutex);
1131 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1132 lkb->lkb_timestamp += adj;
1133 mutex_unlock(&ls->ls_timeout_mutex);
1134}
1135
997/* lkb is master or local copy */ 1136/* lkb is master or local copy */
998 1137
999static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) 1138static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1275 * queue for one resource. The granted mode of each lock blocks the requested 1414 * queue for one resource. The granted mode of each lock blocks the requested
1276 * mode of the other lock." 1415 * mode of the other lock."
1277 * 1416 *
1278 * Part 2: if the granted mode of lkb is preventing the first lkb in the 1417 * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
1279 * convert queue from being granted, then demote lkb (set grmode to NL). 1418 * convert queue from being granted, then deadlk/demote lkb.
1280 * This second form requires that we check for conv-deadlk even when
1281 * now == 0 in _can_be_granted().
1282 * 1419 *
1283 * Example: 1420 * Example:
1284 * Granted Queue: empty 1421 * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1287 * 1424 *
1288 * The first lock can't be granted because of the granted mode of the second 1425 * The first lock can't be granted because of the granted mode of the second
1289 * lock and the second lock can't be granted because it's not first in the 1426 * lock and the second lock can't be granted because it's not first in the
1290 * list. We demote the granted mode of the second lock (the lkb passed to this 1427 * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
1291 * function). 1428 * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
1429 * flag set and return DEMOTED in the lksb flags.
1430 *
1431 * Originally, this function detected conv-deadlk in a more limited scope:
1432 * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
1433 * - if lkb1 was the first entry in the queue (not just earlier), and was
1434 * blocked by the granted mode of lkb2, and there was nothing on the
1435 * granted queue preventing lkb1 from being granted immediately, i.e.
1436 * lkb2 was the only thing preventing lkb1 from being granted.
1437 *
1438 * That second condition meant we'd only say there was conv-deadlk if
1439 * resolving it (by demotion) would lead to the first lock on the convert
1440 * queue being granted right away. It allowed conversion deadlocks to exist
1441 * between locks on the convert queue while they couldn't be granted anyway.
1292 * 1442 *
1293 * After the resolution, the "grant pending" function needs to go back and try 1443 * Now, we detect and take action on conversion deadlocks immediately when
1294 * to grant locks on the convert queue again since the first lock can now be 1444 * they're created, even if they may not be immediately consequential. If
1295 * granted. 1445 * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
1446 * mode that would prevent lkb1's conversion from being granted, we do a
1447 * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
1448 * I think this means that the lkb_is_ahead condition below should always
1449 * be zero, i.e. there will never be conv-deadlk between two locks that are
1450 * both already on the convert queue.
1296 */ 1451 */
1297 1452
1298static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) 1453static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
1299{ 1454{
1300 struct dlm_lkb *this, *first = NULL, *self = NULL; 1455 struct dlm_lkb *lkb1;
1456 int lkb_is_ahead = 0;
1301 1457
1302 list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { 1458 list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
1303 if (!first) 1459 if (lkb1 == lkb2) {
1304 first = this; 1460 lkb_is_ahead = 1;
1305 if (this == lkb) {
1306 self = lkb;
1307 continue; 1461 continue;
1308 } 1462 }
1309 1463
1310 if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) 1464 if (!lkb_is_ahead) {
1311 return 1; 1465 if (!modes_compat(lkb2, lkb1))
1312 } 1466 return 1;
1313 1467 } else {
1314 /* if lkb is on the convert queue and is preventing the first 1468 if (!modes_compat(lkb2, lkb1) &&
1315 from being granted, then there's deadlock and we demote lkb. 1469 !modes_compat(lkb1, lkb2))
1316 multiple converting locks may need to do this before the first 1470 return 1;
1317 converting lock can be granted. */ 1471 }
1318
1319 if (self && self != first) {
1320 if (!modes_compat(lkb, first) &&
1321 !queue_conflict(&rsb->res_grantqueue, first))
1322 return 1;
1323 } 1472 }
1324
1325 return 0; 1473 return 0;
1326} 1474}
1327 1475
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1450 if (!now && !conv && list_empty(&r->res_convertqueue) && 1598 if (!now && !conv && list_empty(&r->res_convertqueue) &&
1451 first_in_list(lkb, &r->res_waitqueue)) 1599 first_in_list(lkb, &r->res_waitqueue))
1452 return 1; 1600 return 1;
1453
1454 out: 1601 out:
1455 /*
1456 * The following, enabled by CONVDEADLK, departs from VMS.
1457 */
1458
1459 if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
1460 conversion_deadlock_detect(r, lkb)) {
1461 lkb->lkb_grmode = DLM_LOCK_NL;
1462 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1463 }
1464
1465 return 0; 1602 return 0;
1466} 1603}
1467 1604
1468/* 1605static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
1469 * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a 1606 int *err)
1470 * simple way to provide a big optimization to applications that can use them.
1471 */
1472
1473static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1474{ 1607{
1475 uint32_t flags = lkb->lkb_exflags;
1476 int rv; 1608 int rv;
1477 int8_t alt = 0, rqmode = lkb->lkb_rqmode; 1609 int8_t alt = 0, rqmode = lkb->lkb_rqmode;
1610 int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
1611
1612 if (err)
1613 *err = 0;
1478 1614
1479 rv = _can_be_granted(r, lkb, now); 1615 rv = _can_be_granted(r, lkb, now);
1480 if (rv) 1616 if (rv)
1481 goto out; 1617 goto out;
1482 1618
1483 if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) 1619 /*
1620 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
1621 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
1622 * cancels one of the locks.
1623 */
1624
1625 if (is_convert && can_be_queued(lkb) &&
1626 conversion_deadlock_detect(r, lkb)) {
1627 if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
1628 lkb->lkb_grmode = DLM_LOCK_NL;
1629 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1630 } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1631 if (err)
1632 *err = -EDEADLK;
1633 else {
1634 log_print("can_be_granted deadlock %x now %d",
1635 lkb->lkb_id, now);
1636 dlm_dump_rsb(r);
1637 }
1638 }
1484 goto out; 1639 goto out;
1640 }
1485 1641
1486 if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) 1642 /*
1643 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
1644 * to grant a request in a mode other than the normal rqmode. It's a
1645 * simple way to provide a big optimization to applications that can
1646 * use them.
1647 */
1648
1649 if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
1487 alt = DLM_LOCK_PR; 1650 alt = DLM_LOCK_PR;
1488 else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) 1651 else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
1489 alt = DLM_LOCK_CW; 1652 alt = DLM_LOCK_CW;
1490 1653
1491 if (alt) { 1654 if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1500 return rv; 1663 return rv;
1501} 1664}
1502 1665
1666/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
1667 for locks pending on the convert list. Once verified (watch for these
1668 log_prints), we should be able to just call _can_be_granted() and not
1669 bother with the demote/deadlk cases here (and there's no easy way to deal
1670 with a deadlk here, we'd have to generate something like grant_lock with
1671 the deadlk error.) */
1672
1673/* returns the highest requested mode of all blocked conversions */
1674
1503static int grant_pending_convert(struct dlm_rsb *r, int high) 1675static int grant_pending_convert(struct dlm_rsb *r, int high)
1504{ 1676{
1505 struct dlm_lkb *lkb, *s; 1677 struct dlm_lkb *lkb, *s;
1506 int hi, demoted, quit, grant_restart, demote_restart; 1678 int hi, demoted, quit, grant_restart, demote_restart;
1679 int deadlk;
1507 1680
1508 quit = 0; 1681 quit = 0;
1509 restart: 1682 restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
1513 1686
1514 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { 1687 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
1515 demoted = is_demoted(lkb); 1688 demoted = is_demoted(lkb);
1516 if (can_be_granted(r, lkb, 0)) { 1689 deadlk = 0;
1690
1691 if (can_be_granted(r, lkb, 0, &deadlk)) {
1517 grant_lock_pending(r, lkb); 1692 grant_lock_pending(r, lkb);
1518 grant_restart = 1; 1693 grant_restart = 1;
1519 } else { 1694 continue;
1520 hi = max_t(int, lkb->lkb_rqmode, hi);
1521 if (!demoted && is_demoted(lkb))
1522 demote_restart = 1;
1523 } 1695 }
1696
1697 if (!demoted && is_demoted(lkb)) {
1698 log_print("WARN: pending demoted %x node %d %s",
1699 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1700 demote_restart = 1;
1701 continue;
1702 }
1703
1704 if (deadlk) {
1705 log_print("WARN: pending deadlock %x node %d %s",
1706 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1707 dlm_dump_rsb(r);
1708 continue;
1709 }
1710
1711 hi = max_t(int, lkb->lkb_rqmode, hi);
1524 } 1712 }
1525 1713
1526 if (grant_restart) 1714 if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
1538 struct dlm_lkb *lkb, *s; 1726 struct dlm_lkb *lkb, *s;
1539 1727
1540 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { 1728 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
1541 if (can_be_granted(r, lkb, 0)) 1729 if (can_be_granted(r, lkb, 0, NULL))
1542 grant_lock_pending(r, lkb); 1730 grant_lock_pending(r, lkb);
1543 else 1731 else
1544 high = max_t(int, lkb->lkb_rqmode, high); 1732 high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
1733} 1921}
1734 1922
1735static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, 1923static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1736 int namelen, uint32_t parent_lkid, void *ast, 1924 int namelen, unsigned long timeout_cs, void *ast,
1737 void *astarg, void *bast, struct dlm_args *args) 1925 void *astarg, void *bast, struct dlm_args *args)
1738{ 1926{
1739 int rv = -EINVAL; 1927 int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1776 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) 1964 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
1777 goto out; 1965 goto out;
1778 1966
1779 /* parent/child locks not yet supported */
1780 if (parent_lkid)
1781 goto out;
1782
1783 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) 1967 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
1784 goto out; 1968 goto out;
1785 1969
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1791 args->astaddr = ast; 1975 args->astaddr = ast;
1792 args->astparam = (long) astarg; 1976 args->astparam = (long) astarg;
1793 args->bastaddr = bast; 1977 args->bastaddr = bast;
1978 args->timeout = timeout_cs;
1794 args->mode = mode; 1979 args->mode = mode;
1795 args->lksb = lksb; 1980 args->lksb = lksb;
1796 rv = 0; 1981 rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
1845 lkb->lkb_lksb = args->lksb; 2030 lkb->lkb_lksb = args->lksb;
1846 lkb->lkb_lvbptr = args->lksb->sb_lvbptr; 2031 lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
1847 lkb->lkb_ownpid = (int) current->pid; 2032 lkb->lkb_ownpid = (int) current->pid;
2033 lkb->lkb_timeout_cs = args->timeout;
1848 rv = 0; 2034 rv = 0;
1849 out: 2035 out:
1850 return rv; 2036 return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1903 if (is_overlap(lkb)) 2089 if (is_overlap(lkb))
1904 goto out; 2090 goto out;
1905 2091
2092 /* don't let scand try to do a cancel */
2093 del_timeout(lkb);
2094
1906 if (lkb->lkb_flags & DLM_IFL_RESEND) { 2095 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1907 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL; 2096 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
1908 rv = -EBUSY; 2097 rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1934 if (is_overlap_unlock(lkb)) 2123 if (is_overlap_unlock(lkb))
1935 goto out; 2124 goto out;
1936 2125
2126 /* don't let scand try to do a cancel */
2127 del_timeout(lkb);
2128
1937 if (lkb->lkb_flags & DLM_IFL_RESEND) { 2129 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1938 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK; 2130 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
1939 rv = -EBUSY; 2131 rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1984{ 2176{
1985 int error = 0; 2177 int error = 0;
1986 2178
1987 if (can_be_granted(r, lkb, 1)) { 2179 if (can_be_granted(r, lkb, 1, NULL)) {
1988 grant_lock(r, lkb); 2180 grant_lock(r, lkb);
1989 queue_cast(r, lkb, 0); 2181 queue_cast(r, lkb, 0);
1990 goto out; 2182 goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1994 error = -EINPROGRESS; 2186 error = -EINPROGRESS;
1995 add_lkb(r, lkb, DLM_LKSTS_WAITING); 2187 add_lkb(r, lkb, DLM_LKSTS_WAITING);
1996 send_blocking_asts(r, lkb); 2188 send_blocking_asts(r, lkb);
2189 add_timeout(lkb);
1997 goto out; 2190 goto out;
1998 } 2191 }
1999 2192
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
2009static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) 2202static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2010{ 2203{
2011 int error = 0; 2204 int error = 0;
2205 int deadlk = 0;
2012 2206
2013 /* changing an existing lock may allow others to be granted */ 2207 /* changing an existing lock may allow others to be granted */
2014 2208
2015 if (can_be_granted(r, lkb, 1)) { 2209 if (can_be_granted(r, lkb, 1, &deadlk)) {
2016 grant_lock(r, lkb); 2210 grant_lock(r, lkb);
2017 queue_cast(r, lkb, 0); 2211 queue_cast(r, lkb, 0);
2018 grant_pending_locks(r); 2212 grant_pending_locks(r);
2019 goto out; 2213 goto out;
2020 } 2214 }
2021 2215
2216 /* can_be_granted() detected that this lock would block in a conversion
2217 deadlock, so we leave it on the granted queue and return EDEADLK in
2218 the ast for the convert. */
2219
2220 if (deadlk) {
2221 /* it's left on the granted queue */
2222 log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
2223 lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
2224 lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
2225 revert_lock(r, lkb);
2226 queue_cast(r, lkb, -EDEADLK);
2227 error = -EDEADLK;
2228 goto out;
2229 }
2230
2022 /* is_demoted() means the can_be_granted() above set the grmode 2231 /* is_demoted() means the can_be_granted() above set the grmode
2023 to NL, and left us on the granted queue. This auto-demotion 2232 to NL, and left us on the granted queue. This auto-demotion
2024 (due to CONVDEADLK) might mean other locks, and/or this lock, are 2233 (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2041 del_lkb(r, lkb); 2250 del_lkb(r, lkb);
2042 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 2251 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
2043 send_blocking_asts(r, lkb); 2252 send_blocking_asts(r, lkb);
2253 add_timeout(lkb);
2044 goto out; 2254 goto out;
2045 } 2255 }
2046 2256
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2274 if (!ls) 2484 if (!ls)
2275 return -EINVAL; 2485 return -EINVAL;
2276 2486
2277 lock_recovery(ls); 2487 dlm_lock_recovery(ls);
2278 2488
2279 if (convert) 2489 if (convert)
2280 error = find_lkb(ls, lksb->sb_lkid, &lkb); 2490 error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2284 if (error) 2494 if (error)
2285 goto out; 2495 goto out;
2286 2496
2287 error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, 2497 error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
2288 astarg, bast, &args); 2498 astarg, bast, &args);
2289 if (error) 2499 if (error)
2290 goto out_put; 2500 goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2299 out_put: 2509 out_put:
2300 if (convert || error) 2510 if (convert || error)
2301 __put_lkb(ls, lkb); 2511 __put_lkb(ls, lkb);
2302 if (error == -EAGAIN) 2512 if (error == -EAGAIN || error == -EDEADLK)
2303 error = 0; 2513 error = 0;
2304 out: 2514 out:
2305 unlock_recovery(ls); 2515 dlm_unlock_recovery(ls);
2306 dlm_put_lockspace(ls); 2516 dlm_put_lockspace(ls);
2307 return error; 2517 return error;
2308} 2518}
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2322 if (!ls) 2532 if (!ls)
2323 return -EINVAL; 2533 return -EINVAL;
2324 2534
2325 lock_recovery(ls); 2535 dlm_lock_recovery(ls);
2326 2536
2327 error = find_lkb(ls, lkid, &lkb); 2537 error = find_lkb(ls, lkid, &lkb);
2328 if (error) 2538 if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2344 out_put: 2554 out_put:
2345 dlm_put_lkb(lkb); 2555 dlm_put_lkb(lkb);
2346 out: 2556 out:
2347 unlock_recovery(ls); 2557 dlm_unlock_recovery(ls);
2348 dlm_put_lockspace(ls); 2558 dlm_put_lockspace(ls);
2349 return error; 2559 return error;
2350} 2560}
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
2384 pass into lowcomms_commit and a message buffer (mb) that we 2594 pass into lowcomms_commit and a message buffer (mb) that we
2385 write our data into */ 2595 write our data into */
2386 2596
2387 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); 2597 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
2388 if (!mh) 2598 if (!mh)
2389 return -ENOBUFS; 2599 return -ENOBUFS;
2390 2600
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
3111 lkb->lkb_remid = ms->m_lkid; 3321 lkb->lkb_remid = ms->m_lkid;
3112 if (is_altmode(lkb)) 3322 if (is_altmode(lkb))
3113 munge_altmode(lkb, ms); 3323 munge_altmode(lkb, ms);
3114 if (result) 3324 if (result) {
3115 add_lkb(r, lkb, DLM_LKSTS_WAITING); 3325 add_lkb(r, lkb, DLM_LKSTS_WAITING);
3116 else { 3326 add_timeout(lkb);
3327 } else {
3117 grant_lock_pc(r, lkb, ms); 3328 grant_lock_pc(r, lkb, ms);
3118 queue_cast(r, lkb, 0); 3329 queue_cast(r, lkb, 0);
3119 } 3330 }
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3172 queue_cast(r, lkb, -EAGAIN); 3383 queue_cast(r, lkb, -EAGAIN);
3173 break; 3384 break;
3174 3385
3386 case -EDEADLK:
3387 receive_flags_reply(lkb, ms);
3388 revert_lock_pc(r, lkb);
3389 queue_cast(r, lkb, -EDEADLK);
3390 break;
3391
3175 case -EINPROGRESS: 3392 case -EINPROGRESS:
3176 /* convert was queued on remote master */ 3393 /* convert was queued on remote master */
3177 receive_flags_reply(lkb, ms); 3394 receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3179 munge_demoted(lkb, ms); 3396 munge_demoted(lkb, ms);
3180 del_lkb(r, lkb); 3397 del_lkb(r, lkb);
3181 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3398 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3399 add_timeout(lkb);
3182 break; 3400 break;
3183 3401
3184 case 0: 3402 case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3298 case -DLM_ECANCEL: 3516 case -DLM_ECANCEL:
3299 receive_flags_reply(lkb, ms); 3517 receive_flags_reply(lkb, ms);
3300 revert_lock_pc(r, lkb); 3518 revert_lock_pc(r, lkb);
3301 if (ms->m_result) 3519 queue_cast(r, lkb, -DLM_ECANCEL);
3302 queue_cast(r, lkb, -DLM_ECANCEL);
3303 break; 3520 break;
3304 case 0: 3521 case 0:
3305 break; 3522 break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3424 } 3641 }
3425 } 3642 }
3426 3643
3427 if (lock_recovery_try(ls)) 3644 if (dlm_lock_recovery_try(ls))
3428 break; 3645 break;
3429 schedule(); 3646 schedule();
3430 } 3647 }
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3503 log_error(ls, "unknown message type %d", ms->m_type); 3720 log_error(ls, "unknown message type %d", ms->m_type);
3504 } 3721 }
3505 3722
3506 unlock_recovery(ls); 3723 dlm_unlock_recovery(ls);
3507 out: 3724 out:
3508 dlm_put_lockspace(ls); 3725 dlm_put_lockspace(ls);
3509 dlm_astd_wake(); 3726 dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
4034 4251
4035int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, 4252int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4036 int mode, uint32_t flags, void *name, unsigned int namelen, 4253 int mode, uint32_t flags, void *name, unsigned int namelen,
4037 uint32_t parent_lkid) 4254 unsigned long timeout_cs)
4038{ 4255{
4039 struct dlm_lkb *lkb; 4256 struct dlm_lkb *lkb;
4040 struct dlm_args args; 4257 struct dlm_args args;
4041 int error; 4258 int error;
4042 4259
4043 lock_recovery(ls); 4260 dlm_lock_recovery(ls);
4044 4261
4045 error = create_lkb(ls, &lkb); 4262 error = create_lkb(ls, &lkb);
4046 if (error) { 4263 if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4062 When DLM_IFL_USER is set, the dlm knows that this is a userspace 4279 When DLM_IFL_USER is set, the dlm knows that this is a userspace
4063 lock and that lkb_astparam is the dlm_user_args structure. */ 4280 lock and that lkb_astparam is the dlm_user_args structure. */
4064 4281
4065 error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid, 4282 error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
4066 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); 4283 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
4067 lkb->lkb_flags |= DLM_IFL_USER; 4284 lkb->lkb_flags |= DLM_IFL_USER;
4068 ua->old_mode = DLM_LOCK_IV; 4285 ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4094 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); 4311 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
4095 spin_unlock(&ua->proc->locks_spin); 4312 spin_unlock(&ua->proc->locks_spin);
4096 out: 4313 out:
4097 unlock_recovery(ls); 4314 dlm_unlock_recovery(ls);
4098 return error; 4315 return error;
4099} 4316}
4100 4317
4101int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 4318int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4102 int mode, uint32_t flags, uint32_t lkid, char *lvb_in) 4319 int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
4320 unsigned long timeout_cs)
4103{ 4321{
4104 struct dlm_lkb *lkb; 4322 struct dlm_lkb *lkb;
4105 struct dlm_args args; 4323 struct dlm_args args;
4106 struct dlm_user_args *ua; 4324 struct dlm_user_args *ua;
4107 int error; 4325 int error;
4108 4326
4109 lock_recovery(ls); 4327 dlm_lock_recovery(ls);
4110 4328
4111 error = find_lkb(ls, lkid, &lkb); 4329 error = find_lkb(ls, lkid, &lkb);
4112 if (error) 4330 if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4127 if (lvb_in && ua->lksb.sb_lvbptr) 4345 if (lvb_in && ua->lksb.sb_lvbptr)
4128 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); 4346 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
4129 4347
4348 ua->xid = ua_tmp->xid;
4130 ua->castparam = ua_tmp->castparam; 4349 ua->castparam = ua_tmp->castparam;
4131 ua->castaddr = ua_tmp->castaddr; 4350 ua->castaddr = ua_tmp->castaddr;
4132 ua->bastparam = ua_tmp->bastparam; 4351 ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4134 ua->user_lksb = ua_tmp->user_lksb; 4353 ua->user_lksb = ua_tmp->user_lksb;
4135 ua->old_mode = lkb->lkb_grmode; 4354 ua->old_mode = lkb->lkb_grmode;
4136 4355
4137 error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST, 4356 error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
4138 ua, DLM_FAKE_USER_AST, &args); 4357 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
4139 if (error) 4358 if (error)
4140 goto out_put; 4359 goto out_put;
4141 4360
4142 error = convert_lock(ls, lkb, &args); 4361 error = convert_lock(ls, lkb, &args);
4143 4362
4144 if (error == -EINPROGRESS || error == -EAGAIN) 4363 if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
4145 error = 0; 4364 error = 0;
4146 out_put: 4365 out_put:
4147 dlm_put_lkb(lkb); 4366 dlm_put_lkb(lkb);
4148 out: 4367 out:
4149 unlock_recovery(ls); 4368 dlm_unlock_recovery(ls);
4150 kfree(ua_tmp); 4369 kfree(ua_tmp);
4151 return error; 4370 return error;
4152} 4371}
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4159 struct dlm_user_args *ua; 4378 struct dlm_user_args *ua;
4160 int error; 4379 int error;
4161 4380
4162 lock_recovery(ls); 4381 dlm_lock_recovery(ls);
4163 4382
4164 error = find_lkb(ls, lkid, &lkb); 4383 error = find_lkb(ls, lkid, &lkb);
4165 if (error) 4384 if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4194 out_put: 4413 out_put:
4195 dlm_put_lkb(lkb); 4414 dlm_put_lkb(lkb);
4196 out: 4415 out:
4197 unlock_recovery(ls); 4416 dlm_unlock_recovery(ls);
4198 kfree(ua_tmp); 4417 kfree(ua_tmp);
4199 return error; 4418 return error;
4200} 4419}
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4207 struct dlm_user_args *ua; 4426 struct dlm_user_args *ua;
4208 int error; 4427 int error;
4209 4428
4210 lock_recovery(ls); 4429 dlm_lock_recovery(ls);
4211 4430
4212 error = find_lkb(ls, lkid, &lkb); 4431 error = find_lkb(ls, lkid, &lkb);
4213 if (error) 4432 if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4231 out_put: 4450 out_put:
4232 dlm_put_lkb(lkb); 4451 dlm_put_lkb(lkb);
4233 out: 4452 out:
4234 unlock_recovery(ls); 4453 dlm_unlock_recovery(ls);
4235 kfree(ua_tmp); 4454 kfree(ua_tmp);
4236 return error; 4455 return error;
4237} 4456}
4238 4457
4458int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
4459{
4460 struct dlm_lkb *lkb;
4461 struct dlm_args args;
4462 struct dlm_user_args *ua;
4463 struct dlm_rsb *r;
4464 int error;
4465
4466 dlm_lock_recovery(ls);
4467
4468 error = find_lkb(ls, lkid, &lkb);
4469 if (error)
4470 goto out;
4471
4472 ua = (struct dlm_user_args *)lkb->lkb_astparam;
4473
4474 error = set_unlock_args(flags, ua, &args);
4475 if (error)
4476 goto out_put;
4477
4478 /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
4479
4480 r = lkb->lkb_resource;
4481 hold_rsb(r);
4482 lock_rsb(r);
4483
4484 error = validate_unlock_args(lkb, &args);
4485 if (error)
4486 goto out_r;
4487 lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
4488
4489 error = _cancel_lock(r, lkb);
4490 out_r:
4491 unlock_rsb(r);
4492 put_rsb(r);
4493
4494 if (error == -DLM_ECANCEL)
4495 error = 0;
4496 /* from validate_unlock_args() */
4497 if (error == -EBUSY)
4498 error = 0;
4499 out_put:
4500 dlm_put_lkb(lkb);
4501 out:
4502 dlm_unlock_recovery(ls);
4503 return error;
4504}
4505
4239/* lkb's that are removed from the waiters list by revert are just left on the 4506/* lkb's that are removed from the waiters list by revert are just left on the
4240 orphans list with the granted orphan locks, to be freed by purge */ 4507 orphans list with the granted orphan locks, to be freed by purge */
4241 4508
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4314{ 4581{
4315 struct dlm_lkb *lkb, *safe; 4582 struct dlm_lkb *lkb, *safe;
4316 4583
4317 lock_recovery(ls); 4584 dlm_lock_recovery(ls);
4318 4585
4319 while (1) { 4586 while (1) {
4320 lkb = del_proc_lock(ls, proc); 4587 lkb = del_proc_lock(ls, proc);
4321 if (!lkb) 4588 if (!lkb)
4322 break; 4589 break;
4590 del_timeout(lkb);
4323 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) 4591 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
4324 orphan_proc_lock(ls, lkb); 4592 orphan_proc_lock(ls, lkb);
4325 else 4593 else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4347 } 4615 }
4348 4616
4349 mutex_unlock(&ls->ls_clear_proc_locks); 4617 mutex_unlock(&ls->ls_clear_proc_locks);
4350 unlock_recovery(ls); 4618 dlm_unlock_recovery(ls);
4351} 4619}
4352 4620
4353static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) 4621static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
4429 if (nodeid != dlm_our_nodeid()) { 4697 if (nodeid != dlm_our_nodeid()) {
4430 error = send_purge(ls, nodeid, pid); 4698 error = send_purge(ls, nodeid, pid);
4431 } else { 4699 } else {
4432 lock_recovery(ls); 4700 dlm_lock_recovery(ls);
4433 if (pid == current->pid) 4701 if (pid == current->pid)
4434 purge_proc_locks(ls, proc); 4702 purge_proc_locks(ls, proc);
4435 else 4703 else
4436 do_purge(ls, nodeid, pid); 4704 do_purge(ls, nodeid, pid);
4437 unlock_recovery(ls); 4705 dlm_unlock_recovery(ls);
4438 } 4706 }
4439 return error; 4707 return error;
4440} 4708}
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 64fc4ec40668..1720313c22df 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
24void dlm_hold_rsb(struct dlm_rsb *r); 24void dlm_hold_rsb(struct dlm_rsb *r);
25int dlm_put_lkb(struct dlm_lkb *lkb); 25int dlm_put_lkb(struct dlm_lkb *lkb);
26void dlm_scan_rsbs(struct dlm_ls *ls); 26void dlm_scan_rsbs(struct dlm_ls *ls);
27int dlm_lock_recovery_try(struct dlm_ls *ls);
28void dlm_unlock_recovery(struct dlm_ls *ls);
29void dlm_scan_timeout(struct dlm_ls *ls);
30void dlm_adjust_timeouts(struct dlm_ls *ls);
27 31
28int dlm_purge_locks(struct dlm_ls *ls); 32int dlm_purge_locks(struct dlm_ls *ls);
29void dlm_purge_mstcpy_locks(struct dlm_rsb *r); 33void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
34int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); 38int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
35 39
36int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, 40int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
37 uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid); 41 uint32_t flags, void *name, unsigned int namelen,
42 unsigned long timeout_cs);
38int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 43int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
39 int mode, uint32_t flags, uint32_t lkid, char *lvb_in); 44 int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
45 unsigned long timeout_cs);
40int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 46int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
41 uint32_t flags, uint32_t lkid, char *lvb_in); 47 uint32_t flags, uint32_t lkid, char *lvb_in);
42int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 48int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
43 uint32_t flags, uint32_t lkid); 49 uint32_t flags, uint32_t lkid);
44int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, 50int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
45 int nodeid, int pid); 51 int nodeid, int pid);
52int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
46void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); 53void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
47 54
48static inline int is_master(struct dlm_rsb *r) 55static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a677b2a5eed4..1dc72105ab12 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
197 else 197 else
198 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); 198 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
199 199
200 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
201
202 /* dlm_controld will see the uevent, do the necessary group management
203 and then write to sysfs to wake us */
204
200 error = wait_event_interruptible(ls->ls_uevent_wait, 205 error = wait_event_interruptible(ls->ls_uevent_wait,
201 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); 206 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
207
208 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
209
202 if (error) 210 if (error)
203 goto out; 211 goto out;
204 212
205 error = ls->ls_uevent_result; 213 error = ls->ls_uevent_result;
206 out: 214 out:
215 if (error)
216 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
217 error, ls->ls_uevent_result);
207 return error; 218 return error;
208} 219}
209 220
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
234 struct dlm_ls *ls; 245 struct dlm_ls *ls;
235 246
236 while (!kthread_should_stop()) { 247 while (!kthread_should_stop()) {
237 list_for_each_entry(ls, &lslist, ls_list) 248 list_for_each_entry(ls, &lslist, ls_list) {
238 dlm_scan_rsbs(ls); 249 if (dlm_lock_recovery_try(ls)) {
250 dlm_scan_rsbs(ls);
251 dlm_scan_timeout(ls);
252 dlm_unlock_recovery(ls);
253 }
254 }
239 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); 255 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
240 } 256 }
241 return 0; 257 return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
395{ 411{
396 struct dlm_ls *ls; 412 struct dlm_ls *ls;
397 int i, size, error = -ENOMEM; 413 int i, size, error = -ENOMEM;
414 int do_unreg = 0;
398 415
399 if (namelen > DLM_LOCKSPACE_LEN) 416 if (namelen > DLM_LOCKSPACE_LEN)
400 return -EINVAL; 417 return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
417 goto out; 434 goto out;
418 memcpy(ls->ls_name, name, namelen); 435 memcpy(ls->ls_name, name, namelen);
419 ls->ls_namelen = namelen; 436 ls->ls_namelen = namelen;
420 ls->ls_exflags = flags;
421 ls->ls_lvblen = lvblen; 437 ls->ls_lvblen = lvblen;
422 ls->ls_count = 0; 438 ls->ls_count = 0;
423 ls->ls_flags = 0; 439 ls->ls_flags = 0;
424 440
441 if (flags & DLM_LSFL_TIMEWARN)
442 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
443
444 if (flags & DLM_LSFL_FS)
445 ls->ls_allocation = GFP_NOFS;
446 else
447 ls->ls_allocation = GFP_KERNEL;
448
449 /* ls_exflags are forced to match among nodes, and we don't
450 need to require all nodes to have TIMEWARN or FS set */
451 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
452
425 size = dlm_config.ci_rsbtbl_size; 453 size = dlm_config.ci_rsbtbl_size;
426 ls->ls_rsbtbl_size = size; 454 ls->ls_rsbtbl_size = size;
427 455
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
461 mutex_init(&ls->ls_waiters_mutex); 489 mutex_init(&ls->ls_waiters_mutex);
462 INIT_LIST_HEAD(&ls->ls_orphans); 490 INIT_LIST_HEAD(&ls->ls_orphans);
463 mutex_init(&ls->ls_orphans_mutex); 491 mutex_init(&ls->ls_orphans_mutex);
492 INIT_LIST_HEAD(&ls->ls_timeout);
493 mutex_init(&ls->ls_timeout_mutex);
464 494
465 INIT_LIST_HEAD(&ls->ls_nodes); 495 INIT_LIST_HEAD(&ls->ls_nodes);
466 INIT_LIST_HEAD(&ls->ls_nodes_gone); 496 INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
477 507
478 init_waitqueue_head(&ls->ls_uevent_wait); 508 init_waitqueue_head(&ls->ls_uevent_wait);
479 ls->ls_uevent_result = 0; 509 ls->ls_uevent_result = 0;
510 init_completion(&ls->ls_members_done);
511 ls->ls_members_result = -1;
480 512
481 ls->ls_recoverd_task = NULL; 513 ls->ls_recoverd_task = NULL;
482 mutex_init(&ls->ls_recoverd_active); 514 mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
513 error = dlm_recoverd_start(ls); 545 error = dlm_recoverd_start(ls);
514 if (error) { 546 if (error) {
515 log_error(ls, "can't start dlm_recoverd %d", error); 547 log_error(ls, "can't start dlm_recoverd %d", error);
516 goto out_rcomfree; 548 goto out_delist;
517 } 549 }
518 550
519 dlm_create_debug_file(ls);
520
521 error = kobject_setup(ls); 551 error = kobject_setup(ls);
522 if (error) 552 if (error)
523 goto out_del; 553 goto out_stop;
524 554
525 error = kobject_register(&ls->ls_kobj); 555 error = kobject_register(&ls->ls_kobj);
526 if (error) 556 if (error)
527 goto out_del; 557 goto out_stop;
558
559 /* let kobject handle freeing of ls if there's an error */
560 do_unreg = 1;
561
562 /* This uevent triggers dlm_controld in userspace to add us to the
563 group of nodes that are members of this lockspace (managed by the
564 cluster infrastructure.) Once it's done that, it tells us who the
565 current lockspace members are (via configfs) and then tells the
566 lockspace to start running (via sysfs) in dlm_ls_start(). */
528 567
529 error = do_uevent(ls, 1); 568 error = do_uevent(ls, 1);
530 if (error) 569 if (error)
531 goto out_unreg; 570 goto out_stop;
571
572 wait_for_completion(&ls->ls_members_done);
573 error = ls->ls_members_result;
574 if (error)
575 goto out_members;
576
577 dlm_create_debug_file(ls);
578
579 log_debug(ls, "join complete");
532 580
533 *lockspace = ls; 581 *lockspace = ls;
534 return 0; 582 return 0;
535 583
536 out_unreg: 584 out_members:
537 kobject_unregister(&ls->ls_kobj); 585 do_uevent(ls, 0);
538 out_del: 586 dlm_clear_members(ls);
539 dlm_delete_debug_file(ls); 587 kfree(ls->ls_node_array);
588 out_stop:
540 dlm_recoverd_stop(ls); 589 dlm_recoverd_stop(ls);
541 out_rcomfree: 590 out_delist:
542 spin_lock(&lslist_lock); 591 spin_lock(&lslist_lock);
543 list_del(&ls->ls_list); 592 list_del(&ls->ls_list);
544 spin_unlock(&lslist_lock); 593 spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
550 out_rsbfree: 599 out_rsbfree:
551 kfree(ls->ls_rsbtbl); 600 kfree(ls->ls_rsbtbl);
552 out_lsfree: 601 out_lsfree:
553 kfree(ls); 602 if (do_unreg)
603 kobject_unregister(&ls->ls_kobj);
604 else
605 kfree(ls);
554 out: 606 out:
555 module_put(THIS_MODULE); 607 module_put(THIS_MODULE);
556 return error; 608 return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
570 error = new_lockspace(name, namelen, lockspace, flags, lvblen); 622 error = new_lockspace(name, namelen, lockspace, flags, lvblen);
571 if (!error) 623 if (!error)
572 ls_count++; 624 ls_count++;
625 else if (!ls_count)
626 threads_stop();
573 out: 627 out:
574 mutex_unlock(&ls_lock); 628 mutex_unlock(&ls_lock);
575 return error; 629 return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
696 dlm_clear_members_gone(ls); 750 dlm_clear_members_gone(ls);
697 kfree(ls->ls_node_array); 751 kfree(ls->ls_node_array);
698 kobject_unregister(&ls->ls_kobj); 752 kobject_unregister(&ls->ls_kobj);
699 /* The ls structure will be freed when the kobject is done with */ 753 /* The ls structure will be freed when the kobject is done with */
700 754
701 mutex_lock(&ls_lock); 755 mutex_lock(&ls_lock);
702 ls_count--; 756 ls_count--;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 27970a58d29b..0553a6158dcb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
260static void lowcomms_data_ready(struct sock *sk, int count_unused) 260static void lowcomms_data_ready(struct sock *sk, int count_unused)
261{ 261{
262 struct connection *con = sock2con(sk); 262 struct connection *con = sock2con(sk);
263 if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) 263 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
264 queue_work(recv_workqueue, &con->rwork); 264 queue_work(recv_workqueue, &con->rwork);
265} 265}
266 266
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
268{ 268{
269 struct connection *con = sock2con(sk); 269 struct connection *con = sock2con(sk);
270 270
271 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 271 if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
272 queue_work(send_workqueue, &con->swork); 272 queue_work(send_workqueue, &con->swork);
273} 273}
274 274
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
720 INIT_WORK(&othercon->rwork, process_recv_sockets); 720 INIT_WORK(&othercon->rwork, process_recv_sockets);
721 set_bit(CF_IS_OTHERCON, &othercon->flags); 721 set_bit(CF_IS_OTHERCON, &othercon->flags);
722 newcon->othercon = othercon; 722 newcon->othercon = othercon;
723 othercon->sock = newsock;
724 newsock->sk->sk_user_data = othercon;
725 add_sock(newsock, othercon);
726 addcon = othercon;
727 }
728 else {
729 printk("Extra connection from node %d attempted\n", nodeid);
730 result = -EAGAIN;
731 mutex_unlock(&newcon->sock_mutex);
732 goto accept_err;
723 } 733 }
724 othercon->sock = newsock;
725 newsock->sk->sk_user_data = othercon;
726 add_sock(newsock, othercon);
727 addcon = othercon;
728 } 734 }
729 else { 735 else {
730 newsock->sk->sk_user_data = newcon; 736 newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
1400 down(&connections_lock); 1406 down(&connections_lock);
1401 for (i = 0; i <= max_nodeid; i++) { 1407 for (i = 0; i <= max_nodeid; i++) {
1402 con = __nodeid2con(i, 0); 1408 con = __nodeid2con(i, 0);
1403 if (con) 1409 if (con) {
1404 con->flags |= 0xFF; 1410 con->flags |= 0xFF;
1411 if (con->sock)
1412 con->sock->sk->sk_user_data = NULL;
1413 }
1405 } 1414 }
1406 up(&connections_lock); 1415 up(&connections_lock);
1407 1416
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 162fbae58fe5..eca2907f2386 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
25static inline int dlm_register_debugfs(void) { return 0; } 25static inline int dlm_register_debugfs(void) { return 0; }
26static inline void dlm_unregister_debugfs(void) { } 26static inline void dlm_unregister_debugfs(void) { }
27#endif 27#endif
28int dlm_netlink_init(void);
29void dlm_netlink_exit(void);
28 30
29static int __init init_dlm(void) 31static int __init init_dlm(void)
30{ 32{
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
50 if (error) 52 if (error)
51 goto out_debug; 53 goto out_debug;
52 54
55 error = dlm_netlink_init();
56 if (error)
57 goto out_user;
58
53 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); 59 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
54 60
55 return 0; 61 return 0;
56 62
63 out_user:
64 dlm_user_exit();
57 out_debug: 65 out_debug:
58 dlm_unregister_debugfs(); 66 dlm_unregister_debugfs();
59 out_config: 67 out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
68 76
69static void __exit exit_dlm(void) 77static void __exit exit_dlm(void)
70{ 78{
79 dlm_netlink_exit();
71 dlm_user_exit(); 80 dlm_user_exit();
72 dlm_config_exit(); 81 dlm_config_exit();
73 dlm_memory_exit(); 82 dlm_memory_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 85e2897bd740..073599dced2a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
233 *neg_out = neg; 233 *neg_out = neg;
234 234
235 error = ping_members(ls); 235 error = ping_members(ls);
236 if (!error || error == -EPROTO) {
237 /* new_lockspace() may be waiting to know if the config
238 is good or bad */
239 ls->ls_members_result = error;
240 complete(&ls->ls_members_done);
241 }
236 if (error) 242 if (error)
237 goto out; 243 goto out;
238 244
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
284 dlm_recoverd_suspend(ls); 290 dlm_recoverd_suspend(ls);
285 ls->ls_recover_status = 0; 291 ls->ls_recover_status = 0;
286 dlm_recoverd_resume(ls); 292 dlm_recoverd_resume(ls);
293
294 if (!ls->ls_recover_begin)
295 ls->ls_recover_begin = jiffies;
287 return 0; 296 return 0;
288} 297}
289 298
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644
index 000000000000..863b87d0dc71
--- /dev/null
+++ b/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
1/*
2 * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
7 */
8
9#include <net/genetlink.h>
10#include <linux/dlm.h>
11#include <linux/dlm_netlink.h>
12
13#include "dlm_internal.h"
14
15static uint32_t dlm_nl_seqnum;
16static uint32_t listener_nlpid;
17
18static struct genl_family family = {
19 .id = GENL_ID_GENERATE,
20 .name = DLM_GENL_NAME,
21 .version = DLM_GENL_VERSION,
22};
23
24static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
25{
26 struct sk_buff *skb;
27 void *data;
28
29 skb = genlmsg_new(size, GFP_KERNEL);
30 if (!skb)
31 return -ENOMEM;
32
33 /* add the message headers */
34 data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
35 if (!data) {
36 nlmsg_free(skb);
37 return -EINVAL;
38 }
39
40 *skbp = skb;
41 return 0;
42}
43
44static struct dlm_lock_data *mk_data(struct sk_buff *skb)
45{
46 struct nlattr *ret;
47
48 ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
49 if (!ret)
50 return NULL;
51 return nla_data(ret);
52}
53
54static int send_data(struct sk_buff *skb)
55{
56 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
57 void *data = genlmsg_data(genlhdr);
58 int rv;
59
60 rv = genlmsg_end(skb, data);
61 if (rv < 0) {
62 nlmsg_free(skb);
63 return rv;
64 }
65
66 return genlmsg_unicast(skb, listener_nlpid);
67}
68
69static int user_cmd(struct sk_buff *skb, struct genl_info *info)
70{
71 listener_nlpid = info->snd_pid;
72 printk("user_cmd nlpid %u\n", listener_nlpid);
73 return 0;
74}
75
76static struct genl_ops dlm_nl_ops = {
77 .cmd = DLM_CMD_HELLO,
78 .doit = user_cmd,
79};
80
81int dlm_netlink_init(void)
82{
83 int rv;
84
85 rv = genl_register_family(&family);
86 if (rv)
87 return rv;
88
89 rv = genl_register_ops(&family, &dlm_nl_ops);
90 if (rv < 0)
91 goto err;
92 return 0;
93 err:
94 genl_unregister_family(&family);
95 return rv;
96}
97
98void dlm_netlink_exit(void)
99{
100 genl_unregister_ops(&family, &dlm_nl_ops);
101 genl_unregister_family(&family);
102}
103
104static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
105{
106 struct dlm_rsb *r = lkb->lkb_resource;
107 struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
108
109 memset(data, 0, sizeof(struct dlm_lock_data));
110
111 data->version = DLM_LOCK_DATA_VERSION;
112 data->nodeid = lkb->lkb_nodeid;
113 data->ownpid = lkb->lkb_ownpid;
114 data->id = lkb->lkb_id;
115 data->remid = lkb->lkb_remid;
116 data->status = lkb->lkb_status;
117 data->grmode = lkb->lkb_grmode;
118 data->rqmode = lkb->lkb_rqmode;
119 data->timestamp = lkb->lkb_timestamp;
120 if (ua)
121 data->xid = ua->xid;
122 if (r) {
123 data->lockspace_id = r->res_ls->ls_global_id;
124 data->resource_namelen = r->res_length;
125 memcpy(data->resource_name, r->res_name, r->res_length);
126 }
127}
128
129void dlm_timeout_warn(struct dlm_lkb *lkb)
130{
131 struct dlm_lock_data *data;
132 struct sk_buff *send_skb;
133 size_t size;
134 int rv;
135
136 size = nla_total_size(sizeof(struct dlm_lock_data)) +
137 nla_total_size(0); /* why this? */
138
139 rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
140 if (rv < 0)
141 return;
142
143 data = mk_data(send_skb);
144 if (!data) {
145 nlmsg_free(send_skb);
146 return;
147 }
148
149 fill_data(data, lkb);
150
151 send_data(send_skb);
152}
153
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6bfbd6153809..e3a1527cbdbe 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
38 char *mb; 38 char *mb;
39 int mb_len = sizeof(struct dlm_rcom) + len; 39 int mb_len = sizeof(struct dlm_rcom) + len;
40 40
41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); 41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
42 if (!mh) { 42 if (!mh) {
43 log_print("create_rcom to %d type %d len %d ENOBUFS", 43 log_print("create_rcom to %d type %d len %d ENOBUFS",
44 to_nodeid, type, len); 44 to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
90 log_error(ls, "version mismatch: %x nodeid %d: %x", 90 log_error(ls, "version mismatch: %x nodeid %d: %x",
91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, 91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
92 rc->rc_header.h_version); 92 rc->rc_header.h_version);
93 return -EINVAL; 93 return -EPROTO;
94 } 94 }
95 95
96 if (rf->rf_lvblen != ls->ls_lvblen || 96 if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", 98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
99 ls->ls_lvblen, ls->ls_exflags, 99 ls->ls_lvblen, ls->ls_exflags,
100 nodeid, rf->rf_lvblen, rf->rf_lsflags); 100 nodeid, rf->rf_lvblen, rf->rf_lsflags);
101 return -EINVAL; 101 return -EPROTO;
102 } 102 }
103 return 0; 103 return 0;
104} 104}
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
386 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
387} 387}
388 388
389static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 389static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
390 struct dlm_rcom *rc_in)
390{ 391{
391 struct dlm_rcom *rc; 392 struct dlm_rcom *rc;
392 struct rcom_config *rf; 393 struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
394 char *mb; 395 char *mb;
395 int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); 396 int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
396 397
397 mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb); 398 mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
398 if (!mh) 399 if (!mh)
399 return -ENOBUFS; 400 return -ENOBUFS;
400 memset(mb, 0, mb_len); 401 memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
464 log_print("lockspace %x from %d type %x not found", 465 log_print("lockspace %x from %d type %x not found",
465 hd->h_lockspace, nodeid, rc->rc_type); 466 hd->h_lockspace, nodeid, rc->rc_type);
466 if (rc->rc_type == DLM_RCOM_STATUS) 467 if (rc->rc_type == DLM_RCOM_STATUS)
467 send_ls_not_ready(nodeid, rc); 468 send_ls_not_ready(ls, nodeid, rc);
468 return; 469 return;
469 } 470 }
470 471
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 3cb636d60249..66575997861c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
190 190
191 dlm_clear_members_gone(ls); 191 dlm_clear_members_gone(ls);
192 192
193 dlm_adjust_timeouts(ls);
194
193 error = enable_locking(ls, rv->seq); 195 error = enable_locking(ls, rv->seq);
194 if (error) { 196 if (error) {
195 log_debug(ls, "enable_locking failed %d", error); 197 log_debug(ls, "enable_locking failed %d", error);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b0201ec325a7..6438941ab1f8 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
33struct dlm_lock_params32 { 33struct dlm_lock_params32 {
34 __u8 mode; 34 __u8 mode;
35 __u8 namelen; 35 __u8 namelen;
36 __u16 flags; 36 __u16 unused;
37 __u32 flags;
37 __u32 lkid; 38 __u32 lkid;
38 __u32 parent; 39 __u32 parent;
39 40 __u64 xid;
41 __u64 timeout;
40 __u32 castparam; 42 __u32 castparam;
41 __u32 castaddr; 43 __u32 castaddr;
42 __u32 bastparam; 44 __u32 bastparam;
43 __u32 bastaddr; 45 __u32 bastaddr;
44 __u32 lksb; 46 __u32 lksb;
45
46 char lvb[DLM_USER_LVB_LEN]; 47 char lvb[DLM_USER_LVB_LEN];
47 char name[0]; 48 char name[0];
48}; 49};
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
68}; 69};
69 70
70struct dlm_lock_result32 { 71struct dlm_lock_result32 {
72 __u32 version[3];
71 __u32 length; 73 __u32 length;
72 __u32 user_astaddr; 74 __u32 user_astaddr;
73 __u32 user_astparam; 75 __u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
102 kb->i.lock.flags = kb32->i.lock.flags; 104 kb->i.lock.flags = kb32->i.lock.flags;
103 kb->i.lock.lkid = kb32->i.lock.lkid; 105 kb->i.lock.lkid = kb32->i.lock.lkid;
104 kb->i.lock.parent = kb32->i.lock.parent; 106 kb->i.lock.parent = kb32->i.lock.parent;
107 kb->i.lock.xid = kb32->i.lock.xid;
108 kb->i.lock.timeout = kb32->i.lock.timeout;
105 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam; 109 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
106 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr; 110 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
107 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam; 111 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
115static void compat_output(struct dlm_lock_result *res, 119static void compat_output(struct dlm_lock_result *res,
116 struct dlm_lock_result32 *res32) 120 struct dlm_lock_result32 *res32)
117{ 121{
122 res32->version[0] = res->version[0];
123 res32->version[1] = res->version[1];
124 res32->version[2] = res->version[2];
125
118 res32->user_astaddr = (__u32)(long)res->user_astaddr; 126 res32->user_astaddr = (__u32)(long)res->user_astaddr;
119 res32->user_astparam = (__u32)(long)res->user_astparam; 127 res32->user_astparam = (__u32)(long)res->user_astparam;
120 res32->user_lksb = (__u32)(long)res->user_lksb; 128 res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
130} 138}
131#endif 139#endif
132 140
141/* Figure out if this lock is at the end of its life and no longer
142 available for the application to use. The lkb still exists until
143 the final ast is read. A lock becomes EOL in three situations:
144 1. a noqueue request fails with EAGAIN
145 2. an unlock completes with EUNLOCK
146 3. a cancel of a waiting request completes with ECANCEL/EDEADLK
147 An EOL lock needs to be removed from the process's list of locks.
148 And we can't allow any new operation on an EOL lock. This is
149 not related to the lifetime of the lkb struct which is managed
150 entirely by refcount. */
151
152static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
153{
154 switch (sb_status) {
155 case -DLM_EUNLOCK:
156 return 1;
157 case -DLM_ECANCEL:
158 case -ETIMEDOUT:
159 case -EDEADLK:
160 if (lkb->lkb_grmode == DLM_LOCK_IV)
161 return 1;
162 break;
163 case -EAGAIN:
164 if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
165 return 1;
166 break;
167 }
168 return 0;
169}
170
133/* we could possibly check if the cancel of an orphan has resulted in the lkb 171/* we could possibly check if the cancel of an orphan has resulted in the lkb
134 being removed and then remove that lkb from the orphans list and free it */ 172 being removed and then remove that lkb from the orphans list and free it */
135 173
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
176 log_debug(ls, "ast overlap %x status %x %x", 214 log_debug(ls, "ast overlap %x status %x %x",
177 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); 215 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
178 216
179 /* Figure out if this lock is at the end of its life and no longer 217 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
180 available for the application to use. The lkb still exists until
181 the final ast is read. A lock becomes EOL in three situations:
182 1. a noqueue request fails with EAGAIN
183 2. an unlock completes with EUNLOCK
184 3. a cancel of a waiting request completes with ECANCEL
185 An EOL lock needs to be removed from the process's list of locks.
186 And we can't allow any new operation on an EOL lock. This is
187 not related to the lifetime of the lkb struct which is managed
188 entirely by refcount. */
189
190 if (type == AST_COMP &&
191 lkb->lkb_grmode == DLM_LOCK_IV &&
192 ua->lksb.sb_status == -EAGAIN)
193 eol = 1;
194 else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
195 (ua->lksb.sb_status == -DLM_ECANCEL &&
196 lkb->lkb_grmode == DLM_LOCK_IV))
197 eol = 1;
198 if (eol) { 218 if (eol) {
199 lkb->lkb_ast_type &= ~AST_BAST; 219 lkb->lkb_ast_type &= ~AST_BAST;
200 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; 220 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
252 ua->castaddr = params->castaddr; 272 ua->castaddr = params->castaddr;
253 ua->bastparam = params->bastparam; 273 ua->bastparam = params->bastparam;
254 ua->bastaddr = params->bastaddr; 274 ua->bastaddr = params->bastaddr;
275 ua->xid = params->xid;
255 276
256 if (params->flags & DLM_LKF_CONVERT) 277 if (params->flags & DLM_LKF_CONVERT)
257 error = dlm_user_convert(ls, ua, 278 error = dlm_user_convert(ls, ua,
258 params->mode, params->flags, 279 params->mode, params->flags,
259 params->lkid, params->lvb); 280 params->lkid, params->lvb,
281 (unsigned long) params->timeout);
260 else { 282 else {
261 error = dlm_user_request(ls, ua, 283 error = dlm_user_request(ls, ua,
262 params->mode, params->flags, 284 params->mode, params->flags,
263 params->name, params->namelen, 285 params->name, params->namelen,
264 params->parent); 286 (unsigned long) params->timeout);
265 if (!error) 287 if (!error)
266 error = ua->lksb.sb_lkid; 288 error = ua->lksb.sb_lkid;
267 } 289 }
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
299 return error; 321 return error;
300} 322}
301 323
324static int device_user_deadlock(struct dlm_user_proc *proc,
325 struct dlm_lock_params *params)
326{
327 struct dlm_ls *ls;
328 int error;
329
330 ls = dlm_find_lockspace_local(proc->lockspace);
331 if (!ls)
332 return -ENOENT;
333
334 error = dlm_user_deadlock(ls, params->flags, params->lkid);
335
336 dlm_put_lockspace(ls);
337 return error;
338}
339
302static int create_misc_device(struct dlm_ls *ls, char *name) 340static int create_misc_device(struct dlm_ls *ls, char *name)
303{ 341{
304 int error, len; 342 int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
348 return -EPERM; 386 return -EPERM;
349 387
350 error = dlm_new_lockspace(params->name, strlen(params->name), 388 error = dlm_new_lockspace(params->name, strlen(params->name),
351 &lockspace, 0, DLM_USER_LVB_LEN); 389 &lockspace, params->flags, DLM_USER_LVB_LEN);
352 if (error) 390 if (error)
353 return error; 391 return error;
354 392
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
524 error = device_user_unlock(proc, &kbuf->i.lock); 562 error = device_user_unlock(proc, &kbuf->i.lock);
525 break; 563 break;
526 564
565 case DLM_USER_DEADLOCK:
566 if (!proc) {
567 log_print("no locking on control device");
568 goto out_sig;
569 }
570 error = device_user_deadlock(proc, &kbuf->i.lock);
571 break;
572
527 case DLM_USER_CREATE_LOCKSPACE: 573 case DLM_USER_CREATE_LOCKSPACE:
528 if (proc) { 574 if (proc) {
529 log_print("create/remove only on control device"); 575 log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
641 int struct_len; 687 int struct_len;
642 688
643 memset(&result, 0, sizeof(struct dlm_lock_result)); 689 memset(&result, 0, sizeof(struct dlm_lock_result));
690 result.version[0] = DLM_DEVICE_VERSION_MAJOR;
691 result.version[1] = DLM_DEVICE_VERSION_MINOR;
692 result.version[2] = DLM_DEVICE_VERSION_PATCH;
644 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); 693 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
645 result.user_lksb = ua->user_lksb; 694 result.user_lksb = ua->user_lksb;
646 695
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
699 return error; 748 return error;
700} 749}
701 750
751static int copy_version_to_user(char __user *buf, size_t count)
752{
753 struct dlm_device_version ver;
754
755 memset(&ver, 0, sizeof(struct dlm_device_version));
756 ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
757 ver.version[1] = DLM_DEVICE_VERSION_MINOR;
758 ver.version[2] = DLM_DEVICE_VERSION_PATCH;
759
760 if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
761 return -EFAULT;
762 return sizeof(struct dlm_device_version);
763}
764
702/* a read returns a single ast described in a struct dlm_lock_result */ 765/* a read returns a single ast described in a struct dlm_lock_result */
703 766
704static ssize_t device_read(struct file *file, char __user *buf, size_t count, 767static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
710 DECLARE_WAITQUEUE(wait, current); 773 DECLARE_WAITQUEUE(wait, current);
711 int error, type=0, bmode=0, removed = 0; 774 int error, type=0, bmode=0, removed = 0;
712 775
776 if (count == sizeof(struct dlm_device_version)) {
777 error = copy_version_to_user(buf, count);
778 return error;
779 }
780
781 if (!proc) {
782 log_print("non-version read from control device %zu", count);
783 return -EINVAL;
784 }
785
713#ifdef CONFIG_COMPAT 786#ifdef CONFIG_COMPAT
714 if (count < sizeof(struct dlm_lock_result32)) 787 if (count < sizeof(struct dlm_lock_result32))
715#else 788#else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
747 } 820 }
748 } 821 }
749 822
750 if (list_empty(&proc->asts)) {
751 spin_unlock(&proc->asts_spin);
752 return -EAGAIN;
753 }
754
755 /* there may be both completion and blocking asts to return for 823 /* there may be both completion and blocking asts to return for
756 the lkb, don't remove lkb from asts list unless no asts remain */ 824 the lkb, don't remove lkb from asts list unless no asts remain */
757 825
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
823static const struct file_operations ctl_device_fops = { 891static const struct file_operations ctl_device_fops = {
824 .open = ctl_device_open, 892 .open = ctl_device_open,
825 .release = ctl_device_close, 893 .release = ctl_device_close,
894 .read = device_read,
826 .write = device_write, 895 .write = device_write,
827 .owner = THIS_MODULE, 896 .owner = THIS_MODULE,
828}; 897};