aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/Kconfig18
-rw-r--r--fs/dlm/config.c154
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/debug_fs.c4
-rw-r--r--fs/dlm/dlm_internal.h20
-rw-r--r--fs/dlm/lock.c87
-rw-r--r--fs/dlm/lockspace.c10
-rw-r--r--fs/dlm/lowcomms-sctp.c151
-rw-r--r--fs/dlm/lowcomms-tcp.c384
-rw-r--r--fs/dlm/memory.c4
-rw-r--r--fs/dlm/midcomms.c4
-rw-r--r--fs/dlm/rcom.c85
-rw-r--r--fs/dlm/recover.c8
-rw-r--r--fs/dlm/recoverd.c22
-rw-r--r--fs/dlm/user.c15
-rw-r--r--fs/dlm/util.c4
16 files changed, 499 insertions, 488 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index b5654a284fef..6fa7b0d5c043 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager"
3 3
4config DLM 4config DLM
5 tristate "Distributed Lock Manager (DLM)" 5 tristate "Distributed Lock Manager (DLM)"
6 depends on IPV6 || IPV6=n 6 depends on SYSFS && (IPV6 || IPV6=n)
7 select CONFIGFS_FS 7 select CONFIGFS_FS
8 select IP_SCTP if DLM_SCTP 8 select IP_SCTP if DLM_SCTP
9 help 9 help
10 A general purpose distributed lock manager for kernel or userspace 10 A general purpose distributed lock manager for kernel or userspace
11 applications. 11 applications.
12 12
13choice 13choice
14 prompt "Select DLM communications protocol" 14 prompt "Select DLM communications protocol"
15 depends on DLM 15 depends on DLM
16 default DLM_TCP 16 default DLM_TCP
17 help 17 help
18 The DLM Can use TCP or SCTP for it's network communications. 18 The DLM Can use TCP or SCTP for it's network communications.
19 SCTP supports multi-homed operations whereas TCP doesn't. 19 SCTP supports multi-homed operations whereas TCP doesn't.
20 However, SCTP seems to have stability problems at the moment. 20 However, SCTP seems to have stability problems at the moment.
21 21
22config DLM_TCP 22config DLM_TCP
23 bool "TCP/IP" 23 bool "TCP/IP"
@@ -31,8 +31,8 @@ config DLM_DEBUG
31 bool "DLM debugging" 31 bool "DLM debugging"
32 depends on DLM 32 depends on DLM
33 help 33 help
34 Under the debugfs mount point, the name of each lockspace will 34 Under the debugfs mount point, the name of each lockspace will
35 appear as a file in the "dlm" directory. The output is the 35 appear as a file in the "dlm" directory. The output is the
36 list of resource and locks the local node knows about. 36 list of resource and locks the local node knows about.
37 37
38endmenu 38endmenu
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 88553054bbfa..8665c88e5af2 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *);
54static void drop_node(struct config_group *, struct config_item *); 54static void drop_node(struct config_group *, struct config_item *);
55static void release_node(struct config_item *); 55static void release_node(struct config_item *);
56 56
57static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
58 char *buf);
59static ssize_t store_cluster(struct config_item *i,
60 struct configfs_attribute *a,
61 const char *buf, size_t len);
57static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 62static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
58 char *buf); 63 char *buf);
59static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, 64static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
73static ssize_t node_weight_read(struct node *nd, char *buf); 78static ssize_t node_weight_read(struct node *nd, char *buf);
74static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); 79static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
75 80
81struct cluster {
82 struct config_group group;
83 unsigned int cl_tcp_port;
84 unsigned int cl_buffer_size;
85 unsigned int cl_rsbtbl_size;
86 unsigned int cl_lkbtbl_size;
87 unsigned int cl_dirtbl_size;
88 unsigned int cl_recover_timer;
89 unsigned int cl_toss_secs;
90 unsigned int cl_scan_secs;
91 unsigned int cl_log_debug;
92};
93
94enum {
95 CLUSTER_ATTR_TCP_PORT = 0,
96 CLUSTER_ATTR_BUFFER_SIZE,
97 CLUSTER_ATTR_RSBTBL_SIZE,
98 CLUSTER_ATTR_LKBTBL_SIZE,
99 CLUSTER_ATTR_DIRTBL_SIZE,
100 CLUSTER_ATTR_RECOVER_TIMER,
101 CLUSTER_ATTR_TOSS_SECS,
102 CLUSTER_ATTR_SCAN_SECS,
103 CLUSTER_ATTR_LOG_DEBUG,
104};
105
106struct cluster_attribute {
107 struct configfs_attribute attr;
108 ssize_t (*show)(struct cluster *, char *);
109 ssize_t (*store)(struct cluster *, const char *, size_t);
110};
111
112static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
113 unsigned int *info_field, int check_zero,
114 const char *buf, size_t len)
115{
116 unsigned int x;
117
118 if (!capable(CAP_SYS_ADMIN))
119 return -EACCES;
120
121 x = simple_strtoul(buf, NULL, 0);
122
123 if (check_zero && !x)
124 return -EINVAL;
125
126 *cl_field = x;
127 *info_field = x;
128
129 return len;
130}
131
132#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
133 .attr = { .ca_name = __stringify(_name), \
134 .ca_mode = _mode, \
135 .ca_owner = THIS_MODULE }, \
136 .show = _read, \
137 .store = _write, \
138}
139
140#define CLUSTER_ATTR(name, check_zero) \
141static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
142{ \
143 return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
144 check_zero, buf, len); \
145} \
146static ssize_t name##_read(struct cluster *cl, char *buf) \
147{ \
148 return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
149} \
150static struct cluster_attribute cluster_attr_##name = \
151__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
152
153CLUSTER_ATTR(tcp_port, 1);
154CLUSTER_ATTR(buffer_size, 1);
155CLUSTER_ATTR(rsbtbl_size, 1);
156CLUSTER_ATTR(lkbtbl_size, 1);
157CLUSTER_ATTR(dirtbl_size, 1);
158CLUSTER_ATTR(recover_timer, 1);
159CLUSTER_ATTR(toss_secs, 1);
160CLUSTER_ATTR(scan_secs, 1);
161CLUSTER_ATTR(log_debug, 0);
162
163static struct configfs_attribute *cluster_attrs[] = {
164 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
165 [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
166 [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
167 [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
168 [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
169 [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
170 [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
171 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
172 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
173 NULL,
174};
175
76enum { 176enum {
77 COMM_ATTR_NODEID = 0, 177 COMM_ATTR_NODEID = 0,
78 COMM_ATTR_LOCAL, 178 COMM_ATTR_LOCAL,
@@ -152,10 +252,6 @@ struct clusters {
152 struct configfs_subsystem subsys; 252 struct configfs_subsystem subsys;
153}; 253};
154 254
155struct cluster {
156 struct config_group group;
157};
158
159struct spaces { 255struct spaces {
160 struct config_group ss_group; 256 struct config_group ss_group;
161}; 257};
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = {
197 293
198static struct configfs_item_operations cluster_ops = { 294static struct configfs_item_operations cluster_ops = {
199 .release = release_cluster, 295 .release = release_cluster,
296 .show_attribute = show_cluster,
297 .store_attribute = store_cluster,
200}; 298};
201 299
202static struct configfs_group_operations spaces_ops = { 300static struct configfs_group_operations spaces_ops = {
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = {
237 335
238static struct config_item_type cluster_type = { 336static struct config_item_type cluster_type = {
239 .ct_item_ops = &cluster_ops, 337 .ct_item_ops = &cluster_ops,
338 .ct_attrs = cluster_attrs,
240 .ct_owner = THIS_MODULE, 339 .ct_owner = THIS_MODULE,
241}; 340};
242 341
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g,
317 cl->group.default_groups[1] = &cms->cs_group; 416 cl->group.default_groups[1] = &cms->cs_group;
318 cl->group.default_groups[2] = NULL; 417 cl->group.default_groups[2] = NULL;
319 418
419 cl->cl_tcp_port = dlm_config.ci_tcp_port;
420 cl->cl_buffer_size = dlm_config.ci_buffer_size;
421 cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
422 cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
423 cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
424 cl->cl_recover_timer = dlm_config.ci_recover_timer;
425 cl->cl_toss_secs = dlm_config.ci_toss_secs;
426 cl->cl_scan_secs = dlm_config.ci_scan_secs;
427 cl->cl_log_debug = dlm_config.ci_log_debug;
428
320 space_list = &sps->ss_group; 429 space_list = &sps->ss_group;
321 comm_list = &cms->cs_group; 430 comm_list = &cms->cs_group;
322 return &cl->group; 431 return &cl->group;
@@ -509,6 +618,25 @@ void dlm_config_exit(void)
509 * Functions for user space to read/write attributes 618 * Functions for user space to read/write attributes
510 */ 619 */
511 620
621static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
622 char *buf)
623{
624 struct cluster *cl = to_cluster(i);
625 struct cluster_attribute *cla =
626 container_of(a, struct cluster_attribute, attr);
627 return cla->show ? cla->show(cl, buf) : 0;
628}
629
630static ssize_t store_cluster(struct config_item *i,
631 struct configfs_attribute *a,
632 const char *buf, size_t len)
633{
634 struct cluster *cl = to_cluster(i);
635 struct cluster_attribute *cla =
636 container_of(a, struct cluster_attribute, attr);
637 return cla->store ? cla->store(cl, buf, len) : -EINVAL;
638}
639
512static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 640static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
513 char *buf) 641 char *buf)
514{ 642{
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
775#define DEFAULT_RECOVER_TIMER 5 903#define DEFAULT_RECOVER_TIMER 5
776#define DEFAULT_TOSS_SECS 10 904#define DEFAULT_TOSS_SECS 10
777#define DEFAULT_SCAN_SECS 5 905#define DEFAULT_SCAN_SECS 5
906#define DEFAULT_LOG_DEBUG 0
778 907
779struct dlm_config_info dlm_config = { 908struct dlm_config_info dlm_config = {
780 .tcp_port = DEFAULT_TCP_PORT, 909 .ci_tcp_port = DEFAULT_TCP_PORT,
781 .buffer_size = DEFAULT_BUFFER_SIZE, 910 .ci_buffer_size = DEFAULT_BUFFER_SIZE,
782 .rsbtbl_size = DEFAULT_RSBTBL_SIZE, 911 .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
783 .lkbtbl_size = DEFAULT_LKBTBL_SIZE, 912 .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
784 .dirtbl_size = DEFAULT_DIRTBL_SIZE, 913 .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
785 .recover_timer = DEFAULT_RECOVER_TIMER, 914 .ci_recover_timer = DEFAULT_RECOVER_TIMER,
786 .toss_secs = DEFAULT_TOSS_SECS, 915 .ci_toss_secs = DEFAULT_TOSS_SECS,
787 .scan_secs = DEFAULT_SCAN_SECS 916 .ci_scan_secs = DEFAULT_SCAN_SECS,
917 .ci_log_debug = DEFAULT_LOG_DEBUG
788}; 918};
789 919
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 9da7839958a9..1e978611a96e 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -17,14 +17,15 @@
17#define DLM_MAX_ADDR_COUNT 3 17#define DLM_MAX_ADDR_COUNT 3
18 18
19struct dlm_config_info { 19struct dlm_config_info {
20 int tcp_port; 20 int ci_tcp_port;
21 int buffer_size; 21 int ci_buffer_size;
22 int rsbtbl_size; 22 int ci_rsbtbl_size;
23 int lkbtbl_size; 23 int ci_lkbtbl_size;
24 int dirtbl_size; 24 int ci_dirtbl_size;
25 int recover_timer; 25 int ci_recover_timer;
26 int toss_secs; 26 int ci_toss_secs;
27 int scan_secs; 27 int ci_scan_secs;
28 int ci_log_debug;
28}; 29};
29 30
30extern struct dlm_config_info dlm_config; 31extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index ca94a837a5bb..61ba670b9e02 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -287,7 +287,7 @@ static int rsb_open(struct inode *inode, struct file *file)
287 return 0; 287 return 0;
288} 288}
289 289
290static struct file_operations rsb_fops = { 290static const struct file_operations rsb_fops = {
291 .owner = THIS_MODULE, 291 .owner = THIS_MODULE,
292 .open = rsb_open, 292 .open = rsb_open,
293 .read = seq_read, 293 .read = seq_read,
@@ -331,7 +331,7 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf,
331 return rv; 331 return rv;
332} 332}
333 333
334static struct file_operations waiters_fops = { 334static const struct file_operations waiters_fops = {
335 .owner = THIS_MODULE, 335 .owner = THIS_MODULE,
336 .open = waiters_open, 336 .open = waiters_open,
337 .read = waiters_read 337 .read = waiters_read
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1ee8195e6fc0..61d93201e1b2 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,6 +41,7 @@
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42 42
43#include <linux/dlm.h> 43#include <linux/dlm.h>
44#include "config.h"
44 45
45#define DLM_LOCKSPACE_LEN 64 46#define DLM_LOCKSPACE_LEN 64
46 47
@@ -69,12 +70,12 @@ struct dlm_mhandle;
69#define log_error(ls, fmt, args...) \ 70#define log_error(ls, fmt, args...) \
70 printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) 71 printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
71 72
72#define DLM_LOG_DEBUG 73#define log_debug(ls, fmt, args...) \
73#ifdef DLM_LOG_DEBUG 74do { \
74#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) 75 if (dlm_config.ci_log_debug) \
75#else 76 printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
76#define log_debug(ls, fmt, args...) 77 (ls)->ls_name , ##args); \
77#endif 78} while (0)
78 79
79#define DLM_ASSERT(x, do) \ 80#define DLM_ASSERT(x, do) \
80{ \ 81{ \
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
309 310
310/* dlm_header is first element of all structs sent between nodes */ 311/* dlm_header is first element of all structs sent between nodes */
311 312
312#define DLM_HEADER_MAJOR 0x00020000 313#define DLM_HEADER_MAJOR 0x00030000
313#define DLM_HEADER_MINOR 0x00000001 314#define DLM_HEADER_MINOR 0x00000000
314 315
315#define DLM_MSG 1 316#define DLM_MSG 1
316#define DLM_RCOM 2 317#define DLM_RCOM 2
@@ -386,6 +387,8 @@ struct dlm_rcom {
386 uint32_t rc_type; /* DLM_RCOM_ */ 387 uint32_t rc_type; /* DLM_RCOM_ */
387 int rc_result; /* multi-purpose */ 388 int rc_result; /* multi-purpose */
388 uint64_t rc_id; /* match reply with request */ 389 uint64_t rc_id; /* match reply with request */
390 uint64_t rc_seq; /* sender's ls_recover_seq */
391 uint64_t rc_seq_reply; /* remote ls_recover_seq */
389 char rc_buf[0]; 392 char rc_buf[0];
390}; 393};
391 394
@@ -523,6 +526,7 @@ struct dlm_user_proc {
523 spinlock_t asts_spin; 526 spinlock_t asts_spin;
524 struct list_head locks; 527 struct list_head locks;
525 spinlock_t locks_spin; 528 spinlock_t locks_spin;
529 struct list_head unlocking;
526 wait_queue_head_t wait; 530 wait_queue_head_t wait;
527}; 531};
528 532
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 30878defaeb6..e725005fafd0 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
754 mutex_unlock(&ls->ls_waiters_mutex); 754 mutex_unlock(&ls->ls_waiters_mutex);
755} 755}
756 756
757/* We clear the RESEND flag because we might be taking an lkb off the waiters
758 list as part of process_requestqueue (e.g. a lookup that has an optimized
759 request reply on the requestqueue) between dlm_recover_waiters_pre() which
760 set RESEND and dlm_recover_waiters_post() */
761
757static int _remove_from_waiters(struct dlm_lkb *lkb) 762static int _remove_from_waiters(struct dlm_lkb *lkb)
758{ 763{
759 int error = 0; 764 int error = 0;
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb)
764 goto out; 769 goto out;
765 } 770 }
766 lkb->lkb_wait_type = 0; 771 lkb->lkb_wait_type = 0;
772 lkb->lkb_flags &= ~DLM_IFL_RESEND;
767 list_del(&lkb->lkb_wait_reply); 773 list_del(&lkb->lkb_wait_reply);
768 unhold_lkb(lkb); 774 unhold_lkb(lkb);
769 out: 775 out:
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
810 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, 816 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
811 res_hashchain) { 817 res_hashchain) {
812 if (!time_after_eq(jiffies, r->res_toss_time + 818 if (!time_after_eq(jiffies, r->res_toss_time +
813 dlm_config.toss_secs * HZ)) 819 dlm_config.ci_toss_secs * HZ))
814 continue; 820 continue;
815 found = 1; 821 found = 1;
816 break; 822 break;
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
2144 if (lkb->lkb_astaddr) 2150 if (lkb->lkb_astaddr)
2145 ms->m_asts |= AST_COMP; 2151 ms->m_asts |= AST_COMP;
2146 2152
2147 if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) 2153 /* compare with switch in create_message; send_remove() doesn't
2148 memcpy(ms->m_extra, r->res_name, r->res_length); 2154 use send_args() */
2149 2155
2150 else if (lkb->lkb_lvbptr) 2156 switch (ms->m_type) {
2157 case DLM_MSG_REQUEST:
2158 case DLM_MSG_LOOKUP:
2159 memcpy(ms->m_extra, r->res_name, r->res_length);
2160 break;
2161 case DLM_MSG_CONVERT:
2162 case DLM_MSG_UNLOCK:
2163 case DLM_MSG_REQUEST_REPLY:
2164 case DLM_MSG_CONVERT_REPLY:
2165 case DLM_MSG_GRANT:
2166 if (!lkb->lkb_lvbptr)
2167 break;
2151 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); 2168 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
2152 2169 break;
2170 }
2153} 2171}
2154 2172
2155static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) 2173static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2418 2436
2419 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); 2437 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
2420 2438
2421 if (receive_lvb(ls, lkb, ms)) 2439 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
2422 return -ENOMEM; 2440 /* lkb was just created so there won't be an lvb yet */
2441 lkb->lkb_lvbptr = allocate_lvb(ls);
2442 if (!lkb->lkb_lvbptr)
2443 return -ENOMEM;
2444 }
2423 2445
2424 return 0; 2446 return 0;
2425} 2447}
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3002{ 3024{
3003 struct dlm_message *ms = (struct dlm_message *) hd; 3025 struct dlm_message *ms = (struct dlm_message *) hd;
3004 struct dlm_ls *ls; 3026 struct dlm_ls *ls;
3005 int error; 3027 int error = 0;
3006 3028
3007 if (!recovery) 3029 if (!recovery)
3008 dlm_message_in(ms); 3030 dlm_message_in(ms);
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3119 out: 3141 out:
3120 dlm_put_lockspace(ls); 3142 dlm_put_lockspace(ls);
3121 dlm_astd_wake(); 3143 dlm_astd_wake();
3122 return 0; 3144 return error;
3123} 3145}
3124 3146
3125 3147
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3132 if (middle_conversion(lkb)) { 3154 if (middle_conversion(lkb)) {
3133 hold_lkb(lkb); 3155 hold_lkb(lkb);
3134 ls->ls_stub_ms.m_result = -EINPROGRESS; 3156 ls->ls_stub_ms.m_result = -EINPROGRESS;
3157 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3135 _remove_from_waiters(lkb); 3158 _remove_from_waiters(lkb);
3136 _receive_convert_reply(lkb, &ls->ls_stub_ms); 3159 _receive_convert_reply(lkb, &ls->ls_stub_ms);
3137 3160
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3205 case DLM_MSG_UNLOCK: 3228 case DLM_MSG_UNLOCK:
3206 hold_lkb(lkb); 3229 hold_lkb(lkb);
3207 ls->ls_stub_ms.m_result = -DLM_EUNLOCK; 3230 ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
3231 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3208 _remove_from_waiters(lkb); 3232 _remove_from_waiters(lkb);
3209 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 3233 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
3210 dlm_put_lkb(lkb); 3234 dlm_put_lkb(lkb);
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3213 case DLM_MSG_CANCEL: 3237 case DLM_MSG_CANCEL:
3214 hold_lkb(lkb); 3238 hold_lkb(lkb);
3215 ls->ls_stub_ms.m_result = -DLM_ECANCEL; 3239 ls->ls_stub_ms.m_result = -DLM_ECANCEL;
3240 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3216 _remove_from_waiters(lkb); 3241 _remove_from_waiters(lkb);
3217 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 3242 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
3218 dlm_put_lkb(lkb); 3243 dlm_put_lkb(lkb);
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3571 lock_rsb(r); 3596 lock_rsb(r);
3572 3597
3573 switch (error) { 3598 switch (error) {
3599 case -EBADR:
3600 /* There's a chance the new master received our lock before
3601 dlm_recover_master_reply(), this wouldn't happen if we did
3602 a barrier between recover_masters and recover_locks. */
3603 log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
3604 (unsigned long)r, r->res_name);
3605 dlm_send_rcom_lock(r, lkb);
3606 goto out;
3574 case -EEXIST: 3607 case -EEXIST:
3575 log_debug(ls, "master copy exists %x", lkb->lkb_id); 3608 log_debug(ls, "master copy exists %x", lkb->lkb_id);
3576 /* fall through */ 3609 /* fall through */
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3585 /* an ack for dlm_recover_locks() which waits for replies from 3618 /* an ack for dlm_recover_locks() which waits for replies from
3586 all the locks it sends to new masters */ 3619 all the locks it sends to new masters */
3587 dlm_recovered_lock(r); 3620 dlm_recovered_lock(r);
3588 3621 out:
3589 unlock_rsb(r); 3622 unlock_rsb(r);
3590 put_rsb(r); 3623 put_rsb(r);
3591 dlm_put_lkb(lkb); 3624 dlm_put_lkb(lkb);
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
3610 } 3643 }
3611 3644
3612 if (flags & DLM_LKF_VALBLK) { 3645 if (flags & DLM_LKF_VALBLK) {
3613 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 3646 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3614 if (!ua->lksb.sb_lvbptr) { 3647 if (!ua->lksb.sb_lvbptr) {
3615 kfree(ua); 3648 kfree(ua);
3616 __put_lkb(ls, lkb); 3649 __put_lkb(ls, lkb);
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3679 ua = (struct dlm_user_args *)lkb->lkb_astparam; 3712 ua = (struct dlm_user_args *)lkb->lkb_astparam;
3680 3713
3681 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { 3714 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
3682 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 3715 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3683 if (!ua->lksb.sb_lvbptr) { 3716 if (!ua->lksb.sb_lvbptr) {
3684 error = -ENOMEM; 3717 error = -ENOMEM;
3685 goto out_put; 3718 goto out_put;
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3745 goto out_put; 3778 goto out_put;
3746 3779
3747 spin_lock(&ua->proc->locks_spin); 3780 spin_lock(&ua->proc->locks_spin);
3748 list_del_init(&lkb->lkb_ownqueue); 3781 /* dlm_user_add_ast() may have already taken lkb off the proc list */
3782 if (!list_empty(&lkb->lkb_ownqueue))
3783 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
3749 spin_unlock(&ua->proc->locks_spin); 3784 spin_unlock(&ua->proc->locks_spin);
3750
3751 /* this removes the reference for the proc->locks list added by
3752 dlm_user_request */
3753 unhold_lkb(lkb);
3754 out_put: 3785 out_put:
3755 dlm_put_lkb(lkb); 3786 dlm_put_lkb(lkb);
3756 out: 3787 out:
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3790 /* this lkb was removed from the WAITING queue */ 3821 /* this lkb was removed from the WAITING queue */
3791 if (lkb->lkb_grmode == DLM_LOCK_IV) { 3822 if (lkb->lkb_grmode == DLM_LOCK_IV) {
3792 spin_lock(&ua->proc->locks_spin); 3823 spin_lock(&ua->proc->locks_spin);
3793 list_del_init(&lkb->lkb_ownqueue); 3824 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
3794 spin_unlock(&ua->proc->locks_spin); 3825 spin_unlock(&ua->proc->locks_spin);
3795 unhold_lkb(lkb);
3796 } 3826 }
3797 out_put: 3827 out_put:
3798 dlm_put_lkb(lkb); 3828 dlm_put_lkb(lkb);
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3853 mutex_lock(&ls->ls_clear_proc_locks); 3883 mutex_lock(&ls->ls_clear_proc_locks);
3854 3884
3855 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { 3885 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
3856 if (lkb->lkb_ast_type) {
3857 list_del(&lkb->lkb_astqueue);
3858 unhold_lkb(lkb);
3859 }
3860
3861 list_del_init(&lkb->lkb_ownqueue); 3886 list_del_init(&lkb->lkb_ownqueue);
3862 3887
3863 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { 3888 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3874 3899
3875 dlm_put_lkb(lkb); 3900 dlm_put_lkb(lkb);
3876 } 3901 }
3902
3903 /* in-progress unlocks */
3904 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
3905 list_del_init(&lkb->lkb_ownqueue);
3906 lkb->lkb_flags |= DLM_IFL_DEAD;
3907 dlm_put_lkb(lkb);
3908 }
3909
3910 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
3911 list_del(&lkb->lkb_astqueue);
3912 dlm_put_lkb(lkb);
3913 }
3914
3877 mutex_unlock(&ls->ls_clear_proc_locks); 3915 mutex_unlock(&ls->ls_clear_proc_locks);
3878 unlock_recovery(ls); 3916 unlock_recovery(ls);
3879} 3917}
3918
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 59012b089e8d..f40817b53c6f 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -236,7 +236,7 @@ static int dlm_scand(void *data)
236 while (!kthread_should_stop()) { 236 while (!kthread_should_stop()) {
237 list_for_each_entry(ls, &lslist, ls_list) 237 list_for_each_entry(ls, &lslist, ls_list)
238 dlm_scan_rsbs(ls); 238 dlm_scan_rsbs(ls);
239 schedule_timeout_interruptible(dlm_config.scan_secs * HZ); 239 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
240 } 240 }
241 return 0; 241 return 0;
242} 242}
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
422 ls->ls_count = 0; 422 ls->ls_count = 0;
423 ls->ls_flags = 0; 423 ls->ls_flags = 0;
424 424
425 size = dlm_config.rsbtbl_size; 425 size = dlm_config.ci_rsbtbl_size;
426 ls->ls_rsbtbl_size = size; 426 ls->ls_rsbtbl_size = size;
427 427
428 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); 428 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
434 rwlock_init(&ls->ls_rsbtbl[i].lock); 434 rwlock_init(&ls->ls_rsbtbl[i].lock);
435 } 435 }
436 436
437 size = dlm_config.lkbtbl_size; 437 size = dlm_config.ci_lkbtbl_size;
438 ls->ls_lkbtbl_size = size; 438 ls->ls_lkbtbl_size = size;
439 439
440 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); 440 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
446 ls->ls_lkbtbl[i].counter = 1; 446 ls->ls_lkbtbl[i].counter = 1;
447 } 447 }
448 448
449 size = dlm_config.dirtbl_size; 449 size = dlm_config.ci_dirtbl_size;
450 ls->ls_dirtbl_size = size; 450 ls->ls_dirtbl_size = size;
451 451
452 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); 452 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
489 mutex_init(&ls->ls_requestqueue_mutex); 489 mutex_init(&ls->ls_requestqueue_mutex);
490 mutex_init(&ls->ls_clear_proc_locks); 490 mutex_init(&ls->ls_clear_proc_locks);
491 491
492 ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); 492 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
493 if (!ls->ls_recover_buf) 493 if (!ls->ls_recover_buf)
494 goto out_dirfree; 494 goto out_dirfree;
495 495
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index fe158d7a9285..dc83a9d979b5 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -72,6 +72,8 @@ struct nodeinfo {
72 struct list_head writequeue; /* outgoing writequeue_entries */ 72 struct list_head writequeue; /* outgoing writequeue_entries */
73 spinlock_t writequeue_lock; 73 spinlock_t writequeue_lock;
74 int nodeid; 74 int nodeid;
75 struct work_struct swork; /* Send workqueue */
76 struct work_struct lwork; /* Locking workqueue */
75}; 77};
76 78
77static DEFINE_IDR(nodeinfo_idr); 79static DEFINE_IDR(nodeinfo_idr);
@@ -96,6 +98,7 @@ struct connection {
96 atomic_t waiting_requests; 98 atomic_t waiting_requests;
97 struct cbuf cb; 99 struct cbuf cb;
98 int eagain_flag; 100 int eagain_flag;
101 struct work_struct work; /* Send workqueue */
99}; 102};
100 103
101/* An entry waiting to be sent */ 104/* An entry waiting to be sent */
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n)
137static LIST_HEAD(write_nodes); 140static LIST_HEAD(write_nodes);
138static DEFINE_SPINLOCK(write_nodes_lock); 141static DEFINE_SPINLOCK(write_nodes_lock);
139 142
143
140/* Maximum number of incoming messages to process before 144/* Maximum number of incoming messages to process before
141 * doing a schedule() 145 * doing a schedule()
142 */ 146 */
143#define MAX_RX_MSG_COUNT 25 147#define MAX_RX_MSG_COUNT 25
144 148
145/* Manage daemons */ 149/* Work queues */
146static struct task_struct *recv_task; 150static struct workqueue_struct *recv_workqueue;
147static struct task_struct *send_task; 151static struct workqueue_struct *send_workqueue;
148static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait); 152static struct workqueue_struct *lock_workqueue;
149 153
150/* The SCTP connection */ 154/* The SCTP connection */
151static struct connection sctp_con; 155static struct connection sctp_con;
152 156
157static void process_send_sockets(struct work_struct *work);
158static void process_recv_sockets(struct work_struct *work);
159static void process_lock_request(struct work_struct *work);
153 160
154static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) 161static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
155{ 162{
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
222 spin_lock_init(&ni->lock); 229 spin_lock_init(&ni->lock);
223 INIT_LIST_HEAD(&ni->writequeue); 230 INIT_LIST_HEAD(&ni->writequeue);
224 spin_lock_init(&ni->writequeue_lock); 231 spin_lock_init(&ni->writequeue_lock);
232 INIT_WORK(&ni->lwork, process_lock_request);
233 INIT_WORK(&ni->swork, process_send_sockets);
225 ni->nodeid = nodeid; 234 ni->nodeid = nodeid;
226 235
227 if (nodeid > max_nodeid) 236 if (nodeid > max_nodeid)
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
249/* Data or notification available on socket */ 258/* Data or notification available on socket */
250static void lowcomms_data_ready(struct sock *sk, int count_unused) 259static void lowcomms_data_ready(struct sock *sk, int count_unused)
251{ 260{
252 atomic_inc(&sctp_con.waiting_requests);
253 if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) 261 if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
254 return; 262 queue_work(recv_workqueue, &sctp_con.work);
255
256 wake_up_interruptible(&lowcomms_recv_wait);
257} 263}
258 264
259 265
@@ -361,10 +367,10 @@ static void init_failed(void)
361 spin_lock_bh(&write_nodes_lock); 367 spin_lock_bh(&write_nodes_lock);
362 list_add_tail(&ni->write_list, &write_nodes); 368 list_add_tail(&ni->write_list, &write_nodes);
363 spin_unlock_bh(&write_nodes_lock); 369 spin_unlock_bh(&write_nodes_lock);
370 queue_work(send_workqueue, &ni->swork);
364 } 371 }
365 } 372 }
366 } 373 }
367 wake_up_process(send_task);
368} 374}
369 375
370/* Something happened to an association */ 376/* Something happened to an association */
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
446 spin_lock_bh(&write_nodes_lock); 452 spin_lock_bh(&write_nodes_lock);
447 list_add_tail(&ni->write_list, &write_nodes); 453 list_add_tail(&ni->write_list, &write_nodes);
448 spin_unlock_bh(&write_nodes_lock); 454 spin_unlock_bh(&write_nodes_lock);
455 queue_work(send_workqueue, &ni->swork);
449 } 456 }
450 wake_up_process(send_task);
451 } 457 }
452 break; 458 break;
453 459
@@ -580,8 +586,8 @@ static int receive_from_sock(void)
580 spin_lock_bh(&write_nodes_lock); 586 spin_lock_bh(&write_nodes_lock);
581 list_add_tail(&ni->write_list, &write_nodes); 587 list_add_tail(&ni->write_list, &write_nodes);
582 spin_unlock_bh(&write_nodes_lock); 588 spin_unlock_bh(&write_nodes_lock);
589 queue_work(send_workqueue, &ni->swork);
583 } 590 }
584 wake_up_process(send_task);
585 } 591 }
586 } 592 }
587 593
@@ -590,6 +596,7 @@ static int receive_from_sock(void)
590 return 0; 596 return 0;
591 597
592 cbuf_add(&sctp_con.cb, ret); 598 cbuf_add(&sctp_con.cb, ret);
599 // PJC: TODO: Add to node's workqueue....can we ??
593 ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), 600 ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
594 page_address(sctp_con.rx_page), 601 page_address(sctp_con.rx_page),
595 sctp_con.cb.base, sctp_con.cb.len, 602 sctp_con.cb.base, sctp_con.cb.len,
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
635 642
636 if (result < 0) 643 if (result < 0)
637 log_print("Can't bind to port %d addr number %d", 644 log_print("Can't bind to port %d addr number %d",
638 dlm_config.tcp_port, num); 645 dlm_config.ci_tcp_port, num);
639 646
640 return result; 647 return result;
641} 648}
@@ -711,7 +718,7 @@ static int init_sock(void)
711 /* Bind to all interfaces. */ 718 /* Bind to all interfaces. */
712 for (i = 0; i < dlm_local_count; i++) { 719 for (i = 0; i < dlm_local_count; i++) {
713 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); 720 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
714 make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); 721 make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
715 722
716 result = add_bind_addr(&localaddr, addr_len, num); 723 result = add_bind_addr(&localaddr, addr_len, num);
717 if (result) 724 if (result)
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg)
820 spin_lock_bh(&write_nodes_lock); 827 spin_lock_bh(&write_nodes_lock);
821 list_add_tail(&ni->write_list, &write_nodes); 828 list_add_tail(&ni->write_list, &write_nodes);
822 spin_unlock_bh(&write_nodes_lock); 829 spin_unlock_bh(&write_nodes_lock);
823 wake_up_process(send_task); 830
831 queue_work(send_workqueue, &ni->swork);
824 } 832 }
825 return; 833 return;
826 834
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid)
863 return; 871 return;
864 } 872 }
865 873
866 make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); 874 make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
867 875
868 outmessage.msg_name = &rem_addr; 876 outmessage.msg_name = &rem_addr;
869 outmessage.msg_namelen = addrlen; 877 outmessage.msg_namelen = addrlen;
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid)
1088 return 0; 1096 return 0;
1089} 1097}
1090 1098
1091static int write_list_empty(void) 1099// PJC: The work queue function for receiving.
1100static void process_recv_sockets(struct work_struct *work)
1092{ 1101{
1093 int status; 1102 if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
1094 1103 int ret;
1095 spin_lock_bh(&write_nodes_lock);
1096 status = list_empty(&write_nodes);
1097 spin_unlock_bh(&write_nodes_lock);
1098
1099 return status;
1100}
1101
1102static int dlm_recvd(void *data)
1103{
1104 DECLARE_WAITQUEUE(wait, current);
1105
1106 while (!kthread_should_stop()) {
1107 int count = 0; 1104 int count = 0;
1108 1105
1109 set_current_state(TASK_INTERRUPTIBLE); 1106 do {
1110 add_wait_queue(&lowcomms_recv_wait, &wait); 1107 ret = receive_from_sock();
1111 if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
1112 cond_resched();
1113 remove_wait_queue(&lowcomms_recv_wait, &wait);
1114 set_current_state(TASK_RUNNING);
1115
1116 if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
1117 int ret;
1118
1119 do {
1120 ret = receive_from_sock();
1121 1108
1122 /* Don't starve out everyone else */ 1109 /* Don't starve out everyone else */
1123 if (++count >= MAX_RX_MSG_COUNT) { 1110 if (++count >= MAX_RX_MSG_COUNT) {
1124 cond_resched(); 1111 cond_resched();
1125 count = 0; 1112 count = 0;
1126 } 1113 }
1127 } while (!kthread_should_stop() && ret >=0); 1114 } while (!kthread_should_stop() && ret >=0);
1128 }
1129 cond_resched();
1130 } 1115 }
1131 1116 cond_resched();
1132 return 0;
1133} 1117}
1134 1118
1135static int dlm_sendd(void *data) 1119// PJC: the work queue function for sending
1120static void process_send_sockets(struct work_struct *work)
1136{ 1121{
1137 DECLARE_WAITQUEUE(wait, current); 1122 if (sctp_con.eagain_flag) {
1138 1123 sctp_con.eagain_flag = 0;
1139 add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); 1124 refill_write_queue();
1140
1141 while (!kthread_should_stop()) {
1142 set_current_state(TASK_INTERRUPTIBLE);
1143 if (write_list_empty())
1144 cond_resched();
1145 set_current_state(TASK_RUNNING);
1146
1147 if (sctp_con.eagain_flag) {
1148 sctp_con.eagain_flag = 0;
1149 refill_write_queue();
1150 }
1151 process_output_queue();
1152 } 1125 }
1126 process_output_queue();
1127}
1153 1128
1154 remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); 1129// PJC: Process lock requests from a particular node.
1155 1130// TODO: can we optimise this out on UP ??
1156 return 0; 1131static void process_lock_request(struct work_struct *work)
1132{
1157} 1133}
1158 1134
1159static void daemons_stop(void) 1135static void daemons_stop(void)
1160{ 1136{
1161 kthread_stop(recv_task); 1137 destroy_workqueue(recv_workqueue);
1162 kthread_stop(send_task); 1138 destroy_workqueue(send_workqueue);
1139 destroy_workqueue(lock_workqueue);
1163} 1140}
1164 1141
1165static int daemons_start(void) 1142static int daemons_start(void)
1166{ 1143{
1167 struct task_struct *p;
1168 int error; 1144 int error;
1145 recv_workqueue = create_workqueue("dlm_recv");
1146 error = IS_ERR(recv_workqueue);
1147 if (error) {
1148 log_print("can't start dlm_recv %d", error);
1149 return error;
1150 }
1169 1151
1170 p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); 1152 send_workqueue = create_singlethread_workqueue("dlm_send");
1171 error = IS_ERR(p); 1153 error = IS_ERR(send_workqueue);
1172 if (error) { 1154 if (error) {
1173 log_print("can't start dlm_recvd %d", error); 1155 log_print("can't start dlm_send %d", error);
1156 destroy_workqueue(recv_workqueue);
1174 return error; 1157 return error;
1175 } 1158 }
1176 recv_task = p;
1177 1159
1178 p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); 1160 lock_workqueue = create_workqueue("dlm_rlock");
1179 error = IS_ERR(p); 1161 error = IS_ERR(lock_workqueue);
1180 if (error) { 1162 if (error) {
1181 log_print("can't start dlm_sendd %d", error); 1163 log_print("can't start dlm_rlock %d", error);
1182 kthread_stop(recv_task); 1164 destroy_workqueue(send_workqueue);
1165 destroy_workqueue(recv_workqueue);
1183 return error; 1166 return error;
1184 } 1167 }
1185 send_task = p;
1186 1168
1187 return 0; 1169 return 0;
1188} 1170}
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void)
1194{ 1176{
1195 int error; 1177 int error;
1196 1178
1179 INIT_WORK(&sctp_con.work, process_recv_sockets);
1180
1197 error = init_sock(); 1181 error = init_sock();
1198 if (error) 1182 if (error)
1199 goto fail_sock; 1183 goto fail_sock;
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void)
1224 for (i = 0; i < dlm_local_count; i++) 1208 for (i = 0; i < dlm_local_count; i++)
1225 kfree(dlm_local_addr[i]); 1209 kfree(dlm_local_addr[i]);
1226} 1210}
1227
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 9be3a440c42a..07e0a122c32f 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb)
96struct connection { 96struct connection {
97 struct socket *sock; /* NULL if not connected */ 97 struct socket *sock; /* NULL if not connected */
98 uint32_t nodeid; /* So we know who we are in the list */ 98 uint32_t nodeid; /* So we know who we are in the list */
99 struct rw_semaphore sock_sem; /* Stop connect races */ 99 struct mutex sock_mutex;
100 struct list_head read_list; /* On this list when ready for reading */
101 struct list_head write_list; /* On this list when ready for writing */
102 struct list_head state_list; /* On this list when ready to connect */
103 unsigned long flags; /* bit 1,2 = We are on the read/write lists */ 100 unsigned long flags; /* bit 1,2 = We are on the read/write lists */
104#define CF_READ_PENDING 1 101#define CF_READ_PENDING 1
105#define CF_WRITE_PENDING 2 102#define CF_WRITE_PENDING 2
@@ -112,9 +109,10 @@ struct connection {
112 struct page *rx_page; 109 struct page *rx_page;
113 struct cbuf cb; 110 struct cbuf cb;
114 int retries; 111 int retries;
115 atomic_t waiting_requests;
116#define MAX_CONNECT_RETRIES 3 112#define MAX_CONNECT_RETRIES 3
117 struct connection *othercon; 113 struct connection *othercon;
114 struct work_struct rwork; /* Receive workqueue */
115 struct work_struct swork; /* Send workqueue */
118}; 116};
119#define sock2con(x) ((struct connection *)(x)->sk_user_data) 117#define sock2con(x) ((struct connection *)(x)->sk_user_data)
120 118
@@ -131,14 +129,9 @@ struct writequeue_entry {
131 129
132static struct sockaddr_storage dlm_local_addr; 130static struct sockaddr_storage dlm_local_addr;
133 131
134/* Manage daemons */ 132/* Work queues */
135static struct task_struct *recv_task; 133static struct workqueue_struct *recv_workqueue;
136static struct task_struct *send_task; 134static struct workqueue_struct *send_workqueue;
137
138static wait_queue_t lowcomms_send_waitq_head;
139static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
140static wait_queue_t lowcomms_recv_waitq_head;
141static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
142 135
143/* An array of pointers to connections, indexed by NODEID */ 136/* An array of pointers to connections, indexed by NODEID */
144static struct connection **connections; 137static struct connection **connections;
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock);
146static struct kmem_cache *con_cache; 139static struct kmem_cache *con_cache;
147static int conn_array_size; 140static int conn_array_size;
148 141
149/* List of sockets that have reads pending */ 142static void process_recv_sockets(struct work_struct *work);
150static LIST_HEAD(read_sockets); 143static void process_send_sockets(struct work_struct *work);
151static DEFINE_SPINLOCK(read_sockets_lock);
152
153/* List of sockets which have writes pending */
154static LIST_HEAD(write_sockets);
155static DEFINE_SPINLOCK(write_sockets_lock);
156
157/* List of sockets which have connects pending */
158static LIST_HEAD(state_sockets);
159static DEFINE_SPINLOCK(state_sockets_lock);
160 144
161static struct connection *nodeid2con(int nodeid, gfp_t allocation) 145static struct connection *nodeid2con(int nodeid, gfp_t allocation)
162{ 146{
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
186 goto finish; 170 goto finish;
187 171
188 con->nodeid = nodeid; 172 con->nodeid = nodeid;
189 init_rwsem(&con->sock_sem); 173 mutex_init(&con->sock_mutex);
190 INIT_LIST_HEAD(&con->writequeue); 174 INIT_LIST_HEAD(&con->writequeue);
191 spin_lock_init(&con->writequeue_lock); 175 spin_lock_init(&con->writequeue_lock);
176 INIT_WORK(&con->swork, process_send_sockets);
177 INIT_WORK(&con->rwork, process_recv_sockets);
192 178
193 connections[nodeid] = con; 179 connections[nodeid] = con;
194 } 180 }
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused)
203{ 189{
204 struct connection *con = sock2con(sk); 190 struct connection *con = sock2con(sk);
205 191
206 atomic_inc(&con->waiting_requests); 192 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
207 if (test_and_set_bit(CF_READ_PENDING, &con->flags)) 193 queue_work(recv_workqueue, &con->rwork);
208 return;
209
210 spin_lock_bh(&read_sockets_lock);
211 list_add_tail(&con->read_list, &read_sockets);
212 spin_unlock_bh(&read_sockets_lock);
213
214 wake_up_interruptible(&lowcomms_recv_waitq);
215} 194}
216 195
217static void lowcomms_write_space(struct sock *sk) 196static void lowcomms_write_space(struct sock *sk)
218{ 197{
219 struct connection *con = sock2con(sk); 198 struct connection *con = sock2con(sk);
220 199
221 if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 200 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
222 return; 201 queue_work(send_workqueue, &con->swork);
223
224 spin_lock_bh(&write_sockets_lock);
225 list_add_tail(&con->write_list, &write_sockets);
226 spin_unlock_bh(&write_sockets_lock);
227
228 wake_up_interruptible(&lowcomms_send_waitq);
229} 202}
230 203
231static inline void lowcomms_connect_sock(struct connection *con) 204static inline void lowcomms_connect_sock(struct connection *con)
232{ 205{
233 if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) 206 if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
234 return; 207 queue_work(send_workqueue, &con->swork);
235
236 spin_lock_bh(&state_sockets_lock);
237 list_add_tail(&con->state_list, &state_sockets);
238 spin_unlock_bh(&state_sockets_lock);
239
240 wake_up_interruptible(&lowcomms_send_waitq);
241} 208}
242 209
243static void lowcomms_state_change(struct sock *sk) 210static void lowcomms_state_change(struct sock *sk)
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
279/* Close a remote connection and tidy up */ 246/* Close a remote connection and tidy up */
280static void close_connection(struct connection *con, bool and_other) 247static void close_connection(struct connection *con, bool and_other)
281{ 248{
282 down_write(&con->sock_sem); 249 mutex_lock(&con->sock_mutex);
283 250
284 if (con->sock) { 251 if (con->sock) {
285 sock_release(con->sock); 252 sock_release(con->sock);
@@ -294,24 +261,27 @@ static void close_connection(struct connection *con, bool and_other)
294 con->rx_page = NULL; 261 con->rx_page = NULL;
295 } 262 }
296 con->retries = 0; 263 con->retries = 0;
297 up_write(&con->sock_sem); 264 mutex_unlock(&con->sock_mutex);
298} 265}
299 266
300/* Data received from remote end */ 267/* Data received from remote end */
301static int receive_from_sock(struct connection *con) 268static int receive_from_sock(struct connection *con)
302{ 269{
303 int ret = 0; 270 int ret = 0;
304 struct msghdr msg; 271 struct msghdr msg = {};
305 struct iovec iov[2]; 272 struct kvec iov[2];
306 mm_segment_t fs;
307 unsigned len; 273 unsigned len;
308 int r; 274 int r;
309 int call_again_soon = 0; 275 int call_again_soon = 0;
276 int nvec;
310 277
311 down_read(&con->sock_sem); 278 mutex_lock(&con->sock_mutex);
279
280 if (con->sock == NULL) {
281 ret = -EAGAIN;
282 goto out_close;
283 }
312 284
313 if (con->sock == NULL)
314 goto out;
315 if (con->rx_page == NULL) { 285 if (con->rx_page == NULL) {
316 /* 286 /*
317 * This doesn't need to be atomic, but I think it should 287 * This doesn't need to be atomic, but I think it should
@@ -323,21 +293,13 @@ static int receive_from_sock(struct connection *con)
323 cbuf_init(&con->cb, PAGE_CACHE_SIZE); 293 cbuf_init(&con->cb, PAGE_CACHE_SIZE);
324 } 294 }
325 295
326 msg.msg_control = NULL;
327 msg.msg_controllen = 0;
328 msg.msg_iovlen = 1;
329 msg.msg_iov = iov;
330 msg.msg_name = NULL;
331 msg.msg_namelen = 0;
332 msg.msg_flags = 0;
333
334 /* 296 /*
335 * iov[0] is the bit of the circular buffer between the current end 297 * iov[0] is the bit of the circular buffer between the current end
336 * point (cb.base + cb.len) and the end of the buffer. 298 * point (cb.base + cb.len) and the end of the buffer.
337 */ 299 */
338 iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); 300 iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
339 iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); 301 iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
340 iov[1].iov_len = 0; 302 nvec = 1;
341 303
342 /* 304 /*
343 * iov[1] is the bit of the circular buffer between the start of the 305 * iov[1] is the bit of the circular buffer between the start of the
@@ -347,18 +309,18 @@ static int receive_from_sock(struct connection *con)
347 iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb); 309 iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
348 iov[1].iov_len = con->cb.base; 310 iov[1].iov_len = con->cb.base;
349 iov[1].iov_base = page_address(con->rx_page); 311 iov[1].iov_base = page_address(con->rx_page);
350 msg.msg_iovlen = 2; 312 nvec = 2;
351 } 313 }
352 len = iov[0].iov_len + iov[1].iov_len; 314 len = iov[0].iov_len + iov[1].iov_len;
353 315
354 fs = get_fs(); 316 r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len,
355 set_fs(get_ds());
356 r = ret = sock_recvmsg(con->sock, &msg, len,
357 MSG_DONTWAIT | MSG_NOSIGNAL); 317 MSG_DONTWAIT | MSG_NOSIGNAL);
358 set_fs(fs);
359 318
360 if (ret <= 0) 319 if (ret <= 0)
361 goto out_close; 320 goto out_close;
321 if (ret == -EAGAIN)
322 goto out_resched;
323
362 if (ret == len) 324 if (ret == len)
363 call_again_soon = 1; 325 call_again_soon = 1;
364 cbuf_add(&con->cb, ret); 326 cbuf_add(&con->cb, ret);
@@ -381,24 +343,26 @@ static int receive_from_sock(struct connection *con)
381 con->rx_page = NULL; 343 con->rx_page = NULL;
382 } 344 }
383 345
384out:
385 if (call_again_soon) 346 if (call_again_soon)
386 goto out_resched; 347 goto out_resched;
387 up_read(&con->sock_sem); 348 mutex_unlock(&con->sock_mutex);
388 return 0; 349 return 0;
389 350
390out_resched: 351out_resched:
391 lowcomms_data_ready(con->sock->sk, 0); 352 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
392 up_read(&con->sock_sem); 353 queue_work(recv_workqueue, &con->rwork);
393 cond_resched(); 354 mutex_unlock(&con->sock_mutex);
394 return 0; 355 return -EAGAIN;
395 356
396out_close: 357out_close:
397 up_read(&con->sock_sem); 358 mutex_unlock(&con->sock_mutex);
398 if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { 359 if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
399 close_connection(con, false); 360 close_connection(con, false);
400 /* Reconnect when there is something to send */ 361 /* Reconnect when there is something to send */
401 } 362 }
363 /* Don't return success if we really got EOF */
364 if (ret == 0)
365 ret = -EAGAIN;
402 366
403 return ret; 367 return ret;
404} 368}
@@ -412,6 +376,7 @@ static int accept_from_sock(struct connection *con)
412 int len; 376 int len;
413 int nodeid; 377 int nodeid;
414 struct connection *newcon; 378 struct connection *newcon;
379 struct connection *addcon;
415 380
416 memset(&peeraddr, 0, sizeof(peeraddr)); 381 memset(&peeraddr, 0, sizeof(peeraddr));
417 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, 382 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
@@ -419,7 +384,7 @@ static int accept_from_sock(struct connection *con)
419 if (result < 0) 384 if (result < 0)
420 return -ENOMEM; 385 return -ENOMEM;
421 386
422 down_read(&con->sock_sem); 387 mutex_lock_nested(&con->sock_mutex, 0);
423 388
424 result = -ENOTCONN; 389 result = -ENOTCONN;
425 if (con->sock == NULL) 390 if (con->sock == NULL)
@@ -445,7 +410,7 @@ static int accept_from_sock(struct connection *con)
445 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { 410 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
446 printk("dlm: connect from non cluster node\n"); 411 printk("dlm: connect from non cluster node\n");
447 sock_release(newsock); 412 sock_release(newsock);
448 up_read(&con->sock_sem); 413 mutex_unlock(&con->sock_mutex);
449 return -1; 414 return -1;
450 } 415 }
451 416
@@ -462,7 +427,7 @@ static int accept_from_sock(struct connection *con)
462 result = -ENOMEM; 427 result = -ENOMEM;
463 goto accept_err; 428 goto accept_err;
464 } 429 }
465 down_write(&newcon->sock_sem); 430 mutex_lock_nested(&newcon->sock_mutex, 1);
466 if (newcon->sock) { 431 if (newcon->sock) {
467 struct connection *othercon = newcon->othercon; 432 struct connection *othercon = newcon->othercon;
468 433
@@ -470,41 +435,45 @@ static int accept_from_sock(struct connection *con)
470 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); 435 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
471 if (!othercon) { 436 if (!othercon) {
472 printk("dlm: failed to allocate incoming socket\n"); 437 printk("dlm: failed to allocate incoming socket\n");
473 up_write(&newcon->sock_sem); 438 mutex_unlock(&newcon->sock_mutex);
474 result = -ENOMEM; 439 result = -ENOMEM;
475 goto accept_err; 440 goto accept_err;
476 } 441 }
477 othercon->nodeid = nodeid; 442 othercon->nodeid = nodeid;
478 othercon->rx_action = receive_from_sock; 443 othercon->rx_action = receive_from_sock;
479 init_rwsem(&othercon->sock_sem); 444 mutex_init(&othercon->sock_mutex);
445 INIT_WORK(&othercon->swork, process_send_sockets);
446 INIT_WORK(&othercon->rwork, process_recv_sockets);
480 set_bit(CF_IS_OTHERCON, &othercon->flags); 447 set_bit(CF_IS_OTHERCON, &othercon->flags);
481 newcon->othercon = othercon; 448 newcon->othercon = othercon;
482 } 449 }
483 othercon->sock = newsock; 450 othercon->sock = newsock;
484 newsock->sk->sk_user_data = othercon; 451 newsock->sk->sk_user_data = othercon;
485 add_sock(newsock, othercon); 452 add_sock(newsock, othercon);
453 addcon = othercon;
486 } 454 }
487 else { 455 else {
488 newsock->sk->sk_user_data = newcon; 456 newsock->sk->sk_user_data = newcon;
489 newcon->rx_action = receive_from_sock; 457 newcon->rx_action = receive_from_sock;
490 add_sock(newsock, newcon); 458 add_sock(newsock, newcon);
491 459 addcon = newcon;
492 } 460 }
493 461
494 up_write(&newcon->sock_sem); 462 mutex_unlock(&newcon->sock_mutex);
495 463
496 /* 464 /*
497 * Add it to the active queue in case we got data 465 * Add it to the active queue in case we got data
498 * beween processing the accept adding the socket 466 * beween processing the accept adding the socket
499 * to the read_sockets list 467 * to the read_sockets list
500 */ 468 */
501 lowcomms_data_ready(newsock->sk, 0); 469 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
502 up_read(&con->sock_sem); 470 queue_work(recv_workqueue, &addcon->rwork);
471 mutex_unlock(&con->sock_mutex);
503 472
504 return 0; 473 return 0;
505 474
506accept_err: 475accept_err:
507 up_read(&con->sock_sem); 476 mutex_unlock(&con->sock_mutex);
508 sock_release(newsock); 477 sock_release(newsock);
509 478
510 if (result != -EAGAIN) 479 if (result != -EAGAIN)
@@ -525,7 +494,7 @@ static void connect_to_sock(struct connection *con)
525 return; 494 return;
526 } 495 }
527 496
528 down_write(&con->sock_sem); 497 mutex_lock(&con->sock_mutex);
529 if (con->retries++ > MAX_CONNECT_RETRIES) 498 if (con->retries++ > MAX_CONNECT_RETRIES)
530 goto out; 499 goto out;
531 500
@@ -548,7 +517,7 @@ static void connect_to_sock(struct connection *con)
548 sock->sk->sk_user_data = con; 517 sock->sk->sk_user_data = con;
549 con->rx_action = receive_from_sock; 518 con->rx_action = receive_from_sock;
550 519
551 make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); 520 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
552 521
553 add_sock(sock, con); 522 add_sock(sock, con);
554 523
@@ -577,7 +546,7 @@ out_err:
577 result = 0; 546 result = 0;
578 } 547 }
579out: 548out:
580 up_write(&con->sock_sem); 549 mutex_unlock(&con->sock_mutex);
581 return; 550 return;
582} 551}
583 552
@@ -616,10 +585,10 @@ static struct socket *create_listen_sock(struct connection *con,
616 con->sock = sock; 585 con->sock = sock;
617 586
618 /* Bind to our port */ 587 /* Bind to our port */
619 make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); 588 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
620 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); 589 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
621 if (result < 0) { 590 if (result < 0) {
622 printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); 591 printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port);
623 sock_release(sock); 592 sock_release(sock);
624 sock = NULL; 593 sock = NULL;
625 con->sock = NULL; 594 con->sock = NULL;
@@ -638,7 +607,7 @@ static struct socket *create_listen_sock(struct connection *con,
638 607
639 result = sock->ops->listen(sock, 5); 608 result = sock->ops->listen(sock, 5);
640 if (result < 0) { 609 if (result < 0) {
641 printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); 610 printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port);
642 sock_release(sock); 611 sock_release(sock);
643 sock = NULL; 612 sock = NULL;
644 goto create_out; 613 goto create_out;
@@ -709,6 +678,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
709 if (!con) 678 if (!con)
710 return NULL; 679 return NULL;
711 680
681 spin_lock(&con->writequeue_lock);
712 e = list_entry(con->writequeue.prev, struct writequeue_entry, list); 682 e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
713 if ((&e->list == &con->writequeue) || 683 if ((&e->list == &con->writequeue) ||
714 (PAGE_CACHE_SIZE - e->end < len)) { 684 (PAGE_CACHE_SIZE - e->end < len)) {
@@ -747,6 +717,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
747 struct connection *con = e->con; 717 struct connection *con = e->con;
748 int users; 718 int users;
749 719
720 spin_lock(&con->writequeue_lock);
750 users = --e->users; 721 users = --e->users;
751 if (users) 722 if (users)
752 goto out; 723 goto out;
@@ -754,12 +725,8 @@ void dlm_lowcomms_commit_buffer(void *mh)
754 kunmap(e->page); 725 kunmap(e->page);
755 spin_unlock(&con->writequeue_lock); 726 spin_unlock(&con->writequeue_lock);
756 727
757 if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { 728 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
758 spin_lock_bh(&write_sockets_lock); 729 queue_work(send_workqueue, &con->swork);
759 list_add_tail(&con->write_list, &write_sockets);
760 spin_unlock_bh(&write_sockets_lock);
761
762 wake_up_interruptible(&lowcomms_send_waitq);
763 } 730 }
764 return; 731 return;
765 732
@@ -783,7 +750,7 @@ static void send_to_sock(struct connection *con)
783 struct writequeue_entry *e; 750 struct writequeue_entry *e;
784 int len, offset; 751 int len, offset;
785 752
786 down_read(&con->sock_sem); 753 mutex_lock(&con->sock_mutex);
787 if (con->sock == NULL) 754 if (con->sock == NULL)
788 goto out_connect; 755 goto out_connect;
789 756
@@ -800,6 +767,7 @@ static void send_to_sock(struct connection *con)
800 offset = e->offset; 767 offset = e->offset;
801 BUG_ON(len == 0 && e->users == 0); 768 BUG_ON(len == 0 && e->users == 0);
802 spin_unlock(&con->writequeue_lock); 769 spin_unlock(&con->writequeue_lock);
770 kmap(e->page);
803 771
804 ret = 0; 772 ret = 0;
805 if (len) { 773 if (len) {
@@ -828,18 +796,18 @@ static void send_to_sock(struct connection *con)
828 } 796 }
829 spin_unlock(&con->writequeue_lock); 797 spin_unlock(&con->writequeue_lock);
830out: 798out:
831 up_read(&con->sock_sem); 799 mutex_unlock(&con->sock_mutex);
832 return; 800 return;
833 801
834send_error: 802send_error:
835 up_read(&con->sock_sem); 803 mutex_unlock(&con->sock_mutex);
836 close_connection(con, false); 804 close_connection(con, false);
837 lowcomms_connect_sock(con); 805 lowcomms_connect_sock(con);
838 return; 806 return;
839 807
840out_connect: 808out_connect:
841 up_read(&con->sock_sem); 809 mutex_unlock(&con->sock_mutex);
842 lowcomms_connect_sock(con); 810 connect_to_sock(con);
843 return; 811 return;
844} 812}
845 813
@@ -872,7 +840,6 @@ int dlm_lowcomms_close(int nodeid)
872 if (con) { 840 if (con) {
873 clean_one_writequeue(con); 841 clean_one_writequeue(con);
874 close_connection(con, true); 842 close_connection(con, true);
875 atomic_set(&con->waiting_requests, 0);
876 } 843 }
877 return 0; 844 return 0;
878 845
@@ -880,102 +847,29 @@ out:
880 return -1; 847 return -1;
881} 848}
882 849
883/* API send message call, may queue the request */
884/* N.B. This is the old interface - use the new one for new calls */
885int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
886{
887 struct writequeue_entry *e;
888 char *b;
889
890 e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
891 if (e) {
892 memcpy(b, buf, len);
893 dlm_lowcomms_commit_buffer(e);
894 return 0;
895 }
896 return -ENOBUFS;
897}
898
899/* Look for activity on active sockets */ 850/* Look for activity on active sockets */
900static void process_sockets(void) 851static void process_recv_sockets(struct work_struct *work)
901{ 852{
902 struct list_head *list; 853 struct connection *con = container_of(work, struct connection, rwork);
903 struct list_head *temp; 854 int err;
904 int count = 0;
905
906 spin_lock_bh(&read_sockets_lock);
907 list_for_each_safe(list, temp, &read_sockets) {
908
909 struct connection *con =
910 list_entry(list, struct connection, read_list);
911 list_del(&con->read_list);
912 clear_bit(CF_READ_PENDING, &con->flags);
913
914 spin_unlock_bh(&read_sockets_lock);
915
916 /* This can reach zero if we are processing requests
917 * as they come in.
918 */
919 if (atomic_read(&con->waiting_requests) == 0) {
920 spin_lock_bh(&read_sockets_lock);
921 continue;
922 }
923
924 do {
925 con->rx_action(con);
926
927 /* Don't starve out everyone else */
928 if (++count >= MAX_RX_MSG_COUNT) {
929 cond_resched();
930 count = 0;
931 }
932 855
933 } while (!atomic_dec_and_test(&con->waiting_requests) && 856 clear_bit(CF_READ_PENDING, &con->flags);
934 !kthread_should_stop()); 857 do {
935 858 err = con->rx_action(con);
936 spin_lock_bh(&read_sockets_lock); 859 } while (!err);
937 }
938 spin_unlock_bh(&read_sockets_lock);
939} 860}
940 861
941/* Try to send any messages that are pending
942 */
943static void process_output_queue(void)
944{
945 struct list_head *list;
946 struct list_head *temp;
947
948 spin_lock_bh(&write_sockets_lock);
949 list_for_each_safe(list, temp, &write_sockets) {
950 struct connection *con =
951 list_entry(list, struct connection, write_list);
952 clear_bit(CF_WRITE_PENDING, &con->flags);
953 list_del(&con->write_list);
954 862
955 spin_unlock_bh(&write_sockets_lock); 863static void process_send_sockets(struct work_struct *work)
956 send_to_sock(con);
957 spin_lock_bh(&write_sockets_lock);
958 }
959 spin_unlock_bh(&write_sockets_lock);
960}
961
962static void process_state_queue(void)
963{ 864{
964 struct list_head *list; 865 struct connection *con = container_of(work, struct connection, swork);
965 struct list_head *temp;
966
967 spin_lock_bh(&state_sockets_lock);
968 list_for_each_safe(list, temp, &state_sockets) {
969 struct connection *con =
970 list_entry(list, struct connection, state_list);
971 list_del(&con->state_list);
972 clear_bit(CF_CONNECT_PENDING, &con->flags);
973 spin_unlock_bh(&state_sockets_lock);
974 866
867 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
975 connect_to_sock(con); 868 connect_to_sock(con);
976 spin_lock_bh(&state_sockets_lock);
977 } 869 }
978 spin_unlock_bh(&state_sockets_lock); 870
871 clear_bit(CF_WRITE_PENDING, &con->flags);
872 send_to_sock(con);
979} 873}
980 874
981 875
@@ -992,109 +886,33 @@ static void clean_writequeues(void)
992 } 886 }
993} 887}
994 888
995static int read_list_empty(void) 889static void work_stop(void)
996{
997 int status;
998
999 spin_lock_bh(&read_sockets_lock);
1000 status = list_empty(&read_sockets);
1001 spin_unlock_bh(&read_sockets_lock);
1002
1003 return status;
1004}
1005
1006/* DLM Transport comms receive daemon */
1007static int dlm_recvd(void *data)
1008{ 890{
1009 init_waitqueue_entry(&lowcomms_recv_waitq_head, current); 891 destroy_workqueue(recv_workqueue);
1010 add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); 892 destroy_workqueue(send_workqueue);
1011
1012 while (!kthread_should_stop()) {
1013 set_current_state(TASK_INTERRUPTIBLE);
1014 if (read_list_empty())
1015 cond_resched();
1016 set_current_state(TASK_RUNNING);
1017
1018 process_sockets();
1019 }
1020
1021 return 0;
1022} 893}
1023 894
1024static int write_and_state_lists_empty(void) 895static int work_start(void)
1025{ 896{
1026 int status;
1027
1028 spin_lock_bh(&write_sockets_lock);
1029 status = list_empty(&write_sockets);
1030 spin_unlock_bh(&write_sockets_lock);
1031
1032 spin_lock_bh(&state_sockets_lock);
1033 if (list_empty(&state_sockets) == 0)
1034 status = 0;
1035 spin_unlock_bh(&state_sockets_lock);
1036
1037 return status;
1038}
1039
1040/* DLM Transport send daemon */
1041static int dlm_sendd(void *data)
1042{
1043 init_waitqueue_entry(&lowcomms_send_waitq_head, current);
1044 add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
1045
1046 while (!kthread_should_stop()) {
1047 set_current_state(TASK_INTERRUPTIBLE);
1048 if (write_and_state_lists_empty())
1049 cond_resched();
1050 set_current_state(TASK_RUNNING);
1051
1052 process_state_queue();
1053 process_output_queue();
1054 }
1055
1056 return 0;
1057}
1058
1059static void daemons_stop(void)
1060{
1061 kthread_stop(recv_task);
1062 kthread_stop(send_task);
1063}
1064
1065static int daemons_start(void)
1066{
1067 struct task_struct *p;
1068 int error; 897 int error;
1069 898 recv_workqueue = create_workqueue("dlm_recv");
1070 p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); 899 error = IS_ERR(recv_workqueue);
1071 error = IS_ERR(p);
1072 if (error) { 900 if (error) {
1073 log_print("can't start dlm_recvd %d", error); 901 log_print("can't start dlm_recv %d", error);
1074 return error; 902 return error;
1075 } 903 }
1076 recv_task = p;
1077 904
1078 p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); 905 send_workqueue = create_singlethread_workqueue("dlm_send");
1079 error = IS_ERR(p); 906 error = IS_ERR(send_workqueue);
1080 if (error) { 907 if (error) {
1081 log_print("can't start dlm_sendd %d", error); 908 log_print("can't start dlm_send %d", error);
1082 kthread_stop(recv_task); 909 destroy_workqueue(recv_workqueue);
1083 return error; 910 return error;
1084 } 911 }
1085 send_task = p;
1086 912
1087 return 0; 913 return 0;
1088} 914}
1089 915
1090/*
1091 * Return the largest buffer size we can cope with.
1092 */
1093int lowcomms_max_buffer_size(void)
1094{
1095 return PAGE_CACHE_SIZE;
1096}
1097
1098void dlm_lowcomms_stop(void) 916void dlm_lowcomms_stop(void)
1099{ 917{
1100 int i; 918 int i;
@@ -1107,7 +925,7 @@ void dlm_lowcomms_stop(void)
1107 connections[i]->flags |= 0xFF; 925 connections[i]->flags |= 0xFF;
1108 } 926 }
1109 927
1110 daemons_stop(); 928 work_stop();
1111 clean_writequeues(); 929 clean_writequeues();
1112 930
1113 for (i = 0; i < conn_array_size; i++) { 931 for (i = 0; i < conn_array_size; i++) {
@@ -1159,7 +977,7 @@ int dlm_lowcomms_start(void)
1159 if (error) 977 if (error)
1160 goto fail_unlisten; 978 goto fail_unlisten;
1161 979
1162 error = daemons_start(); 980 error = work_start();
1163 if (error) 981 if (error)
1164 goto fail_unlisten; 982 goto fail_unlisten;
1165 983
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 5352b03ff5aa..f858fef6e41c 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -76,9 +76,7 @@ struct dlm_lkb *allocate_lkb(struct dlm_ls *ls)
76{ 76{
77 struct dlm_lkb *lkb; 77 struct dlm_lkb *lkb;
78 78
79 lkb = kmem_cache_alloc(lkb_cache, GFP_KERNEL); 79 lkb = kmem_cache_zalloc(lkb_cache, GFP_KERNEL);
80 if (lkb)
81 memset(lkb, 0, sizeof(*lkb));
82 return lkb; 80 return lkb;
83} 81}
84 82
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index c9b1c3d535f4..a5126e0c68a6 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
82 if (msglen < sizeof(struct dlm_header)) 82 if (msglen < sizeof(struct dlm_header))
83 break; 83 break;
84 err = -E2BIG; 84 err = -E2BIG;
85 if (msglen > dlm_config.buffer_size) { 85 if (msglen > dlm_config.ci_buffer_size) {
86 log_print("message size %d from %d too big, buf len %d", 86 log_print("message size %d from %d too big, buf len %d",
87 msglen, nodeid, len); 87 msglen, nodeid, len);
88 break; 88 break;
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
103 103
104 if (msglen > sizeof(__tmp) && 104 if (msglen > sizeof(__tmp) &&
105 msg == (struct dlm_header *) __tmp) { 105 msg == (struct dlm_header *) __tmp) {
106 msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); 106 msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
107 if (msg == NULL) 107 if (msg == NULL)
108 return ret; 108 return ret;
109 } 109 }
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 4cc31be9cd9d..6bfbd6153809 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
56 56
57 rc->rc_type = type; 57 rc->rc_type = type;
58 58
59 spin_lock(&ls->ls_recover_lock);
60 rc->rc_seq = ls->ls_recover_seq;
61 spin_unlock(&ls->ls_recover_lock);
62
59 *mh_ret = mh; 63 *mh_ret = mh;
60 *rc_ret = rc; 64 *rc_ret = rc;
61 return 0; 65 return 0;
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
78 rf->rf_lsflags = ls->ls_exflags; 82 rf->rf_lsflags = ls->ls_exflags;
79} 83}
80 84
81static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) 85static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
82{ 86{
87 struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
88
89 if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
90 log_error(ls, "version mismatch: %x nodeid %d: %x",
91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
92 rc->rc_header.h_version);
93 return -EINVAL;
94 }
95
83 if (rf->rf_lvblen != ls->ls_lvblen || 96 if (rf->rf_lvblen != ls->ls_lvblen ||
84 rf->rf_lsflags != ls->ls_exflags) { 97 rf->rf_lsflags != ls->ls_exflags) {
85 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", 98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
125 goto out; 138 goto out;
126 139
127 allow_sync_reply(ls, &rc->rc_id); 140 allow_sync_reply(ls, &rc->rc_id);
128 memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); 141 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
129 142
130 send_rcom(ls, mh, rc); 143 send_rcom(ls, mh, rc);
131 144
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
141 log_debug(ls, "remote node %d not ready", nodeid); 154 log_debug(ls, "remote node %d not ready", nodeid);
142 rc->rc_result = 0; 155 rc->rc_result = 0;
143 } else 156 } else
144 error = check_config(ls, (struct rcom_config *) rc->rc_buf, 157 error = check_config(ls, rc, nodeid);
145 nodeid);
146 /* the caller looks at rc_result for the remote recovery status */ 158 /* the caller looks at rc_result for the remote recovery status */
147 out: 159 out:
148 return error; 160 return error;
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
159 if (error) 171 if (error)
160 return; 172 return;
161 rc->rc_id = rc_in->rc_id; 173 rc->rc_id = rc_in->rc_id;
174 rc->rc_seq_reply = rc_in->rc_seq;
162 rc->rc_result = dlm_recover_status(ls); 175 rc->rc_result = dlm_recover_status(ls);
163 make_config(ls, (struct rcom_config *) rc->rc_buf); 176 make_config(ls, (struct rcom_config *) rc->rc_buf);
164 177
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
200 if (nodeid == dlm_our_nodeid()) { 213 if (nodeid == dlm_our_nodeid()) {
201 dlm_copy_master_names(ls, last_name, last_len, 214 dlm_copy_master_names(ls, last_name, last_len,
202 ls->ls_recover_buf + len, 215 ls->ls_recover_buf + len,
203 dlm_config.buffer_size - len, nodeid); 216 dlm_config.ci_buffer_size - len, nodeid);
204 goto out; 217 goto out;
205 } 218 }
206 219
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
210 memcpy(rc->rc_buf, last_name, last_len); 223 memcpy(rc->rc_buf, last_name, last_len);
211 224
212 allow_sync_reply(ls, &rc->rc_id); 225 allow_sync_reply(ls, &rc->rc_id);
213 memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); 226 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
214 227
215 send_rcom(ls, mh, rc); 228 send_rcom(ls, mh, rc);
216 229
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
224{ 237{
225 struct dlm_rcom *rc; 238 struct dlm_rcom *rc;
226 struct dlm_mhandle *mh; 239 struct dlm_mhandle *mh;
227 int error, inlen, outlen; 240 int error, inlen, outlen, nodeid;
228 int nodeid = rc_in->rc_header.h_nodeid;
229 uint32_t status = dlm_recover_status(ls);
230
231 /*
232 * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
233 * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
234 * It could only happen in rare cases where we get a late NAMES
235 * message from a previous instance of recovery.
236 */
237
238 if (!(status & DLM_RS_NODES)) {
239 log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
240 return;
241 }
242 241
243 nodeid = rc_in->rc_header.h_nodeid; 242 nodeid = rc_in->rc_header.h_nodeid;
244 inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); 243 inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
245 outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); 244 outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
246 245
247 error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); 246 error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
248 if (error) 247 if (error)
249 return; 248 return;
250 rc->rc_id = rc_in->rc_id; 249 rc->rc_id = rc_in->rc_id;
250 rc->rc_seq_reply = rc_in->rc_seq;
251 251
252 dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, 252 dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
253 nodeid); 253 nodeid);
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
294 ret_nodeid = error; 294 ret_nodeid = error;
295 rc->rc_result = ret_nodeid; 295 rc->rc_result = ret_nodeid;
296 rc->rc_id = rc_in->rc_id; 296 rc->rc_id = rc_in->rc_id;
297 rc->rc_seq_reply = rc_in->rc_seq;
297 298
298 send_rcom(ls, mh, rc); 299 send_rcom(ls, mh, rc);
299} 300}
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
375 376
376 memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); 377 memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
377 rc->rc_id = rc_in->rc_id; 378 rc->rc_id = rc_in->rc_id;
379 rc->rc_seq_reply = rc_in->rc_seq;
378 380
379 send_rcom(ls, mh, rc); 381 send_rcom(ls, mh, rc);
380} 382}
381 383
382static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) 384static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
383{ 385{
384 uint32_t status = dlm_recover_status(ls);
385
386 if (!(status & DLM_RS_DIR)) {
387 log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
388 rc_in->rc_header.h_nodeid);
389 return;
390 }
391
392 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
393} 387}
394 388
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
415 409
416 rc->rc_type = DLM_RCOM_STATUS_REPLY; 410 rc->rc_type = DLM_RCOM_STATUS_REPLY;
417 rc->rc_id = rc_in->rc_id; 411 rc->rc_id = rc_in->rc_id;
412 rc->rc_seq_reply = rc_in->rc_seq;
418 rc->rc_result = -ESRCH; 413 rc->rc_result = -ESRCH;
419 414
420 rf = (struct rcom_config *) rc->rc_buf; 415 rf = (struct rcom_config *) rc->rc_buf;
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
426 return 0; 421 return 0;
427} 422}
428 423
424static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
425{
426 uint64_t seq;
427 int rv = 0;
428
429 switch (rc->rc_type) {
430 case DLM_RCOM_STATUS_REPLY:
431 case DLM_RCOM_NAMES_REPLY:
432 case DLM_RCOM_LOOKUP_REPLY:
433 case DLM_RCOM_LOCK_REPLY:
434 spin_lock(&ls->ls_recover_lock);
435 seq = ls->ls_recover_seq;
436 spin_unlock(&ls->ls_recover_lock);
437 if (rc->rc_seq_reply != seq) {
438 log_debug(ls, "ignoring old reply %x from %d "
439 "seq_reply %llx expect %llx",
440 rc->rc_type, rc->rc_header.h_nodeid,
441 (unsigned long long)rc->rc_seq_reply,
442 (unsigned long long)seq);
443 rv = 1;
444 }
445 }
446 return rv;
447}
448
429/* Called by dlm_recvd; corresponds to dlm_receive_message() but special 449/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
430 recovery-only comms are sent through here. */ 450 recovery-only comms are sent through here. */
431 451
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
449 } 469 }
450 470
451 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { 471 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
452 log_error(ls, "ignoring recovery message %x from %d", 472 log_debug(ls, "ignoring recovery message %x from %d",
453 rc->rc_type, nodeid); 473 rc->rc_type, nodeid);
454 goto out; 474 goto out;
455 } 475 }
456 476
477 if (is_old_reply(ls, rc))
478 goto out;
479
457 if (nodeid != rc->rc_header.h_nodeid) { 480 if (nodeid != rc->rc_header.h_nodeid) {
458 log_error(ls, "bad rcom nodeid %d from %d", 481 log_error(ls, "bad rcom nodeid %d from %d",
459 rc->rc_header.h_nodeid, nodeid); 482 rc->rc_header.h_nodeid, nodeid);
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index cf9f6831bab5..c2cc7694cd16 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -44,7 +44,7 @@
44static void dlm_wait_timer_fn(unsigned long data) 44static void dlm_wait_timer_fn(unsigned long data)
45{ 45{
46 struct dlm_ls *ls = (struct dlm_ls *) data; 46 struct dlm_ls *ls = (struct dlm_ls *) data;
47 mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); 47 mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
48 wake_up(&ls->ls_wait_general); 48 wake_up(&ls->ls_wait_general);
49} 49}
50 50
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
55 init_timer(&ls->ls_timer); 55 init_timer(&ls->ls_timer);
56 ls->ls_timer.function = dlm_wait_timer_fn; 56 ls->ls_timer.function = dlm_wait_timer_fn;
57 ls->ls_timer.data = (long) ls; 57 ls->ls_timer.data = (long) ls;
58 ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); 58 ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
59 add_timer(&ls->ls_timer); 59 add_timer(&ls->ls_timer);
60 60
61 wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); 61 wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls)
397 397
398 if (dlm_no_directory(ls)) 398 if (dlm_no_directory(ls))
399 count += recover_master_static(r); 399 count += recover_master_static(r);
400 else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { 400 else if (!is_master(r) &&
401 (dlm_is_removed(ls, r->res_nodeid) ||
402 rsb_flag(r, RSB_NEW_MASTER))) {
401 recover_master(r); 403 recover_master(r);
402 count++; 404 count++;
403 } 405 }
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 650536aa5139..3cb636d60249 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
77 77
78 error = dlm_recover_members(ls, rv, &neg); 78 error = dlm_recover_members(ls, rv, &neg);
79 if (error) { 79 if (error) {
80 log_error(ls, "recover_members failed %d", error); 80 log_debug(ls, "recover_members failed %d", error);
81 goto fail; 81 goto fail;
82 } 82 }
83 start = jiffies; 83 start = jiffies;
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
89 89
90 error = dlm_recover_directory(ls); 90 error = dlm_recover_directory(ls);
91 if (error) { 91 if (error) {
92 log_error(ls, "recover_directory failed %d", error); 92 log_debug(ls, "recover_directory failed %d", error);
93 goto fail; 93 goto fail;
94 } 94 }
95 95
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
99 99
100 error = dlm_recover_directory_wait(ls); 100 error = dlm_recover_directory_wait(ls);
101 if (error) { 101 if (error) {
102 log_error(ls, "recover_directory_wait failed %d", error); 102 log_debug(ls, "recover_directory_wait failed %d", error);
103 goto fail; 103 goto fail;
104 } 104 }
105 105
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
129 129
130 error = dlm_recover_masters(ls); 130 error = dlm_recover_masters(ls);
131 if (error) { 131 if (error) {
132 log_error(ls, "recover_masters failed %d", error); 132 log_debug(ls, "recover_masters failed %d", error);
133 goto fail; 133 goto fail;
134 } 134 }
135 135
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
139 139
140 error = dlm_recover_locks(ls); 140 error = dlm_recover_locks(ls);
141 if (error) { 141 if (error) {
142 log_error(ls, "recover_locks failed %d", error); 142 log_debug(ls, "recover_locks failed %d", error);
143 goto fail; 143 goto fail;
144 } 144 }
145 145
146 error = dlm_recover_locks_wait(ls); 146 error = dlm_recover_locks_wait(ls);
147 if (error) { 147 if (error) {
148 log_error(ls, "recover_locks_wait failed %d", error); 148 log_debug(ls, "recover_locks_wait failed %d", error);
149 goto fail; 149 goto fail;
150 } 150 }
151 151
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
166 166
167 error = dlm_recover_locks_wait(ls); 167 error = dlm_recover_locks_wait(ls);
168 if (error) { 168 if (error) {
169 log_error(ls, "recover_locks_wait failed %d", error); 169 log_debug(ls, "recover_locks_wait failed %d", error);
170 goto fail; 170 goto fail;
171 } 171 }
172 } 172 }
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
184 dlm_set_recover_status(ls, DLM_RS_DONE); 184 dlm_set_recover_status(ls, DLM_RS_DONE);
185 error = dlm_recover_done_wait(ls); 185 error = dlm_recover_done_wait(ls);
186 if (error) { 186 if (error) {
187 log_error(ls, "recover_done_wait failed %d", error); 187 log_debug(ls, "recover_done_wait failed %d", error);
188 goto fail; 188 goto fail;
189 } 189 }
190 190
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
192 192
193 error = enable_locking(ls, rv->seq); 193 error = enable_locking(ls, rv->seq);
194 if (error) { 194 if (error) {
195 log_error(ls, "enable_locking failed %d", error); 195 log_debug(ls, "enable_locking failed %d", error);
196 goto fail; 196 goto fail;
197 } 197 }
198 198
199 error = dlm_process_requestqueue(ls); 199 error = dlm_process_requestqueue(ls);
200 if (error) { 200 if (error) {
201 log_error(ls, "process_requestqueue failed %d", error); 201 log_debug(ls, "process_requestqueue failed %d", error);
202 goto fail; 202 goto fail;
203 } 203 }
204 204
205 error = dlm_recover_waiters_post(ls); 205 error = dlm_recover_waiters_post(ls);
206 if (error) { 206 if (error) {
207 log_error(ls, "recover_waiters_post failed %d", error); 207 log_debug(ls, "recover_waiters_post failed %d", error);
208 goto fail; 208 goto fail;
209 } 209 }
210 210
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index c37e93e4f2df..40db61dc95f2 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -25,7 +25,7 @@
25 25
26static const char *name_prefix="dlm"; 26static const char *name_prefix="dlm";
27static struct miscdevice ctl_device; 27static struct miscdevice ctl_device;
28static struct file_operations device_fops; 28static const struct file_operations device_fops;
29 29
30#ifdef CONFIG_COMPAT 30#ifdef CONFIG_COMPAT
31 31
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
180 ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) 180 ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
181 remove_ownqueue = 1; 181 remove_ownqueue = 1;
182 182
183 /* unlocks or cancels of waiting requests need to be removed from the
184 proc's unlocking list, again there must be a better way... */
185
186 if (ua->lksb.sb_status == -DLM_EUNLOCK ||
187 (ua->lksb.sb_status == -DLM_ECANCEL &&
188 lkb->lkb_grmode == DLM_LOCK_IV))
189 remove_ownqueue = 1;
190
183 /* We want to copy the lvb to userspace when the completion 191 /* We want to copy the lvb to userspace when the completion
184 ast is read if the status is 0, the lock has an lvb and 192 ast is read if the status is 0, the lock has an lvb and
185 lvb_ops says we should. We could probably have set_lvb_lock() 193 lvb_ops says we should. We could probably have set_lvb_lock()
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file)
523 proc->lockspace = ls->ls_local_handle; 531 proc->lockspace = ls->ls_local_handle;
524 INIT_LIST_HEAD(&proc->asts); 532 INIT_LIST_HEAD(&proc->asts);
525 INIT_LIST_HEAD(&proc->locks); 533 INIT_LIST_HEAD(&proc->locks);
534 INIT_LIST_HEAD(&proc->unlocking);
526 spin_lock_init(&proc->asts_spin); 535 spin_lock_init(&proc->asts_spin);
527 spin_lock_init(&proc->locks_spin); 536 spin_lock_init(&proc->locks_spin);
528 init_waitqueue_head(&proc->wait); 537 init_waitqueue_head(&proc->wait);
@@ -750,7 +759,7 @@ static int ctl_device_close(struct inode *inode, struct file *file)
750 return 0; 759 return 0;
751} 760}
752 761
753static struct file_operations device_fops = { 762static const struct file_operations device_fops = {
754 .open = device_open, 763 .open = device_open,
755 .release = device_close, 764 .release = device_close,
756 .read = device_read, 765 .read = device_read,
@@ -759,7 +768,7 @@ static struct file_operations device_fops = {
759 .owner = THIS_MODULE, 768 .owner = THIS_MODULE,
760}; 769};
761 770
762static struct file_operations ctl_device_fops = { 771static const struct file_operations ctl_device_fops = {
763 .open = ctl_device_open, 772 .open = ctl_device_open,
764 .release = ctl_device_close, 773 .release = ctl_device_close,
765 .write = device_write, 774 .write = device_write,
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 767197db9944..963889cf6740 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
134 rc->rc_type = cpu_to_le32(rc->rc_type); 134 rc->rc_type = cpu_to_le32(rc->rc_type);
135 rc->rc_result = cpu_to_le32(rc->rc_result); 135 rc->rc_result = cpu_to_le32(rc->rc_result);
136 rc->rc_id = cpu_to_le64(rc->rc_id); 136 rc->rc_id = cpu_to_le64(rc->rc_id);
137 rc->rc_seq = cpu_to_le64(rc->rc_seq);
138 rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
137 139
138 if (type == DLM_RCOM_LOCK) 140 if (type == DLM_RCOM_LOCK)
139 rcom_lock_out((struct rcom_lock *) rc->rc_buf); 141 rcom_lock_out((struct rcom_lock *) rc->rc_buf);
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
151 rc->rc_type = le32_to_cpu(rc->rc_type); 153 rc->rc_type = le32_to_cpu(rc->rc_type);
152 rc->rc_result = le32_to_cpu(rc->rc_result); 154 rc->rc_result = le32_to_cpu(rc->rc_result);
153 rc->rc_id = le64_to_cpu(rc->rc_id); 155 rc->rc_id = le64_to_cpu(rc->rc_id);
156 rc->rc_seq = le64_to_cpu(rc->rc_seq);
157 rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
154 158
155 if (rc->rc_type == DLM_RCOM_LOCK) 159 if (rc->rc_type == DLM_RCOM_LOCK)
156 rcom_lock_in((struct rcom_lock *) rc->rc_buf); 160 rcom_lock_in((struct rcom_lock *) rc->rc_buf);