aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-24 18:04:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-24 18:04:00 -0400
commitdf3256f9ab7ae2127144de5ba2abca332278a42d (patch)
treec2cbdca425b745894a23f8cf7d7c91effcd7478c /fs/dlm
parentb0ca118dbacbc6c35e15f216e25e95cca7aedf5b (diff)
parent901025d2f3194b4868980c8ba80df4cc0aa1282c (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/dlm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/dlm: dlm: make plock operation killable dlm: remove shared message stub for recovery dlm: delayed reply message warning dlm: Remove superfluous call to recalc_sigpending()
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/config.c9
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c182
-rw-r--r--fs/dlm/lock.h1
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/plock.c65
-rw-r--r--fs/dlm/user.c1
8 files changed, 219 insertions, 49 deletions
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 0d329ff8ed4c..9b026ea8baa9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -100,6 +100,7 @@ struct dlm_cluster {
100 unsigned int cl_log_debug; 100 unsigned int cl_log_debug;
101 unsigned int cl_protocol; 101 unsigned int cl_protocol;
102 unsigned int cl_timewarn_cs; 102 unsigned int cl_timewarn_cs;
103 unsigned int cl_waitwarn_us;
103}; 104};
104 105
105enum { 106enum {
@@ -114,6 +115,7 @@ enum {
114 CLUSTER_ATTR_LOG_DEBUG, 115 CLUSTER_ATTR_LOG_DEBUG,
115 CLUSTER_ATTR_PROTOCOL, 116 CLUSTER_ATTR_PROTOCOL,
116 CLUSTER_ATTR_TIMEWARN_CS, 117 CLUSTER_ATTR_TIMEWARN_CS,
118 CLUSTER_ATTR_WAITWARN_US,
117}; 119};
118 120
119struct cluster_attribute { 121struct cluster_attribute {
@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
166CLUSTER_ATTR(log_debug, 0); 168CLUSTER_ATTR(log_debug, 0);
167CLUSTER_ATTR(protocol, 0); 169CLUSTER_ATTR(protocol, 0);
168CLUSTER_ATTR(timewarn_cs, 1); 170CLUSTER_ATTR(timewarn_cs, 1);
171CLUSTER_ATTR(waitwarn_us, 0);
169 172
170static struct configfs_attribute *cluster_attrs[] = { 173static struct configfs_attribute *cluster_attrs[] = {
171 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 174 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
179 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 182 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
180 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 183 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
181 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, 184 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
185 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
182 NULL, 186 NULL,
183}; 187};
184 188
@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
439 cl->cl_log_debug = dlm_config.ci_log_debug; 443 cl->cl_log_debug = dlm_config.ci_log_debug;
440 cl->cl_protocol = dlm_config.ci_protocol; 444 cl->cl_protocol = dlm_config.ci_protocol;
441 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; 445 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
446 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
442 447
443 space_list = &sps->ss_group; 448 space_list = &sps->ss_group;
444 comm_list = &cms->cs_group; 449 comm_list = &cms->cs_group;
@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
986#define DEFAULT_LOG_DEBUG 0 991#define DEFAULT_LOG_DEBUG 0
987#define DEFAULT_PROTOCOL 0 992#define DEFAULT_PROTOCOL 0
988#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ 993#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
994#define DEFAULT_WAITWARN_US 0
989 995
990struct dlm_config_info dlm_config = { 996struct dlm_config_info dlm_config = {
991 .ci_tcp_port = DEFAULT_TCP_PORT, 997 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
998 .ci_scan_secs = DEFAULT_SCAN_SECS, 1004 .ci_scan_secs = DEFAULT_SCAN_SECS,
999 .ci_log_debug = DEFAULT_LOG_DEBUG, 1005 .ci_log_debug = DEFAULT_LOG_DEBUG,
1000 .ci_protocol = DEFAULT_PROTOCOL, 1006 .ci_protocol = DEFAULT_PROTOCOL,
1001 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS 1007 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
1008 .ci_waitwarn_us = DEFAULT_WAITWARN_US
1002}; 1009};
1003 1010
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 4f1d6fce58c5..dd0ce24d5a80 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -28,6 +28,7 @@ struct dlm_config_info {
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol; 29 int ci_protocol;
30 int ci_timewarn_cs; 30 int ci_timewarn_cs;
31 int ci_waitwarn_us;
31}; 32};
32 33
33extern struct dlm_config_info dlm_config; 34extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index b94204913011..0262451eb9c6 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -209,6 +209,7 @@ struct dlm_args {
209#define DLM_IFL_WATCH_TIMEWARN 0x00400000 209#define DLM_IFL_WATCH_TIMEWARN 0x00400000
210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000 211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
212#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
212#define DLM_IFL_USER 0x00000001 213#define DLM_IFL_USER 0x00000001
213#define DLM_IFL_ORPHAN 0x00000002 214#define DLM_IFL_ORPHAN 0x00000002
214 215
@@ -245,6 +246,7 @@ struct dlm_lkb {
245 246
246 int8_t lkb_wait_type; /* type of reply waiting for */ 247 int8_t lkb_wait_type; /* type of reply waiting for */
247 int8_t lkb_wait_count; 248 int8_t lkb_wait_count;
249 int lkb_wait_nodeid; /* for debugging */
248 250
249 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ 251 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
250 struct list_head lkb_statequeue; /* rsb g/c/w list */ 252 struct list_head lkb_statequeue; /* rsb g/c/w list */
@@ -254,6 +256,7 @@ struct dlm_lkb {
254 struct list_head lkb_ownqueue; /* list of locks for a process */ 256 struct list_head lkb_ownqueue; /* list of locks for a process */
255 struct list_head lkb_time_list; 257 struct list_head lkb_time_list;
256 ktime_t lkb_timestamp; 258 ktime_t lkb_timestamp;
259 ktime_t lkb_wait_time;
257 unsigned long lkb_timeout_cs; 260 unsigned long lkb_timeout_cs;
258 261
259 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; 262 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 56d6bfcc1e48..f71d0b5abd95 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
799 return -1; 799 return -1;
800} 800}
801 801
802static int nodeid_warned(int nodeid, int num_nodes, int *warned)
803{
804 int i;
805
806 for (i = 0; i < num_nodes; i++) {
807 if (!warned[i]) {
808 warned[i] = nodeid;
809 return 0;
810 }
811 if (warned[i] == nodeid)
812 return 1;
813 }
814 return 0;
815}
816
817void dlm_scan_waiters(struct dlm_ls *ls)
818{
819 struct dlm_lkb *lkb;
820 ktime_t zero = ktime_set(0, 0);
821 s64 us;
822 s64 debug_maxus = 0;
823 u32 debug_scanned = 0;
824 u32 debug_expired = 0;
825 int num_nodes = 0;
826 int *warned = NULL;
827
828 if (!dlm_config.ci_waitwarn_us)
829 return;
830
831 mutex_lock(&ls->ls_waiters_mutex);
832
833 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
834 if (ktime_equal(lkb->lkb_wait_time, zero))
835 continue;
836
837 debug_scanned++;
838
839 us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
840
841 if (us < dlm_config.ci_waitwarn_us)
842 continue;
843
844 lkb->lkb_wait_time = zero;
845
846 debug_expired++;
847 if (us > debug_maxus)
848 debug_maxus = us;
849
850 if (!num_nodes) {
851 num_nodes = ls->ls_num_nodes;
852 warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
853 if (warned)
854 memset(warned, 0, num_nodes * sizeof(int));
855 }
856 if (!warned)
857 continue;
858 if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
859 continue;
860
861 log_error(ls, "waitwarn %x %lld %d us check connection to "
862 "node %d", lkb->lkb_id, (long long)us,
863 dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
864 }
865 mutex_unlock(&ls->ls_waiters_mutex);
866
867 if (warned)
868 kfree(warned);
869
870 if (debug_expired)
871 log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
872 debug_scanned, debug_expired,
873 dlm_config.ci_waitwarn_us, (long long)debug_maxus);
874}
875
802/* add/remove lkb from global waiters list of lkb's waiting for 876/* add/remove lkb from global waiters list of lkb's waiting for
803 a reply from a remote node */ 877 a reply from a remote node */
804 878
805static int add_to_waiters(struct dlm_lkb *lkb, int mstype) 879static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
806{ 880{
807 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 881 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
808 int error = 0; 882 int error = 0;
@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
842 916
843 lkb->lkb_wait_count++; 917 lkb->lkb_wait_count++;
844 lkb->lkb_wait_type = mstype; 918 lkb->lkb_wait_type = mstype;
919 lkb->lkb_wait_time = ktime_get();
920 lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
845 hold_lkb(lkb); 921 hold_lkb(lkb);
846 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); 922 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
847 out: 923 out:
@@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
961 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 1037 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
962 int error; 1038 int error;
963 1039
964 if (ms != &ls->ls_stub_ms) 1040 if (ms->m_flags != DLM_IFL_STUB_MS)
965 mutex_lock(&ls->ls_waiters_mutex); 1041 mutex_lock(&ls->ls_waiters_mutex);
966 error = _remove_from_waiters(lkb, ms->m_type, ms); 1042 error = _remove_from_waiters(lkb, ms->m_type, ms);
967 if (ms != &ls->ls_stub_ms) 1043 if (ms->m_flags != DLM_IFL_STUB_MS)
968 mutex_unlock(&ls->ls_waiters_mutex); 1044 mutex_unlock(&ls->ls_waiters_mutex);
969 return error; 1045 return error;
970} 1046}
@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
1157 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) 1233 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1158 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); 1234 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
1159 mutex_unlock(&ls->ls_timeout_mutex); 1235 mutex_unlock(&ls->ls_timeout_mutex);
1236
1237 if (!dlm_config.ci_waitwarn_us)
1238 return;
1239
1240 mutex_lock(&ls->ls_waiters_mutex);
1241 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
1242 if (ktime_to_us(lkb->lkb_wait_time))
1243 lkb->lkb_wait_time = ktime_get();
1244 }
1245 mutex_unlock(&ls->ls_waiters_mutex);
1160} 1246}
1161 1247
1162/* lkb is master or local copy */ 1248/* lkb is master or local copy */
@@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
1376 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become 1462 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
1377 compatible with other granted locks */ 1463 compatible with other granted locks */
1378 1464
1379static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) 1465static void munge_demoted(struct dlm_lkb *lkb)
1380{ 1466{
1381 if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
1382 log_print("munge_demoted %x invalid reply type %d",
1383 lkb->lkb_id, ms->m_type);
1384 return;
1385 }
1386
1387 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { 1467 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
1388 log_print("munge_demoted %x invalid modes gr %d rq %d", 1468 log_print("munge_demoted %x invalid modes gr %d rq %d",
1389 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); 1469 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
@@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2844 struct dlm_mhandle *mh; 2924 struct dlm_mhandle *mh;
2845 int to_nodeid, error; 2925 int to_nodeid, error;
2846 2926
2847 error = add_to_waiters(lkb, mstype); 2927 to_nodeid = r->res_nodeid;
2928
2929 error = add_to_waiters(lkb, mstype, to_nodeid);
2848 if (error) 2930 if (error)
2849 return error; 2931 return error;
2850 2932
2851 to_nodeid = r->res_nodeid;
2852
2853 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); 2933 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2854 if (error) 2934 if (error)
2855 goto fail; 2935 goto fail;
@@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2880 /* down conversions go without a reply from the master */ 2960 /* down conversions go without a reply from the master */
2881 if (!error && down_conversion(lkb)) { 2961 if (!error && down_conversion(lkb)) {
2882 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); 2962 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
2963 r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
2883 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 2964 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
2884 r->res_ls->ls_stub_ms.m_result = 0; 2965 r->res_ls->ls_stub_ms.m_result = 0;
2885 r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
2886 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); 2966 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
2887 } 2967 }
2888 2968
@@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2951 struct dlm_mhandle *mh; 3031 struct dlm_mhandle *mh;
2952 int to_nodeid, error; 3032 int to_nodeid, error;
2953 3033
2954 error = add_to_waiters(lkb, DLM_MSG_LOOKUP); 3034 to_nodeid = dlm_dir_nodeid(r);
3035
3036 error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
2955 if (error) 3037 if (error)
2956 return error; 3038 return error;
2957 3039
2958 to_nodeid = dlm_dir_nodeid(r);
2959
2960 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); 3040 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
2961 if (error) 3041 if (error)
2962 goto fail; 3042 goto fail;
@@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
3070 3150
3071static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) 3151static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3072{ 3152{
3153 if (ms->m_flags == DLM_IFL_STUB_MS)
3154 return;
3155
3073 lkb->lkb_sbflags = ms->m_sbflags; 3156 lkb->lkb_sbflags = ms->m_sbflags;
3074 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | 3157 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
3075 (ms->m_flags & 0x0000FFFF); 3158 (ms->m_flags & 0x0000FFFF);
@@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3612 /* convert was queued on remote master */ 3695 /* convert was queued on remote master */
3613 receive_flags_reply(lkb, ms); 3696 receive_flags_reply(lkb, ms);
3614 if (is_demoted(lkb)) 3697 if (is_demoted(lkb))
3615 munge_demoted(lkb, ms); 3698 munge_demoted(lkb);
3616 del_lkb(r, lkb); 3699 del_lkb(r, lkb);
3617 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3700 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3618 add_timeout(lkb); 3701 add_timeout(lkb);
@@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3622 /* convert was granted on remote master */ 3705 /* convert was granted on remote master */
3623 receive_flags_reply(lkb, ms); 3706 receive_flags_reply(lkb, ms);
3624 if (is_demoted(lkb)) 3707 if (is_demoted(lkb))
3625 munge_demoted(lkb, ms); 3708 munge_demoted(lkb);
3626 grant_lock_pc(r, lkb, ms); 3709 grant_lock_pc(r, lkb, ms);
3627 queue_cast(r, lkb, 0); 3710 queue_cast(r, lkb, 0);
3628 break; 3711 break;
@@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
3996 dlm_put_lockspace(ls); 4079 dlm_put_lockspace(ls);
3997} 4080}
3998 4081
3999static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) 4082static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
4083 struct dlm_message *ms_stub)
4000{ 4084{
4001 if (middle_conversion(lkb)) { 4085 if (middle_conversion(lkb)) {
4002 hold_lkb(lkb); 4086 hold_lkb(lkb);
4003 ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 4087 memset(ms_stub, 0, sizeof(struct dlm_message));
4004 ls->ls_stub_ms.m_result = -EINPROGRESS; 4088 ms_stub->m_flags = DLM_IFL_STUB_MS;
4005 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4089 ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
4006 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4090 ms_stub->m_result = -EINPROGRESS;
4007 _receive_convert_reply(lkb, &ls->ls_stub_ms); 4091 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4092 _receive_convert_reply(lkb, ms_stub);
4008 4093
4009 /* Same special case as in receive_rcom_lock_args() */ 4094 /* Same special case as in receive_rcom_lock_args() */
4010 lkb->lkb_grmode = DLM_LOCK_IV; 4095 lkb->lkb_grmode = DLM_LOCK_IV;
@@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
4045void dlm_recover_waiters_pre(struct dlm_ls *ls) 4130void dlm_recover_waiters_pre(struct dlm_ls *ls)
4046{ 4131{
4047 struct dlm_lkb *lkb, *safe; 4132 struct dlm_lkb *lkb, *safe;
4133 struct dlm_message *ms_stub;
4048 int wait_type, stub_unlock_result, stub_cancel_result; 4134 int wait_type, stub_unlock_result, stub_cancel_result;
4049 4135
4136 ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
4137 if (!ms_stub) {
4138 log_error(ls, "dlm_recover_waiters_pre no mem");
4139 return;
4140 }
4141
4050 mutex_lock(&ls->ls_waiters_mutex); 4142 mutex_lock(&ls->ls_waiters_mutex);
4051 4143
4052 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { 4144 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
4053 log_debug(ls, "pre recover waiter lkid %x type %d flags %x", 4145
4054 lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); 4146 /* exclude debug messages about unlocks because there can be so
4147 many and they aren't very interesting */
4148
4149 if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
4150 log_debug(ls, "recover_waiter %x nodeid %d "
4151 "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
4152 lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
4153 }
4055 4154
4056 /* all outstanding lookups, regardless of destination will be 4155 /* all outstanding lookups, regardless of destination will be
4057 resent after recovery is done */ 4156 resent after recovery is done */
@@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4097 break; 4196 break;
4098 4197
4099 case DLM_MSG_CONVERT: 4198 case DLM_MSG_CONVERT:
4100 recover_convert_waiter(ls, lkb); 4199 recover_convert_waiter(ls, lkb, ms_stub);
4101 break; 4200 break;
4102 4201
4103 case DLM_MSG_UNLOCK: 4202 case DLM_MSG_UNLOCK:
4104 hold_lkb(lkb); 4203 hold_lkb(lkb);
4105 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; 4204 memset(ms_stub, 0, sizeof(struct dlm_message));
4106 ls->ls_stub_ms.m_result = stub_unlock_result; 4205 ms_stub->m_flags = DLM_IFL_STUB_MS;
4107 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4206 ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
4108 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4207 ms_stub->m_result = stub_unlock_result;
4109 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 4208 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4209 _receive_unlock_reply(lkb, ms_stub);
4110 dlm_put_lkb(lkb); 4210 dlm_put_lkb(lkb);
4111 break; 4211 break;
4112 4212
4113 case DLM_MSG_CANCEL: 4213 case DLM_MSG_CANCEL:
4114 hold_lkb(lkb); 4214 hold_lkb(lkb);
4115 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; 4215 memset(ms_stub, 0, sizeof(struct dlm_message));
4116 ls->ls_stub_ms.m_result = stub_cancel_result; 4216 ms_stub->m_flags = DLM_IFL_STUB_MS;
4117 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4217 ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
4118 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4218 ms_stub->m_result = stub_cancel_result;
4119 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 4219 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4220 _receive_cancel_reply(lkb, ms_stub);
4120 dlm_put_lkb(lkb); 4221 dlm_put_lkb(lkb);
4121 break; 4222 break;
4122 4223
@@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4127 schedule(); 4228 schedule();
4128 } 4229 }
4129 mutex_unlock(&ls->ls_waiters_mutex); 4230 mutex_unlock(&ls->ls_waiters_mutex);
4231 kfree(ms_stub);
4130} 4232}
4131 4233
4132static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) 4234static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
@@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
4191 ou = is_overlap_unlock(lkb); 4293 ou = is_overlap_unlock(lkb);
4192 err = 0; 4294 err = 0;
4193 4295
4194 log_debug(ls, "recover_waiters_post %x type %d flags %x %s", 4296 log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
4195 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); 4297 lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
4196 4298
4197 /* At this point we assume that we won't get a reply to any 4299 /* At this point we assume that we won't get a reply to any
4198 previous op or overlap op on this lock. First, do a big 4300 previous op or overlap op on this lock. First, do a big
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 88e93c80cc22..265017a7c3e7 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
24void dlm_scan_rsbs(struct dlm_ls *ls); 24void dlm_scan_rsbs(struct dlm_ls *ls);
25int dlm_lock_recovery_try(struct dlm_ls *ls); 25int dlm_lock_recovery_try(struct dlm_ls *ls);
26void dlm_unlock_recovery(struct dlm_ls *ls); 26void dlm_unlock_recovery(struct dlm_ls *ls);
27void dlm_scan_waiters(struct dlm_ls *ls);
27void dlm_scan_timeout(struct dlm_ls *ls); 28void dlm_scan_timeout(struct dlm_ls *ls);
28void dlm_adjust_timeouts(struct dlm_ls *ls); 29void dlm_adjust_timeouts(struct dlm_ls *ls);
29 30
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f994a7dfda85..14cbf4099753 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
243static int dlm_scand(void *data) 243static int dlm_scand(void *data)
244{ 244{
245 struct dlm_ls *ls; 245 struct dlm_ls *ls;
246 int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
247 246
248 while (!kthread_should_stop()) { 247 while (!kthread_should_stop()) {
249 ls = find_ls_to_scan(); 248 ls = find_ls_to_scan();
@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
252 ls->ls_scan_time = jiffies; 251 ls->ls_scan_time = jiffies;
253 dlm_scan_rsbs(ls); 252 dlm_scan_rsbs(ls);
254 dlm_scan_timeout(ls); 253 dlm_scan_timeout(ls);
254 dlm_scan_waiters(ls);
255 dlm_unlock_recovery(ls); 255 dlm_unlock_recovery(ls);
256 } else { 256 } else {
257 ls->ls_scan_time += HZ; 257 ls->ls_scan_time += HZ;
258 } 258 }
259 } else { 259 continue;
260 schedule_timeout_interruptible(timeout_jiffies);
261 } 260 }
261 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
262 } 262 }
263 return 0; 263 return 0;
264} 264}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 30d8b85febbf..e2b878004364 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
71 wake_up(&send_wq); 71 wake_up(&send_wq);
72} 72}
73 73
74/* If a process was killed while waiting for the only plock on a file,
75 locks_remove_posix will not see any lock on the file so it won't
76 send an unlock-close to us to pass on to userspace to clean up the
77 abandoned waiter. So, we have to insert the unlock-close when the
78 lock call is interrupted. */
79
80static void do_unlock_close(struct dlm_ls *ls, u64 number,
81 struct file *file, struct file_lock *fl)
82{
83 struct plock_op *op;
84
85 op = kzalloc(sizeof(*op), GFP_NOFS);
86 if (!op)
87 return;
88
89 op->info.optype = DLM_PLOCK_OP_UNLOCK;
90 op->info.pid = fl->fl_pid;
91 op->info.fsid = ls->ls_global_id;
92 op->info.number = number;
93 op->info.start = 0;
94 op->info.end = OFFSET_MAX;
95 if (fl->fl_lmops && fl->fl_lmops->fl_grant)
96 op->info.owner = (__u64) fl->fl_pid;
97 else
98 op->info.owner = (__u64)(long) fl->fl_owner;
99
100 op->info.flags |= DLM_PLOCK_FL_CLOSE;
101 send_op(op);
102}
103
74int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 104int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
75 int cmd, struct file_lock *fl) 105 int cmd, struct file_lock *fl)
76{ 106{
@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
114 144
115 send_op(op); 145 send_op(op);
116 146
117 if (xop->callback == NULL) 147 if (xop->callback == NULL) {
118 wait_event(recv_wq, (op->done != 0)); 148 rv = wait_event_killable(recv_wq, (op->done != 0));
119 else { 149 if (rv == -ERESTARTSYS) {
150 log_debug(ls, "dlm_posix_lock: wait killed %llx",
151 (unsigned long long)number);
152 spin_lock(&ops_lock);
153 list_del(&op->list);
154 spin_unlock(&ops_lock);
155 kfree(xop);
156 do_unlock_close(ls, number, file, fl);
157 goto out;
158 }
159 } else {
120 rv = FILE_LOCK_DEFERRED; 160 rv = FILE_LOCK_DEFERRED;
121 goto out; 161 goto out;
122 } 162 }
@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
233 else 273 else
234 op->info.owner = (__u64)(long) fl->fl_owner; 274 op->info.owner = (__u64)(long) fl->fl_owner;
235 275
276 if (fl->fl_flags & FL_CLOSE) {
277 op->info.flags |= DLM_PLOCK_FL_CLOSE;
278 send_op(op);
279 rv = 0;
280 goto out;
281 }
282
236 send_op(op); 283 send_op(op);
237 wait_event(recv_wq, (op->done != 0)); 284 wait_event(recv_wq, (op->done != 0));
238 285
@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
334 spin_lock(&ops_lock); 381 spin_lock(&ops_lock);
335 if (!list_empty(&send_list)) { 382 if (!list_empty(&send_list)) {
336 op = list_entry(send_list.next, struct plock_op, list); 383 op = list_entry(send_list.next, struct plock_op, list);
337 list_move(&op->list, &recv_list); 384 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
385 list_del(&op->list);
386 else
387 list_move(&op->list, &recv_list);
338 memcpy(&info, &op->info, sizeof(info)); 388 memcpy(&info, &op->info, sizeof(info));
339 } 389 }
340 spin_unlock(&ops_lock); 390 spin_unlock(&ops_lock);
@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
342 if (!op) 392 if (!op)
343 return -EAGAIN; 393 return -EAGAIN;
344 394
395 /* there is no need to get a reply from userspace for unlocks
396 that were generated by the vfs cleaning up for a close
397 (the process did not make an unlock call). */
398
399 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
400 kfree(op);
401
345 if (copy_to_user(u, &info, sizeof(info))) 402 if (copy_to_user(u, &info, sizeof(info)))
346 return -EFAULT; 403 return -EFAULT;
347 return sizeof(info); 404 return sizeof(info);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d5ab3fe7c198..e96bf3e9be88 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
611 611
612 out_sig: 612 out_sig:
613 sigprocmask(SIG_SETMASK, &tmpsig, NULL); 613 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
614 recalc_sigpending();
615 out_free: 614 out_free:
616 kfree(kbuf); 615 kfree(kbuf);
617 return error; 616 return error;