aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/lock.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm/lock.c')
-rw-r--r--fs/dlm/lock.c249
1 files changed, 177 insertions, 72 deletions
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 3915b8e14146..ff4a198fa677 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -88,7 +88,6 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
88static int receive_extralen(struct dlm_message *ms); 88static int receive_extralen(struct dlm_message *ms);
89static void do_purge(struct dlm_ls *ls, int nodeid, int pid); 89static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
90static void del_timeout(struct dlm_lkb *lkb); 90static void del_timeout(struct dlm_lkb *lkb);
91void dlm_timeout_warn(struct dlm_lkb *lkb);
92 91
93/* 92/*
94 * Lock compatibilty matrix - thanks Steve 93 * Lock compatibilty matrix - thanks Steve
@@ -335,7 +334,7 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
335{ 334{
336 struct dlm_rsb *r; 335 struct dlm_rsb *r;
337 336
338 r = allocate_rsb(ls, len); 337 r = dlm_allocate_rsb(ls, len);
339 if (!r) 338 if (!r)
340 return NULL; 339 return NULL;
341 340
@@ -478,7 +477,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
478 error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); 477 error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
479 if (!error) { 478 if (!error) {
480 write_unlock(&ls->ls_rsbtbl[bucket].lock); 479 write_unlock(&ls->ls_rsbtbl[bucket].lock);
481 free_rsb(r); 480 dlm_free_rsb(r);
482 r = tmp; 481 r = tmp;
483 goto out; 482 goto out;
484 } 483 }
@@ -490,12 +489,6 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
490 return error; 489 return error;
491} 490}
492 491
493int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
494 unsigned int flags, struct dlm_rsb **r_ret)
495{
496 return find_rsb(ls, name, namelen, flags, r_ret);
497}
498
499/* This is only called to add a reference when the code already holds 492/* This is only called to add a reference when the code already holds
500 a valid reference to the rsb, so there's no need for locking. */ 493 a valid reference to the rsb, so there's no need for locking. */
501 494
@@ -519,7 +512,7 @@ static void toss_rsb(struct kref *kref)
519 list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); 512 list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss);
520 r->res_toss_time = jiffies; 513 r->res_toss_time = jiffies;
521 if (r->res_lvbptr) { 514 if (r->res_lvbptr) {
522 free_lvb(r->res_lvbptr); 515 dlm_free_lvb(r->res_lvbptr);
523 r->res_lvbptr = NULL; 516 r->res_lvbptr = NULL;
524 } 517 }
525} 518}
@@ -589,7 +582,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
589 uint32_t lkid = 0; 582 uint32_t lkid = 0;
590 uint16_t bucket; 583 uint16_t bucket;
591 584
592 lkb = allocate_lkb(ls); 585 lkb = dlm_allocate_lkb(ls);
593 if (!lkb) 586 if (!lkb)
594 return -ENOMEM; 587 return -ENOMEM;
595 588
@@ -683,8 +676,8 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
683 676
684 /* for local/process lkbs, lvbptr points to caller's lksb */ 677 /* for local/process lkbs, lvbptr points to caller's lksb */
685 if (lkb->lkb_lvbptr && is_master_copy(lkb)) 678 if (lkb->lkb_lvbptr && is_master_copy(lkb))
686 free_lvb(lkb->lkb_lvbptr); 679 dlm_free_lvb(lkb->lkb_lvbptr);
687 free_lkb(lkb); 680 dlm_free_lkb(lkb);
688 return 1; 681 return 1;
689 } else { 682 } else {
690 write_unlock(&ls->ls_lkbtbl[bucket].lock); 683 write_unlock(&ls->ls_lkbtbl[bucket].lock);
@@ -988,7 +981,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
988 981
989 if (is_master(r)) 982 if (is_master(r))
990 dir_remove(r); 983 dir_remove(r);
991 free_rsb(r); 984 dlm_free_rsb(r);
992 count++; 985 count++;
993 } else { 986 } else {
994 write_unlock(&ls->ls_rsbtbl[b].lock); 987 write_unlock(&ls->ls_rsbtbl[b].lock);
@@ -1171,7 +1164,7 @@ static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1171 return; 1164 return;
1172 1165
1173 if (!r->res_lvbptr) 1166 if (!r->res_lvbptr)
1174 r->res_lvbptr = allocate_lvb(r->res_ls); 1167 r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
1175 1168
1176 if (!r->res_lvbptr) 1169 if (!r->res_lvbptr)
1177 return; 1170 return;
@@ -1203,7 +1196,7 @@ static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1203 return; 1196 return;
1204 1197
1205 if (!r->res_lvbptr) 1198 if (!r->res_lvbptr)
1206 r->res_lvbptr = allocate_lvb(r->res_ls); 1199 r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
1207 1200
1208 if (!r->res_lvbptr) 1201 if (!r->res_lvbptr)
1209 return; 1202 return;
@@ -1852,7 +1845,7 @@ static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb)
1852static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) 1845static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
1853{ 1846{
1854 struct dlm_ls *ls = r->res_ls; 1847 struct dlm_ls *ls = r->res_ls;
1855 int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); 1848 int i, error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
1856 1849
1857 if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { 1850 if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) {
1858 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); 1851 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
@@ -1886,7 +1879,7 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
1886 return 1; 1879 return 1;
1887 } 1880 }
1888 1881
1889 for (;;) { 1882 for (i = 0; i < 2; i++) {
1890 /* It's possible for dlm_scand to remove an old rsb for 1883 /* It's possible for dlm_scand to remove an old rsb for
1891 this same resource from the toss list, us to create 1884 this same resource from the toss list, us to create
1892 a new one, look up the master locally, and find it 1885 a new one, look up the master locally, and find it
@@ -1900,6 +1893,8 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
1900 log_debug(ls, "dir_lookup error %d %s", error, r->res_name); 1893 log_debug(ls, "dir_lookup error %d %s", error, r->res_name);
1901 schedule(); 1894 schedule();
1902 } 1895 }
1896 if (error && error != -EEXIST)
1897 return error;
1903 1898
1904 if (ret_nodeid == our_nodeid) { 1899 if (ret_nodeid == our_nodeid) {
1905 r->res_first_lkid = 0; 1900 r->res_first_lkid = 0;
@@ -1941,8 +1936,11 @@ static void confirm_master(struct dlm_rsb *r, int error)
1941 break; 1936 break;
1942 1937
1943 case -EAGAIN: 1938 case -EAGAIN:
1944 /* the remote master didn't queue our NOQUEUE request; 1939 case -EBADR:
1945 make a waiting lkb the first_lkid */ 1940 case -ENOTBLK:
1941 /* the remote request failed and won't be retried (it was
1942 a NOQUEUE, or has been canceled/unlocked); make a waiting
1943 lkb the first_lkid */
1946 1944
1947 r->res_first_lkid = 0; 1945 r->res_first_lkid = 0;
1948 1946
@@ -2108,17 +2106,18 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
2108 /* an lkb may be waiting for an rsb lookup to complete where the 2106 /* an lkb may be waiting for an rsb lookup to complete where the
2109 lookup was initiated by another lock */ 2107 lookup was initiated by another lock */
2110 2108
2111 if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) { 2109 if (!list_empty(&lkb->lkb_rsb_lookup)) {
2112 if (!list_empty(&lkb->lkb_rsb_lookup)) { 2110 if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
2113 log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id); 2111 log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id);
2114 list_del_init(&lkb->lkb_rsb_lookup); 2112 list_del_init(&lkb->lkb_rsb_lookup);
2115 queue_cast(lkb->lkb_resource, lkb, 2113 queue_cast(lkb->lkb_resource, lkb,
2116 args->flags & DLM_LKF_CANCEL ? 2114 args->flags & DLM_LKF_CANCEL ?
2117 -DLM_ECANCEL : -DLM_EUNLOCK); 2115 -DLM_ECANCEL : -DLM_EUNLOCK);
2118 unhold_lkb(lkb); /* undoes create_lkb() */ 2116 unhold_lkb(lkb); /* undoes create_lkb() */
2119 rv = -EBUSY;
2120 goto out;
2121 } 2117 }
2118 /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */
2119 rv = -EBUSY;
2120 goto out;
2122 } 2121 }
2123 2122
2124 /* cancel not allowed with another cancel/unlock in progress */ 2123 /* cancel not allowed with another cancel/unlock in progress */
@@ -2986,7 +2985,7 @@ static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
2986 2985
2987 if (lkb->lkb_exflags & DLM_LKF_VALBLK) { 2986 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
2988 if (!lkb->lkb_lvbptr) 2987 if (!lkb->lkb_lvbptr)
2989 lkb->lkb_lvbptr = allocate_lvb(ls); 2988 lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
2990 if (!lkb->lkb_lvbptr) 2989 if (!lkb->lkb_lvbptr)
2991 return -ENOMEM; 2990 return -ENOMEM;
2992 len = receive_extralen(ms); 2991 len = receive_extralen(ms);
@@ -3006,11 +3005,9 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3006 lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); 3005 lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST);
3007 lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); 3006 lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP);
3008 3007
3009 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
3010
3011 if (lkb->lkb_exflags & DLM_LKF_VALBLK) { 3008 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3012 /* lkb was just created so there won't be an lvb yet */ 3009 /* lkb was just created so there won't be an lvb yet */
3013 lkb->lkb_lvbptr = allocate_lvb(ls); 3010 lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
3014 if (!lkb->lkb_lvbptr) 3011 if (!lkb->lkb_lvbptr)
3015 return -ENOMEM; 3012 return -ENOMEM;
3016 } 3013 }
@@ -3021,16 +3018,6 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3021static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, 3018static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3022 struct dlm_message *ms) 3019 struct dlm_message *ms)
3023{ 3020{
3024 if (lkb->lkb_nodeid != ms->m_header.h_nodeid) {
3025 log_error(ls, "convert_args nodeid %d %d lkid %x %x",
3026 lkb->lkb_nodeid, ms->m_header.h_nodeid,
3027 lkb->lkb_id, lkb->lkb_remid);
3028 return -EINVAL;
3029 }
3030
3031 if (!is_master_copy(lkb))
3032 return -EINVAL;
3033
3034 if (lkb->lkb_status != DLM_LKSTS_GRANTED) 3021 if (lkb->lkb_status != DLM_LKSTS_GRANTED)
3035 return -EBUSY; 3022 return -EBUSY;
3036 3023
@@ -3046,8 +3033,6 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3046static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, 3033static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3047 struct dlm_message *ms) 3034 struct dlm_message *ms)
3048{ 3035{
3049 if (!is_master_copy(lkb))
3050 return -EINVAL;
3051 if (receive_lvb(ls, lkb, ms)) 3036 if (receive_lvb(ls, lkb, ms))
3052 return -ENOMEM; 3037 return -ENOMEM;
3053 return 0; 3038 return 0;
@@ -3063,6 +3048,50 @@ static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
3063 lkb->lkb_remid = ms->m_lkid; 3048 lkb->lkb_remid = ms->m_lkid;
3064} 3049}
3065 3050
3051/* This is called after the rsb is locked so that we can safely inspect
3052 fields in the lkb. */
3053
3054static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
3055{
3056 int from = ms->m_header.h_nodeid;
3057 int error = 0;
3058
3059 switch (ms->m_type) {
3060 case DLM_MSG_CONVERT:
3061 case DLM_MSG_UNLOCK:
3062 case DLM_MSG_CANCEL:
3063 if (!is_master_copy(lkb) || lkb->lkb_nodeid != from)
3064 error = -EINVAL;
3065 break;
3066
3067 case DLM_MSG_CONVERT_REPLY:
3068 case DLM_MSG_UNLOCK_REPLY:
3069 case DLM_MSG_CANCEL_REPLY:
3070 case DLM_MSG_GRANT:
3071 case DLM_MSG_BAST:
3072 if (!is_process_copy(lkb) || lkb->lkb_nodeid != from)
3073 error = -EINVAL;
3074 break;
3075
3076 case DLM_MSG_REQUEST_REPLY:
3077 if (!is_process_copy(lkb))
3078 error = -EINVAL;
3079 else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from)
3080 error = -EINVAL;
3081 break;
3082
3083 default:
3084 error = -EINVAL;
3085 }
3086
3087 if (error)
3088 log_error(lkb->lkb_resource->res_ls,
3089 "ignore invalid message %d from %d %x %x %x %d",
3090 ms->m_type, from, lkb->lkb_id, lkb->lkb_remid,
3091 lkb->lkb_flags, lkb->lkb_nodeid);
3092 return error;
3093}
3094
3066static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) 3095static void receive_request(struct dlm_ls *ls, struct dlm_message *ms)
3067{ 3096{
3068 struct dlm_lkb *lkb; 3097 struct dlm_lkb *lkb;
@@ -3124,17 +3153,21 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
3124 hold_rsb(r); 3153 hold_rsb(r);
3125 lock_rsb(r); 3154 lock_rsb(r);
3126 3155
3156 error = validate_message(lkb, ms);
3157 if (error)
3158 goto out;
3159
3127 receive_flags(lkb, ms); 3160 receive_flags(lkb, ms);
3128 error = receive_convert_args(ls, lkb, ms); 3161 error = receive_convert_args(ls, lkb, ms);
3129 if (error) 3162 if (error)
3130 goto out; 3163 goto out_reply;
3131 reply = !down_conversion(lkb); 3164 reply = !down_conversion(lkb);
3132 3165
3133 error = do_convert(r, lkb); 3166 error = do_convert(r, lkb);
3134 out: 3167 out_reply:
3135 if (reply) 3168 if (reply)
3136 send_convert_reply(r, lkb, error); 3169 send_convert_reply(r, lkb, error);
3137 3170 out:
3138 unlock_rsb(r); 3171 unlock_rsb(r);
3139 put_rsb(r); 3172 put_rsb(r);
3140 dlm_put_lkb(lkb); 3173 dlm_put_lkb(lkb);
@@ -3160,15 +3193,19 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
3160 hold_rsb(r); 3193 hold_rsb(r);
3161 lock_rsb(r); 3194 lock_rsb(r);
3162 3195
3196 error = validate_message(lkb, ms);
3197 if (error)
3198 goto out;
3199
3163 receive_flags(lkb, ms); 3200 receive_flags(lkb, ms);
3164 error = receive_unlock_args(ls, lkb, ms); 3201 error = receive_unlock_args(ls, lkb, ms);
3165 if (error) 3202 if (error)
3166 goto out; 3203 goto out_reply;
3167 3204
3168 error = do_unlock(r, lkb); 3205 error = do_unlock(r, lkb);
3169 out: 3206 out_reply:
3170 send_unlock_reply(r, lkb, error); 3207 send_unlock_reply(r, lkb, error);
3171 3208 out:
3172 unlock_rsb(r); 3209 unlock_rsb(r);
3173 put_rsb(r); 3210 put_rsb(r);
3174 dlm_put_lkb(lkb); 3211 dlm_put_lkb(lkb);
@@ -3196,9 +3233,13 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
3196 hold_rsb(r); 3233 hold_rsb(r);
3197 lock_rsb(r); 3234 lock_rsb(r);
3198 3235
3236 error = validate_message(lkb, ms);
3237 if (error)
3238 goto out;
3239
3199 error = do_cancel(r, lkb); 3240 error = do_cancel(r, lkb);
3200 send_cancel_reply(r, lkb, error); 3241 send_cancel_reply(r, lkb, error);
3201 3242 out:
3202 unlock_rsb(r); 3243 unlock_rsb(r);
3203 put_rsb(r); 3244 put_rsb(r);
3204 dlm_put_lkb(lkb); 3245 dlm_put_lkb(lkb);
@@ -3217,22 +3258,26 @@ static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
3217 3258
3218 error = find_lkb(ls, ms->m_remid, &lkb); 3259 error = find_lkb(ls, ms->m_remid, &lkb);
3219 if (error) { 3260 if (error) {
3220 log_error(ls, "receive_grant no lkb"); 3261 log_debug(ls, "receive_grant from %d no lkb %x",
3262 ms->m_header.h_nodeid, ms->m_remid);
3221 return; 3263 return;
3222 } 3264 }
3223 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3224 3265
3225 r = lkb->lkb_resource; 3266 r = lkb->lkb_resource;
3226 3267
3227 hold_rsb(r); 3268 hold_rsb(r);
3228 lock_rsb(r); 3269 lock_rsb(r);
3229 3270
3271 error = validate_message(lkb, ms);
3272 if (error)
3273 goto out;
3274
3230 receive_flags_reply(lkb, ms); 3275 receive_flags_reply(lkb, ms);
3231 if (is_altmode(lkb)) 3276 if (is_altmode(lkb))
3232 munge_altmode(lkb, ms); 3277 munge_altmode(lkb, ms);
3233 grant_lock_pc(r, lkb, ms); 3278 grant_lock_pc(r, lkb, ms);
3234 queue_cast(r, lkb, 0); 3279 queue_cast(r, lkb, 0);
3235 3280 out:
3236 unlock_rsb(r); 3281 unlock_rsb(r);
3237 put_rsb(r); 3282 put_rsb(r);
3238 dlm_put_lkb(lkb); 3283 dlm_put_lkb(lkb);
@@ -3246,18 +3291,22 @@ static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
3246 3291
3247 error = find_lkb(ls, ms->m_remid, &lkb); 3292 error = find_lkb(ls, ms->m_remid, &lkb);
3248 if (error) { 3293 if (error) {
3249 log_error(ls, "receive_bast no lkb"); 3294 log_debug(ls, "receive_bast from %d no lkb %x",
3295 ms->m_header.h_nodeid, ms->m_remid);
3250 return; 3296 return;
3251 } 3297 }
3252 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3253 3298
3254 r = lkb->lkb_resource; 3299 r = lkb->lkb_resource;
3255 3300
3256 hold_rsb(r); 3301 hold_rsb(r);
3257 lock_rsb(r); 3302 lock_rsb(r);
3258 3303
3259 queue_bast(r, lkb, ms->m_bastmode); 3304 error = validate_message(lkb, ms);
3305 if (error)
3306 goto out;
3260 3307
3308 queue_bast(r, lkb, ms->m_bastmode);
3309 out:
3261 unlock_rsb(r); 3310 unlock_rsb(r);
3262 put_rsb(r); 3311 put_rsb(r);
3263 dlm_put_lkb(lkb); 3312 dlm_put_lkb(lkb);
@@ -3323,15 +3372,19 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
3323 3372
3324 error = find_lkb(ls, ms->m_remid, &lkb); 3373 error = find_lkb(ls, ms->m_remid, &lkb);
3325 if (error) { 3374 if (error) {
3326 log_error(ls, "receive_request_reply no lkb"); 3375 log_debug(ls, "receive_request_reply from %d no lkb %x",
3376 ms->m_header.h_nodeid, ms->m_remid);
3327 return; 3377 return;
3328 } 3378 }
3329 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3330 3379
3331 r = lkb->lkb_resource; 3380 r = lkb->lkb_resource;
3332 hold_rsb(r); 3381 hold_rsb(r);
3333 lock_rsb(r); 3382 lock_rsb(r);
3334 3383
3384 error = validate_message(lkb, ms);
3385 if (error)
3386 goto out;
3387
3335 mstype = lkb->lkb_wait_type; 3388 mstype = lkb->lkb_wait_type;
3336 error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); 3389 error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
3337 if (error) 3390 if (error)
@@ -3383,6 +3436,7 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
3383 if (is_overlap(lkb)) { 3436 if (is_overlap(lkb)) {
3384 /* we'll ignore error in cancel/unlock reply */ 3437 /* we'll ignore error in cancel/unlock reply */
3385 queue_cast_overlap(r, lkb); 3438 queue_cast_overlap(r, lkb);
3439 confirm_master(r, result);
3386 unhold_lkb(lkb); /* undoes create_lkb() */ 3440 unhold_lkb(lkb); /* undoes create_lkb() */
3387 } else 3441 } else
3388 _request_lock(r, lkb); 3442 _request_lock(r, lkb);
@@ -3463,6 +3517,10 @@ static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3463 hold_rsb(r); 3517 hold_rsb(r);
3464 lock_rsb(r); 3518 lock_rsb(r);
3465 3519
3520 error = validate_message(lkb, ms);
3521 if (error)
3522 goto out;
3523
3466 /* stub reply can happen with waiters_mutex held */ 3524 /* stub reply can happen with waiters_mutex held */
3467 error = remove_from_waiters_ms(lkb, ms); 3525 error = remove_from_waiters_ms(lkb, ms);
3468 if (error) 3526 if (error)
@@ -3481,10 +3539,10 @@ static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
3481 3539
3482 error = find_lkb(ls, ms->m_remid, &lkb); 3540 error = find_lkb(ls, ms->m_remid, &lkb);
3483 if (error) { 3541 if (error) {
3484 log_error(ls, "receive_convert_reply no lkb"); 3542 log_debug(ls, "receive_convert_reply from %d no lkb %x",
3543 ms->m_header.h_nodeid, ms->m_remid);
3485 return; 3544 return;
3486 } 3545 }
3487 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3488 3546
3489 _receive_convert_reply(lkb, ms); 3547 _receive_convert_reply(lkb, ms);
3490 dlm_put_lkb(lkb); 3548 dlm_put_lkb(lkb);
@@ -3498,6 +3556,10 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3498 hold_rsb(r); 3556 hold_rsb(r);
3499 lock_rsb(r); 3557 lock_rsb(r);
3500 3558
3559 error = validate_message(lkb, ms);
3560 if (error)
3561 goto out;
3562
3501 /* stub reply can happen with waiters_mutex held */ 3563 /* stub reply can happen with waiters_mutex held */
3502 error = remove_from_waiters_ms(lkb, ms); 3564 error = remove_from_waiters_ms(lkb, ms);
3503 if (error) 3565 if (error)
@@ -3529,10 +3591,10 @@ static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
3529 3591
3530 error = find_lkb(ls, ms->m_remid, &lkb); 3592 error = find_lkb(ls, ms->m_remid, &lkb);
3531 if (error) { 3593 if (error) {
3532 log_error(ls, "receive_unlock_reply no lkb"); 3594 log_debug(ls, "receive_unlock_reply from %d no lkb %x",
3595 ms->m_header.h_nodeid, ms->m_remid);
3533 return; 3596 return;
3534 } 3597 }
3535 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3536 3598
3537 _receive_unlock_reply(lkb, ms); 3599 _receive_unlock_reply(lkb, ms);
3538 dlm_put_lkb(lkb); 3600 dlm_put_lkb(lkb);
@@ -3546,6 +3608,10 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3546 hold_rsb(r); 3608 hold_rsb(r);
3547 lock_rsb(r); 3609 lock_rsb(r);
3548 3610
3611 error = validate_message(lkb, ms);
3612 if (error)
3613 goto out;
3614
3549 /* stub reply can happen with waiters_mutex held */ 3615 /* stub reply can happen with waiters_mutex held */
3550 error = remove_from_waiters_ms(lkb, ms); 3616 error = remove_from_waiters_ms(lkb, ms);
3551 if (error) 3617 if (error)
@@ -3577,10 +3643,10 @@ static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
3577 3643
3578 error = find_lkb(ls, ms->m_remid, &lkb); 3644 error = find_lkb(ls, ms->m_remid, &lkb);
3579 if (error) { 3645 if (error) {
3580 log_error(ls, "receive_cancel_reply no lkb"); 3646 log_debug(ls, "receive_cancel_reply from %d no lkb %x",
3647 ms->m_header.h_nodeid, ms->m_remid);
3581 return; 3648 return;
3582 } 3649 }
3583 DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb););
3584 3650
3585 _receive_cancel_reply(lkb, ms); 3651 _receive_cancel_reply(lkb, ms);
3586 dlm_put_lkb(lkb); 3652 dlm_put_lkb(lkb);
@@ -3640,6 +3706,13 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
3640 3706
3641static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) 3707static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
3642{ 3708{
3709 if (!dlm_is_member(ls, ms->m_header.h_nodeid)) {
3710 log_debug(ls, "ignore non-member message %d from %d %x %x %d",
3711 ms->m_type, ms->m_header.h_nodeid, ms->m_lkid,
3712 ms->m_remid, ms->m_result);
3713 return;
3714 }
3715
3643 switch (ms->m_type) { 3716 switch (ms->m_type) {
3644 3717
3645 /* messages sent to a master node */ 3718 /* messages sent to a master node */
@@ -3778,8 +3851,9 @@ void dlm_receive_buffer(struct dlm_header *hd, int nodeid)
3778 3851
3779 ls = dlm_find_lockspace_global(hd->h_lockspace); 3852 ls = dlm_find_lockspace_global(hd->h_lockspace);
3780 if (!ls) { 3853 if (!ls) {
3781 log_print("invalid h_lockspace %x from %d cmd %d type %d", 3854 if (dlm_config.ci_log_debug)
3782 hd->h_lockspace, nodeid, hd->h_cmd, type); 3855 log_print("invalid lockspace %x from %d cmd %d type %d",
3856 hd->h_lockspace, nodeid, hd->h_cmd, type);
3783 3857
3784 if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) 3858 if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
3785 dlm_send_ls_not_ready(nodeid, rc); 3859 dlm_send_ls_not_ready(nodeid, rc);
@@ -3806,6 +3880,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3806 ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 3880 ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
3807 ls->ls_stub_ms.m_result = -EINPROGRESS; 3881 ls->ls_stub_ms.m_result = -EINPROGRESS;
3808 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3882 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3883 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
3809 _receive_convert_reply(lkb, &ls->ls_stub_ms); 3884 _receive_convert_reply(lkb, &ls->ls_stub_ms);
3810 3885
3811 /* Same special case as in receive_rcom_lock_args() */ 3886 /* Same special case as in receive_rcom_lock_args() */
@@ -3847,6 +3922,7 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
3847void dlm_recover_waiters_pre(struct dlm_ls *ls) 3922void dlm_recover_waiters_pre(struct dlm_ls *ls)
3848{ 3923{
3849 struct dlm_lkb *lkb, *safe; 3924 struct dlm_lkb *lkb, *safe;
3925 int wait_type, stub_unlock_result, stub_cancel_result;
3850 3926
3851 mutex_lock(&ls->ls_waiters_mutex); 3927 mutex_lock(&ls->ls_waiters_mutex);
3852 3928
@@ -3865,7 +3941,33 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3865 if (!waiter_needs_recovery(ls, lkb)) 3941 if (!waiter_needs_recovery(ls, lkb))
3866 continue; 3942 continue;
3867 3943
3868 switch (lkb->lkb_wait_type) { 3944 wait_type = lkb->lkb_wait_type;
3945 stub_unlock_result = -DLM_EUNLOCK;
3946 stub_cancel_result = -DLM_ECANCEL;
3947
3948 /* Main reply may have been received leaving a zero wait_type,
3949 but a reply for the overlapping op may not have been
3950 received. In that case we need to fake the appropriate
3951 reply for the overlap op. */
3952
3953 if (!wait_type) {
3954 if (is_overlap_cancel(lkb)) {
3955 wait_type = DLM_MSG_CANCEL;
3956 if (lkb->lkb_grmode == DLM_LOCK_IV)
3957 stub_cancel_result = 0;
3958 }
3959 if (is_overlap_unlock(lkb)) {
3960 wait_type = DLM_MSG_UNLOCK;
3961 if (lkb->lkb_grmode == DLM_LOCK_IV)
3962 stub_unlock_result = -ENOENT;
3963 }
3964
3965 log_debug(ls, "rwpre overlap %x %x %d %d %d",
3966 lkb->lkb_id, lkb->lkb_flags, wait_type,
3967 stub_cancel_result, stub_unlock_result);
3968 }
3969
3970 switch (wait_type) {
3869 3971
3870 case DLM_MSG_REQUEST: 3972 case DLM_MSG_REQUEST:
3871 lkb->lkb_flags |= DLM_IFL_RESEND; 3973 lkb->lkb_flags |= DLM_IFL_RESEND;
@@ -3878,8 +3980,9 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3878 case DLM_MSG_UNLOCK: 3980 case DLM_MSG_UNLOCK:
3879 hold_lkb(lkb); 3981 hold_lkb(lkb);
3880 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; 3982 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
3881 ls->ls_stub_ms.m_result = -DLM_EUNLOCK; 3983 ls->ls_stub_ms.m_result = stub_unlock_result;
3882 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3984 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3985 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
3883 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 3986 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
3884 dlm_put_lkb(lkb); 3987 dlm_put_lkb(lkb);
3885 break; 3988 break;
@@ -3887,15 +3990,16 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3887 case DLM_MSG_CANCEL: 3990 case DLM_MSG_CANCEL:
3888 hold_lkb(lkb); 3991 hold_lkb(lkb);
3889 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; 3992 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
3890 ls->ls_stub_ms.m_result = -DLM_ECANCEL; 3993 ls->ls_stub_ms.m_result = stub_cancel_result;
3891 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3994 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3995 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
3892 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 3996 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
3893 dlm_put_lkb(lkb); 3997 dlm_put_lkb(lkb);
3894 break; 3998 break;
3895 3999
3896 default: 4000 default:
3897 log_error(ls, "invalid lkb wait_type %d", 4001 log_error(ls, "invalid lkb wait_type %d %d",
3898 lkb->lkb_wait_type); 4002 lkb->lkb_wait_type, wait_type);
3899 } 4003 }
3900 schedule(); 4004 schedule();
3901 } 4005 }
@@ -4184,7 +4288,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
4184 lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); 4288 lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP);
4185 4289
4186 if (lkb->lkb_exflags & DLM_LKF_VALBLK) { 4290 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
4187 lkb->lkb_lvbptr = allocate_lvb(ls); 4291 lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
4188 if (!lkb->lkb_lvbptr) 4292 if (!lkb->lkb_lvbptr)
4189 return -ENOMEM; 4293 return -ENOMEM;
4190 lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - 4294 lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
@@ -4259,7 +4363,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
4259 put_rsb(r); 4363 put_rsb(r);
4260 out: 4364 out:
4261 if (error) 4365 if (error)
4262 log_print("recover_master_copy %d %x", error, rl->rl_lkid); 4366 log_debug(ls, "recover_master_copy %d %x", error, rl->rl_lkid);
4263 rl->rl_result = error; 4367 rl->rl_result = error;
4264 return error; 4368 return error;
4265} 4369}
@@ -4342,7 +4446,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4342 } 4446 }
4343 } 4447 }
4344 4448
4345 /* After ua is attached to lkb it will be freed by free_lkb(). 4449 /* After ua is attached to lkb it will be freed by dlm_free_lkb().
4346 When DLM_IFL_USER is set, the dlm knows that this is a userspace 4450 When DLM_IFL_USER is set, the dlm knows that this is a userspace
4347 lock and that lkb_astparam is the dlm_user_args structure. */ 4451 lock and that lkb_astparam is the dlm_user_args structure. */
4348 4452
@@ -4679,6 +4783,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4679 } 4783 }
4680 4784
4681 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { 4785 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
4786 lkb->lkb_ast_type = 0;
4682 list_del(&lkb->lkb_astqueue); 4787 list_del(&lkb->lkb_astqueue);
4683 dlm_put_lkb(lkb); 4788 dlm_put_lkb(lkb);
4684 } 4789 }