diff options
| -rw-r--r-- | fs/dlm/dir.c | 18 | ||||
| -rw-r--r-- | fs/dlm/dlm_internal.h | 2 | ||||
| -rw-r--r-- | fs/dlm/lock.c | 60 | ||||
| -rw-r--r-- | fs/dlm/lockspace.c | 2 | ||||
| -rw-r--r-- | fs/dlm/lowcomms.c | 181 | ||||
| -rw-r--r-- | fs/dlm/user.c | 24 |
6 files changed, 175 insertions, 112 deletions
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 92969f879a17..858fba14aaa6 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
| @@ -156,7 +156,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen | |||
| 156 | 156 | ||
| 157 | bucket = dir_hash(ls, name, namelen); | 157 | bucket = dir_hash(ls, name, namelen); |
| 158 | 158 | ||
| 159 | write_lock(&ls->ls_dirtbl[bucket].lock); | 159 | spin_lock(&ls->ls_dirtbl[bucket].lock); |
| 160 | 160 | ||
| 161 | de = search_bucket(ls, name, namelen, bucket); | 161 | de = search_bucket(ls, name, namelen, bucket); |
| 162 | 162 | ||
| @@ -173,7 +173,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen | |||
| 173 | list_del(&de->list); | 173 | list_del(&de->list); |
| 174 | kfree(de); | 174 | kfree(de); |
| 175 | out: | 175 | out: |
| 176 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 176 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
| 177 | } | 177 | } |
| 178 | 178 | ||
| 179 | void dlm_dir_clear(struct dlm_ls *ls) | 179 | void dlm_dir_clear(struct dlm_ls *ls) |
| @@ -185,14 +185,14 @@ void dlm_dir_clear(struct dlm_ls *ls) | |||
| 185 | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); | 185 | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); |
| 186 | 186 | ||
| 187 | for (i = 0; i < ls->ls_dirtbl_size; i++) { | 187 | for (i = 0; i < ls->ls_dirtbl_size; i++) { |
| 188 | write_lock(&ls->ls_dirtbl[i].lock); | 188 | spin_lock(&ls->ls_dirtbl[i].lock); |
| 189 | head = &ls->ls_dirtbl[i].list; | 189 | head = &ls->ls_dirtbl[i].list; |
| 190 | while (!list_empty(head)) { | 190 | while (!list_empty(head)) { |
| 191 | de = list_entry(head->next, struct dlm_direntry, list); | 191 | de = list_entry(head->next, struct dlm_direntry, list); |
| 192 | list_del(&de->list); | 192 | list_del(&de->list); |
| 193 | put_free_de(ls, de); | 193 | put_free_de(ls, de); |
| 194 | } | 194 | } |
| 195 | write_unlock(&ls->ls_dirtbl[i].lock); | 195 | spin_unlock(&ls->ls_dirtbl[i].lock); |
| 196 | } | 196 | } |
| 197 | } | 197 | } |
| 198 | 198 | ||
| @@ -307,17 +307,17 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
| 307 | 307 | ||
| 308 | bucket = dir_hash(ls, name, namelen); | 308 | bucket = dir_hash(ls, name, namelen); |
| 309 | 309 | ||
| 310 | write_lock(&ls->ls_dirtbl[bucket].lock); | 310 | spin_lock(&ls->ls_dirtbl[bucket].lock); |
| 311 | de = search_bucket(ls, name, namelen, bucket); | 311 | de = search_bucket(ls, name, namelen, bucket); |
| 312 | if (de) { | 312 | if (de) { |
| 313 | *r_nodeid = de->master_nodeid; | 313 | *r_nodeid = de->master_nodeid; |
| 314 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 314 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
| 315 | if (*r_nodeid == nodeid) | 315 | if (*r_nodeid == nodeid) |
| 316 | return -EEXIST; | 316 | return -EEXIST; |
| 317 | return 0; | 317 | return 0; |
| 318 | } | 318 | } |
| 319 | 319 | ||
| 320 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 320 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
| 321 | 321 | ||
| 322 | if (namelen > DLM_RESNAME_MAXLEN) | 322 | if (namelen > DLM_RESNAME_MAXLEN) |
| 323 | return -EINVAL; | 323 | return -EINVAL; |
| @@ -330,7 +330,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
| 330 | de->length = namelen; | 330 | de->length = namelen; |
| 331 | memcpy(de->name, name, namelen); | 331 | memcpy(de->name, name, namelen); |
| 332 | 332 | ||
| 333 | write_lock(&ls->ls_dirtbl[bucket].lock); | 333 | spin_lock(&ls->ls_dirtbl[bucket].lock); |
| 334 | tmp = search_bucket(ls, name, namelen, bucket); | 334 | tmp = search_bucket(ls, name, namelen, bucket); |
| 335 | if (tmp) { | 335 | if (tmp) { |
| 336 | kfree(de); | 336 | kfree(de); |
| @@ -339,7 +339,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
| 339 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | 339 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); |
| 340 | } | 340 | } |
| 341 | *r_nodeid = de->master_nodeid; | 341 | *r_nodeid = de->master_nodeid; |
| 342 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 342 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
| 343 | return 0; | 343 | return 0; |
| 344 | } | 344 | } |
| 345 | 345 | ||
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 076e86f38bc8..d01ca0a711db 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
| @@ -99,7 +99,7 @@ struct dlm_direntry { | |||
| 99 | 99 | ||
| 100 | struct dlm_dirtable { | 100 | struct dlm_dirtable { |
| 101 | struct list_head list; | 101 | struct list_head list; |
| 102 | rwlock_t lock; | 102 | spinlock_t lock; |
| 103 | }; | 103 | }; |
| 104 | 104 | ||
| 105 | struct dlm_rsbtable { | 105 | struct dlm_rsbtable { |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 01e7d39c5fba..205ec95b347e 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
| @@ -835,7 +835,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 835 | lkb->lkb_wait_count++; | 835 | lkb->lkb_wait_count++; |
| 836 | hold_lkb(lkb); | 836 | hold_lkb(lkb); |
| 837 | 837 | ||
| 838 | log_debug(ls, "add overlap %x cur %d new %d count %d flags %x", | 838 | log_debug(ls, "addwait %x cur %d overlap %d count %d f %x", |
| 839 | lkb->lkb_id, lkb->lkb_wait_type, mstype, | 839 | lkb->lkb_id, lkb->lkb_wait_type, mstype, |
| 840 | lkb->lkb_wait_count, lkb->lkb_flags); | 840 | lkb->lkb_wait_count, lkb->lkb_flags); |
| 841 | goto out; | 841 | goto out; |
| @@ -851,7 +851,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 851 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | 851 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); |
| 852 | out: | 852 | out: |
| 853 | if (error) | 853 | if (error) |
| 854 | log_error(ls, "add_to_waiters %x error %d flags %x %d %d %s", | 854 | log_error(ls, "addwait error %x %d flags %x %d %d %s", |
| 855 | lkb->lkb_id, error, lkb->lkb_flags, mstype, | 855 | lkb->lkb_id, error, lkb->lkb_flags, mstype, |
| 856 | lkb->lkb_wait_type, lkb->lkb_resource->res_name); | 856 | lkb->lkb_wait_type, lkb->lkb_resource->res_name); |
| 857 | mutex_unlock(&ls->ls_waiters_mutex); | 857 | mutex_unlock(&ls->ls_waiters_mutex); |
| @@ -863,23 +863,55 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 863 | request reply on the requestqueue) between dlm_recover_waiters_pre() which | 863 | request reply on the requestqueue) between dlm_recover_waiters_pre() which |
| 864 | set RESEND and dlm_recover_waiters_post() */ | 864 | set RESEND and dlm_recover_waiters_post() */ |
| 865 | 865 | ||
| 866 | static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) | 866 | static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, |
| 867 | struct dlm_message *ms) | ||
| 867 | { | 868 | { |
| 868 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 869 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; |
| 869 | int overlap_done = 0; | 870 | int overlap_done = 0; |
| 870 | 871 | ||
| 871 | if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) { | 872 | if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) { |
| 873 | log_debug(ls, "remwait %x unlock_reply overlap", lkb->lkb_id); | ||
| 872 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; | 874 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; |
| 873 | overlap_done = 1; | 875 | overlap_done = 1; |
| 874 | goto out_del; | 876 | goto out_del; |
| 875 | } | 877 | } |
| 876 | 878 | ||
| 877 | if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) { | 879 | if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) { |
| 880 | log_debug(ls, "remwait %x cancel_reply overlap", lkb->lkb_id); | ||
| 878 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | 881 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; |
| 879 | overlap_done = 1; | 882 | overlap_done = 1; |
| 880 | goto out_del; | 883 | goto out_del; |
| 881 | } | 884 | } |
| 882 | 885 | ||
| 886 | /* Cancel state was preemptively cleared by a successful convert, | ||
| 887 | see next comment, nothing to do. */ | ||
| 888 | |||
| 889 | if ((mstype == DLM_MSG_CANCEL_REPLY) && | ||
| 890 | (lkb->lkb_wait_type != DLM_MSG_CANCEL)) { | ||
| 891 | log_debug(ls, "remwait %x cancel_reply wait_type %d", | ||
| 892 | lkb->lkb_id, lkb->lkb_wait_type); | ||
| 893 | return -1; | ||
| 894 | } | ||
| 895 | |||
| 896 | /* Remove for the convert reply, and premptively remove for the | ||
| 897 | cancel reply. A convert has been granted while there's still | ||
| 898 | an outstanding cancel on it (the cancel is moot and the result | ||
| 899 | in the cancel reply should be 0). We preempt the cancel reply | ||
| 900 | because the app gets the convert result and then can follow up | ||
| 901 | with another op, like convert. This subsequent op would see the | ||
| 902 | lingering state of the cancel and fail with -EBUSY. */ | ||
| 903 | |||
| 904 | if ((mstype == DLM_MSG_CONVERT_REPLY) && | ||
| 905 | (lkb->lkb_wait_type == DLM_MSG_CONVERT) && | ||
| 906 | is_overlap_cancel(lkb) && ms && !ms->m_result) { | ||
| 907 | log_debug(ls, "remwait %x convert_reply zap overlap_cancel", | ||
| 908 | lkb->lkb_id); | ||
| 909 | lkb->lkb_wait_type = 0; | ||
| 910 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | ||
| 911 | lkb->lkb_wait_count--; | ||
| 912 | goto out_del; | ||
| 913 | } | ||
| 914 | |||
| 883 | /* N.B. type of reply may not always correspond to type of original | 915 | /* N.B. type of reply may not always correspond to type of original |
| 884 | msg due to lookup->request optimization, verify others? */ | 916 | msg due to lookup->request optimization, verify others? */ |
| 885 | 917 | ||
| @@ -888,8 +920,8 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 888 | goto out_del; | 920 | goto out_del; |
| 889 | } | 921 | } |
| 890 | 922 | ||
| 891 | log_error(ls, "remove_from_waiters lkid %x flags %x types %d %d", | 923 | log_error(ls, "remwait error %x reply %d flags %x no wait_type", |
| 892 | lkb->lkb_id, lkb->lkb_flags, mstype, lkb->lkb_wait_type); | 924 | lkb->lkb_id, mstype, lkb->lkb_flags); |
| 893 | return -1; | 925 | return -1; |
| 894 | 926 | ||
| 895 | out_del: | 927 | out_del: |
| @@ -899,7 +931,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 899 | this would happen */ | 931 | this would happen */ |
| 900 | 932 | ||
| 901 | if (overlap_done && lkb->lkb_wait_type) { | 933 | if (overlap_done && lkb->lkb_wait_type) { |
| 902 | log_error(ls, "remove_from_waiters %x reply %d give up on %d", | 934 | log_error(ls, "remwait error %x reply %d wait_type %d overlap", |
| 903 | lkb->lkb_id, mstype, lkb->lkb_wait_type); | 935 | lkb->lkb_id, mstype, lkb->lkb_wait_type); |
| 904 | lkb->lkb_wait_count--; | 936 | lkb->lkb_wait_count--; |
| 905 | lkb->lkb_wait_type = 0; | 937 | lkb->lkb_wait_type = 0; |
| @@ -921,7 +953,7 @@ static int remove_from_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 921 | int error; | 953 | int error; |
| 922 | 954 | ||
| 923 | mutex_lock(&ls->ls_waiters_mutex); | 955 | mutex_lock(&ls->ls_waiters_mutex); |
| 924 | error = _remove_from_waiters(lkb, mstype); | 956 | error = _remove_from_waiters(lkb, mstype, NULL); |
| 925 | mutex_unlock(&ls->ls_waiters_mutex); | 957 | mutex_unlock(&ls->ls_waiters_mutex); |
| 926 | return error; | 958 | return error; |
| 927 | } | 959 | } |
| @@ -936,7 +968,7 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
| 936 | 968 | ||
| 937 | if (ms != &ls->ls_stub_ms) | 969 | if (ms != &ls->ls_stub_ms) |
| 938 | mutex_lock(&ls->ls_waiters_mutex); | 970 | mutex_lock(&ls->ls_waiters_mutex); |
| 939 | error = _remove_from_waiters(lkb, ms->m_type); | 971 | error = _remove_from_waiters(lkb, ms->m_type, ms); |
| 940 | if (ms != &ls->ls_stub_ms) | 972 | if (ms != &ls->ls_stub_ms) |
| 941 | mutex_unlock(&ls->ls_waiters_mutex); | 973 | mutex_unlock(&ls->ls_waiters_mutex); |
| 942 | return error; | 974 | return error; |
| @@ -2083,6 +2115,11 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
| 2083 | lkb->lkb_timeout_cs = args->timeout; | 2115 | lkb->lkb_timeout_cs = args->timeout; |
| 2084 | rv = 0; | 2116 | rv = 0; |
| 2085 | out: | 2117 | out: |
| 2118 | if (rv) | ||
| 2119 | log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s", | ||
| 2120 | rv, lkb->lkb_id, lkb->lkb_flags, args->flags, | ||
| 2121 | lkb->lkb_status, lkb->lkb_wait_type, | ||
| 2122 | lkb->lkb_resource->res_name); | ||
| 2086 | return rv; | 2123 | return rv; |
| 2087 | } | 2124 | } |
| 2088 | 2125 | ||
| @@ -2149,6 +2186,13 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) | |||
| 2149 | goto out; | 2186 | goto out; |
| 2150 | } | 2187 | } |
| 2151 | 2188 | ||
| 2189 | /* there's nothing to cancel */ | ||
| 2190 | if (lkb->lkb_status == DLM_LKSTS_GRANTED && | ||
| 2191 | !lkb->lkb_wait_type) { | ||
| 2192 | rv = -EBUSY; | ||
| 2193 | goto out; | ||
| 2194 | } | ||
| 2195 | |||
| 2152 | switch (lkb->lkb_wait_type) { | 2196 | switch (lkb->lkb_wait_type) { |
| 2153 | case DLM_MSG_LOOKUP: | 2197 | case DLM_MSG_LOOKUP: |
| 2154 | case DLM_MSG_REQUEST: | 2198 | case DLM_MSG_REQUEST: |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index aa32e5f02493..cd8e2df3c295 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
| @@ -487,7 +487,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 487 | goto out_lkbfree; | 487 | goto out_lkbfree; |
| 488 | for (i = 0; i < size; i++) { | 488 | for (i = 0; i < size; i++) { |
| 489 | INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); | 489 | INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); |
| 490 | rwlock_init(&ls->ls_dirtbl[i].lock); | 490 | spin_lock_init(&ls->ls_dirtbl[i].lock); |
| 491 | } | 491 | } |
| 492 | 492 | ||
| 493 | INIT_LIST_HEAD(&ls->ls_waiters); | 493 | INIT_LIST_HEAD(&ls->ls_waiters); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 103a5ebd1371..609108a83267 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* |
| 3 | ** | 3 | ** |
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. |
| 6 | ** | 6 | ** |
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
| @@ -21,7 +21,7 @@ | |||
| 21 | * | 21 | * |
| 22 | * Cluster nodes are referred to by their nodeids. nodeids are | 22 | * Cluster nodes are referred to by their nodeids. nodeids are |
| 23 | * simply 32 bit numbers to the locking module - if they need to | 23 | * simply 32 bit numbers to the locking module - if they need to |
| 24 | * be expanded for the cluster infrastructure then that is it's | 24 | * be expanded for the cluster infrastructure then that is its |
| 25 | * responsibility. It is this layer's | 25 | * responsibility. It is this layer's |
| 26 | * responsibility to resolve these into IP address or | 26 | * responsibility to resolve these into IP address or |
| 27 | * whatever it needs for inter-node communication. | 27 | * whatever it needs for inter-node communication. |
| @@ -36,9 +36,9 @@ | |||
| 36 | * of high load. Also, this way, the sending thread can collect together | 36 | * of high load. Also, this way, the sending thread can collect together |
| 37 | * messages bound for one node and send them in one block. | 37 | * messages bound for one node and send them in one block. |
| 38 | * | 38 | * |
| 39 | * lowcomms will choose to use wither TCP or SCTP as its transport layer | 39 | * lowcomms will choose to use either TCP or SCTP as its transport layer |
| 40 | * depending on the configuration variable 'protocol'. This should be set | 40 | * depending on the configuration variable 'protocol'. This should be set |
| 41 | * to 0 (default) for TCP or 1 for SCTP. It shouldbe configured using a | 41 | * to 0 (default) for TCP or 1 for SCTP. It should be configured using a |
| 42 | * cluster-wide mechanism as it must be the same on all nodes of the cluster | 42 | * cluster-wide mechanism as it must be the same on all nodes of the cluster |
| 43 | * for the DLM to function. | 43 | * for the DLM to function. |
| 44 | * | 44 | * |
| @@ -48,11 +48,11 @@ | |||
| 48 | #include <net/sock.h> | 48 | #include <net/sock.h> |
| 49 | #include <net/tcp.h> | 49 | #include <net/tcp.h> |
| 50 | #include <linux/pagemap.h> | 50 | #include <linux/pagemap.h> |
| 51 | #include <linux/idr.h> | ||
| 52 | #include <linux/file.h> | 51 | #include <linux/file.h> |
| 53 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
| 54 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
| 55 | #include <net/sctp/user.h> | 54 | #include <net/sctp/user.h> |
| 55 | #include <net/ipv6.h> | ||
| 56 | 56 | ||
| 57 | #include "dlm_internal.h" | 57 | #include "dlm_internal.h" |
| 58 | #include "lowcomms.h" | 58 | #include "lowcomms.h" |
| @@ -60,6 +60,7 @@ | |||
| 60 | #include "config.h" | 60 | #include "config.h" |
| 61 | 61 | ||
| 62 | #define NEEDED_RMEM (4*1024*1024) | 62 | #define NEEDED_RMEM (4*1024*1024) |
| 63 | #define CONN_HASH_SIZE 32 | ||
| 63 | 64 | ||
| 64 | struct cbuf { | 65 | struct cbuf { |
| 65 | unsigned int base; | 66 | unsigned int base; |
| @@ -114,6 +115,7 @@ struct connection { | |||
| 114 | int retries; | 115 | int retries; |
| 115 | #define MAX_CONNECT_RETRIES 3 | 116 | #define MAX_CONNECT_RETRIES 3 |
| 116 | int sctp_assoc; | 117 | int sctp_assoc; |
| 118 | struct hlist_node list; | ||
| 117 | struct connection *othercon; | 119 | struct connection *othercon; |
| 118 | struct work_struct rwork; /* Receive workqueue */ | 120 | struct work_struct rwork; /* Receive workqueue */ |
| 119 | struct work_struct swork; /* Send workqueue */ | 121 | struct work_struct swork; /* Send workqueue */ |
| @@ -138,14 +140,37 @@ static int dlm_local_count; | |||
| 138 | static struct workqueue_struct *recv_workqueue; | 140 | static struct workqueue_struct *recv_workqueue; |
| 139 | static struct workqueue_struct *send_workqueue; | 141 | static struct workqueue_struct *send_workqueue; |
| 140 | 142 | ||
| 141 | static DEFINE_IDR(connections_idr); | 143 | static struct hlist_head connection_hash[CONN_HASH_SIZE]; |
| 142 | static DEFINE_MUTEX(connections_lock); | 144 | static DEFINE_MUTEX(connections_lock); |
| 143 | static int max_nodeid; | ||
| 144 | static struct kmem_cache *con_cache; | 145 | static struct kmem_cache *con_cache; |
| 145 | 146 | ||
| 146 | static void process_recv_sockets(struct work_struct *work); | 147 | static void process_recv_sockets(struct work_struct *work); |
| 147 | static void process_send_sockets(struct work_struct *work); | 148 | static void process_send_sockets(struct work_struct *work); |
| 148 | 149 | ||
| 150 | |||
| 151 | /* This is deliberately very simple because most clusters have simple | ||
| 152 | sequential nodeids, so we should be able to go straight to a connection | ||
| 153 | struct in the array */ | ||
| 154 | static inline int nodeid_hash(int nodeid) | ||
| 155 | { | ||
| 156 | return nodeid & (CONN_HASH_SIZE-1); | ||
| 157 | } | ||
| 158 | |||
| 159 | static struct connection *__find_con(int nodeid) | ||
| 160 | { | ||
| 161 | int r; | ||
| 162 | struct hlist_node *h; | ||
| 163 | struct connection *con; | ||
| 164 | |||
| 165 | r = nodeid_hash(nodeid); | ||
| 166 | |||
| 167 | hlist_for_each_entry(con, h, &connection_hash[r], list) { | ||
| 168 | if (con->nodeid == nodeid) | ||
| 169 | return con; | ||
| 170 | } | ||
| 171 | return NULL; | ||
| 172 | } | ||
| 173 | |||
| 149 | /* | 174 | /* |
| 150 | * If 'allocation' is zero then we don't attempt to create a new | 175 | * If 'allocation' is zero then we don't attempt to create a new |
| 151 | * connection structure for this node. | 176 | * connection structure for this node. |
| @@ -154,31 +179,17 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
| 154 | { | 179 | { |
| 155 | struct connection *con = NULL; | 180 | struct connection *con = NULL; |
| 156 | int r; | 181 | int r; |
| 157 | int n; | ||
| 158 | 182 | ||
| 159 | con = idr_find(&connections_idr, nodeid); | 183 | con = __find_con(nodeid); |
| 160 | if (con || !alloc) | 184 | if (con || !alloc) |
| 161 | return con; | 185 | return con; |
| 162 | 186 | ||
| 163 | r = idr_pre_get(&connections_idr, alloc); | ||
| 164 | if (!r) | ||
| 165 | return NULL; | ||
| 166 | |||
| 167 | con = kmem_cache_zalloc(con_cache, alloc); | 187 | con = kmem_cache_zalloc(con_cache, alloc); |
| 168 | if (!con) | 188 | if (!con) |
| 169 | return NULL; | 189 | return NULL; |
| 170 | 190 | ||
| 171 | r = idr_get_new_above(&connections_idr, con, nodeid, &n); | 191 | r = nodeid_hash(nodeid); |
| 172 | if (r) { | 192 | hlist_add_head(&con->list, &connection_hash[r]); |
| 173 | kmem_cache_free(con_cache, con); | ||
| 174 | return NULL; | ||
| 175 | } | ||
| 176 | |||
| 177 | if (n != nodeid) { | ||
| 178 | idr_remove(&connections_idr, n); | ||
| 179 | kmem_cache_free(con_cache, con); | ||
| 180 | return NULL; | ||
| 181 | } | ||
| 182 | 193 | ||
| 183 | con->nodeid = nodeid; | 194 | con->nodeid = nodeid; |
| 184 | mutex_init(&con->sock_mutex); | 195 | mutex_init(&con->sock_mutex); |
| @@ -189,19 +200,30 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
| 189 | 200 | ||
| 190 | /* Setup action pointers for child sockets */ | 201 | /* Setup action pointers for child sockets */ |
| 191 | if (con->nodeid) { | 202 | if (con->nodeid) { |
| 192 | struct connection *zerocon = idr_find(&connections_idr, 0); | 203 | struct connection *zerocon = __find_con(0); |
| 193 | 204 | ||
| 194 | con->connect_action = zerocon->connect_action; | 205 | con->connect_action = zerocon->connect_action; |
| 195 | if (!con->rx_action) | 206 | if (!con->rx_action) |
| 196 | con->rx_action = zerocon->rx_action; | 207 | con->rx_action = zerocon->rx_action; |
| 197 | } | 208 | } |
| 198 | 209 | ||
| 199 | if (nodeid > max_nodeid) | ||
| 200 | max_nodeid = nodeid; | ||
| 201 | |||
| 202 | return con; | 210 | return con; |
| 203 | } | 211 | } |
| 204 | 212 | ||
| 213 | /* Loop round all connections */ | ||
| 214 | static void foreach_conn(void (*conn_func)(struct connection *c)) | ||
| 215 | { | ||
| 216 | int i; | ||
| 217 | struct hlist_node *h, *n; | ||
| 218 | struct connection *con; | ||
| 219 | |||
| 220 | for (i = 0; i < CONN_HASH_SIZE; i++) { | ||
| 221 | hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ | ||
| 222 | conn_func(con); | ||
| 223 | } | ||
| 224 | } | ||
| 225 | } | ||
| 226 | |||
| 205 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | 227 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
| 206 | { | 228 | { |
| 207 | struct connection *con; | 229 | struct connection *con; |
| @@ -217,14 +239,17 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) | |||
| 217 | static struct connection *assoc2con(int assoc_id) | 239 | static struct connection *assoc2con(int assoc_id) |
| 218 | { | 240 | { |
| 219 | int i; | 241 | int i; |
| 242 | struct hlist_node *h; | ||
| 220 | struct connection *con; | 243 | struct connection *con; |
| 221 | 244 | ||
| 222 | mutex_lock(&connections_lock); | 245 | mutex_lock(&connections_lock); |
| 223 | for (i=0; i<=max_nodeid; i++) { | 246 | |
| 224 | con = __nodeid2con(i, 0); | 247 | for (i = 0 ; i < CONN_HASH_SIZE; i++) { |
| 225 | if (con && con->sctp_assoc == assoc_id) { | 248 | hlist_for_each_entry(con, h, &connection_hash[i], list) { |
| 226 | mutex_unlock(&connections_lock); | 249 | if (con && con->sctp_assoc == assoc_id) { |
| 227 | return con; | 250 | mutex_unlock(&connections_lock); |
| 251 | return con; | ||
| 252 | } | ||
| 228 | } | 253 | } |
| 229 | } | 254 | } |
| 230 | mutex_unlock(&connections_lock); | 255 | mutex_unlock(&connections_lock); |
| @@ -250,8 +275,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | |||
| 250 | } else { | 275 | } else { |
| 251 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | 276 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; |
| 252 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | 277 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; |
| 253 | memcpy(&ret6->sin6_addr, &in6->sin6_addr, | 278 | ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); |
| 254 | sizeof(in6->sin6_addr)); | ||
| 255 | } | 279 | } |
| 256 | 280 | ||
| 257 | return 0; | 281 | return 0; |
| @@ -376,25 +400,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) | |||
| 376 | log_print("send EOF to node failed: %d", ret); | 400 | log_print("send EOF to node failed: %d", ret); |
| 377 | } | 401 | } |
| 378 | 402 | ||
| 403 | static void sctp_init_failed_foreach(struct connection *con) | ||
| 404 | { | ||
| 405 | con->sctp_assoc = 0; | ||
| 406 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
| 407 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
| 408 | queue_work(send_workqueue, &con->swork); | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 379 | /* INIT failed but we don't know which node... | 412 | /* INIT failed but we don't know which node... |
| 380 | restart INIT on all pending nodes */ | 413 | restart INIT on all pending nodes */ |
| 381 | static void sctp_init_failed(void) | 414 | static void sctp_init_failed(void) |
| 382 | { | 415 | { |
| 383 | int i; | ||
| 384 | struct connection *con; | ||
| 385 | |||
| 386 | mutex_lock(&connections_lock); | 416 | mutex_lock(&connections_lock); |
| 387 | for (i=1; i<=max_nodeid; i++) { | 417 | |
| 388 | con = __nodeid2con(i, 0); | 418 | foreach_conn(sctp_init_failed_foreach); |
| 389 | if (!con) | 419 | |
| 390 | continue; | ||
| 391 | con->sctp_assoc = 0; | ||
| 392 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
| 393 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { | ||
| 394 | queue_work(send_workqueue, &con->swork); | ||
| 395 | } | ||
| 396 | } | ||
| 397 | } | ||
| 398 | mutex_unlock(&connections_lock); | 420 | mutex_unlock(&connections_lock); |
| 399 | } | 421 | } |
| 400 | 422 | ||
| @@ -1313,13 +1335,10 @@ out_connect: | |||
| 1313 | 1335 | ||
| 1314 | static void clean_one_writequeue(struct connection *con) | 1336 | static void clean_one_writequeue(struct connection *con) |
| 1315 | { | 1337 | { |
| 1316 | struct list_head *list; | 1338 | struct writequeue_entry *e, *safe; |
| 1317 | struct list_head *temp; | ||
| 1318 | 1339 | ||
| 1319 | spin_lock(&con->writequeue_lock); | 1340 | spin_lock(&con->writequeue_lock); |
| 1320 | list_for_each_safe(list, temp, &con->writequeue) { | 1341 | list_for_each_entry_safe(e, safe, &con->writequeue, list) { |
| 1321 | struct writequeue_entry *e = | ||
| 1322 | list_entry(list, struct writequeue_entry, list); | ||
| 1323 | list_del(&e->list); | 1342 | list_del(&e->list); |
| 1324 | free_entry(e); | 1343 | free_entry(e); |
| 1325 | } | 1344 | } |
| @@ -1369,14 +1388,7 @@ static void process_send_sockets(struct work_struct *work) | |||
| 1369 | /* Discard all entries on the write queues */ | 1388 | /* Discard all entries on the write queues */ |
| 1370 | static void clean_writequeues(void) | 1389 | static void clean_writequeues(void) |
| 1371 | { | 1390 | { |
| 1372 | int nodeid; | 1391 | foreach_conn(clean_one_writequeue); |
| 1373 | |||
| 1374 | for (nodeid = 1; nodeid <= max_nodeid; nodeid++) { | ||
| 1375 | struct connection *con = __nodeid2con(nodeid, 0); | ||
| 1376 | |||
| 1377 | if (con) | ||
| 1378 | clean_one_writequeue(con); | ||
| 1379 | } | ||
| 1380 | } | 1392 | } |
| 1381 | 1393 | ||
| 1382 | static void work_stop(void) | 1394 | static void work_stop(void) |
| @@ -1406,23 +1418,29 @@ static int work_start(void) | |||
| 1406 | return 0; | 1418 | return 0; |
| 1407 | } | 1419 | } |
| 1408 | 1420 | ||
| 1409 | void dlm_lowcomms_stop(void) | 1421 | static void stop_conn(struct connection *con) |
| 1410 | { | 1422 | { |
| 1411 | int i; | 1423 | con->flags |= 0x0F; |
| 1412 | struct connection *con; | 1424 | if (con->sock) |
| 1425 | con->sock->sk->sk_user_data = NULL; | ||
| 1426 | } | ||
| 1413 | 1427 | ||
| 1428 | static void free_conn(struct connection *con) | ||
| 1429 | { | ||
| 1430 | close_connection(con, true); | ||
| 1431 | if (con->othercon) | ||
| 1432 | kmem_cache_free(con_cache, con->othercon); | ||
| 1433 | hlist_del(&con->list); | ||
| 1434 | kmem_cache_free(con_cache, con); | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | void dlm_lowcomms_stop(void) | ||
| 1438 | { | ||
| 1414 | /* Set all the flags to prevent any | 1439 | /* Set all the flags to prevent any |
| 1415 | socket activity. | 1440 | socket activity. |
| 1416 | */ | 1441 | */ |
| 1417 | mutex_lock(&connections_lock); | 1442 | mutex_lock(&connections_lock); |
| 1418 | for (i = 0; i <= max_nodeid; i++) { | 1443 | foreach_conn(stop_conn); |
| 1419 | con = __nodeid2con(i, 0); | ||
| 1420 | if (con) { | ||
| 1421 | con->flags |= 0x0F; | ||
| 1422 | if (con->sock) | ||
| 1423 | con->sock->sk->sk_user_data = NULL; | ||
| 1424 | } | ||
| 1425 | } | ||
| 1426 | mutex_unlock(&connections_lock); | 1444 | mutex_unlock(&connections_lock); |
| 1427 | 1445 | ||
| 1428 | work_stop(); | 1446 | work_stop(); |
| @@ -1430,25 +1448,20 @@ void dlm_lowcomms_stop(void) | |||
| 1430 | mutex_lock(&connections_lock); | 1448 | mutex_lock(&connections_lock); |
| 1431 | clean_writequeues(); | 1449 | clean_writequeues(); |
| 1432 | 1450 | ||
| 1433 | for (i = 0; i <= max_nodeid; i++) { | 1451 | foreach_conn(free_conn); |
| 1434 | con = __nodeid2con(i, 0); | 1452 | |
| 1435 | if (con) { | ||
| 1436 | close_connection(con, true); | ||
| 1437 | if (con->othercon) | ||
| 1438 | kmem_cache_free(con_cache, con->othercon); | ||
| 1439 | kmem_cache_free(con_cache, con); | ||
| 1440 | } | ||
| 1441 | } | ||
| 1442 | max_nodeid = 0; | ||
| 1443 | mutex_unlock(&connections_lock); | 1453 | mutex_unlock(&connections_lock); |
| 1444 | kmem_cache_destroy(con_cache); | 1454 | kmem_cache_destroy(con_cache); |
| 1445 | idr_init(&connections_idr); | ||
| 1446 | } | 1455 | } |
| 1447 | 1456 | ||
| 1448 | int dlm_lowcomms_start(void) | 1457 | int dlm_lowcomms_start(void) |
| 1449 | { | 1458 | { |
| 1450 | int error = -EINVAL; | 1459 | int error = -EINVAL; |
| 1451 | struct connection *con; | 1460 | struct connection *con; |
| 1461 | int i; | ||
| 1462 | |||
| 1463 | for (i = 0; i < CONN_HASH_SIZE; i++) | ||
| 1464 | INIT_HLIST_HEAD(&connection_hash[i]); | ||
| 1452 | 1465 | ||
| 1453 | init_local(); | 1466 | init_local(); |
| 1454 | if (!dlm_local_count) { | 1467 | if (!dlm_local_count) { |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 065149e84f42..ebce994ab0b7 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. |
| 3 | * | 3 | * |
| 4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -84,7 +84,7 @@ struct dlm_lock_result32 { | |||
| 84 | 84 | ||
| 85 | static void compat_input(struct dlm_write_request *kb, | 85 | static void compat_input(struct dlm_write_request *kb, |
| 86 | struct dlm_write_request32 *kb32, | 86 | struct dlm_write_request32 *kb32, |
| 87 | size_t count) | 87 | int namelen) |
| 88 | { | 88 | { |
| 89 | kb->version[0] = kb32->version[0]; | 89 | kb->version[0] = kb32->version[0]; |
| 90 | kb->version[1] = kb32->version[1]; | 90 | kb->version[1] = kb32->version[1]; |
| @@ -96,8 +96,7 @@ static void compat_input(struct dlm_write_request *kb, | |||
| 96 | kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { | 96 | kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { |
| 97 | kb->i.lspace.flags = kb32->i.lspace.flags; | 97 | kb->i.lspace.flags = kb32->i.lspace.flags; |
| 98 | kb->i.lspace.minor = kb32->i.lspace.minor; | 98 | kb->i.lspace.minor = kb32->i.lspace.minor; |
| 99 | memcpy(kb->i.lspace.name, kb32->i.lspace.name, count - | 99 | memcpy(kb->i.lspace.name, kb32->i.lspace.name, namelen); |
| 100 | offsetof(struct dlm_write_request32, i.lspace.name)); | ||
| 101 | } else if (kb->cmd == DLM_USER_PURGE) { | 100 | } else if (kb->cmd == DLM_USER_PURGE) { |
| 102 | kb->i.purge.nodeid = kb32->i.purge.nodeid; | 101 | kb->i.purge.nodeid = kb32->i.purge.nodeid; |
| 103 | kb->i.purge.pid = kb32->i.purge.pid; | 102 | kb->i.purge.pid = kb32->i.purge.pid; |
| @@ -115,8 +114,7 @@ static void compat_input(struct dlm_write_request *kb, | |||
| 115 | kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; | 114 | kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; |
| 116 | kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; | 115 | kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; |
| 117 | memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); | 116 | memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); |
| 118 | memcpy(kb->i.lock.name, kb32->i.lock.name, count - | 117 | memcpy(kb->i.lock.name, kb32->i.lock.name, namelen); |
| 119 | offsetof(struct dlm_write_request32, i.lock.name)); | ||
| 120 | } | 118 | } |
| 121 | } | 119 | } |
| 122 | 120 | ||
| @@ -539,9 +537,16 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
| 539 | #ifdef CONFIG_COMPAT | 537 | #ifdef CONFIG_COMPAT |
| 540 | if (!kbuf->is64bit) { | 538 | if (!kbuf->is64bit) { |
| 541 | struct dlm_write_request32 *k32buf; | 539 | struct dlm_write_request32 *k32buf; |
| 540 | int namelen = 0; | ||
| 541 | |||
| 542 | if (count > sizeof(struct dlm_write_request32)) | ||
| 543 | namelen = count - sizeof(struct dlm_write_request32); | ||
| 544 | |||
| 542 | k32buf = (struct dlm_write_request32 *)kbuf; | 545 | k32buf = (struct dlm_write_request32 *)kbuf; |
| 543 | kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - | 546 | |
| 544 | sizeof(struct dlm_write_request32)), GFP_KERNEL); | 547 | /* add 1 after namelen so that the name string is terminated */ |
| 548 | kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1, | ||
| 549 | GFP_KERNEL); | ||
| 545 | if (!kbuf) { | 550 | if (!kbuf) { |
| 546 | kfree(k32buf); | 551 | kfree(k32buf); |
| 547 | return -ENOMEM; | 552 | return -ENOMEM; |
| @@ -549,7 +554,8 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
| 549 | 554 | ||
| 550 | if (proc) | 555 | if (proc) |
| 551 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); | 556 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); |
| 552 | compat_input(kbuf, k32buf, count + 1); | 557 | |
| 558 | compat_input(kbuf, k32buf, namelen); | ||
| 553 | kfree(k32buf); | 559 | kfree(k32buf); |
| 554 | } | 560 | } |
| 555 | #endif | 561 | #endif |
