diff options
| -rw-r--r-- | fs/dlm/Kconfig | 31 | ||||
| -rw-r--r-- | fs/dlm/Makefile | 6 | ||||
| -rw-r--r-- | fs/dlm/ast.c | 1 | ||||
| -rw-r--r-- | fs/dlm/config.c | 10 | ||||
| -rw-r--r-- | fs/dlm/config.h | 3 | ||||
| -rw-r--r-- | fs/dlm/dlm_internal.h | 11 | ||||
| -rw-r--r-- | fs/dlm/lock.c | 955 | ||||
| -rw-r--r-- | fs/dlm/lock.h | 2 | ||||
| -rw-r--r-- | fs/dlm/lockspace.c | 4 | ||||
| -rw-r--r-- | fs/dlm/lowcomms-sctp.c | 1210 | ||||
| -rw-r--r-- | fs/dlm/lowcomms.c (renamed from fs/dlm/lowcomms-tcp.c) | 788 | ||||
| -rw-r--r-- | fs/dlm/user.c | 163 | ||||
| -rw-r--r-- | fs/gfs2/dir.c | 38 | ||||
| -rw-r--r-- | fs/gfs2/glock.c | 619 | ||||
| -rw-r--r-- | fs/gfs2/glock.h | 8 | ||||
| -rw-r--r-- | fs/gfs2/incore.h | 14 | ||||
| -rw-r--r-- | fs/gfs2/locking/dlm/lock.c | 14 | ||||
| -rw-r--r-- | fs/gfs2/locking/dlm/lock_dlm.h | 3 | ||||
| -rw-r--r-- | fs/gfs2/lops.c | 20 | ||||
| -rw-r--r-- | fs/gfs2/main.c | 4 | ||||
| -rw-r--r-- | fs/gfs2/mount.c | 239 | ||||
| -rw-r--r-- | fs/gfs2/ops_address.c | 21 | ||||
| -rw-r--r-- | fs/gfs2/ops_fstype.c | 4 | ||||
| -rw-r--r-- | fs/gfs2/ops_super.c | 28 | ||||
| -rw-r--r-- | fs/gfs2/rgrp.c | 12 | ||||
| -rw-r--r-- | include/linux/dlm_device.h | 9 | 
26 files changed, 2158 insertions, 2059 deletions
| diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 6fa7b0d5c043..69a94690e493 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig | |||
| @@ -3,36 +3,19 @@ menu "Distributed Lock Manager" | |||
| 3 | 3 | ||
| 4 | config DLM | 4 | config DLM | 
| 5 | tristate "Distributed Lock Manager (DLM)" | 5 | tristate "Distributed Lock Manager (DLM)" | 
| 6 | depends on SYSFS && (IPV6 || IPV6=n) | 6 | depends on IPV6 || IPV6=n | 
| 7 | select CONFIGFS_FS | 7 | select CONFIGFS_FS | 
| 8 | select IP_SCTP if DLM_SCTP | 8 | select IP_SCTP | 
| 9 | help | 9 | help | 
| 10 | A general purpose distributed lock manager for kernel or userspace | 10 | A general purpose distributed lock manager for kernel or userspace | 
| 11 | applications. | 11 | applications. | 
| 12 | |||
| 13 | choice | ||
| 14 | prompt "Select DLM communications protocol" | ||
| 15 | depends on DLM | ||
| 16 | default DLM_TCP | ||
| 17 | help | ||
| 18 | The DLM Can use TCP or SCTP for it's network communications. | ||
| 19 | SCTP supports multi-homed operations whereas TCP doesn't. | ||
| 20 | However, SCTP seems to have stability problems at the moment. | ||
| 21 | |||
| 22 | config DLM_TCP | ||
| 23 | bool "TCP/IP" | ||
| 24 | |||
| 25 | config DLM_SCTP | ||
| 26 | bool "SCTP" | ||
| 27 | |||
| 28 | endchoice | ||
| 29 | 12 | ||
| 30 | config DLM_DEBUG | 13 | config DLM_DEBUG | 
| 31 | bool "DLM debugging" | 14 | bool "DLM debugging" | 
| 32 | depends on DLM | 15 | depends on DLM | 
| 33 | help | 16 | help | 
| 34 | Under the debugfs mount point, the name of each lockspace will | 17 | Under the debugfs mount point, the name of each lockspace will | 
| 35 | appear as a file in the "dlm" directory. The output is the | 18 | appear as a file in the "dlm" directory. The output is the | 
| 36 | list of resource and locks the local node knows about. | 19 | list of resource and locks the local node knows about. | 
| 37 | 20 | ||
| 38 | endmenu | 21 | endmenu | 
| diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index 65388944eba0..604cf7dc5f39 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile | |||
| @@ -8,14 +8,12 @@ dlm-y := ast.o \ | |||
| 8 | member.o \ | 8 | member.o \ | 
| 9 | memory.o \ | 9 | memory.o \ | 
| 10 | midcomms.o \ | 10 | midcomms.o \ | 
| 11 | lowcomms.o \ | ||
| 11 | rcom.o \ | 12 | rcom.o \ | 
| 12 | recover.o \ | 13 | recover.o \ | 
| 13 | recoverd.o \ | 14 | recoverd.o \ | 
| 14 | requestqueue.o \ | 15 | requestqueue.o \ | 
| 15 | user.o \ | 16 | user.o \ | 
| 16 | util.o | 17 | util.o | 
| 17 | dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o | 18 | dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o | 
| 18 | 19 | ||
| 19 | dlm-$(CONFIG_DLM_TCP) += lowcomms-tcp.o | ||
| 20 | |||
| 21 | dlm-$(CONFIG_DLM_SCTP) += lowcomms-sctp.o \ No newline at end of file | ||
| diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index f91d39cb1e0b..6308122890ca 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "dlm_internal.h" | 14 | #include "dlm_internal.h" | 
| 15 | #include "lock.h" | 15 | #include "lock.h" | 
| 16 | #include "user.h" | 16 | #include "user.h" | 
| 17 | #include "ast.h" | ||
| 17 | 18 | ||
| 18 | #define WAKE_ASTS 0 | 19 | #define WAKE_ASTS 0 | 
| 19 | 20 | ||
| diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 8665c88e5af2..822abdcd1434 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* | 
| 3 | ** | 3 | ** | 
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 
| 6 | ** | 6 | ** | 
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, | 
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 
| @@ -89,6 +89,7 @@ struct cluster { | |||
| 89 | unsigned int cl_toss_secs; | 89 | unsigned int cl_toss_secs; | 
| 90 | unsigned int cl_scan_secs; | 90 | unsigned int cl_scan_secs; | 
| 91 | unsigned int cl_log_debug; | 91 | unsigned int cl_log_debug; | 
| 92 | unsigned int cl_protocol; | ||
| 92 | }; | 93 | }; | 
| 93 | 94 | ||
| 94 | enum { | 95 | enum { | 
| @@ -101,6 +102,7 @@ enum { | |||
| 101 | CLUSTER_ATTR_TOSS_SECS, | 102 | CLUSTER_ATTR_TOSS_SECS, | 
| 102 | CLUSTER_ATTR_SCAN_SECS, | 103 | CLUSTER_ATTR_SCAN_SECS, | 
| 103 | CLUSTER_ATTR_LOG_DEBUG, | 104 | CLUSTER_ATTR_LOG_DEBUG, | 
| 105 | CLUSTER_ATTR_PROTOCOL, | ||
| 104 | }; | 106 | }; | 
| 105 | 107 | ||
| 106 | struct cluster_attribute { | 108 | struct cluster_attribute { | 
| @@ -159,6 +161,7 @@ CLUSTER_ATTR(recover_timer, 1); | |||
| 159 | CLUSTER_ATTR(toss_secs, 1); | 161 | CLUSTER_ATTR(toss_secs, 1); | 
| 160 | CLUSTER_ATTR(scan_secs, 1); | 162 | CLUSTER_ATTR(scan_secs, 1); | 
| 161 | CLUSTER_ATTR(log_debug, 0); | 163 | CLUSTER_ATTR(log_debug, 0); | 
| 164 | CLUSTER_ATTR(protocol, 0); | ||
| 162 | 165 | ||
| 163 | static struct configfs_attribute *cluster_attrs[] = { | 166 | static struct configfs_attribute *cluster_attrs[] = { | 
| 164 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | 167 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | 
| @@ -170,6 +173,7 @@ static struct configfs_attribute *cluster_attrs[] = { | |||
| 170 | [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, | 173 | [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, | 
| 171 | [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, | 174 | [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, | 
| 172 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, | 175 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, | 
| 176 | [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, | ||
| 173 | NULL, | 177 | NULL, | 
| 174 | }; | 178 | }; | 
| 175 | 179 | ||
| @@ -904,6 +908,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
| 904 | #define DEFAULT_TOSS_SECS 10 | 908 | #define DEFAULT_TOSS_SECS 10 | 
| 905 | #define DEFAULT_SCAN_SECS 5 | 909 | #define DEFAULT_SCAN_SECS 5 | 
| 906 | #define DEFAULT_LOG_DEBUG 0 | 910 | #define DEFAULT_LOG_DEBUG 0 | 
| 911 | #define DEFAULT_PROTOCOL 0 | ||
| 907 | 912 | ||
| 908 | struct dlm_config_info dlm_config = { | 913 | struct dlm_config_info dlm_config = { | 
| 909 | .ci_tcp_port = DEFAULT_TCP_PORT, | 914 | .ci_tcp_port = DEFAULT_TCP_PORT, | 
| @@ -914,6 +919,7 @@ struct dlm_config_info dlm_config = { | |||
| 914 | .ci_recover_timer = DEFAULT_RECOVER_TIMER, | 919 | .ci_recover_timer = DEFAULT_RECOVER_TIMER, | 
| 915 | .ci_toss_secs = DEFAULT_TOSS_SECS, | 920 | .ci_toss_secs = DEFAULT_TOSS_SECS, | 
| 916 | .ci_scan_secs = DEFAULT_SCAN_SECS, | 921 | .ci_scan_secs = DEFAULT_SCAN_SECS, | 
| 917 | .ci_log_debug = DEFAULT_LOG_DEBUG | 922 | .ci_log_debug = DEFAULT_LOG_DEBUG, | 
| 923 | .ci_protocol = DEFAULT_PROTOCOL | ||
| 918 | }; | 924 | }; | 
| 919 | 925 | ||
| diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 1e978611a96e..967cc3d72e5e 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* | 
| 3 | ** | 3 | ** | 
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 
| 6 | ** | 6 | ** | 
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, | 
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 
| @@ -26,6 +26,7 @@ struct dlm_config_info { | |||
| 26 | int ci_toss_secs; | 26 | int ci_toss_secs; | 
| 27 | int ci_scan_secs; | 27 | int ci_scan_secs; | 
| 28 | int ci_log_debug; | 28 | int ci_log_debug; | 
| 29 | int ci_protocol; | ||
| 29 | }; | 30 | }; | 
| 30 | 31 | ||
| 31 | extern struct dlm_config_info dlm_config; | 32 | extern struct dlm_config_info dlm_config; | 
| diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 61d93201e1b2..30994d68f6a0 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* | 
| 3 | ** | 3 | ** | 
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 
| 6 | ** | 6 | ** | 
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, | 
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 
| @@ -210,6 +210,9 @@ struct dlm_args { | |||
| 210 | #define DLM_IFL_MSTCPY 0x00010000 | 210 | #define DLM_IFL_MSTCPY 0x00010000 | 
| 211 | #define DLM_IFL_RESEND 0x00020000 | 211 | #define DLM_IFL_RESEND 0x00020000 | 
| 212 | #define DLM_IFL_DEAD 0x00040000 | 212 | #define DLM_IFL_DEAD 0x00040000 | 
| 213 | #define DLM_IFL_OVERLAP_UNLOCK 0x00080000 | ||
| 214 | #define DLM_IFL_OVERLAP_CANCEL 0x00100000 | ||
| 215 | #define DLM_IFL_ENDOFLIFE 0x00200000 | ||
| 213 | #define DLM_IFL_USER 0x00000001 | 216 | #define DLM_IFL_USER 0x00000001 | 
| 214 | #define DLM_IFL_ORPHAN 0x00000002 | 217 | #define DLM_IFL_ORPHAN 0x00000002 | 
| 215 | 218 | ||
| @@ -230,8 +233,8 @@ struct dlm_lkb { | |||
| 230 | int8_t lkb_grmode; /* granted lock mode */ | 233 | int8_t lkb_grmode; /* granted lock mode */ | 
| 231 | int8_t lkb_bastmode; /* requested mode */ | 234 | int8_t lkb_bastmode; /* requested mode */ | 
| 232 | int8_t lkb_highbast; /* highest mode bast sent for */ | 235 | int8_t lkb_highbast; /* highest mode bast sent for */ | 
| 233 | |||
| 234 | int8_t lkb_wait_type; /* type of reply waiting for */ | 236 | int8_t lkb_wait_type; /* type of reply waiting for */ | 
| 237 | int8_t lkb_wait_count; | ||
| 235 | int8_t lkb_ast_type; /* type of ast queued for */ | 238 | int8_t lkb_ast_type; /* type of ast queued for */ | 
| 236 | 239 | ||
| 237 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ | 240 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ | 
| @@ -339,6 +342,7 @@ struct dlm_header { | |||
| 339 | #define DLM_MSG_LOOKUP 11 | 342 | #define DLM_MSG_LOOKUP 11 | 
| 340 | #define DLM_MSG_REMOVE 12 | 343 | #define DLM_MSG_REMOVE 12 | 
| 341 | #define DLM_MSG_LOOKUP_REPLY 13 | 344 | #define DLM_MSG_LOOKUP_REPLY 13 | 
| 345 | #define DLM_MSG_PURGE 14 | ||
| 342 | 346 | ||
| 343 | struct dlm_message { | 347 | struct dlm_message { | 
| 344 | struct dlm_header m_header; | 348 | struct dlm_header m_header; | 
| @@ -440,6 +444,9 @@ struct dlm_ls { | |||
| 440 | struct mutex ls_waiters_mutex; | 444 | struct mutex ls_waiters_mutex; | 
| 441 | struct list_head ls_waiters; /* lkbs needing a reply */ | 445 | struct list_head ls_waiters; /* lkbs needing a reply */ | 
| 442 | 446 | ||
| 447 | struct mutex ls_orphans_mutex; | ||
| 448 | struct list_head ls_orphans; | ||
| 449 | |||
| 443 | struct list_head ls_nodes; /* current nodes in ls */ | 450 | struct list_head ls_nodes; /* current nodes in ls */ | 
| 444 | struct list_head ls_nodes_gone; /* dead node list, recovery */ | 451 | struct list_head ls_nodes_gone; /* dead node list, recovery */ | 
| 445 | int ls_num_nodes; /* number of nodes in ls */ | 452 | int ls_num_nodes; /* number of nodes in ls */ | 
| diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index e725005fafd0..d8d6e729f96b 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /****************************************************************************** | 1 | /****************************************************************************** | 
| 2 | ******************************************************************************* | 2 | ******************************************************************************* | 
| 3 | ** | 3 | ** | 
| 4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. | 
| 5 | ** | 5 | ** | 
| 6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, | 
| 7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions | 
| @@ -85,6 +85,7 @@ static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); | |||
| 85 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | 85 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | 
| 86 | struct dlm_message *ms); | 86 | struct dlm_message *ms); | 
| 87 | static int receive_extralen(struct dlm_message *ms); | 87 | static int receive_extralen(struct dlm_message *ms); | 
| 88 | static void do_purge(struct dlm_ls *ls, int nodeid, int pid); | ||
| 88 | 89 | ||
| 89 | /* | 90 | /* | 
| 90 | * Lock compatibilty matrix - thanks Steve | 91 | * Lock compatibilty matrix - thanks Steve | 
| @@ -223,6 +224,16 @@ static inline int is_demoted(struct dlm_lkb *lkb) | |||
| 223 | return (lkb->lkb_sbflags & DLM_SBF_DEMOTED); | 224 | return (lkb->lkb_sbflags & DLM_SBF_DEMOTED); | 
| 224 | } | 225 | } | 
| 225 | 226 | ||
| 227 | static inline int is_altmode(struct dlm_lkb *lkb) | ||
| 228 | { | ||
| 229 | return (lkb->lkb_sbflags & DLM_SBF_ALTMODE); | ||
| 230 | } | ||
| 231 | |||
| 232 | static inline int is_granted(struct dlm_lkb *lkb) | ||
| 233 | { | ||
| 234 | return (lkb->lkb_status == DLM_LKSTS_GRANTED); | ||
| 235 | } | ||
| 236 | |||
| 226 | static inline int is_remote(struct dlm_rsb *r) | 237 | static inline int is_remote(struct dlm_rsb *r) | 
| 227 | { | 238 | { | 
| 228 | DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r);); | 239 | DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r);); | 
| @@ -254,6 +265,22 @@ static inline int down_conversion(struct dlm_lkb *lkb) | |||
| 254 | return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode); | 265 | return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode); | 
| 255 | } | 266 | } | 
| 256 | 267 | ||
| 268 | static inline int is_overlap_unlock(struct dlm_lkb *lkb) | ||
| 269 | { | ||
| 270 | return lkb->lkb_flags & DLM_IFL_OVERLAP_UNLOCK; | ||
| 271 | } | ||
| 272 | |||
| 273 | static inline int is_overlap_cancel(struct dlm_lkb *lkb) | ||
| 274 | { | ||
| 275 | return lkb->lkb_flags & DLM_IFL_OVERLAP_CANCEL; | ||
| 276 | } | ||
| 277 | |||
| 278 | static inline int is_overlap(struct dlm_lkb *lkb) | ||
| 279 | { | ||
| 280 | return (lkb->lkb_flags & (DLM_IFL_OVERLAP_UNLOCK | | ||
| 281 | DLM_IFL_OVERLAP_CANCEL)); | ||
| 282 | } | ||
| 283 | |||
| 257 | static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | 284 | static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | 
| 258 | { | 285 | { | 
| 259 | if (is_master_copy(lkb)) | 286 | if (is_master_copy(lkb)) | 
| @@ -267,6 +294,12 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) | |||
| 267 | dlm_add_ast(lkb, AST_COMP); | 294 | dlm_add_ast(lkb, AST_COMP); | 
| 268 | } | 295 | } | 
| 269 | 296 | ||
| 297 | static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 298 | { | ||
| 299 | queue_cast(r, lkb, | ||
| 300 | is_overlap_unlock(lkb) ? -DLM_EUNLOCK : -DLM_ECANCEL); | ||
| 301 | } | ||
| 302 | |||
| 270 | static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) | 303 | static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) | 
| 271 | { | 304 | { | 
| 272 | if (is_master_copy(lkb)) | 305 | if (is_master_copy(lkb)) | 
| @@ -547,6 +580,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | |||
| 547 | lkb->lkb_grmode = DLM_LOCK_IV; | 580 | lkb->lkb_grmode = DLM_LOCK_IV; | 
| 548 | kref_init(&lkb->lkb_ref); | 581 | kref_init(&lkb->lkb_ref); | 
| 549 | INIT_LIST_HEAD(&lkb->lkb_ownqueue); | 582 | INIT_LIST_HEAD(&lkb->lkb_ownqueue); | 
| 583 | INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); | ||
| 550 | 584 | ||
| 551 | get_random_bytes(&bucket, sizeof(bucket)); | 585 | get_random_bytes(&bucket, sizeof(bucket)); | 
| 552 | bucket &= (ls->ls_lkbtbl_size - 1); | 586 | bucket &= (ls->ls_lkbtbl_size - 1); | 
| @@ -556,7 +590,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | |||
| 556 | /* counter can roll over so we must verify lkid is not in use */ | 590 | /* counter can roll over so we must verify lkid is not in use */ | 
| 557 | 591 | ||
| 558 | while (lkid == 0) { | 592 | while (lkid == 0) { | 
| 559 | lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); | 593 | lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++; | 
| 560 | 594 | ||
| 561 | list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, | 595 | list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, | 
| 562 | lkb_idtbl_list) { | 596 | lkb_idtbl_list) { | 
| @@ -577,8 +611,8 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | |||
| 577 | 611 | ||
| 578 | static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) | 612 | static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) | 
| 579 | { | 613 | { | 
| 580 | uint16_t bucket = lkid & 0xFFFF; | ||
| 581 | struct dlm_lkb *lkb; | 614 | struct dlm_lkb *lkb; | 
| 615 | uint16_t bucket = (lkid >> 16); | ||
| 582 | 616 | ||
| 583 | list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { | 617 | list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { | 
| 584 | if (lkb->lkb_id == lkid) | 618 | if (lkb->lkb_id == lkid) | 
| @@ -590,7 +624,7 @@ static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) | |||
| 590 | static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) | 624 | static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) | 
| 591 | { | 625 | { | 
| 592 | struct dlm_lkb *lkb; | 626 | struct dlm_lkb *lkb; | 
| 593 | uint16_t bucket = lkid & 0xFFFF; | 627 | uint16_t bucket = (lkid >> 16); | 
| 594 | 628 | ||
| 595 | if (bucket >= ls->ls_lkbtbl_size) | 629 | if (bucket >= ls->ls_lkbtbl_size) | 
| 596 | return -EBADSLT; | 630 | return -EBADSLT; | 
| @@ -620,7 +654,7 @@ static void kill_lkb(struct kref *kref) | |||
| 620 | 654 | ||
| 621 | static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) | 655 | static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) | 
| 622 | { | 656 | { | 
| 623 | uint16_t bucket = lkb->lkb_id & 0xFFFF; | 657 | uint16_t bucket = (lkb->lkb_id >> 16); | 
| 624 | 658 | ||
| 625 | write_lock(&ls->ls_lkbtbl[bucket].lock); | 659 | write_lock(&ls->ls_lkbtbl[bucket].lock); | 
| 626 | if (kref_put(&lkb->lkb_ref, kill_lkb)) { | 660 | if (kref_put(&lkb->lkb_ref, kill_lkb)) { | 
| @@ -735,23 +769,75 @@ static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts) | |||
| 735 | unhold_lkb(lkb); | 769 | unhold_lkb(lkb); | 
| 736 | } | 770 | } | 
| 737 | 771 | ||
| 772 | static int msg_reply_type(int mstype) | ||
| 773 | { | ||
| 774 | switch (mstype) { | ||
| 775 | case DLM_MSG_REQUEST: | ||
| 776 | return DLM_MSG_REQUEST_REPLY; | ||
| 777 | case DLM_MSG_CONVERT: | ||
| 778 | return DLM_MSG_CONVERT_REPLY; | ||
| 779 | case DLM_MSG_UNLOCK: | ||
| 780 | return DLM_MSG_UNLOCK_REPLY; | ||
| 781 | case DLM_MSG_CANCEL: | ||
| 782 | return DLM_MSG_CANCEL_REPLY; | ||
| 783 | case DLM_MSG_LOOKUP: | ||
| 784 | return DLM_MSG_LOOKUP_REPLY; | ||
| 785 | } | ||
| 786 | return -1; | ||
| 787 | } | ||
| 788 | |||
| 738 | /* add/remove lkb from global waiters list of lkb's waiting for | 789 | /* add/remove lkb from global waiters list of lkb's waiting for | 
| 739 | a reply from a remote node */ | 790 | a reply from a remote node */ | 
| 740 | 791 | ||
| 741 | static void add_to_waiters(struct dlm_lkb *lkb, int mstype) | 792 | static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | 
| 742 | { | 793 | { | 
| 743 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 794 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 
| 795 | int error = 0; | ||
| 744 | 796 | ||
| 745 | mutex_lock(&ls->ls_waiters_mutex); | 797 | mutex_lock(&ls->ls_waiters_mutex); | 
| 746 | if (lkb->lkb_wait_type) { | 798 | |
| 747 | log_print("add_to_waiters error %d", lkb->lkb_wait_type); | 799 | if (is_overlap_unlock(lkb) || | 
| 800 | (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL))) { | ||
| 801 | error = -EINVAL; | ||
| 802 | goto out; | ||
| 803 | } | ||
| 804 | |||
| 805 | if (lkb->lkb_wait_type || is_overlap_cancel(lkb)) { | ||
| 806 | switch (mstype) { | ||
| 807 | case DLM_MSG_UNLOCK: | ||
| 808 | lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK; | ||
| 809 | break; | ||
| 810 | case DLM_MSG_CANCEL: | ||
| 811 | lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL; | ||
| 812 | break; | ||
| 813 | default: | ||
| 814 | error = -EBUSY; | ||
| 815 | goto out; | ||
| 816 | } | ||
| 817 | lkb->lkb_wait_count++; | ||
| 818 | hold_lkb(lkb); | ||
| 819 | |||
| 820 | log_debug(ls, "add overlap %x cur %d new %d count %d flags %x", | ||
| 821 | lkb->lkb_id, lkb->lkb_wait_type, mstype, | ||
| 822 | lkb->lkb_wait_count, lkb->lkb_flags); | ||
| 748 | goto out; | 823 | goto out; | 
| 749 | } | 824 | } | 
| 825 | |||
| 826 | DLM_ASSERT(!lkb->lkb_wait_count, | ||
| 827 | dlm_print_lkb(lkb); | ||
| 828 | printk("wait_count %d\n", lkb->lkb_wait_count);); | ||
| 829 | |||
| 830 | lkb->lkb_wait_count++; | ||
| 750 | lkb->lkb_wait_type = mstype; | 831 | lkb->lkb_wait_type = mstype; | 
| 751 | kref_get(&lkb->lkb_ref); | 832 | hold_lkb(lkb); | 
| 752 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | 833 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | 
| 753 | out: | 834 | out: | 
| 835 | if (error) | ||
| 836 | log_error(ls, "add_to_waiters %x error %d flags %x %d %d %s", | ||
| 837 | lkb->lkb_id, error, lkb->lkb_flags, mstype, | ||
| 838 | lkb->lkb_wait_type, lkb->lkb_resource->res_name); | ||
| 754 | mutex_unlock(&ls->ls_waiters_mutex); | 839 | mutex_unlock(&ls->ls_waiters_mutex); | 
| 840 | return error; | ||
| 755 | } | 841 | } | 
| 756 | 842 | ||
| 757 | /* We clear the RESEND flag because we might be taking an lkb off the waiters | 843 | /* We clear the RESEND flag because we might be taking an lkb off the waiters | 
| @@ -759,34 +845,85 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 759 | request reply on the requestqueue) between dlm_recover_waiters_pre() which | 845 | request reply on the requestqueue) between dlm_recover_waiters_pre() which | 
| 760 | set RESEND and dlm_recover_waiters_post() */ | 846 | set RESEND and dlm_recover_waiters_post() */ | 
| 761 | 847 | ||
| 762 | static int _remove_from_waiters(struct dlm_lkb *lkb) | 848 | static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) | 
| 763 | { | 849 | { | 
| 764 | int error = 0; | 850 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 
| 851 | int overlap_done = 0; | ||
| 765 | 852 | ||
| 766 | if (!lkb->lkb_wait_type) { | 853 | if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) { | 
| 767 | log_print("remove_from_waiters error"); | 854 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; | 
| 768 | error = -EINVAL; | 855 | overlap_done = 1; | 
| 769 | goto out; | 856 | goto out_del; | 
| 857 | } | ||
| 858 | |||
| 859 | if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) { | ||
| 860 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | ||
| 861 | overlap_done = 1; | ||
| 862 | goto out_del; | ||
| 863 | } | ||
| 864 | |||
| 865 | /* N.B. type of reply may not always correspond to type of original | ||
| 866 | msg due to lookup->request optimization, verify others? */ | ||
| 867 | |||
| 868 | if (lkb->lkb_wait_type) { | ||
| 869 | lkb->lkb_wait_type = 0; | ||
| 870 | goto out_del; | ||
| 871 | } | ||
| 872 | |||
| 873 | log_error(ls, "remove_from_waiters lkid %x flags %x types %d %d", | ||
| 874 | lkb->lkb_id, lkb->lkb_flags, mstype, lkb->lkb_wait_type); | ||
| 875 | return -1; | ||
| 876 | |||
| 877 | out_del: | ||
| 878 | /* the force-unlock/cancel has completed and we haven't recvd a reply | ||
| 879 | to the op that was in progress prior to the unlock/cancel; we | ||
| 880 | give up on any reply to the earlier op. FIXME: not sure when/how | ||
| 881 | this would happen */ | ||
| 882 | |||
| 883 | if (overlap_done && lkb->lkb_wait_type) { | ||
| 884 | log_error(ls, "remove_from_waiters %x reply %d give up on %d", | ||
| 885 | lkb->lkb_id, mstype, lkb->lkb_wait_type); | ||
| 886 | lkb->lkb_wait_count--; | ||
| 887 | lkb->lkb_wait_type = 0; | ||
| 770 | } | 888 | } | 
| 771 | lkb->lkb_wait_type = 0; | 889 | |
| 890 | DLM_ASSERT(lkb->lkb_wait_count, dlm_print_lkb(lkb);); | ||
| 891 | |||
| 772 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | 892 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | 
| 773 | list_del(&lkb->lkb_wait_reply); | 893 | lkb->lkb_wait_count--; | 
| 894 | if (!lkb->lkb_wait_count) | ||
| 895 | list_del_init(&lkb->lkb_wait_reply); | ||
| 774 | unhold_lkb(lkb); | 896 | unhold_lkb(lkb); | 
| 775 | out: | 897 | return 0; | 
| 776 | return error; | ||
| 777 | } | 898 | } | 
| 778 | 899 | ||
| 779 | static int remove_from_waiters(struct dlm_lkb *lkb) | 900 | static int remove_from_waiters(struct dlm_lkb *lkb, int mstype) | 
| 780 | { | 901 | { | 
| 781 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 902 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 
| 782 | int error; | 903 | int error; | 
| 783 | 904 | ||
| 784 | mutex_lock(&ls->ls_waiters_mutex); | 905 | mutex_lock(&ls->ls_waiters_mutex); | 
| 785 | error = _remove_from_waiters(lkb); | 906 | error = _remove_from_waiters(lkb, mstype); | 
| 786 | mutex_unlock(&ls->ls_waiters_mutex); | 907 | mutex_unlock(&ls->ls_waiters_mutex); | 
| 787 | return error; | 908 | return error; | 
| 788 | } | 909 | } | 
| 789 | 910 | ||
| 911 | /* Handles situations where we might be processing a "fake" or "stub" reply in | ||
| 912 | which we can't try to take waiters_mutex again. */ | ||
| 913 | |||
| 914 | static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 915 | { | ||
| 916 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | ||
| 917 | int error; | ||
| 918 | |||
| 919 | if (ms != &ls->ls_stub_ms) | ||
| 920 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 921 | error = _remove_from_waiters(lkb, ms->m_type); | ||
| 922 | if (ms != &ls->ls_stub_ms) | ||
| 923 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 924 | return error; | ||
| 925 | } | ||
| 926 | |||
| 790 | static void dir_remove(struct dlm_rsb *r) | 927 | static void dir_remove(struct dlm_rsb *r) | 
| 791 | { | 928 | { | 
| 792 | int to_nodeid; | 929 | int to_nodeid; | 
| @@ -988,8 +1125,14 @@ static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 988 | _remove_lock(r, lkb); | 1125 | _remove_lock(r, lkb); | 
| 989 | } | 1126 | } | 
| 990 | 1127 | ||
| 991 | static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | 1128 | /* returns: 0 did nothing | 
| 1129 | 1 moved lock to granted | ||
| 1130 | -1 removed lock */ | ||
| 1131 | |||
| 1132 | static int revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | ||
| 992 | { | 1133 | { | 
| 1134 | int rv = 0; | ||
| 1135 | |||
| 993 | lkb->lkb_rqmode = DLM_LOCK_IV; | 1136 | lkb->lkb_rqmode = DLM_LOCK_IV; | 
| 994 | 1137 | ||
| 995 | switch (lkb->lkb_status) { | 1138 | switch (lkb->lkb_status) { | 
| @@ -997,6 +1140,7 @@ static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 997 | break; | 1140 | break; | 
| 998 | case DLM_LKSTS_CONVERT: | 1141 | case DLM_LKSTS_CONVERT: | 
| 999 | move_lkb(r, lkb, DLM_LKSTS_GRANTED); | 1142 | move_lkb(r, lkb, DLM_LKSTS_GRANTED); | 
| 1143 | rv = 1; | ||
| 1000 | break; | 1144 | break; | 
| 1001 | case DLM_LKSTS_WAITING: | 1145 | case DLM_LKSTS_WAITING: | 
| 1002 | del_lkb(r, lkb); | 1146 | del_lkb(r, lkb); | 
| @@ -1004,15 +1148,17 @@ static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 1004 | /* this unhold undoes the original ref from create_lkb() | 1148 | /* this unhold undoes the original ref from create_lkb() | 
| 1005 | so this leads to the lkb being freed */ | 1149 | so this leads to the lkb being freed */ | 
| 1006 | unhold_lkb(lkb); | 1150 | unhold_lkb(lkb); | 
| 1151 | rv = -1; | ||
| 1007 | break; | 1152 | break; | 
| 1008 | default: | 1153 | default: | 
| 1009 | log_print("invalid status for revert %d", lkb->lkb_status); | 1154 | log_print("invalid status for revert %d", lkb->lkb_status); | 
| 1010 | } | 1155 | } | 
| 1156 | return rv; | ||
| 1011 | } | 1157 | } | 
| 1012 | 1158 | ||
| 1013 | static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) | 1159 | static int revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) | 
| 1014 | { | 1160 | { | 
| 1015 | revert_lock(r, lkb); | 1161 | return revert_lock(r, lkb); | 
| 1016 | } | 1162 | } | 
| 1017 | 1163 | ||
| 1018 | static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | 1164 | static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | 
| @@ -1055,6 +1201,50 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 1055 | queue_cast(r, lkb, 0); | 1201 | queue_cast(r, lkb, 0); | 
| 1056 | } | 1202 | } | 
| 1057 | 1203 | ||
| 1204 | /* The special CONVDEADLK, ALTPR and ALTCW flags allow the master to | ||
| 1205 | change the granted/requested modes. We're munging things accordingly in | ||
| 1206 | the process copy. | ||
| 1207 | CONVDEADLK: our grmode may have been forced down to NL to resolve a | ||
| 1208 | conversion deadlock | ||
| 1209 | ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become | ||
| 1210 | compatible with other granted locks */ | ||
| 1211 | |||
| 1212 | static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 1213 | { | ||
| 1214 | if (ms->m_type != DLM_MSG_CONVERT_REPLY) { | ||
| 1215 | log_print("munge_demoted %x invalid reply type %d", | ||
| 1216 | lkb->lkb_id, ms->m_type); | ||
| 1217 | return; | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { | ||
| 1221 | log_print("munge_demoted %x invalid modes gr %d rq %d", | ||
| 1222 | lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); | ||
| 1223 | return; | ||
| 1224 | } | ||
| 1225 | |||
| 1226 | lkb->lkb_grmode = DLM_LOCK_NL; | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | static void munge_altmode(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
| 1230 | { | ||
| 1231 | if (ms->m_type != DLM_MSG_REQUEST_REPLY && | ||
| 1232 | ms->m_type != DLM_MSG_GRANT) { | ||
| 1233 | log_print("munge_altmode %x invalid reply type %d", | ||
| 1234 | lkb->lkb_id, ms->m_type); | ||
| 1235 | return; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | if (lkb->lkb_exflags & DLM_LKF_ALTPR) | ||
| 1239 | lkb->lkb_rqmode = DLM_LOCK_PR; | ||
| 1240 | else if (lkb->lkb_exflags & DLM_LKF_ALTCW) | ||
| 1241 | lkb->lkb_rqmode = DLM_LOCK_CW; | ||
| 1242 | else { | ||
| 1243 | log_print("munge_altmode invalid exflags %x", lkb->lkb_exflags); | ||
| 1244 | dlm_print_lkb(lkb); | ||
| 1245 | } | ||
| 1246 | } | ||
| 1247 | |||
| 1058 | static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) | 1248 | static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) | 
| 1059 | { | 1249 | { | 
| 1060 | struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, | 1250 | struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, | 
| @@ -1499,7 +1689,7 @@ static void process_lookup_list(struct dlm_rsb *r) | |||
| 1499 | struct dlm_lkb *lkb, *safe; | 1689 | struct dlm_lkb *lkb, *safe; | 
| 1500 | 1690 | ||
| 1501 | list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) { | 1691 | list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) { | 
| 1502 | list_del(&lkb->lkb_rsb_lookup); | 1692 | list_del_init(&lkb->lkb_rsb_lookup); | 
| 1503 | _request_lock(r, lkb); | 1693 | _request_lock(r, lkb); | 
| 1504 | schedule(); | 1694 | schedule(); | 
| 1505 | } | 1695 | } | 
| @@ -1530,7 +1720,7 @@ static void confirm_master(struct dlm_rsb *r, int error) | |||
| 1530 | if (!list_empty(&r->res_lookup)) { | 1720 | if (!list_empty(&r->res_lookup)) { | 
| 1531 | lkb = list_entry(r->res_lookup.next, struct dlm_lkb, | 1721 | lkb = list_entry(r->res_lookup.next, struct dlm_lkb, | 
| 1532 | lkb_rsb_lookup); | 1722 | lkb_rsb_lookup); | 
| 1533 | list_del(&lkb->lkb_rsb_lookup); | 1723 | list_del_init(&lkb->lkb_rsb_lookup); | 
| 1534 | r->res_first_lkid = lkb->lkb_id; | 1724 | r->res_first_lkid = lkb->lkb_id; | 
| 1535 | _request_lock(r, lkb); | 1725 | _request_lock(r, lkb); | 
| 1536 | } else | 1726 | } else | 
| @@ -1614,6 +1804,9 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) | |||
| 1614 | DLM_LKF_FORCEUNLOCK)) | 1804 | DLM_LKF_FORCEUNLOCK)) | 
| 1615 | return -EINVAL; | 1805 | return -EINVAL; | 
| 1616 | 1806 | ||
| 1807 | if (flags & DLM_LKF_CANCEL && flags & DLM_LKF_FORCEUNLOCK) | ||
| 1808 | return -EINVAL; | ||
| 1809 | |||
| 1617 | args->flags = flags; | 1810 | args->flags = flags; | 
| 1618 | args->astparam = (long) astarg; | 1811 | args->astparam = (long) astarg; | 
| 1619 | return 0; | 1812 | return 0; | 
| @@ -1638,6 +1831,9 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
| 1638 | 1831 | ||
| 1639 | if (lkb->lkb_wait_type) | 1832 | if (lkb->lkb_wait_type) | 
| 1640 | goto out; | 1833 | goto out; | 
| 1834 | |||
| 1835 | if (is_overlap(lkb)) | ||
| 1836 | goto out; | ||
| 1641 | } | 1837 | } | 
| 1642 | 1838 | ||
| 1643 | lkb->lkb_exflags = args->flags; | 1839 | lkb->lkb_exflags = args->flags; | 
| @@ -1654,35 +1850,126 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
| 1654 | return rv; | 1850 | return rv; | 
| 1655 | } | 1851 | } | 
| 1656 | 1852 | ||
| 1853 | /* when dlm_unlock() sees -EBUSY with CANCEL/FORCEUNLOCK it returns 0 | ||
| 1854 | for success */ | ||
| 1855 | |||
| 1856 | /* note: it's valid for lkb_nodeid/res_nodeid to be -1 when we get here | ||
| 1857 | because there may be a lookup in progress and it's valid to do | ||
| 1858 | cancel/unlockf on it */ | ||
| 1859 | |||
| 1657 | static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) | 1860 | static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) | 
| 1658 | { | 1861 | { | 
| 1862 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | ||
| 1659 | int rv = -EINVAL; | 1863 | int rv = -EINVAL; | 
| 1660 | 1864 | ||
| 1661 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | 1865 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) { | 
| 1866 | log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id); | ||
| 1867 | dlm_print_lkb(lkb); | ||
| 1662 | goto out; | 1868 | goto out; | 
| 1869 | } | ||
| 1663 | 1870 | ||
| 1664 | if (args->flags & DLM_LKF_FORCEUNLOCK) | 1871 | /* an lkb may still exist even though the lock is EOL'ed due to a | 
| 1665 | goto out_ok; | 1872 | cancel, unlock or failed noqueue request; an app can't use these | 
| 1873 | locks; return same error as if the lkid had not been found at all */ | ||
| 1666 | 1874 | ||
| 1667 | if (args->flags & DLM_LKF_CANCEL && | 1875 | if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) { | 
| 1668 | lkb->lkb_status == DLM_LKSTS_GRANTED) | 1876 | log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id); | 
| 1877 | rv = -ENOENT; | ||
| 1669 | goto out; | 1878 | goto out; | 
| 1879 | } | ||
| 1670 | 1880 | ||
| 1671 | if (!(args->flags & DLM_LKF_CANCEL) && | 1881 | /* an lkb may be waiting for an rsb lookup to complete where the | 
| 1672 | lkb->lkb_status != DLM_LKSTS_GRANTED) | 1882 | lookup was initiated by another lock */ | 
| 1673 | goto out; | 1883 | |
| 1884 | if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) { | ||
| 1885 | if (!list_empty(&lkb->lkb_rsb_lookup)) { | ||
| 1886 | log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id); | ||
| 1887 | list_del_init(&lkb->lkb_rsb_lookup); | ||
| 1888 | queue_cast(lkb->lkb_resource, lkb, | ||
| 1889 | args->flags & DLM_LKF_CANCEL ? | ||
| 1890 | -DLM_ECANCEL : -DLM_EUNLOCK); | ||
| 1891 | unhold_lkb(lkb); /* undoes create_lkb() */ | ||
| 1892 | rv = -EBUSY; | ||
| 1893 | goto out; | ||
| 1894 | } | ||
| 1895 | } | ||
| 1896 | |||
| 1897 | /* cancel not allowed with another cancel/unlock in progress */ | ||
| 1898 | |||
| 1899 | if (args->flags & DLM_LKF_CANCEL) { | ||
| 1900 | if (lkb->lkb_exflags & DLM_LKF_CANCEL) | ||
| 1901 | goto out; | ||
| 1902 | |||
| 1903 | if (is_overlap(lkb)) | ||
| 1904 | goto out; | ||
| 1905 | |||
| 1906 | if (lkb->lkb_flags & DLM_IFL_RESEND) { | ||
| 1907 | lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL; | ||
| 1908 | rv = -EBUSY; | ||
| 1909 | goto out; | ||
| 1910 | } | ||
| 1911 | |||
| 1912 | switch (lkb->lkb_wait_type) { | ||
| 1913 | case DLM_MSG_LOOKUP: | ||
| 1914 | case DLM_MSG_REQUEST: | ||
| 1915 | lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL; | ||
| 1916 | rv = -EBUSY; | ||
| 1917 | goto out; | ||
| 1918 | case DLM_MSG_UNLOCK: | ||
| 1919 | case DLM_MSG_CANCEL: | ||
| 1920 | goto out; | ||
| 1921 | } | ||
| 1922 | /* add_to_waiters() will set OVERLAP_CANCEL */ | ||
| 1923 | goto out_ok; | ||
| 1924 | } | ||
| 1925 | |||
| 1926 | /* do we need to allow a force-unlock if there's a normal unlock | ||
| 1927 | already in progress? in what conditions could the normal unlock | ||
| 1928 | fail such that we'd want to send a force-unlock to be sure? */ | ||
| 1929 | |||
| 1930 | if (args->flags & DLM_LKF_FORCEUNLOCK) { | ||
| 1931 | if (lkb->lkb_exflags & DLM_LKF_FORCEUNLOCK) | ||
| 1932 | goto out; | ||
| 1933 | |||
| 1934 | if (is_overlap_unlock(lkb)) | ||
| 1935 | goto out; | ||
| 1674 | 1936 | ||
| 1937 | if (lkb->lkb_flags & DLM_IFL_RESEND) { | ||
| 1938 | lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK; | ||
| 1939 | rv = -EBUSY; | ||
| 1940 | goto out; | ||
| 1941 | } | ||
| 1942 | |||
| 1943 | switch (lkb->lkb_wait_type) { | ||
| 1944 | case DLM_MSG_LOOKUP: | ||
| 1945 | case DLM_MSG_REQUEST: | ||
| 1946 | lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK; | ||
| 1947 | rv = -EBUSY; | ||
| 1948 | goto out; | ||
| 1949 | case DLM_MSG_UNLOCK: | ||
| 1950 | goto out; | ||
| 1951 | } | ||
| 1952 | /* add_to_waiters() will set OVERLAP_UNLOCK */ | ||
| 1953 | goto out_ok; | ||
| 1954 | } | ||
| 1955 | |||
| 1956 | /* normal unlock not allowed if there's any op in progress */ | ||
| 1675 | rv = -EBUSY; | 1957 | rv = -EBUSY; | 
| 1676 | if (lkb->lkb_wait_type) | 1958 | if (lkb->lkb_wait_type || lkb->lkb_wait_count) | 
| 1677 | goto out; | 1959 | goto out; | 
| 1678 | 1960 | ||
| 1679 | out_ok: | 1961 | out_ok: | 
| 1680 | lkb->lkb_exflags = args->flags; | 1962 | /* an overlapping op shouldn't blow away exflags from other op */ | 
| 1963 | lkb->lkb_exflags |= args->flags; | ||
| 1681 | lkb->lkb_sbflags = 0; | 1964 | lkb->lkb_sbflags = 0; | 
| 1682 | lkb->lkb_astparam = args->astparam; | 1965 | lkb->lkb_astparam = args->astparam; | 
| 1683 | |||
| 1684 | rv = 0; | 1966 | rv = 0; | 
| 1685 | out: | 1967 | out: | 
| 1968 | if (rv) | ||
| 1969 | log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv, | ||
| 1970 | lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags, | ||
| 1971 | args->flags, lkb->lkb_wait_type, | ||
| 1972 | lkb->lkb_resource->res_name); | ||
| 1686 | return rv; | 1973 | return rv; | 
| 1687 | } | 1974 | } | 
| 1688 | 1975 | ||
| @@ -1732,9 +2019,24 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 1732 | goto out; | 2019 | goto out; | 
| 1733 | } | 2020 | } | 
| 1734 | 2021 | ||
| 1735 | if (can_be_queued(lkb)) { | 2022 | /* is_demoted() means the can_be_granted() above set the grmode | 
| 1736 | if (is_demoted(lkb)) | 2023 | to NL, and left us on the granted queue. This auto-demotion | 
| 2024 | (due to CONVDEADLK) might mean other locks, and/or this lock, are | ||
| 2025 | now grantable. We have to try to grant other converting locks | ||
| 2026 | before we try again to grant this one. */ | ||
| 2027 | |||
| 2028 | if (is_demoted(lkb)) { | ||
| 2029 | grant_pending_convert(r, DLM_LOCK_IV); | ||
| 2030 | if (_can_be_granted(r, lkb, 1)) { | ||
| 2031 | grant_lock(r, lkb); | ||
| 2032 | queue_cast(r, lkb, 0); | ||
| 1737 | grant_pending_locks(r); | 2033 | grant_pending_locks(r); | 
| 2034 | goto out; | ||
| 2035 | } | ||
| 2036 | /* else fall through and move to convert queue */ | ||
| 2037 | } | ||
| 2038 | |||
| 2039 | if (can_be_queued(lkb)) { | ||
| 1738 | error = -EINPROGRESS; | 2040 | error = -EINPROGRESS; | 
| 1739 | del_lkb(r, lkb); | 2041 | del_lkb(r, lkb); | 
| 1740 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | 2042 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | 
| @@ -1759,17 +2061,19 @@ static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 1759 | return -DLM_EUNLOCK; | 2061 | return -DLM_EUNLOCK; | 
| 1760 | } | 2062 | } | 
| 1761 | 2063 | ||
| 1762 | /* FIXME: if revert_lock() finds that the lkb is granted, we should | 2064 | /* returns: 0 did nothing, -DLM_ECANCEL canceled lock */ | 
| 1763 | skip the queue_cast(ECANCEL). It indicates that the request/convert | ||
| 1764 | completed (and queued a normal ast) just before the cancel; we don't | ||
| 1765 | want to clobber the sb_result for the normal ast with ECANCEL. */ | ||
| 1766 | 2065 | ||
| 1767 | static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) | 2066 | static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) | 
| 1768 | { | 2067 | { | 
| 1769 | revert_lock(r, lkb); | 2068 | int error; | 
| 1770 | queue_cast(r, lkb, -DLM_ECANCEL); | 2069 | |
| 1771 | grant_pending_locks(r); | 2070 | error = revert_lock(r, lkb); | 
| 1772 | return -DLM_ECANCEL; | 2071 | if (error) { | 
| 2072 | queue_cast(r, lkb, -DLM_ECANCEL); | ||
| 2073 | grant_pending_locks(r); | ||
| 2074 | return -DLM_ECANCEL; | ||
| 2075 | } | ||
| 2076 | return 0; | ||
| 1773 | } | 2077 | } | 
| 1774 | 2078 | ||
| 1775 | /* | 2079 | /* | 
| @@ -2035,6 +2339,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace, | |||
| 2035 | 2339 | ||
| 2036 | if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) | 2340 | if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) | 
| 2037 | error = 0; | 2341 | error = 0; | 
| 2342 | if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK))) | ||
| 2343 | error = 0; | ||
| 2038 | out_put: | 2344 | out_put: | 
| 2039 | dlm_put_lkb(lkb); | 2345 | dlm_put_lkb(lkb); | 
| 2040 | out: | 2346 | out: | 
| @@ -2065,31 +2371,14 @@ int dlm_unlock(dlm_lockspace_t *lockspace, | |||
| 2065 | * receive_lookup_reply send_lookup_reply | 2371 | * receive_lookup_reply send_lookup_reply | 
| 2066 | */ | 2372 | */ | 
| 2067 | 2373 | ||
| 2068 | static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, | 2374 | static int _create_message(struct dlm_ls *ls, int mb_len, | 
| 2069 | int to_nodeid, int mstype, | 2375 | int to_nodeid, int mstype, | 
| 2070 | struct dlm_message **ms_ret, | 2376 | struct dlm_message **ms_ret, | 
| 2071 | struct dlm_mhandle **mh_ret) | 2377 | struct dlm_mhandle **mh_ret) | 
| 2072 | { | 2378 | { | 
| 2073 | struct dlm_message *ms; | 2379 | struct dlm_message *ms; | 
| 2074 | struct dlm_mhandle *mh; | 2380 | struct dlm_mhandle *mh; | 
| 2075 | char *mb; | 2381 | char *mb; | 
| 2076 | int mb_len = sizeof(struct dlm_message); | ||
| 2077 | |||
| 2078 | switch (mstype) { | ||
| 2079 | case DLM_MSG_REQUEST: | ||
| 2080 | case DLM_MSG_LOOKUP: | ||
| 2081 | case DLM_MSG_REMOVE: | ||
| 2082 | mb_len += r->res_length; | ||
| 2083 | break; | ||
| 2084 | case DLM_MSG_CONVERT: | ||
| 2085 | case DLM_MSG_UNLOCK: | ||
| 2086 | case DLM_MSG_REQUEST_REPLY: | ||
| 2087 | case DLM_MSG_CONVERT_REPLY: | ||
| 2088 | case DLM_MSG_GRANT: | ||
| 2089 | if (lkb && lkb->lkb_lvbptr) | ||
| 2090 | mb_len += r->res_ls->ls_lvblen; | ||
| 2091 | break; | ||
| 2092 | } | ||
| 2093 | 2382 | ||
| 2094 | /* get_buffer gives us a message handle (mh) that we need to | 2383 | /* get_buffer gives us a message handle (mh) that we need to | 
| 2095 | pass into lowcomms_commit and a message buffer (mb) that we | 2384 | pass into lowcomms_commit and a message buffer (mb) that we | 
| @@ -2104,7 +2393,7 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 2104 | ms = (struct dlm_message *) mb; | 2393 | ms = (struct dlm_message *) mb; | 
| 2105 | 2394 | ||
| 2106 | ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | 2395 | ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); | 
| 2107 | ms->m_header.h_lockspace = r->res_ls->ls_global_id; | 2396 | ms->m_header.h_lockspace = ls->ls_global_id; | 
| 2108 | ms->m_header.h_nodeid = dlm_our_nodeid(); | 2397 | ms->m_header.h_nodeid = dlm_our_nodeid(); | 
| 2109 | ms->m_header.h_length = mb_len; | 2398 | ms->m_header.h_length = mb_len; | 
| 2110 | ms->m_header.h_cmd = DLM_MSG; | 2399 | ms->m_header.h_cmd = DLM_MSG; | 
| @@ -2116,6 +2405,33 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 2116 | return 0; | 2405 | return 0; | 
| 2117 | } | 2406 | } | 
| 2118 | 2407 | ||
| 2408 | static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, | ||
| 2409 | int to_nodeid, int mstype, | ||
| 2410 | struct dlm_message **ms_ret, | ||
| 2411 | struct dlm_mhandle **mh_ret) | ||
| 2412 | { | ||
| 2413 | int mb_len = sizeof(struct dlm_message); | ||
| 2414 | |||
| 2415 | switch (mstype) { | ||
| 2416 | case DLM_MSG_REQUEST: | ||
| 2417 | case DLM_MSG_LOOKUP: | ||
| 2418 | case DLM_MSG_REMOVE: | ||
| 2419 | mb_len += r->res_length; | ||
| 2420 | break; | ||
| 2421 | case DLM_MSG_CONVERT: | ||
| 2422 | case DLM_MSG_UNLOCK: | ||
| 2423 | case DLM_MSG_REQUEST_REPLY: | ||
| 2424 | case DLM_MSG_CONVERT_REPLY: | ||
| 2425 | case DLM_MSG_GRANT: | ||
| 2426 | if (lkb && lkb->lkb_lvbptr) | ||
| 2427 | mb_len += r->res_ls->ls_lvblen; | ||
| 2428 | break; | ||
| 2429 | } | ||
| 2430 | |||
| 2431 | return _create_message(r->res_ls, mb_len, to_nodeid, mstype, | ||
| 2432 | ms_ret, mh_ret); | ||
| 2433 | } | ||
| 2434 | |||
| 2119 | /* further lowcomms enhancements or alternate implementations may make | 2435 | /* further lowcomms enhancements or alternate implementations may make | 
| 2120 | the return value from this function useful at some point */ | 2436 | the return value from this function useful at some point */ | 
| 2121 | 2437 | ||
| @@ -2176,7 +2492,9 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | |||
| 2176 | struct dlm_mhandle *mh; | 2492 | struct dlm_mhandle *mh; | 
| 2177 | int to_nodeid, error; | 2493 | int to_nodeid, error; | 
| 2178 | 2494 | ||
| 2179 | add_to_waiters(lkb, mstype); | 2495 | error = add_to_waiters(lkb, mstype); | 
| 2496 | if (error) | ||
| 2497 | return error; | ||
| 2180 | 2498 | ||
| 2181 | to_nodeid = r->res_nodeid; | 2499 | to_nodeid = r->res_nodeid; | 
| 2182 | 2500 | ||
| @@ -2192,7 +2510,7 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | |||
| 2192 | return 0; | 2510 | return 0; | 
| 2193 | 2511 | ||
| 2194 | fail: | 2512 | fail: | 
| 2195 | remove_from_waiters(lkb); | 2513 | remove_from_waiters(lkb, msg_reply_type(mstype)); | 
| 2196 | return error; | 2514 | return error; | 
| 2197 | } | 2515 | } | 
| 2198 | 2516 | ||
| @@ -2209,7 +2527,8 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 2209 | 2527 | ||
| 2210 | /* down conversions go without a reply from the master */ | 2528 | /* down conversions go without a reply from the master */ | 
| 2211 | if (!error && down_conversion(lkb)) { | 2529 | if (!error && down_conversion(lkb)) { | 
| 2212 | remove_from_waiters(lkb); | 2530 | remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); | 
| 2531 | r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; | ||
| 2213 | r->res_ls->ls_stub_ms.m_result = 0; | 2532 | r->res_ls->ls_stub_ms.m_result = 0; | 
| 2214 | r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 2533 | r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 
| 2215 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); | 2534 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); | 
| @@ -2280,7 +2599,9 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 2280 | struct dlm_mhandle *mh; | 2599 | struct dlm_mhandle *mh; | 
| 2281 | int to_nodeid, error; | 2600 | int to_nodeid, error; | 
| 2282 | 2601 | ||
| 2283 | add_to_waiters(lkb, DLM_MSG_LOOKUP); | 2602 | error = add_to_waiters(lkb, DLM_MSG_LOOKUP); | 
| 2603 | if (error) | ||
| 2604 | return error; | ||
| 2284 | 2605 | ||
| 2285 | to_nodeid = dlm_dir_nodeid(r); | 2606 | to_nodeid = dlm_dir_nodeid(r); | 
| 2286 | 2607 | ||
| @@ -2296,7 +2617,7 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 2296 | return 0; | 2617 | return 0; | 
| 2297 | 2618 | ||
| 2298 | fail: | 2619 | fail: | 
| 2299 | remove_from_waiters(lkb); | 2620 | remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY); | 
| 2300 | return error; | 2621 | return error; | 
| 2301 | } | 2622 | } | 
| 2302 | 2623 | ||
| @@ -2656,6 +2977,8 @@ static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2656 | lock_rsb(r); | 2977 | lock_rsb(r); | 
| 2657 | 2978 | ||
| 2658 | receive_flags_reply(lkb, ms); | 2979 | receive_flags_reply(lkb, ms); | 
| 2980 | if (is_altmode(lkb)) | ||
| 2981 | munge_altmode(lkb, ms); | ||
| 2659 | grant_lock_pc(r, lkb, ms); | 2982 | grant_lock_pc(r, lkb, ms); | 
| 2660 | queue_cast(r, lkb, 0); | 2983 | queue_cast(r, lkb, 0); | 
| 2661 | 2984 | ||
| @@ -2736,11 +3059,16 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2736 | dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); | 3059 | dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); | 
| 2737 | } | 3060 | } | 
| 2738 | 3061 | ||
| 3062 | static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms) | ||
| 3063 | { | ||
| 3064 | do_purge(ls, ms->m_nodeid, ms->m_pid); | ||
| 3065 | } | ||
| 3066 | |||
| 2739 | static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | 3067 | static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | 
| 2740 | { | 3068 | { | 
| 2741 | struct dlm_lkb *lkb; | 3069 | struct dlm_lkb *lkb; | 
| 2742 | struct dlm_rsb *r; | 3070 | struct dlm_rsb *r; | 
| 2743 | int error, mstype; | 3071 | int error, mstype, result; | 
| 2744 | 3072 | ||
| 2745 | error = find_lkb(ls, ms->m_remid, &lkb); | 3073 | error = find_lkb(ls, ms->m_remid, &lkb); | 
| 2746 | if (error) { | 3074 | if (error) { | 
| @@ -2749,20 +3077,15 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2749 | } | 3077 | } | 
| 2750 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 3078 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 
| 2751 | 3079 | ||
| 2752 | mstype = lkb->lkb_wait_type; | ||
| 2753 | error = remove_from_waiters(lkb); | ||
| 2754 | if (error) { | ||
| 2755 | log_error(ls, "receive_request_reply not on waiters"); | ||
| 2756 | goto out; | ||
| 2757 | } | ||
| 2758 | |||
| 2759 | /* this is the value returned from do_request() on the master */ | ||
| 2760 | error = ms->m_result; | ||
| 2761 | |||
| 2762 | r = lkb->lkb_resource; | 3080 | r = lkb->lkb_resource; | 
| 2763 | hold_rsb(r); | 3081 | hold_rsb(r); | 
| 2764 | lock_rsb(r); | 3082 | lock_rsb(r); | 
| 2765 | 3083 | ||
| 3084 | mstype = lkb->lkb_wait_type; | ||
| 3085 | error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); | ||
| 3086 | if (error) | ||
| 3087 | goto out; | ||
| 3088 | |||
| 2766 | /* Optimization: the dir node was also the master, so it took our | 3089 | /* Optimization: the dir node was also the master, so it took our | 
| 2767 | lookup as a request and sent request reply instead of lookup reply */ | 3090 | lookup as a request and sent request reply instead of lookup reply */ | 
| 2768 | if (mstype == DLM_MSG_LOOKUP) { | 3091 | if (mstype == DLM_MSG_LOOKUP) { | 
| @@ -2770,14 +3093,15 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2770 | lkb->lkb_nodeid = r->res_nodeid; | 3093 | lkb->lkb_nodeid = r->res_nodeid; | 
| 2771 | } | 3094 | } | 
| 2772 | 3095 | ||
| 2773 | switch (error) { | 3096 | /* this is the value returned from do_request() on the master */ | 
| 3097 | result = ms->m_result; | ||
| 3098 | |||
| 3099 | switch (result) { | ||
| 2774 | case -EAGAIN: | 3100 | case -EAGAIN: | 
| 2775 | /* request would block (be queued) on remote master; | 3101 | /* request would block (be queued) on remote master */ | 
| 2776 | the unhold undoes the original ref from create_lkb() | ||
| 2777 | so it leads to the lkb being freed */ | ||
| 2778 | queue_cast(r, lkb, -EAGAIN); | 3102 | queue_cast(r, lkb, -EAGAIN); | 
| 2779 | confirm_master(r, -EAGAIN); | 3103 | confirm_master(r, -EAGAIN); | 
| 2780 | unhold_lkb(lkb); | 3104 | unhold_lkb(lkb); /* undoes create_lkb() */ | 
| 2781 | break; | 3105 | break; | 
| 2782 | 3106 | ||
| 2783 | case -EINPROGRESS: | 3107 | case -EINPROGRESS: | 
| @@ -2785,41 +3109,64 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2785 | /* request was queued or granted on remote master */ | 3109 | /* request was queued or granted on remote master */ | 
| 2786 | receive_flags_reply(lkb, ms); | 3110 | receive_flags_reply(lkb, ms); | 
| 2787 | lkb->lkb_remid = ms->m_lkid; | 3111 | lkb->lkb_remid = ms->m_lkid; | 
| 2788 | if (error) | 3112 | if (is_altmode(lkb)) | 
| 3113 | munge_altmode(lkb, ms); | ||
| 3114 | if (result) | ||
| 2789 | add_lkb(r, lkb, DLM_LKSTS_WAITING); | 3115 | add_lkb(r, lkb, DLM_LKSTS_WAITING); | 
| 2790 | else { | 3116 | else { | 
| 2791 | grant_lock_pc(r, lkb, ms); | 3117 | grant_lock_pc(r, lkb, ms); | 
| 2792 | queue_cast(r, lkb, 0); | 3118 | queue_cast(r, lkb, 0); | 
| 2793 | } | 3119 | } | 
| 2794 | confirm_master(r, error); | 3120 | confirm_master(r, result); | 
| 2795 | break; | 3121 | break; | 
| 2796 | 3122 | ||
| 2797 | case -EBADR: | 3123 | case -EBADR: | 
| 2798 | case -ENOTBLK: | 3124 | case -ENOTBLK: | 
| 2799 | /* find_rsb failed to find rsb or rsb wasn't master */ | 3125 | /* find_rsb failed to find rsb or rsb wasn't master */ | 
| 3126 | log_debug(ls, "receive_request_reply %x %x master diff %d %d", | ||
| 3127 | lkb->lkb_id, lkb->lkb_flags, r->res_nodeid, result); | ||
| 2800 | r->res_nodeid = -1; | 3128 | r->res_nodeid = -1; | 
| 2801 | lkb->lkb_nodeid = -1; | 3129 | lkb->lkb_nodeid = -1; | 
| 2802 | _request_lock(r, lkb); | 3130 | |
| 3131 | if (is_overlap(lkb)) { | ||
| 3132 | /* we'll ignore error in cancel/unlock reply */ | ||
| 3133 | queue_cast_overlap(r, lkb); | ||
| 3134 | unhold_lkb(lkb); /* undoes create_lkb() */ | ||
| 3135 | } else | ||
| 3136 | _request_lock(r, lkb); | ||
| 2803 | break; | 3137 | break; | 
| 2804 | 3138 | ||
| 2805 | default: | 3139 | default: | 
| 2806 | log_error(ls, "receive_request_reply error %d", error); | 3140 | log_error(ls, "receive_request_reply %x error %d", | 
| 3141 | lkb->lkb_id, result); | ||
| 2807 | } | 3142 | } | 
| 2808 | 3143 | ||
| 3144 | if (is_overlap_unlock(lkb) && (result == 0 || result == -EINPROGRESS)) { | ||
| 3145 | log_debug(ls, "receive_request_reply %x result %d unlock", | ||
| 3146 | lkb->lkb_id, result); | ||
| 3147 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; | ||
| 3148 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | ||
| 3149 | send_unlock(r, lkb); | ||
| 3150 | } else if (is_overlap_cancel(lkb) && (result == -EINPROGRESS)) { | ||
| 3151 | log_debug(ls, "receive_request_reply %x cancel", lkb->lkb_id); | ||
| 3152 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; | ||
| 3153 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | ||
| 3154 | send_cancel(r, lkb); | ||
| 3155 | } else { | ||
| 3156 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | ||
| 3157 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; | ||
| 3158 | } | ||
| 3159 | out: | ||
| 2809 | unlock_rsb(r); | 3160 | unlock_rsb(r); | 
| 2810 | put_rsb(r); | 3161 | put_rsb(r); | 
| 2811 | out: | ||
| 2812 | dlm_put_lkb(lkb); | 3162 | dlm_put_lkb(lkb); | 
| 2813 | } | 3163 | } | 
| 2814 | 3164 | ||
| 2815 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | 3165 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | 
| 2816 | struct dlm_message *ms) | 3166 | struct dlm_message *ms) | 
| 2817 | { | 3167 | { | 
| 2818 | int error = ms->m_result; | ||
| 2819 | |||
| 2820 | /* this is the value returned from do_convert() on the master */ | 3168 | /* this is the value returned from do_convert() on the master */ | 
| 2821 | 3169 | switch (ms->m_result) { | |
| 2822 | switch (error) { | ||
| 2823 | case -EAGAIN: | 3170 | case -EAGAIN: | 
| 2824 | /* convert would block (be queued) on remote master */ | 3171 | /* convert would block (be queued) on remote master */ | 
| 2825 | queue_cast(r, lkb, -EAGAIN); | 3172 | queue_cast(r, lkb, -EAGAIN); | 
| @@ -2827,6 +3174,9 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 2827 | 3174 | ||
| 2828 | case -EINPROGRESS: | 3175 | case -EINPROGRESS: | 
| 2829 | /* convert was queued on remote master */ | 3176 | /* convert was queued on remote master */ | 
| 3177 | receive_flags_reply(lkb, ms); | ||
| 3178 | if (is_demoted(lkb)) | ||
| 3179 | munge_demoted(lkb, ms); | ||
| 2830 | del_lkb(r, lkb); | 3180 | del_lkb(r, lkb); | 
| 2831 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | 3181 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | 
| 2832 | break; | 3182 | break; | 
| @@ -2834,24 +3184,33 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 2834 | case 0: | 3184 | case 0: | 
| 2835 | /* convert was granted on remote master */ | 3185 | /* convert was granted on remote master */ | 
| 2836 | receive_flags_reply(lkb, ms); | 3186 | receive_flags_reply(lkb, ms); | 
| 3187 | if (is_demoted(lkb)) | ||
| 3188 | munge_demoted(lkb, ms); | ||
| 2837 | grant_lock_pc(r, lkb, ms); | 3189 | grant_lock_pc(r, lkb, ms); | 
| 2838 | queue_cast(r, lkb, 0); | 3190 | queue_cast(r, lkb, 0); | 
| 2839 | break; | 3191 | break; | 
| 2840 | 3192 | ||
| 2841 | default: | 3193 | default: | 
| 2842 | log_error(r->res_ls, "receive_convert_reply error %d", error); | 3194 | log_error(r->res_ls, "receive_convert_reply %x error %d", | 
| 3195 | lkb->lkb_id, ms->m_result); | ||
| 2843 | } | 3196 | } | 
| 2844 | } | 3197 | } | 
| 2845 | 3198 | ||
| 2846 | static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 3199 | static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 
| 2847 | { | 3200 | { | 
| 2848 | struct dlm_rsb *r = lkb->lkb_resource; | 3201 | struct dlm_rsb *r = lkb->lkb_resource; | 
| 3202 | int error; | ||
| 2849 | 3203 | ||
| 2850 | hold_rsb(r); | 3204 | hold_rsb(r); | 
| 2851 | lock_rsb(r); | 3205 | lock_rsb(r); | 
| 2852 | 3206 | ||
| 2853 | __receive_convert_reply(r, lkb, ms); | 3207 | /* stub reply can happen with waiters_mutex held */ | 
| 3208 | error = remove_from_waiters_ms(lkb, ms); | ||
| 3209 | if (error) | ||
| 3210 | goto out; | ||
| 2854 | 3211 | ||
| 3212 | __receive_convert_reply(r, lkb, ms); | ||
| 3213 | out: | ||
| 2855 | unlock_rsb(r); | 3214 | unlock_rsb(r); | 
| 2856 | put_rsb(r); | 3215 | put_rsb(r); | 
| 2857 | } | 3216 | } | 
| @@ -2868,37 +3227,38 @@ static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2868 | } | 3227 | } | 
| 2869 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 3228 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 
| 2870 | 3229 | ||
| 2871 | error = remove_from_waiters(lkb); | ||
| 2872 | if (error) { | ||
| 2873 | log_error(ls, "receive_convert_reply not on waiters"); | ||
| 2874 | goto out; | ||
| 2875 | } | ||
| 2876 | |||
| 2877 | _receive_convert_reply(lkb, ms); | 3230 | _receive_convert_reply(lkb, ms); | 
| 2878 | out: | ||
| 2879 | dlm_put_lkb(lkb); | 3231 | dlm_put_lkb(lkb); | 
| 2880 | } | 3232 | } | 
| 2881 | 3233 | ||
| 2882 | static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 3234 | static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 
| 2883 | { | 3235 | { | 
| 2884 | struct dlm_rsb *r = lkb->lkb_resource; | 3236 | struct dlm_rsb *r = lkb->lkb_resource; | 
| 2885 | int error = ms->m_result; | 3237 | int error; | 
| 2886 | 3238 | ||
| 2887 | hold_rsb(r); | 3239 | hold_rsb(r); | 
| 2888 | lock_rsb(r); | 3240 | lock_rsb(r); | 
| 2889 | 3241 | ||
| 3242 | /* stub reply can happen with waiters_mutex held */ | ||
| 3243 | error = remove_from_waiters_ms(lkb, ms); | ||
| 3244 | if (error) | ||
| 3245 | goto out; | ||
| 3246 | |||
| 2890 | /* this is the value returned from do_unlock() on the master */ | 3247 | /* this is the value returned from do_unlock() on the master */ | 
| 2891 | 3248 | ||
| 2892 | switch (error) { | 3249 | switch (ms->m_result) { | 
| 2893 | case -DLM_EUNLOCK: | 3250 | case -DLM_EUNLOCK: | 
| 2894 | receive_flags_reply(lkb, ms); | 3251 | receive_flags_reply(lkb, ms); | 
| 2895 | remove_lock_pc(r, lkb); | 3252 | remove_lock_pc(r, lkb); | 
| 2896 | queue_cast(r, lkb, -DLM_EUNLOCK); | 3253 | queue_cast(r, lkb, -DLM_EUNLOCK); | 
| 2897 | break; | 3254 | break; | 
| 3255 | case -ENOENT: | ||
| 3256 | break; | ||
| 2898 | default: | 3257 | default: | 
| 2899 | log_error(r->res_ls, "receive_unlock_reply error %d", error); | 3258 | log_error(r->res_ls, "receive_unlock_reply %x error %d", | 
| 3259 | lkb->lkb_id, ms->m_result); | ||
| 2900 | } | 3260 | } | 
| 2901 | 3261 | out: | |
| 2902 | unlock_rsb(r); | 3262 | unlock_rsb(r); | 
| 2903 | put_rsb(r); | 3263 | put_rsb(r); | 
| 2904 | } | 3264 | } | 
| @@ -2915,37 +3275,39 @@ static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2915 | } | 3275 | } | 
| 2916 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 3276 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 
| 2917 | 3277 | ||
| 2918 | error = remove_from_waiters(lkb); | ||
| 2919 | if (error) { | ||
| 2920 | log_error(ls, "receive_unlock_reply not on waiters"); | ||
| 2921 | goto out; | ||
| 2922 | } | ||
| 2923 | |||
| 2924 | _receive_unlock_reply(lkb, ms); | 3278 | _receive_unlock_reply(lkb, ms); | 
| 2925 | out: | ||
| 2926 | dlm_put_lkb(lkb); | 3279 | dlm_put_lkb(lkb); | 
| 2927 | } | 3280 | } | 
| 2928 | 3281 | ||
| 2929 | static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 3282 | static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 
| 2930 | { | 3283 | { | 
| 2931 | struct dlm_rsb *r = lkb->lkb_resource; | 3284 | struct dlm_rsb *r = lkb->lkb_resource; | 
| 2932 | int error = ms->m_result; | 3285 | int error; | 
| 2933 | 3286 | ||
| 2934 | hold_rsb(r); | 3287 | hold_rsb(r); | 
| 2935 | lock_rsb(r); | 3288 | lock_rsb(r); | 
| 2936 | 3289 | ||
| 3290 | /* stub reply can happen with waiters_mutex held */ | ||
| 3291 | error = remove_from_waiters_ms(lkb, ms); | ||
| 3292 | if (error) | ||
| 3293 | goto out; | ||
| 3294 | |||
| 2937 | /* this is the value returned from do_cancel() on the master */ | 3295 | /* this is the value returned from do_cancel() on the master */ | 
| 2938 | 3296 | ||
| 2939 | switch (error) { | 3297 | switch (ms->m_result) { | 
| 2940 | case -DLM_ECANCEL: | 3298 | case -DLM_ECANCEL: | 
| 2941 | receive_flags_reply(lkb, ms); | 3299 | receive_flags_reply(lkb, ms); | 
| 2942 | revert_lock_pc(r, lkb); | 3300 | revert_lock_pc(r, lkb); | 
| 2943 | queue_cast(r, lkb, -DLM_ECANCEL); | 3301 | if (ms->m_result) | 
| 3302 | queue_cast(r, lkb, -DLM_ECANCEL); | ||
| 3303 | break; | ||
| 3304 | case 0: | ||
| 2944 | break; | 3305 | break; | 
| 2945 | default: | 3306 | default: | 
| 2946 | log_error(r->res_ls, "receive_cancel_reply error %d", error); | 3307 | log_error(r->res_ls, "receive_cancel_reply %x error %d", | 
| 3308 | lkb->lkb_id, ms->m_result); | ||
| 2947 | } | 3309 | } | 
| 2948 | 3310 | out: | |
| 2949 | unlock_rsb(r); | 3311 | unlock_rsb(r); | 
| 2950 | put_rsb(r); | 3312 | put_rsb(r); | 
| 2951 | } | 3313 | } | 
| @@ -2962,14 +3324,7 @@ static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2962 | } | 3324 | } | 
| 2963 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 3325 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | 
| 2964 | 3326 | ||
| 2965 | error = remove_from_waiters(lkb); | ||
| 2966 | if (error) { | ||
| 2967 | log_error(ls, "receive_cancel_reply not on waiters"); | ||
| 2968 | goto out; | ||
| 2969 | } | ||
| 2970 | |||
| 2971 | _receive_cancel_reply(lkb, ms); | 3327 | _receive_cancel_reply(lkb, ms); | 
| 2972 | out: | ||
| 2973 | dlm_put_lkb(lkb); | 3328 | dlm_put_lkb(lkb); | 
| 2974 | } | 3329 | } | 
| 2975 | 3330 | ||
| @@ -2985,20 +3340,17 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 2985 | return; | 3340 | return; | 
| 2986 | } | 3341 | } | 
| 2987 | 3342 | ||
| 2988 | error = remove_from_waiters(lkb); | 3343 | /* ms->m_result is the value returned by dlm_dir_lookup on dir node | 
| 2989 | if (error) { | ||
| 2990 | log_error(ls, "receive_lookup_reply not on waiters"); | ||
| 2991 | goto out; | ||
| 2992 | } | ||
| 2993 | |||
| 2994 | /* this is the value returned by dlm_dir_lookup on dir node | ||
| 2995 | FIXME: will a non-zero error ever be returned? */ | 3344 | FIXME: will a non-zero error ever be returned? */ | 
| 2996 | error = ms->m_result; | ||
| 2997 | 3345 | ||
| 2998 | r = lkb->lkb_resource; | 3346 | r = lkb->lkb_resource; | 
| 2999 | hold_rsb(r); | 3347 | hold_rsb(r); | 
| 3000 | lock_rsb(r); | 3348 | lock_rsb(r); | 
| 3001 | 3349 | ||
| 3350 | error = remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY); | ||
| 3351 | if (error) | ||
| 3352 | goto out; | ||
| 3353 | |||
| 3002 | ret_nodeid = ms->m_nodeid; | 3354 | ret_nodeid = ms->m_nodeid; | 
| 3003 | if (ret_nodeid == dlm_our_nodeid()) { | 3355 | if (ret_nodeid == dlm_our_nodeid()) { | 
| 3004 | r->res_nodeid = 0; | 3356 | r->res_nodeid = 0; | 
| @@ -3009,14 +3361,22 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
| 3009 | r->res_nodeid = ret_nodeid; | 3361 | r->res_nodeid = ret_nodeid; | 
| 3010 | } | 3362 | } | 
| 3011 | 3363 | ||
| 3364 | if (is_overlap(lkb)) { | ||
| 3365 | log_debug(ls, "receive_lookup_reply %x unlock %x", | ||
| 3366 | lkb->lkb_id, lkb->lkb_flags); | ||
| 3367 | queue_cast_overlap(r, lkb); | ||
| 3368 | unhold_lkb(lkb); /* undoes create_lkb() */ | ||
| 3369 | goto out_list; | ||
| 3370 | } | ||
| 3371 | |||
| 3012 | _request_lock(r, lkb); | 3372 | _request_lock(r, lkb); | 
| 3013 | 3373 | ||
| 3374 | out_list: | ||
| 3014 | if (!ret_nodeid) | 3375 | if (!ret_nodeid) | 
| 3015 | process_lookup_list(r); | 3376 | process_lookup_list(r); | 
| 3016 | 3377 | out: | |
| 3017 | unlock_rsb(r); | 3378 | unlock_rsb(r); | 
| 3018 | put_rsb(r); | 3379 | put_rsb(r); | 
| 3019 | out: | ||
| 3020 | dlm_put_lkb(lkb); | 3380 | dlm_put_lkb(lkb); | 
| 3021 | } | 3381 | } | 
| 3022 | 3382 | ||
| @@ -3133,6 +3493,12 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) | |||
| 3133 | receive_lookup_reply(ls, ms); | 3493 | receive_lookup_reply(ls, ms); | 
| 3134 | break; | 3494 | break; | 
| 3135 | 3495 | ||
| 3496 | /* other messages */ | ||
| 3497 | |||
| 3498 | case DLM_MSG_PURGE: | ||
| 3499 | receive_purge(ls, ms); | ||
| 3500 | break; | ||
| 3501 | |||
| 3136 | default: | 3502 | default: | 
| 3137 | log_error(ls, "unknown message type %d", ms->m_type); | 3503 | log_error(ls, "unknown message type %d", ms->m_type); | 
| 3138 | } | 3504 | } | 
| @@ -3153,9 +3519,9 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
| 3153 | { | 3519 | { | 
| 3154 | if (middle_conversion(lkb)) { | 3520 | if (middle_conversion(lkb)) { | 
| 3155 | hold_lkb(lkb); | 3521 | hold_lkb(lkb); | 
| 3522 | ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; | ||
| 3156 | ls->ls_stub_ms.m_result = -EINPROGRESS; | 3523 | ls->ls_stub_ms.m_result = -EINPROGRESS; | 
| 3157 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 3524 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 
| 3158 | _remove_from_waiters(lkb); | ||
| 3159 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | 3525 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | 
| 3160 | 3526 | ||
| 3161 | /* Same special case as in receive_rcom_lock_args() */ | 3527 | /* Same special case as in receive_rcom_lock_args() */ | 
| @@ -3227,18 +3593,18 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
| 3227 | 3593 | ||
| 3228 | case DLM_MSG_UNLOCK: | 3594 | case DLM_MSG_UNLOCK: | 
| 3229 | hold_lkb(lkb); | 3595 | hold_lkb(lkb); | 
| 3596 | ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; | ||
| 3230 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; | 3597 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; | 
| 3231 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 3598 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 
| 3232 | _remove_from_waiters(lkb); | ||
| 3233 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | 3599 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | 
| 3234 | dlm_put_lkb(lkb); | 3600 | dlm_put_lkb(lkb); | 
| 3235 | break; | 3601 | break; | 
| 3236 | 3602 | ||
| 3237 | case DLM_MSG_CANCEL: | 3603 | case DLM_MSG_CANCEL: | 
| 3238 | hold_lkb(lkb); | 3604 | hold_lkb(lkb); | 
| 3605 | ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; | ||
| 3239 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; | 3606 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; | 
| 3240 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 3607 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 
| 3241 | _remove_from_waiters(lkb); | ||
| 3242 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | 3608 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | 
| 3243 | dlm_put_lkb(lkb); | 3609 | dlm_put_lkb(lkb); | 
| 3244 | break; | 3610 | break; | 
| @@ -3252,37 +3618,47 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
| 3252 | mutex_unlock(&ls->ls_waiters_mutex); | 3618 | mutex_unlock(&ls->ls_waiters_mutex); | 
| 3253 | } | 3619 | } | 
| 3254 | 3620 | ||
| 3255 | static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | 3621 | static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) | 
| 3256 | { | 3622 | { | 
| 3257 | struct dlm_lkb *lkb; | 3623 | struct dlm_lkb *lkb; | 
| 3258 | int rv = 0; | 3624 | int found = 0; | 
| 3259 | 3625 | ||
| 3260 | mutex_lock(&ls->ls_waiters_mutex); | 3626 | mutex_lock(&ls->ls_waiters_mutex); | 
| 3261 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | 3627 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | 
| 3262 | if (lkb->lkb_flags & DLM_IFL_RESEND) { | 3628 | if (lkb->lkb_flags & DLM_IFL_RESEND) { | 
| 3263 | rv = lkb->lkb_wait_type; | 3629 | hold_lkb(lkb); | 
| 3264 | _remove_from_waiters(lkb); | 3630 | found = 1; | 
| 3265 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | ||
| 3266 | break; | 3631 | break; | 
| 3267 | } | 3632 | } | 
| 3268 | } | 3633 | } | 
| 3269 | mutex_unlock(&ls->ls_waiters_mutex); | 3634 | mutex_unlock(&ls->ls_waiters_mutex); | 
| 3270 | 3635 | ||
| 3271 | if (!rv) | 3636 | if (!found) | 
| 3272 | lkb = NULL; | 3637 | lkb = NULL; | 
| 3273 | *lkb_ret = lkb; | 3638 | return lkb; | 
| 3274 | return rv; | ||
| 3275 | } | 3639 | } | 
| 3276 | 3640 | ||
| 3277 | /* Deal with lookups and lkb's marked RESEND from _pre. We may now be the | 3641 | /* Deal with lookups and lkb's marked RESEND from _pre. We may now be the | 
| 3278 | master or dir-node for r. Processing the lkb may result in it being placed | 3642 | master or dir-node for r. Processing the lkb may result in it being placed | 
| 3279 | back on waiters. */ | 3643 | back on waiters. */ | 
| 3280 | 3644 | ||
| 3645 | /* We do this after normal locking has been enabled and any saved messages | ||
| 3646 | (in requestqueue) have been processed. We should be confident that at | ||
| 3647 | this point we won't get or process a reply to any of these waiting | ||
| 3648 | operations. But, new ops may be coming in on the rsbs/locks here from | ||
| 3649 | userspace or remotely. */ | ||
| 3650 | |||
| 3651 | /* there may have been an overlap unlock/cancel prior to recovery or after | ||
| 3652 | recovery. if before, the lkb may still have a pos wait_count; if after, the | ||
| 3653 | overlap flag would just have been set and nothing new sent. we can be | ||
| 3654 | confident here than any replies to either the initial op or overlap ops | ||
| 3655 | prior to recovery have been received. */ | ||
| 3656 | |||
| 3281 | int dlm_recover_waiters_post(struct dlm_ls *ls) | 3657 | int dlm_recover_waiters_post(struct dlm_ls *ls) | 
| 3282 | { | 3658 | { | 
| 3283 | struct dlm_lkb *lkb; | 3659 | struct dlm_lkb *lkb; | 
| 3284 | struct dlm_rsb *r; | 3660 | struct dlm_rsb *r; | 
| 3285 | int error = 0, mstype; | 3661 | int error = 0, mstype, err, oc, ou; | 
| 3286 | 3662 | ||
| 3287 | while (1) { | 3663 | while (1) { | 
| 3288 | if (dlm_locking_stopped(ls)) { | 3664 | if (dlm_locking_stopped(ls)) { | 
| @@ -3291,48 +3667,78 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) | |||
| 3291 | break; | 3667 | break; | 
| 3292 | } | 3668 | } | 
| 3293 | 3669 | ||
| 3294 | mstype = remove_resend_waiter(ls, &lkb); | 3670 | lkb = find_resend_waiter(ls); | 
| 3295 | if (!mstype) | 3671 | if (!lkb) | 
| 3296 | break; | 3672 | break; | 
| 3297 | 3673 | ||
| 3298 | r = lkb->lkb_resource; | 3674 | r = lkb->lkb_resource; | 
| 3675 | hold_rsb(r); | ||
| 3676 | lock_rsb(r); | ||
| 3677 | |||
| 3678 | mstype = lkb->lkb_wait_type; | ||
| 3679 | oc = is_overlap_cancel(lkb); | ||
| 3680 | ou = is_overlap_unlock(lkb); | ||
| 3681 | err = 0; | ||
| 3299 | 3682 | ||
| 3300 | log_debug(ls, "recover_waiters_post %x type %d flags %x %s", | 3683 | log_debug(ls, "recover_waiters_post %x type %d flags %x %s", | 
| 3301 | lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); | 3684 | lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); | 
| 3302 | 3685 | ||
| 3303 | switch (mstype) { | 3686 | /* At this point we assume that we won't get a reply to any | 
| 3304 | 3687 | previous op or overlap op on this lock. First, do a big | |
| 3305 | case DLM_MSG_LOOKUP: | 3688 | remove_from_waiters() for all previous ops. */ | 
| 3306 | hold_rsb(r); | 3689 | |
| 3307 | lock_rsb(r); | 3690 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | 
| 3308 | _request_lock(r, lkb); | 3691 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; | 
| 3309 | if (is_master(r)) | 3692 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | 
| 3310 | confirm_master(r, 0); | 3693 | lkb->lkb_wait_type = 0; | 
| 3311 | unlock_rsb(r); | 3694 | lkb->lkb_wait_count = 0; | 
| 3312 | put_rsb(r); | 3695 | mutex_lock(&ls->ls_waiters_mutex); | 
| 3313 | break; | 3696 | list_del_init(&lkb->lkb_wait_reply); | 
| 3314 | 3697 | mutex_unlock(&ls->ls_waiters_mutex); | |
| 3315 | case DLM_MSG_REQUEST: | 3698 | unhold_lkb(lkb); /* for waiters list */ | 
| 3316 | hold_rsb(r); | 3699 | |
| 3317 | lock_rsb(r); | 3700 | if (oc || ou) { | 
| 3318 | _request_lock(r, lkb); | 3701 | /* do an unlock or cancel instead of resending */ | 
| 3319 | if (is_master(r)) | 3702 | switch (mstype) { | 
| 3320 | confirm_master(r, 0); | 3703 | case DLM_MSG_LOOKUP: | 
| 3321 | unlock_rsb(r); | 3704 | case DLM_MSG_REQUEST: | 
| 3322 | put_rsb(r); | 3705 | queue_cast(r, lkb, ou ? -DLM_EUNLOCK : | 
| 3323 | break; | 3706 | -DLM_ECANCEL); | 
| 3324 | 3707 | unhold_lkb(lkb); /* undoes create_lkb() */ | |
| 3325 | case DLM_MSG_CONVERT: | 3708 | break; | 
| 3326 | hold_rsb(r); | 3709 | case DLM_MSG_CONVERT: | 
| 3327 | lock_rsb(r); | 3710 | if (oc) { | 
| 3328 | _convert_lock(r, lkb); | 3711 | queue_cast(r, lkb, -DLM_ECANCEL); | 
| 3329 | unlock_rsb(r); | 3712 | } else { | 
| 3330 | put_rsb(r); | 3713 | lkb->lkb_exflags |= DLM_LKF_FORCEUNLOCK; | 
| 3331 | break; | 3714 | _unlock_lock(r, lkb); | 
| 3332 | 3715 | } | |
| 3333 | default: | 3716 | break; | 
| 3334 | log_error(ls, "recover_waiters_post type %d", mstype); | 3717 | default: | 
| 3718 | err = 1; | ||
| 3719 | } | ||
| 3720 | } else { | ||
| 3721 | switch (mstype) { | ||
| 3722 | case DLM_MSG_LOOKUP: | ||
| 3723 | case DLM_MSG_REQUEST: | ||
| 3724 | _request_lock(r, lkb); | ||
| 3725 | if (is_master(r)) | ||
| 3726 | confirm_master(r, 0); | ||
| 3727 | break; | ||
| 3728 | case DLM_MSG_CONVERT: | ||
| 3729 | _convert_lock(r, lkb); | ||
| 3730 | break; | ||
| 3731 | default: | ||
| 3732 | err = 1; | ||
| 3733 | } | ||
| 3335 | } | 3734 | } | 
| 3735 | |||
| 3736 | if (err) | ||
| 3737 | log_error(ls, "recover_waiters_post %x %d %x %d %d", | ||
| 3738 | lkb->lkb_id, mstype, lkb->lkb_flags, oc, ou); | ||
| 3739 | unlock_rsb(r); | ||
| 3740 | put_rsb(r); | ||
| 3741 | dlm_put_lkb(lkb); | ||
| 3336 | } | 3742 | } | 
| 3337 | 3743 | ||
| 3338 | return error; | 3744 | return error; | 
| @@ -3684,7 +4090,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, | |||
| 3684 | 4090 | ||
| 3685 | /* add this new lkb to the per-process list of locks */ | 4091 | /* add this new lkb to the per-process list of locks */ | 
| 3686 | spin_lock(&ua->proc->locks_spin); | 4092 | spin_lock(&ua->proc->locks_spin); | 
| 3687 | kref_get(&lkb->lkb_ref); | 4093 | hold_lkb(lkb); | 
| 3688 | list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); | 4094 | list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); | 
| 3689 | spin_unlock(&ua->proc->locks_spin); | 4095 | spin_unlock(&ua->proc->locks_spin); | 
| 3690 | out: | 4096 | out: | 
| @@ -3774,6 +4180,9 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
| 3774 | 4180 | ||
| 3775 | if (error == -DLM_EUNLOCK) | 4181 | if (error == -DLM_EUNLOCK) | 
| 3776 | error = 0; | 4182 | error = 0; | 
| 4183 | /* from validate_unlock_args() */ | ||
| 4184 | if (error == -EBUSY && (flags & DLM_LKF_FORCEUNLOCK)) | ||
| 4185 | error = 0; | ||
| 3777 | if (error) | 4186 | if (error) | 
| 3778 | goto out_put; | 4187 | goto out_put; | 
| 3779 | 4188 | ||
| @@ -3786,6 +4195,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
| 3786 | dlm_put_lkb(lkb); | 4195 | dlm_put_lkb(lkb); | 
| 3787 | out: | 4196 | out: | 
| 3788 | unlock_recovery(ls); | 4197 | unlock_recovery(ls); | 
| 4198 | kfree(ua_tmp); | ||
| 3789 | return error; | 4199 | return error; | 
| 3790 | } | 4200 | } | 
| 3791 | 4201 | ||
| @@ -3815,33 +4225,37 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
| 3815 | 4225 | ||
| 3816 | if (error == -DLM_ECANCEL) | 4226 | if (error == -DLM_ECANCEL) | 
| 3817 | error = 0; | 4227 | error = 0; | 
| 3818 | if (error) | 4228 | /* from validate_unlock_args() */ | 
| 3819 | goto out_put; | 4229 | if (error == -EBUSY) | 
| 3820 | 4230 | error = 0; | |
| 3821 | /* this lkb was removed from the WAITING queue */ | ||
| 3822 | if (lkb->lkb_grmode == DLM_LOCK_IV) { | ||
| 3823 | spin_lock(&ua->proc->locks_spin); | ||
| 3824 | list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); | ||
| 3825 | spin_unlock(&ua->proc->locks_spin); | ||
| 3826 | } | ||
| 3827 | out_put: | 4231 | out_put: | 
| 3828 | dlm_put_lkb(lkb); | 4232 | dlm_put_lkb(lkb); | 
| 3829 | out: | 4233 | out: | 
| 3830 | unlock_recovery(ls); | 4234 | unlock_recovery(ls); | 
| 4235 | kfree(ua_tmp); | ||
| 3831 | return error; | 4236 | return error; | 
| 3832 | } | 4237 | } | 
| 3833 | 4238 | ||
| 4239 | /* lkb's that are removed from the waiters list by revert are just left on the | ||
| 4240 | orphans list with the granted orphan locks, to be freed by purge */ | ||
| 4241 | |||
| 3834 | static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | 4242 | static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | 
| 3835 | { | 4243 | { | 
| 3836 | struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; | 4244 | struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; | 
| 4245 | struct dlm_args args; | ||
| 4246 | int error; | ||
| 3837 | 4247 | ||
| 3838 | if (ua->lksb.sb_lvbptr) | 4248 | hold_lkb(lkb); | 
| 3839 | kfree(ua->lksb.sb_lvbptr); | 4249 | mutex_lock(&ls->ls_orphans_mutex); | 
| 3840 | kfree(ua); | 4250 | list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans); | 
| 3841 | lkb->lkb_astparam = (long)NULL; | 4251 | mutex_unlock(&ls->ls_orphans_mutex); | 
| 3842 | 4252 | ||
| 3843 | /* TODO: propogate to master if needed */ | 4253 | set_unlock_args(0, ua, &args); | 
| 3844 | return 0; | 4254 | |
| 4255 | error = cancel_lock(ls, lkb, &args); | ||
| 4256 | if (error == -DLM_ECANCEL) | ||
| 4257 | error = 0; | ||
| 4258 | return error; | ||
| 3845 | } | 4259 | } | 
| 3846 | 4260 | ||
| 3847 | /* The force flag allows the unlock to go ahead even if the lkb isn't granted. | 4261 | /* The force flag allows the unlock to go ahead even if the lkb isn't granted. | 
| @@ -3853,10 +4267,6 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
| 3853 | struct dlm_args args; | 4267 | struct dlm_args args; | 
| 3854 | int error; | 4268 | int error; | 
| 3855 | 4269 | ||
| 3856 | /* FIXME: we need to handle the case where the lkb is in limbo | ||
| 3857 | while the rsb is being looked up, currently we assert in | ||
| 3858 | _unlock_lock/is_remote because rsb nodeid is -1. */ | ||
| 3859 | |||
| 3860 | set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args); | 4270 | set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args); | 
| 3861 | 4271 | ||
| 3862 | error = unlock_lock(ls, lkb, &args); | 4272 | error = unlock_lock(ls, lkb, &args); | 
| @@ -3865,6 +4275,31 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
| 3865 | return error; | 4275 | return error; | 
| 3866 | } | 4276 | } | 
| 3867 | 4277 | ||
| 4278 | /* We have to release clear_proc_locks mutex before calling unlock_proc_lock() | ||
| 4279 | (which does lock_rsb) due to deadlock with receiving a message that does | ||
| 4280 | lock_rsb followed by dlm_user_add_ast() */ | ||
| 4281 | |||
| 4282 | static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, | ||
| 4283 | struct dlm_user_proc *proc) | ||
| 4284 | { | ||
| 4285 | struct dlm_lkb *lkb = NULL; | ||
| 4286 | |||
| 4287 | mutex_lock(&ls->ls_clear_proc_locks); | ||
| 4288 | if (list_empty(&proc->locks)) | ||
| 4289 | goto out; | ||
| 4290 | |||
| 4291 | lkb = list_entry(proc->locks.next, struct dlm_lkb, lkb_ownqueue); | ||
| 4292 | list_del_init(&lkb->lkb_ownqueue); | ||
| 4293 | |||
| 4294 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) | ||
| 4295 | lkb->lkb_flags |= DLM_IFL_ORPHAN; | ||
| 4296 | else | ||
| 4297 | lkb->lkb_flags |= DLM_IFL_DEAD; | ||
| 4298 | out: | ||
| 4299 | mutex_unlock(&ls->ls_clear_proc_locks); | ||
| 4300 | return lkb; | ||
| 4301 | } | ||
| 4302 | |||
| 3868 | /* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which | 4303 | /* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which | 
| 3869 | 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, | 4304 | 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, | 
| 3870 | which we clear here. */ | 4305 | which we clear here. */ | 
| @@ -3880,18 +4315,15 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
| 3880 | struct dlm_lkb *lkb, *safe; | 4315 | struct dlm_lkb *lkb, *safe; | 
| 3881 | 4316 | ||
| 3882 | lock_recovery(ls); | 4317 | lock_recovery(ls); | 
| 3883 | mutex_lock(&ls->ls_clear_proc_locks); | ||
| 3884 | 4318 | ||
| 3885 | list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { | 4319 | while (1) { | 
| 3886 | list_del_init(&lkb->lkb_ownqueue); | 4320 | lkb = del_proc_lock(ls, proc); | 
| 3887 | 4321 | if (!lkb) | |
| 3888 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { | 4322 | break; | 
| 3889 | lkb->lkb_flags |= DLM_IFL_ORPHAN; | 4323 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) | 
| 3890 | orphan_proc_lock(ls, lkb); | 4324 | orphan_proc_lock(ls, lkb); | 
| 3891 | } else { | 4325 | else | 
| 3892 | lkb->lkb_flags |= DLM_IFL_DEAD; | ||
| 3893 | unlock_proc_lock(ls, lkb); | 4326 | unlock_proc_lock(ls, lkb); | 
| 3894 | } | ||
| 3895 | 4327 | ||
| 3896 | /* this removes the reference for the proc->locks list | 4328 | /* this removes the reference for the proc->locks list | 
| 3897 | added by dlm_user_request, it may result in the lkb | 4329 | added by dlm_user_request, it may result in the lkb | 
| @@ -3900,6 +4332,8 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
| 3900 | dlm_put_lkb(lkb); | 4332 | dlm_put_lkb(lkb); | 
| 3901 | } | 4333 | } | 
| 3902 | 4334 | ||
| 4335 | mutex_lock(&ls->ls_clear_proc_locks); | ||
| 4336 | |||
| 3903 | /* in-progress unlocks */ | 4337 | /* in-progress unlocks */ | 
| 3904 | list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { | 4338 | list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { | 
| 3905 | list_del_init(&lkb->lkb_ownqueue); | 4339 | list_del_init(&lkb->lkb_ownqueue); | 
| @@ -3916,3 +4350,92 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
| 3916 | unlock_recovery(ls); | 4350 | unlock_recovery(ls); | 
| 3917 | } | 4351 | } | 
| 3918 | 4352 | ||
| 4353 | static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | ||
| 4354 | { | ||
| 4355 | struct dlm_lkb *lkb, *safe; | ||
| 4356 | |||
| 4357 | while (1) { | ||
| 4358 | lkb = NULL; | ||
| 4359 | spin_lock(&proc->locks_spin); | ||
| 4360 | if (!list_empty(&proc->locks)) { | ||
| 4361 | lkb = list_entry(proc->locks.next, struct dlm_lkb, | ||
| 4362 | lkb_ownqueue); | ||
| 4363 | list_del_init(&lkb->lkb_ownqueue); | ||
| 4364 | } | ||
| 4365 | spin_unlock(&proc->locks_spin); | ||
| 4366 | |||
| 4367 | if (!lkb) | ||
| 4368 | break; | ||
| 4369 | |||
| 4370 | lkb->lkb_flags |= DLM_IFL_DEAD; | ||
| 4371 | unlock_proc_lock(ls, lkb); | ||
| 4372 | dlm_put_lkb(lkb); /* ref from proc->locks list */ | ||
| 4373 | } | ||
| 4374 | |||
| 4375 | spin_lock(&proc->locks_spin); | ||
| 4376 | list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { | ||
| 4377 | list_del_init(&lkb->lkb_ownqueue); | ||
| 4378 | lkb->lkb_flags |= DLM_IFL_DEAD; | ||
| 4379 | dlm_put_lkb(lkb); | ||
| 4380 | } | ||
| 4381 | spin_unlock(&proc->locks_spin); | ||
| 4382 | |||
| 4383 | spin_lock(&proc->asts_spin); | ||
| 4384 | list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { | ||
| 4385 | list_del(&lkb->lkb_astqueue); | ||
| 4386 | dlm_put_lkb(lkb); | ||
| 4387 | } | ||
| 4388 | spin_unlock(&proc->asts_spin); | ||
| 4389 | } | ||
| 4390 | |||
| 4391 | /* pid of 0 means purge all orphans */ | ||
| 4392 | |||
| 4393 | static void do_purge(struct dlm_ls *ls, int nodeid, int pid) | ||
| 4394 | { | ||
| 4395 | struct dlm_lkb *lkb, *safe; | ||
| 4396 | |||
| 4397 | mutex_lock(&ls->ls_orphans_mutex); | ||
| 4398 | list_for_each_entry_safe(lkb, safe, &ls->ls_orphans, lkb_ownqueue) { | ||
| 4399 | if (pid && lkb->lkb_ownpid != pid) | ||
| 4400 | continue; | ||
| 4401 | unlock_proc_lock(ls, lkb); | ||
| 4402 | list_del_init(&lkb->lkb_ownqueue); | ||
| 4403 | dlm_put_lkb(lkb); | ||
| 4404 | } | ||
| 4405 | mutex_unlock(&ls->ls_orphans_mutex); | ||
| 4406 | } | ||
| 4407 | |||
| 4408 | static int send_purge(struct dlm_ls *ls, int nodeid, int pid) | ||
| 4409 | { | ||
| 4410 | struct dlm_message *ms; | ||
| 4411 | struct dlm_mhandle *mh; | ||
| 4412 | int error; | ||
| 4413 | |||
| 4414 | error = _create_message(ls, sizeof(struct dlm_message), nodeid, | ||
| 4415 | DLM_MSG_PURGE, &ms, &mh); | ||
| 4416 | if (error) | ||
| 4417 | return error; | ||
| 4418 | ms->m_nodeid = nodeid; | ||
| 4419 | ms->m_pid = pid; | ||
| 4420 | |||
| 4421 | return send_message(mh, ms); | ||
| 4422 | } | ||
| 4423 | |||
| 4424 | int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, | ||
| 4425 | int nodeid, int pid) | ||
| 4426 | { | ||
| 4427 | int error = 0; | ||
| 4428 | |||
| 4429 | if (nodeid != dlm_our_nodeid()) { | ||
| 4430 | error = send_purge(ls, nodeid, pid); | ||
| 4431 | } else { | ||
| 4432 | lock_recovery(ls); | ||
| 4433 | if (pid == current->pid) | ||
| 4434 | purge_proc_locks(ls, proc); | ||
| 4435 | else | ||
| 4436 | do_purge(ls, nodeid, pid); | ||
| 4437 | unlock_recovery(ls); | ||
| 4438 | } | ||
| 4439 | return error; | ||
| 4440 | } | ||
| 4441 | |||
| diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 0843a3073ec3..64fc4ec40668 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h | |||
| @@ -41,6 +41,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
| 41 | uint32_t flags, uint32_t lkid, char *lvb_in); | 41 | uint32_t flags, uint32_t lkid, char *lvb_in); | 
| 42 | int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | 42 | int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | 
| 43 | uint32_t flags, uint32_t lkid); | 43 | uint32_t flags, uint32_t lkid); | 
| 44 | int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, | ||
| 45 | int nodeid, int pid); | ||
| 44 | void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); | 46 | void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); | 
| 45 | 47 | ||
| 46 | static inline int is_master(struct dlm_rsb *r) | 48 | static inline int is_master(struct dlm_rsb *r) | 
| diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 8e6ad7600236..a677b2a5eed4 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* | 
| 3 | ** | 3 | ** | 
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 
| 6 | ** | 6 | ** | 
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, | 
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 
| @@ -459,6 +459,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 459 | 459 | ||
| 460 | INIT_LIST_HEAD(&ls->ls_waiters); | 460 | INIT_LIST_HEAD(&ls->ls_waiters); | 
| 461 | mutex_init(&ls->ls_waiters_mutex); | 461 | mutex_init(&ls->ls_waiters_mutex); | 
| 462 | INIT_LIST_HEAD(&ls->ls_orphans); | ||
| 463 | mutex_init(&ls->ls_orphans_mutex); | ||
| 462 | 464 | ||
| 463 | INIT_LIST_HEAD(&ls->ls_nodes); | 465 | INIT_LIST_HEAD(&ls->ls_nodes); | 
| 464 | INIT_LIST_HEAD(&ls->ls_nodes_gone); | 466 | INIT_LIST_HEAD(&ls->ls_nodes_gone); | 
| diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c deleted file mode 100644 index dc83a9d979b5..000000000000 --- a/fs/dlm/lowcomms-sctp.c +++ /dev/null | |||
| @@ -1,1210 +0,0 @@ | |||
| 1 | /****************************************************************************** | ||
| 2 | ******************************************************************************* | ||
| 3 | ** | ||
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 5 | ** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 6 | ** | ||
| 7 | ** This copyrighted material is made available to anyone wishing to use, | ||
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | ||
| 9 | ** of the GNU General Public License v.2. | ||
| 10 | ** | ||
| 11 | ******************************************************************************* | ||
| 12 | ******************************************************************************/ | ||
| 13 | |||
| 14 | /* | ||
| 15 | * lowcomms.c | ||
| 16 | * | ||
| 17 | * This is the "low-level" comms layer. | ||
| 18 | * | ||
| 19 | * It is responsible for sending/receiving messages | ||
| 20 | * from other nodes in the cluster. | ||
| 21 | * | ||
| 22 | * Cluster nodes are referred to by their nodeids. nodeids are | ||
| 23 | * simply 32 bit numbers to the locking module - if they need to | ||
| 24 | * be expanded for the cluster infrastructure then that is it's | ||
| 25 | * responsibility. It is this layer's | ||
| 26 | * responsibility to resolve these into IP address or | ||
| 27 | * whatever it needs for inter-node communication. | ||
| 28 | * | ||
| 29 | * The comms level is two kernel threads that deal mainly with | ||
| 30 | * the receiving of messages from other nodes and passing them | ||
| 31 | * up to the mid-level comms layer (which understands the | ||
| 32 | * message format) for execution by the locking core, and | ||
| 33 | * a send thread which does all the setting up of connections | ||
| 34 | * to remote nodes and the sending of data. Threads are not allowed | ||
| 35 | * to send their own data because it may cause them to wait in times | ||
| 36 | * of high load. Also, this way, the sending thread can collect together | ||
| 37 | * messages bound for one node and send them in one block. | ||
| 38 | * | ||
| 39 | * I don't see any problem with the recv thread executing the locking | ||
| 40 | * code on behalf of remote processes as the locking code is | ||
| 41 | * short, efficient and never (well, hardly ever) waits. | ||
| 42 | * | ||
| 43 | */ | ||
| 44 | |||
| 45 | #include <asm/ioctls.h> | ||
| 46 | #include <net/sock.h> | ||
| 47 | #include <net/tcp.h> | ||
| 48 | #include <net/sctp/user.h> | ||
| 49 | #include <linux/pagemap.h> | ||
| 50 | #include <linux/socket.h> | ||
| 51 | #include <linux/idr.h> | ||
| 52 | |||
| 53 | #include "dlm_internal.h" | ||
| 54 | #include "lowcomms.h" | ||
| 55 | #include "config.h" | ||
| 56 | #include "midcomms.h" | ||
| 57 | |||
| 58 | static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; | ||
| 59 | static int dlm_local_count; | ||
| 60 | static int dlm_local_nodeid; | ||
| 61 | |||
| 62 | /* One of these per connected node */ | ||
| 63 | |||
| 64 | #define NI_INIT_PENDING 1 | ||
| 65 | #define NI_WRITE_PENDING 2 | ||
| 66 | |||
| 67 | struct nodeinfo { | ||
| 68 | spinlock_t lock; | ||
| 69 | sctp_assoc_t assoc_id; | ||
| 70 | unsigned long flags; | ||
| 71 | struct list_head write_list; /* nodes with pending writes */ | ||
| 72 | struct list_head writequeue; /* outgoing writequeue_entries */ | ||
| 73 | spinlock_t writequeue_lock; | ||
| 74 | int nodeid; | ||
| 75 | struct work_struct swork; /* Send workqueue */ | ||
| 76 | struct work_struct lwork; /* Locking workqueue */ | ||
| 77 | }; | ||
| 78 | |||
| 79 | static DEFINE_IDR(nodeinfo_idr); | ||
| 80 | static DECLARE_RWSEM(nodeinfo_lock); | ||
| 81 | static int max_nodeid; | ||
| 82 | |||
| 83 | struct cbuf { | ||
| 84 | unsigned int base; | ||
| 85 | unsigned int len; | ||
| 86 | unsigned int mask; | ||
| 87 | }; | ||
| 88 | |||
| 89 | /* Just the one of these, now. But this struct keeps | ||
| 90 | the connection-specific variables together */ | ||
| 91 | |||
| 92 | #define CF_READ_PENDING 1 | ||
| 93 | |||
| 94 | struct connection { | ||
| 95 | struct socket *sock; | ||
| 96 | unsigned long flags; | ||
| 97 | struct page *rx_page; | ||
| 98 | atomic_t waiting_requests; | ||
| 99 | struct cbuf cb; | ||
| 100 | int eagain_flag; | ||
| 101 | struct work_struct work; /* Send workqueue */ | ||
| 102 | }; | ||
| 103 | |||
| 104 | /* An entry waiting to be sent */ | ||
| 105 | |||
| 106 | struct writequeue_entry { | ||
| 107 | struct list_head list; | ||
| 108 | struct page *page; | ||
| 109 | int offset; | ||
| 110 | int len; | ||
| 111 | int end; | ||
| 112 | int users; | ||
| 113 | struct nodeinfo *ni; | ||
| 114 | }; | ||
| 115 | |||
| 116 | static void cbuf_add(struct cbuf *cb, int n) | ||
| 117 | { | ||
| 118 | cb->len += n; | ||
| 119 | } | ||
| 120 | |||
| 121 | static int cbuf_data(struct cbuf *cb) | ||
| 122 | { | ||
| 123 | return ((cb->base + cb->len) & cb->mask); | ||
| 124 | } | ||
| 125 | |||
| 126 | static void cbuf_init(struct cbuf *cb, int size) | ||
| 127 | { | ||
| 128 | cb->base = cb->len = 0; | ||
| 129 | cb->mask = size-1; | ||
| 130 | } | ||
| 131 | |||
| 132 | static void cbuf_eat(struct cbuf *cb, int n) | ||
| 133 | { | ||
| 134 | cb->len -= n; | ||
| 135 | cb->base += n; | ||
| 136 | cb->base &= cb->mask; | ||
| 137 | } | ||
| 138 | |||
| 139 | /* List of nodes which have writes pending */ | ||
| 140 | static LIST_HEAD(write_nodes); | ||
| 141 | static DEFINE_SPINLOCK(write_nodes_lock); | ||
| 142 | |||
| 143 | |||
| 144 | /* Maximum number of incoming messages to process before | ||
| 145 | * doing a schedule() | ||
| 146 | */ | ||
| 147 | #define MAX_RX_MSG_COUNT 25 | ||
| 148 | |||
| 149 | /* Work queues */ | ||
| 150 | static struct workqueue_struct *recv_workqueue; | ||
| 151 | static struct workqueue_struct *send_workqueue; | ||
| 152 | static struct workqueue_struct *lock_workqueue; | ||
| 153 | |||
| 154 | /* The SCTP connection */ | ||
| 155 | static struct connection sctp_con; | ||
| 156 | |||
| 157 | static void process_send_sockets(struct work_struct *work); | ||
| 158 | static void process_recv_sockets(struct work_struct *work); | ||
| 159 | static void process_lock_request(struct work_struct *work); | ||
| 160 | |||
| 161 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | ||
| 162 | { | ||
| 163 | struct sockaddr_storage addr; | ||
| 164 | int error; | ||
| 165 | |||
| 166 | if (!dlm_local_count) | ||
| 167 | return -1; | ||
| 168 | |||
| 169 | error = dlm_nodeid_to_addr(nodeid, &addr); | ||
| 170 | if (error) | ||
| 171 | return error; | ||
| 172 | |||
| 173 | if (dlm_local_addr[0]->ss_family == AF_INET) { | ||
| 174 | struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; | ||
| 175 | struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; | ||
| 176 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | ||
| 177 | } else { | ||
| 178 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | ||
| 179 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | ||
| 180 | memcpy(&ret6->sin6_addr, &in6->sin6_addr, | ||
| 181 | sizeof(in6->sin6_addr)); | ||
| 182 | } | ||
| 183 | |||
| 184 | return 0; | ||
| 185 | } | ||
| 186 | |||
| 187 | /* If alloc is 0 here we will not attempt to allocate a new | ||
| 188 | nodeinfo struct */ | ||
| 189 | static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc) | ||
| 190 | { | ||
| 191 | struct nodeinfo *ni; | ||
| 192 | int r; | ||
| 193 | int n; | ||
| 194 | |||
| 195 | down_read(&nodeinfo_lock); | ||
| 196 | ni = idr_find(&nodeinfo_idr, nodeid); | ||
| 197 | up_read(&nodeinfo_lock); | ||
| 198 | |||
| 199 | if (ni || !alloc) | ||
| 200 | return ni; | ||
| 201 | |||
| 202 | down_write(&nodeinfo_lock); | ||
| 203 | |||
| 204 | ni = idr_find(&nodeinfo_idr, nodeid); | ||
| 205 | if (ni) | ||
| 206 | goto out_up; | ||
| 207 | |||
| 208 | r = idr_pre_get(&nodeinfo_idr, alloc); | ||
| 209 | if (!r) | ||
| 210 | goto out_up; | ||
| 211 | |||
| 212 | ni = kmalloc(sizeof(struct nodeinfo), alloc); | ||
| 213 | if (!ni) | ||
| 214 | goto out_up; | ||
| 215 | |||
| 216 | r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n); | ||
| 217 | if (r) { | ||
| 218 | kfree(ni); | ||
| 219 | ni = NULL; | ||
| 220 | goto out_up; | ||
| 221 | } | ||
| 222 | if (n != nodeid) { | ||
| 223 | idr_remove(&nodeinfo_idr, n); | ||
| 224 | kfree(ni); | ||
| 225 | ni = NULL; | ||
| 226 | goto out_up; | ||
| 227 | } | ||
| 228 | memset(ni, 0, sizeof(struct nodeinfo)); | ||
| 229 | spin_lock_init(&ni->lock); | ||
| 230 | INIT_LIST_HEAD(&ni->writequeue); | ||
| 231 | spin_lock_init(&ni->writequeue_lock); | ||
| 232 | INIT_WORK(&ni->lwork, process_lock_request); | ||
| 233 | INIT_WORK(&ni->swork, process_send_sockets); | ||
| 234 | ni->nodeid = nodeid; | ||
| 235 | |||
| 236 | if (nodeid > max_nodeid) | ||
| 237 | max_nodeid = nodeid; | ||
| 238 | out_up: | ||
| 239 | up_write(&nodeinfo_lock); | ||
| 240 | |||
| 241 | return ni; | ||
| 242 | } | ||
| 243 | |||
| 244 | /* Don't call this too often... */ | ||
| 245 | static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc) | ||
| 246 | { | ||
| 247 | int i; | ||
| 248 | struct nodeinfo *ni; | ||
| 249 | |||
| 250 | for (i=1; i<=max_nodeid; i++) { | ||
| 251 | ni = nodeid2nodeinfo(i, 0); | ||
| 252 | if (ni && ni->assoc_id == assoc) | ||
| 253 | return ni; | ||
| 254 | } | ||
| 255 | return NULL; | ||
| 256 | } | ||
| 257 | |||
| 258 | /* Data or notification available on socket */ | ||
| 259 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | ||
| 260 | { | ||
| 261 | if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) | ||
| 262 | queue_work(recv_workqueue, &sctp_con.work); | ||
| 263 | } | ||
| 264 | |||
| 265 | |||
| 266 | /* Add the port number to an IP6 or 4 sockaddr and return the address length. | ||
| 267 | Also padd out the struct with zeros to make comparisons meaningful */ | ||
| 268 | |||
| 269 | static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | ||
| 270 | int *addr_len) | ||
| 271 | { | ||
| 272 | struct sockaddr_in *local4_addr; | ||
| 273 | struct sockaddr_in6 *local6_addr; | ||
| 274 | |||
| 275 | if (!dlm_local_count) | ||
| 276 | return; | ||
| 277 | |||
| 278 | if (!port) { | ||
| 279 | if (dlm_local_addr[0]->ss_family == AF_INET) { | ||
| 280 | local4_addr = (struct sockaddr_in *)dlm_local_addr[0]; | ||
| 281 | port = be16_to_cpu(local4_addr->sin_port); | ||
| 282 | } else { | ||
| 283 | local6_addr = (struct sockaddr_in6 *)dlm_local_addr[0]; | ||
| 284 | port = be16_to_cpu(local6_addr->sin6_port); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | saddr->ss_family = dlm_local_addr[0]->ss_family; | ||
| 289 | if (dlm_local_addr[0]->ss_family == AF_INET) { | ||
| 290 | struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; | ||
| 291 | in4_addr->sin_port = cpu_to_be16(port); | ||
| 292 | memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero)); | ||
| 293 | memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) - | ||
| 294 | sizeof(struct sockaddr_in)); | ||
| 295 | *addr_len = sizeof(struct sockaddr_in); | ||
| 296 | } else { | ||
| 297 | struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; | ||
| 298 | in6_addr->sin6_port = cpu_to_be16(port); | ||
| 299 | memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) - | ||
| 300 | sizeof(struct sockaddr_in6)); | ||
| 301 | *addr_len = sizeof(struct sockaddr_in6); | ||
| 302 | } | ||
| 303 | } | ||
| 304 | |||
| 305 | /* Close the connection and tidy up */ | ||
| 306 | static void close_connection(void) | ||
| 307 | { | ||
| 308 | if (sctp_con.sock) { | ||
| 309 | sock_release(sctp_con.sock); | ||
| 310 | sctp_con.sock = NULL; | ||
| 311 | } | ||
| 312 | |||
| 313 | if (sctp_con.rx_page) { | ||
| 314 | __free_page(sctp_con.rx_page); | ||
| 315 | sctp_con.rx_page = NULL; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | /* We only send shutdown messages to nodes that are not part of the cluster */ | ||
| 320 | static void send_shutdown(sctp_assoc_t associd) | ||
| 321 | { | ||
| 322 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 323 | struct msghdr outmessage; | ||
| 324 | struct cmsghdr *cmsg; | ||
| 325 | struct sctp_sndrcvinfo *sinfo; | ||
| 326 | int ret; | ||
| 327 | |||
| 328 | outmessage.msg_name = NULL; | ||
| 329 | outmessage.msg_namelen = 0; | ||
| 330 | outmessage.msg_control = outcmsg; | ||
| 331 | outmessage.msg_controllen = sizeof(outcmsg); | ||
| 332 | outmessage.msg_flags = MSG_EOR; | ||
| 333 | |||
| 334 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
| 335 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 336 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 337 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 338 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
| 339 | sinfo = CMSG_DATA(cmsg); | ||
| 340 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 341 | |||
| 342 | sinfo->sinfo_flags |= MSG_EOF; | ||
| 343 | sinfo->sinfo_assoc_id = associd; | ||
| 344 | |||
| 345 | ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0); | ||
| 346 | |||
| 347 | if (ret != 0) | ||
| 348 | log_print("send EOF to node failed: %d", ret); | ||
| 349 | } | ||
| 350 | |||
| 351 | |||
| 352 | /* INIT failed but we don't know which node... | ||
| 353 | restart INIT on all pending nodes */ | ||
| 354 | static void init_failed(void) | ||
| 355 | { | ||
| 356 | int i; | ||
| 357 | struct nodeinfo *ni; | ||
| 358 | |||
| 359 | for (i=1; i<=max_nodeid; i++) { | ||
| 360 | ni = nodeid2nodeinfo(i, 0); | ||
| 361 | if (!ni) | ||
| 362 | continue; | ||
| 363 | |||
| 364 | if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 365 | ni->assoc_id = 0; | ||
| 366 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 367 | spin_lock_bh(&write_nodes_lock); | ||
| 368 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 369 | spin_unlock_bh(&write_nodes_lock); | ||
| 370 | queue_work(send_workqueue, &ni->swork); | ||
| 371 | } | ||
| 372 | } | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | /* Something happened to an association */ | ||
| 377 | static void process_sctp_notification(struct msghdr *msg, char *buf) | ||
| 378 | { | ||
| 379 | union sctp_notification *sn = (union sctp_notification *)buf; | ||
| 380 | |||
| 381 | if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { | ||
| 382 | switch (sn->sn_assoc_change.sac_state) { | ||
| 383 | |||
| 384 | case SCTP_COMM_UP: | ||
| 385 | case SCTP_RESTART: | ||
| 386 | { | ||
| 387 | /* Check that the new node is in the lockspace */ | ||
| 388 | struct sctp_prim prim; | ||
| 389 | mm_segment_t fs; | ||
| 390 | int nodeid; | ||
| 391 | int prim_len, ret; | ||
| 392 | int addr_len; | ||
| 393 | struct nodeinfo *ni; | ||
| 394 | |||
| 395 | /* This seems to happen when we received a connection | ||
| 396 | * too early... or something... anyway, it happens but | ||
| 397 | * we always seem to get a real message too, see | ||
| 398 | * receive_from_sock */ | ||
| 399 | |||
| 400 | if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) { | ||
| 401 | log_print("COMM_UP for invalid assoc ID %d", | ||
| 402 | (int)sn->sn_assoc_change.sac_assoc_id); | ||
| 403 | init_failed(); | ||
| 404 | return; | ||
| 405 | } | ||
| 406 | memset(&prim, 0, sizeof(struct sctp_prim)); | ||
| 407 | prim_len = sizeof(struct sctp_prim); | ||
| 408 | prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
| 409 | |||
| 410 | fs = get_fs(); | ||
| 411 | set_fs(get_ds()); | ||
| 412 | ret = sctp_con.sock->ops->getsockopt(sctp_con.sock, | ||
| 413 | IPPROTO_SCTP, | ||
| 414 | SCTP_PRIMARY_ADDR, | ||
| 415 | (char*)&prim, | ||
| 416 | &prim_len); | ||
| 417 | set_fs(fs); | ||
| 418 | if (ret < 0) { | ||
| 419 | struct nodeinfo *ni; | ||
| 420 | |||
| 421 | log_print("getsockopt/sctp_primary_addr on " | ||
| 422 | "new assoc %d failed : %d", | ||
| 423 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
| 424 | ret); | ||
| 425 | |||
| 426 | /* Retry INIT later */ | ||
| 427 | ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id); | ||
| 428 | if (ni) | ||
| 429 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 430 | return; | ||
| 431 | } | ||
| 432 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | ||
| 433 | if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | ||
| 434 | log_print("reject connect from unknown addr"); | ||
| 435 | send_shutdown(prim.ssp_assoc_id); | ||
| 436 | return; | ||
| 437 | } | ||
| 438 | |||
| 439 | ni = nodeid2nodeinfo(nodeid, GFP_KERNEL); | ||
| 440 | if (!ni) | ||
| 441 | return; | ||
| 442 | |||
| 443 | /* Save the assoc ID */ | ||
| 444 | ni->assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
| 445 | |||
| 446 | log_print("got new/restarted association %d nodeid %d", | ||
| 447 | (int)sn->sn_assoc_change.sac_assoc_id, nodeid); | ||
| 448 | |||
| 449 | /* Send any pending writes */ | ||
| 450 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 451 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 452 | spin_lock_bh(&write_nodes_lock); | ||
| 453 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 454 | spin_unlock_bh(&write_nodes_lock); | ||
| 455 | queue_work(send_workqueue, &ni->swork); | ||
| 456 | } | ||
| 457 | } | ||
| 458 | break; | ||
| 459 | |||
| 460 | case SCTP_COMM_LOST: | ||
| 461 | case SCTP_SHUTDOWN_COMP: | ||
| 462 | { | ||
| 463 | struct nodeinfo *ni; | ||
| 464 | |||
| 465 | ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id); | ||
| 466 | if (ni) { | ||
| 467 | spin_lock(&ni->lock); | ||
| 468 | ni->assoc_id = 0; | ||
| 469 | spin_unlock(&ni->lock); | ||
| 470 | } | ||
| 471 | } | ||
| 472 | break; | ||
| 473 | |||
| 474 | /* We don't know which INIT failed, so clear the PENDING flags | ||
| 475 | * on them all. if assoc_id is zero then it will then try | ||
| 476 | * again */ | ||
| 477 | |||
| 478 | case SCTP_CANT_STR_ASSOC: | ||
| 479 | { | ||
| 480 | log_print("Can't start SCTP association - retrying"); | ||
| 481 | init_failed(); | ||
| 482 | } | ||
| 483 | break; | ||
| 484 | |||
| 485 | default: | ||
| 486 | log_print("unexpected SCTP assoc change id=%d state=%d", | ||
| 487 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
| 488 | sn->sn_assoc_change.sac_state); | ||
| 489 | } | ||
| 490 | } | ||
| 491 | } | ||
| 492 | |||
| 493 | /* Data received from remote end */ | ||
| 494 | static int receive_from_sock(void) | ||
| 495 | { | ||
| 496 | int ret = 0; | ||
| 497 | struct msghdr msg; | ||
| 498 | struct kvec iov[2]; | ||
| 499 | unsigned len; | ||
| 500 | int r; | ||
| 501 | struct sctp_sndrcvinfo *sinfo; | ||
| 502 | struct cmsghdr *cmsg; | ||
| 503 | struct nodeinfo *ni; | ||
| 504 | |||
| 505 | /* These two are marginally too big for stack allocation, but this | ||
| 506 | * function is (currently) only called by dlm_recvd so static should be | ||
| 507 | * OK. | ||
| 508 | */ | ||
| 509 | static struct sockaddr_storage msgname; | ||
| 510 | static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 511 | |||
| 512 | if (sctp_con.sock == NULL) | ||
| 513 | goto out; | ||
| 514 | |||
| 515 | if (sctp_con.rx_page == NULL) { | ||
| 516 | /* | ||
| 517 | * This doesn't need to be atomic, but I think it should | ||
| 518 | * improve performance if it is. | ||
| 519 | */ | ||
| 520 | sctp_con.rx_page = alloc_page(GFP_ATOMIC); | ||
| 521 | if (sctp_con.rx_page == NULL) | ||
| 522 | goto out_resched; | ||
| 523 | cbuf_init(&sctp_con.cb, PAGE_CACHE_SIZE); | ||
| 524 | } | ||
| 525 | |||
| 526 | memset(&incmsg, 0, sizeof(incmsg)); | ||
| 527 | memset(&msgname, 0, sizeof(msgname)); | ||
| 528 | |||
| 529 | msg.msg_name = &msgname; | ||
| 530 | msg.msg_namelen = sizeof(msgname); | ||
| 531 | msg.msg_flags = 0; | ||
| 532 | msg.msg_control = incmsg; | ||
| 533 | msg.msg_controllen = sizeof(incmsg); | ||
| 534 | msg.msg_iovlen = 1; | ||
| 535 | |||
| 536 | /* I don't see why this circular buffer stuff is necessary for SCTP | ||
| 537 | * which is a packet-based protocol, but the whole thing breaks under | ||
| 538 | * load without it! The overhead is minimal (and is in the TCP lowcomms | ||
| 539 | * anyway, of course) so I'll leave it in until I can figure out what's | ||
| 540 | * really happening. | ||
| 541 | */ | ||
| 542 | |||
| 543 | /* | ||
| 544 | * iov[0] is the bit of the circular buffer between the current end | ||
| 545 | * point (cb.base + cb.len) and the end of the buffer. | ||
| 546 | */ | ||
| 547 | iov[0].iov_len = sctp_con.cb.base - cbuf_data(&sctp_con.cb); | ||
| 548 | iov[0].iov_base = page_address(sctp_con.rx_page) + | ||
| 549 | cbuf_data(&sctp_con.cb); | ||
| 550 | iov[1].iov_len = 0; | ||
| 551 | |||
| 552 | /* | ||
| 553 | * iov[1] is the bit of the circular buffer between the start of the | ||
| 554 | * buffer and the start of the currently used section (cb.base) | ||
| 555 | */ | ||
| 556 | if (cbuf_data(&sctp_con.cb) >= sctp_con.cb.base) { | ||
| 557 | iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&sctp_con.cb); | ||
| 558 | iov[1].iov_len = sctp_con.cb.base; | ||
| 559 | iov[1].iov_base = page_address(sctp_con.rx_page); | ||
| 560 | msg.msg_iovlen = 2; | ||
| 561 | } | ||
| 562 | len = iov[0].iov_len + iov[1].iov_len; | ||
| 563 | |||
| 564 | r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len, | ||
| 565 | MSG_NOSIGNAL | MSG_DONTWAIT); | ||
| 566 | if (ret <= 0) | ||
| 567 | goto out_close; | ||
| 568 | |||
| 569 | msg.msg_control = incmsg; | ||
| 570 | msg.msg_controllen = sizeof(incmsg); | ||
| 571 | cmsg = CMSG_FIRSTHDR(&msg); | ||
| 572 | sinfo = CMSG_DATA(cmsg); | ||
| 573 | |||
| 574 | if (msg.msg_flags & MSG_NOTIFICATION) { | ||
| 575 | process_sctp_notification(&msg, page_address(sctp_con.rx_page)); | ||
| 576 | return 0; | ||
| 577 | } | ||
| 578 | |||
| 579 | /* Is this a new association ? */ | ||
| 580 | ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL); | ||
| 581 | if (ni) { | ||
| 582 | ni->assoc_id = sinfo->sinfo_assoc_id; | ||
| 583 | if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 584 | |||
| 585 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 586 | spin_lock_bh(&write_nodes_lock); | ||
| 587 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 588 | spin_unlock_bh(&write_nodes_lock); | ||
| 589 | queue_work(send_workqueue, &ni->swork); | ||
| 590 | } | ||
| 591 | } | ||
| 592 | } | ||
| 593 | |||
| 594 | /* INIT sends a message with length of 1 - ignore it */ | ||
| 595 | if (r == 1) | ||
| 596 | return 0; | ||
| 597 | |||
| 598 | cbuf_add(&sctp_con.cb, ret); | ||
| 599 | // PJC: TODO: Add to node's workqueue....can we ?? | ||
| 600 | ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), | ||
| 601 | page_address(sctp_con.rx_page), | ||
| 602 | sctp_con.cb.base, sctp_con.cb.len, | ||
| 603 | PAGE_CACHE_SIZE); | ||
| 604 | if (ret < 0) | ||
| 605 | goto out_close; | ||
| 606 | cbuf_eat(&sctp_con.cb, ret); | ||
| 607 | |||
| 608 | out: | ||
| 609 | ret = 0; | ||
| 610 | goto out_ret; | ||
| 611 | |||
| 612 | out_resched: | ||
| 613 | lowcomms_data_ready(sctp_con.sock->sk, 0); | ||
| 614 | ret = 0; | ||
| 615 | cond_resched(); | ||
| 616 | goto out_ret; | ||
| 617 | |||
| 618 | out_close: | ||
| 619 | if (ret != -EAGAIN) | ||
| 620 | log_print("error reading from sctp socket: %d", ret); | ||
| 621 | out_ret: | ||
| 622 | return ret; | ||
| 623 | } | ||
| 624 | |||
| 625 | /* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */ | ||
| 626 | static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num) | ||
| 627 | { | ||
| 628 | mm_segment_t fs; | ||
| 629 | int result = 0; | ||
| 630 | |||
| 631 | fs = get_fs(); | ||
| 632 | set_fs(get_ds()); | ||
| 633 | if (num == 1) | ||
| 634 | result = sctp_con.sock->ops->bind(sctp_con.sock, | ||
| 635 | (struct sockaddr *) addr, | ||
| 636 | addr_len); | ||
| 637 | else | ||
| 638 | result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP, | ||
| 639 | SCTP_SOCKOPT_BINDX_ADD, | ||
| 640 | (char *)addr, addr_len); | ||
| 641 | set_fs(fs); | ||
| 642 | |||
| 643 | if (result < 0) | ||
| 644 | log_print("Can't bind to port %d addr number %d", | ||
| 645 | dlm_config.ci_tcp_port, num); | ||
| 646 | |||
| 647 | return result; | ||
| 648 | } | ||
| 649 | |||
| 650 | static void init_local(void) | ||
| 651 | { | ||
| 652 | struct sockaddr_storage sas, *addr; | ||
| 653 | int i; | ||
| 654 | |||
| 655 | dlm_local_nodeid = dlm_our_nodeid(); | ||
| 656 | |||
| 657 | for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { | ||
| 658 | if (dlm_our_addr(&sas, i)) | ||
| 659 | break; | ||
| 660 | |||
| 661 | addr = kmalloc(sizeof(*addr), GFP_KERNEL); | ||
| 662 | if (!addr) | ||
| 663 | break; | ||
| 664 | memcpy(addr, &sas, sizeof(*addr)); | ||
| 665 | dlm_local_addr[dlm_local_count++] = addr; | ||
| 666 | } | ||
| 667 | } | ||
| 668 | |||
| 669 | /* Initialise SCTP socket and bind to all interfaces */ | ||
| 670 | static int init_sock(void) | ||
| 671 | { | ||
| 672 | mm_segment_t fs; | ||
| 673 | struct socket *sock = NULL; | ||
| 674 | struct sockaddr_storage localaddr; | ||
| 675 | struct sctp_event_subscribe subscribe; | ||
| 676 | int result = -EINVAL, num = 1, i, addr_len; | ||
| 677 | |||
| 678 | if (!dlm_local_count) { | ||
| 679 | init_local(); | ||
| 680 | if (!dlm_local_count) { | ||
| 681 | log_print("no local IP address has been set"); | ||
| 682 | goto out; | ||
| 683 | } | ||
| 684 | } | ||
| 685 | |||
| 686 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET, | ||
| 687 | IPPROTO_SCTP, &sock); | ||
| 688 | if (result < 0) { | ||
| 689 | log_print("Can't create comms socket, check SCTP is loaded"); | ||
| 690 | goto out; | ||
| 691 | } | ||
| 692 | |||
| 693 | /* Listen for events */ | ||
| 694 | memset(&subscribe, 0, sizeof(subscribe)); | ||
| 695 | subscribe.sctp_data_io_event = 1; | ||
| 696 | subscribe.sctp_association_event = 1; | ||
| 697 | subscribe.sctp_send_failure_event = 1; | ||
| 698 | subscribe.sctp_shutdown_event = 1; | ||
| 699 | subscribe.sctp_partial_delivery_event = 1; | ||
| 700 | |||
| 701 | fs = get_fs(); | ||
| 702 | set_fs(get_ds()); | ||
| 703 | result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS, | ||
| 704 | (char *)&subscribe, sizeof(subscribe)); | ||
| 705 | set_fs(fs); | ||
| 706 | |||
| 707 | if (result < 0) { | ||
| 708 | log_print("Failed to set SCTP_EVENTS on socket: result=%d", | ||
| 709 | result); | ||
| 710 | goto create_delsock; | ||
| 711 | } | ||
| 712 | |||
| 713 | /* Init con struct */ | ||
| 714 | sock->sk->sk_user_data = &sctp_con; | ||
| 715 | sctp_con.sock = sock; | ||
| 716 | sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready; | ||
| 717 | |||
| 718 | /* Bind to all interfaces. */ | ||
| 719 | for (i = 0; i < dlm_local_count; i++) { | ||
| 720 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); | ||
| 721 | make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len); | ||
| 722 | |||
| 723 | result = add_bind_addr(&localaddr, addr_len, num); | ||
| 724 | if (result) | ||
| 725 | goto create_delsock; | ||
| 726 | ++num; | ||
| 727 | } | ||
| 728 | |||
| 729 | result = sock->ops->listen(sock, 5); | ||
| 730 | if (result < 0) { | ||
| 731 | log_print("Can't set socket listening"); | ||
| 732 | goto create_delsock; | ||
| 733 | } | ||
| 734 | |||
| 735 | return 0; | ||
| 736 | |||
| 737 | create_delsock: | ||
| 738 | sock_release(sock); | ||
| 739 | sctp_con.sock = NULL; | ||
| 740 | out: | ||
| 741 | return result; | ||
| 742 | } | ||
| 743 | |||
| 744 | |||
| 745 | static struct writequeue_entry *new_writequeue_entry(gfp_t allocation) | ||
| 746 | { | ||
| 747 | struct writequeue_entry *entry; | ||
| 748 | |||
| 749 | entry = kmalloc(sizeof(struct writequeue_entry), allocation); | ||
| 750 | if (!entry) | ||
| 751 | return NULL; | ||
| 752 | |||
| 753 | entry->page = alloc_page(allocation); | ||
| 754 | if (!entry->page) { | ||
| 755 | kfree(entry); | ||
| 756 | return NULL; | ||
| 757 | } | ||
| 758 | |||
| 759 | entry->offset = 0; | ||
| 760 | entry->len = 0; | ||
| 761 | entry->end = 0; | ||
| 762 | entry->users = 0; | ||
| 763 | |||
| 764 | return entry; | ||
| 765 | } | ||
| 766 | |||
| 767 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | ||
| 768 | { | ||
| 769 | struct writequeue_entry *e; | ||
| 770 | int offset = 0; | ||
| 771 | int users = 0; | ||
| 772 | struct nodeinfo *ni; | ||
| 773 | |||
| 774 | ni = nodeid2nodeinfo(nodeid, allocation); | ||
| 775 | if (!ni) | ||
| 776 | return NULL; | ||
| 777 | |||
| 778 | spin_lock(&ni->writequeue_lock); | ||
| 779 | e = list_entry(ni->writequeue.prev, struct writequeue_entry, list); | ||
| 780 | if ((&e->list == &ni->writequeue) || | ||
| 781 | (PAGE_CACHE_SIZE - e->end < len)) { | ||
| 782 | e = NULL; | ||
| 783 | } else { | ||
| 784 | offset = e->end; | ||
| 785 | e->end += len; | ||
| 786 | users = e->users++; | ||
| 787 | } | ||
| 788 | spin_unlock(&ni->writequeue_lock); | ||
| 789 | |||
| 790 | if (e) { | ||
| 791 | got_one: | ||
| 792 | if (users == 0) | ||
| 793 | kmap(e->page); | ||
| 794 | *ppc = page_address(e->page) + offset; | ||
| 795 | return e; | ||
| 796 | } | ||
| 797 | |||
| 798 | e = new_writequeue_entry(allocation); | ||
| 799 | if (e) { | ||
| 800 | spin_lock(&ni->writequeue_lock); | ||
| 801 | offset = e->end; | ||
| 802 | e->end += len; | ||
| 803 | e->ni = ni; | ||
| 804 | users = e->users++; | ||
| 805 | list_add_tail(&e->list, &ni->writequeue); | ||
| 806 | spin_unlock(&ni->writequeue_lock); | ||
| 807 | goto got_one; | ||
| 808 | } | ||
| 809 | return NULL; | ||
| 810 | } | ||
| 811 | |||
| 812 | void dlm_lowcomms_commit_buffer(void *arg) | ||
| 813 | { | ||
| 814 | struct writequeue_entry *e = (struct writequeue_entry *) arg; | ||
| 815 | int users; | ||
| 816 | struct nodeinfo *ni = e->ni; | ||
| 817 | |||
| 818 | spin_lock(&ni->writequeue_lock); | ||
| 819 | users = --e->users; | ||
| 820 | if (users) | ||
| 821 | goto out; | ||
| 822 | e->len = e->end - e->offset; | ||
| 823 | kunmap(e->page); | ||
| 824 | spin_unlock(&ni->writequeue_lock); | ||
| 825 | |||
| 826 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 827 | spin_lock_bh(&write_nodes_lock); | ||
| 828 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 829 | spin_unlock_bh(&write_nodes_lock); | ||
| 830 | |||
| 831 | queue_work(send_workqueue, &ni->swork); | ||
| 832 | } | ||
| 833 | return; | ||
| 834 | |||
| 835 | out: | ||
| 836 | spin_unlock(&ni->writequeue_lock); | ||
| 837 | return; | ||
| 838 | } | ||
| 839 | |||
| 840 | static void free_entry(struct writequeue_entry *e) | ||
| 841 | { | ||
| 842 | __free_page(e->page); | ||
| 843 | kfree(e); | ||
| 844 | } | ||
| 845 | |||
| 846 | /* Initiate an SCTP association. In theory we could just use sendmsg() on | ||
| 847 | the first IP address and it should work, but this allows us to set up the | ||
| 848 | association before sending any valuable data that we can't afford to lose. | ||
| 849 | It also keeps the send path clean as it can now always use the association ID */ | ||
| 850 | static void initiate_association(int nodeid) | ||
| 851 | { | ||
| 852 | struct sockaddr_storage rem_addr; | ||
| 853 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 854 | struct msghdr outmessage; | ||
| 855 | struct cmsghdr *cmsg; | ||
| 856 | struct sctp_sndrcvinfo *sinfo; | ||
| 857 | int ret; | ||
| 858 | int addrlen; | ||
| 859 | char buf[1]; | ||
| 860 | struct kvec iov[1]; | ||
| 861 | struct nodeinfo *ni; | ||
| 862 | |||
| 863 | log_print("Initiating association with node %d", nodeid); | ||
| 864 | |||
| 865 | ni = nodeid2nodeinfo(nodeid, GFP_KERNEL); | ||
| 866 | if (!ni) | ||
| 867 | return; | ||
| 868 | |||
| 869 | if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) { | ||
| 870 | log_print("no address for nodeid %d", nodeid); | ||
| 871 | return; | ||
| 872 | } | ||
| 873 | |||
| 874 | make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen); | ||
| 875 | |||
| 876 | outmessage.msg_name = &rem_addr; | ||
| 877 | outmessage.msg_namelen = addrlen; | ||
| 878 | outmessage.msg_control = outcmsg; | ||
| 879 | outmessage.msg_controllen = sizeof(outcmsg); | ||
| 880 | outmessage.msg_flags = MSG_EOR; | ||
| 881 | |||
| 882 | iov[0].iov_base = buf; | ||
| 883 | iov[0].iov_len = 1; | ||
| 884 | |||
| 885 | /* Real INIT messages seem to cause trouble. Just send a 1 byte message | ||
| 886 | we can afford to lose */ | ||
| 887 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
| 888 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 889 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 890 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 891 | sinfo = CMSG_DATA(cmsg); | ||
| 892 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 893 | sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid); | ||
| 894 | |||
| 895 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
| 896 | ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1); | ||
| 897 | if (ret < 0) { | ||
| 898 | log_print("send INIT to node failed: %d", ret); | ||
| 899 | /* Try again later */ | ||
| 900 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 901 | } | ||
| 902 | } | ||
| 903 | |||
| 904 | /* Send a message */ | ||
| 905 | static void send_to_sock(struct nodeinfo *ni) | ||
| 906 | { | ||
| 907 | int ret = 0; | ||
| 908 | struct writequeue_entry *e; | ||
| 909 | int len, offset; | ||
| 910 | struct msghdr outmsg; | ||
| 911 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 912 | struct cmsghdr *cmsg; | ||
| 913 | struct sctp_sndrcvinfo *sinfo; | ||
| 914 | struct kvec iov; | ||
| 915 | |||
| 916 | /* See if we need to init an association before we start | ||
| 917 | sending precious messages */ | ||
| 918 | spin_lock(&ni->lock); | ||
| 919 | if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 920 | spin_unlock(&ni->lock); | ||
| 921 | initiate_association(ni->nodeid); | ||
| 922 | return; | ||
| 923 | } | ||
| 924 | spin_unlock(&ni->lock); | ||
| 925 | |||
| 926 | outmsg.msg_name = NULL; /* We use assoc_id */ | ||
| 927 | outmsg.msg_namelen = 0; | ||
| 928 | outmsg.msg_control = outcmsg; | ||
| 929 | outmsg.msg_controllen = sizeof(outcmsg); | ||
| 930 | outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR; | ||
| 931 | |||
| 932 | cmsg = CMSG_FIRSTHDR(&outmsg); | ||
| 933 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 934 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 935 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 936 | sinfo = CMSG_DATA(cmsg); | ||
| 937 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 938 | sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid); | ||
| 939 | sinfo->sinfo_assoc_id = ni->assoc_id; | ||
| 940 | outmsg.msg_controllen = cmsg->cmsg_len; | ||
| 941 | |||
| 942 | spin_lock(&ni->writequeue_lock); | ||
| 943 | for (;;) { | ||
| 944 | if (list_empty(&ni->writequeue)) | ||
| 945 | break; | ||
| 946 | e = list_entry(ni->writequeue.next, struct writequeue_entry, | ||
| 947 | list); | ||
| 948 | len = e->len; | ||
| 949 | offset = e->offset; | ||
| 950 | BUG_ON(len == 0 && e->users == 0); | ||
| 951 | spin_unlock(&ni->writequeue_lock); | ||
| 952 | kmap(e->page); | ||
| 953 | |||
| 954 | ret = 0; | ||
| 955 | if (len) { | ||
| 956 | iov.iov_base = page_address(e->page)+offset; | ||
| 957 | iov.iov_len = len; | ||
| 958 | |||
| 959 | ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1, | ||
| 960 | len); | ||
| 961 | if (ret == -EAGAIN) { | ||
| 962 | sctp_con.eagain_flag = 1; | ||
| 963 | goto out; | ||
| 964 | } else if (ret < 0) | ||
| 965 | goto send_error; | ||
| 966 | } else { | ||
| 967 | /* Don't starve people filling buffers */ | ||
| 968 | cond_resched(); | ||
| 969 | } | ||
| 970 | |||
| 971 | spin_lock(&ni->writequeue_lock); | ||
| 972 | e->offset += ret; | ||
| 973 | e->len -= ret; | ||
| 974 | |||
| 975 | if (e->len == 0 && e->users == 0) { | ||
| 976 | list_del(&e->list); | ||
| 977 | kunmap(e->page); | ||
| 978 | free_entry(e); | ||
| 979 | continue; | ||
| 980 | } | ||
| 981 | } | ||
| 982 | spin_unlock(&ni->writequeue_lock); | ||
| 983 | out: | ||
| 984 | return; | ||
| 985 | |||
| 986 | send_error: | ||
| 987 | log_print("Error sending to node %d %d", ni->nodeid, ret); | ||
| 988 | spin_lock(&ni->lock); | ||
| 989 | if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) { | ||
| 990 | ni->assoc_id = 0; | ||
| 991 | spin_unlock(&ni->lock); | ||
| 992 | initiate_association(ni->nodeid); | ||
| 993 | } else | ||
| 994 | spin_unlock(&ni->lock); | ||
| 995 | |||
| 996 | return; | ||
| 997 | } | ||
| 998 | |||
| 999 | /* Try to send any messages that are pending */ | ||
| 1000 | static void process_output_queue(void) | ||
| 1001 | { | ||
| 1002 | struct list_head *list; | ||
| 1003 | struct list_head *temp; | ||
| 1004 | |||
| 1005 | spin_lock_bh(&write_nodes_lock); | ||
| 1006 | list_for_each_safe(list, temp, &write_nodes) { | ||
| 1007 | struct nodeinfo *ni = | ||
| 1008 | list_entry(list, struct nodeinfo, write_list); | ||
| 1009 | clear_bit(NI_WRITE_PENDING, &ni->flags); | ||
| 1010 | list_del(&ni->write_list); | ||
| 1011 | |||
| 1012 | spin_unlock_bh(&write_nodes_lock); | ||
| 1013 | |||
| 1014 | send_to_sock(ni); | ||
| 1015 | spin_lock_bh(&write_nodes_lock); | ||
| 1016 | } | ||
| 1017 | spin_unlock_bh(&write_nodes_lock); | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | /* Called after we've had -EAGAIN and been woken up */ | ||
| 1021 | static void refill_write_queue(void) | ||
| 1022 | { | ||
| 1023 | int i; | ||
| 1024 | |||
| 1025 | for (i=1; i<=max_nodeid; i++) { | ||
| 1026 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
| 1027 | |||
| 1028 | if (ni) { | ||
| 1029 | if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) { | ||
| 1030 | spin_lock_bh(&write_nodes_lock); | ||
| 1031 | list_add_tail(&ni->write_list, &write_nodes); | ||
| 1032 | spin_unlock_bh(&write_nodes_lock); | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | } | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | static void clean_one_writequeue(struct nodeinfo *ni) | ||
| 1039 | { | ||
| 1040 | struct list_head *list; | ||
| 1041 | struct list_head *temp; | ||
| 1042 | |||
| 1043 | spin_lock(&ni->writequeue_lock); | ||
| 1044 | list_for_each_safe(list, temp, &ni->writequeue) { | ||
| 1045 | struct writequeue_entry *e = | ||
| 1046 | list_entry(list, struct writequeue_entry, list); | ||
| 1047 | list_del(&e->list); | ||
| 1048 | free_entry(e); | ||
| 1049 | } | ||
| 1050 | spin_unlock(&ni->writequeue_lock); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | static void clean_writequeues(void) | ||
| 1054 | { | ||
| 1055 | int i; | ||
| 1056 | |||
| 1057 | for (i=1; i<=max_nodeid; i++) { | ||
| 1058 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
| 1059 | if (ni) | ||
| 1060 | clean_one_writequeue(ni); | ||
| 1061 | } | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | |||
| 1065 | static void dealloc_nodeinfo(void) | ||
| 1066 | { | ||
| 1067 | int i; | ||
| 1068 | |||
| 1069 | for (i=1; i<=max_nodeid; i++) { | ||
| 1070 | struct nodeinfo *ni = nodeid2nodeinfo(i, 0); | ||
| 1071 | if (ni) { | ||
| 1072 | idr_remove(&nodeinfo_idr, i); | ||
| 1073 | kfree(ni); | ||
| 1074 | } | ||
| 1075 | } | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | int dlm_lowcomms_close(int nodeid) | ||
| 1079 | { | ||
| 1080 | struct nodeinfo *ni; | ||
| 1081 | |||
| 1082 | ni = nodeid2nodeinfo(nodeid, 0); | ||
| 1083 | if (!ni) | ||
| 1084 | return -1; | ||
| 1085 | |||
| 1086 | spin_lock(&ni->lock); | ||
| 1087 | if (ni->assoc_id) { | ||
| 1088 | ni->assoc_id = 0; | ||
| 1089 | /* Don't send shutdown here, sctp will just queue it | ||
| 1090 | till the node comes back up! */ | ||
| 1091 | } | ||
| 1092 | spin_unlock(&ni->lock); | ||
| 1093 | |||
| 1094 | clean_one_writequeue(ni); | ||
| 1095 | clear_bit(NI_INIT_PENDING, &ni->flags); | ||
| 1096 | return 0; | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | // PJC: The work queue function for receiving. | ||
| 1100 | static void process_recv_sockets(struct work_struct *work) | ||
| 1101 | { | ||
| 1102 | if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { | ||
| 1103 | int ret; | ||
| 1104 | int count = 0; | ||
| 1105 | |||
| 1106 | do { | ||
| 1107 | ret = receive_from_sock(); | ||
| 1108 | |||
| 1109 | /* Don't starve out everyone else */ | ||
| 1110 | if (++count >= MAX_RX_MSG_COUNT) { | ||
| 1111 | cond_resched(); | ||
| 1112 | count = 0; | ||
| 1113 | } | ||
| 1114 | } while (!kthread_should_stop() && ret >=0); | ||
| 1115 | } | ||
| 1116 | cond_resched(); | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | // PJC: the work queue function for sending | ||
| 1120 | static void process_send_sockets(struct work_struct *work) | ||
| 1121 | { | ||
| 1122 | if (sctp_con.eagain_flag) { | ||
| 1123 | sctp_con.eagain_flag = 0; | ||
| 1124 | refill_write_queue(); | ||
| 1125 | } | ||
| 1126 | process_output_queue(); | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | // PJC: Process lock requests from a particular node. | ||
| 1130 | // TODO: can we optimise this out on UP ?? | ||
| 1131 | static void process_lock_request(struct work_struct *work) | ||
| 1132 | { | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | static void daemons_stop(void) | ||
| 1136 | { | ||
| 1137 | destroy_workqueue(recv_workqueue); | ||
| 1138 | destroy_workqueue(send_workqueue); | ||
| 1139 | destroy_workqueue(lock_workqueue); | ||
| 1140 | } | ||
| 1141 | |||
| 1142 | static int daemons_start(void) | ||
| 1143 | { | ||
| 1144 | int error; | ||
| 1145 | recv_workqueue = create_workqueue("dlm_recv"); | ||
| 1146 | error = IS_ERR(recv_workqueue); | ||
| 1147 | if (error) { | ||
| 1148 | log_print("can't start dlm_recv %d", error); | ||
| 1149 | return error; | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | send_workqueue = create_singlethread_workqueue("dlm_send"); | ||
| 1153 | error = IS_ERR(send_workqueue); | ||
| 1154 | if (error) { | ||
| 1155 | log_print("can't start dlm_send %d", error); | ||
| 1156 | destroy_workqueue(recv_workqueue); | ||
| 1157 | return error; | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | lock_workqueue = create_workqueue("dlm_rlock"); | ||
| 1161 | error = IS_ERR(lock_workqueue); | ||
| 1162 | if (error) { | ||
| 1163 | log_print("can't start dlm_rlock %d", error); | ||
| 1164 | destroy_workqueue(send_workqueue); | ||
| 1165 | destroy_workqueue(recv_workqueue); | ||
| 1166 | return error; | ||
| 1167 | } | ||
| 1168 | |||
| 1169 | return 0; | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | /* | ||
| 1173 | * This is quite likely to sleep... | ||
| 1174 | */ | ||
| 1175 | int dlm_lowcomms_start(void) | ||
| 1176 | { | ||
| 1177 | int error; | ||
| 1178 | |||
| 1179 | INIT_WORK(&sctp_con.work, process_recv_sockets); | ||
| 1180 | |||
| 1181 | error = init_sock(); | ||
| 1182 | if (error) | ||
| 1183 | goto fail_sock; | ||
| 1184 | error = daemons_start(); | ||
| 1185 | if (error) | ||
| 1186 | goto fail_sock; | ||
| 1187 | return 0; | ||
| 1188 | |||
| 1189 | fail_sock: | ||
| 1190 | close_connection(); | ||
| 1191 | return error; | ||
| 1192 | } | ||
| 1193 | |||
| 1194 | void dlm_lowcomms_stop(void) | ||
| 1195 | { | ||
| 1196 | int i; | ||
| 1197 | |||
| 1198 | sctp_con.flags = 0x7; | ||
| 1199 | daemons_stop(); | ||
| 1200 | clean_writequeues(); | ||
| 1201 | close_connection(); | ||
| 1202 | dealloc_nodeinfo(); | ||
| 1203 | max_nodeid = 0; | ||
| 1204 | |||
| 1205 | dlm_local_count = 0; | ||
| 1206 | dlm_local_nodeid = 0; | ||
| 1207 | |||
| 1208 | for (i = 0; i < dlm_local_count; i++) | ||
| 1209 | kfree(dlm_local_addr[i]); | ||
| 1210 | } | ||
| diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms.c index 07e0a122c32f..27970a58d29b 100644 --- a/fs/dlm/lowcomms-tcp.c +++ b/fs/dlm/lowcomms.c | |||
| @@ -36,30 +36,36 @@ | |||
| 36 | * of high load. Also, this way, the sending thread can collect together | 36 | * of high load. Also, this way, the sending thread can collect together | 
| 37 | * messages bound for one node and send them in one block. | 37 | * messages bound for one node and send them in one block. | 
| 38 | * | 38 | * | 
| 39 | * I don't see any problem with the recv thread executing the locking | 39 | * lowcomms will choose to use wither TCP or SCTP as its transport layer | 
| 40 | * code on behalf of remote processes as the locking code is | 40 | * depending on the configuration variable 'protocol'. This should be set | 
| 41 | * short, efficient and never waits. | 41 | * to 0 (default) for TCP or 1 for SCTP. It shouldbe configured using a | 
| 42 | * cluster-wide mechanism as it must be the same on all nodes of the cluster | ||
| 43 | * for the DLM to function. | ||
| 42 | * | 44 | * | 
| 43 | */ | 45 | */ | 
| 44 | 46 | ||
| 45 | |||
| 46 | #include <asm/ioctls.h> | 47 | #include <asm/ioctls.h> | 
| 47 | #include <net/sock.h> | 48 | #include <net/sock.h> | 
| 48 | #include <net/tcp.h> | 49 | #include <net/tcp.h> | 
| 49 | #include <linux/pagemap.h> | 50 | #include <linux/pagemap.h> | 
| 51 | #include <linux/idr.h> | ||
| 52 | #include <linux/file.h> | ||
| 53 | #include <linux/sctp.h> | ||
| 54 | #include <net/sctp/user.h> | ||
| 50 | 55 | ||
| 51 | #include "dlm_internal.h" | 56 | #include "dlm_internal.h" | 
| 52 | #include "lowcomms.h" | 57 | #include "lowcomms.h" | 
| 53 | #include "midcomms.h" | 58 | #include "midcomms.h" | 
| 54 | #include "config.h" | 59 | #include "config.h" | 
| 55 | 60 | ||
| 61 | #define NEEDED_RMEM (4*1024*1024) | ||
| 62 | |||
| 56 | struct cbuf { | 63 | struct cbuf { | 
| 57 | unsigned int base; | 64 | unsigned int base; | 
| 58 | unsigned int len; | 65 | unsigned int len; | 
| 59 | unsigned int mask; | 66 | unsigned int mask; | 
| 60 | }; | 67 | }; | 
| 61 | 68 | ||
| 62 | #define NODE_INCREMENT 32 | ||
| 63 | static void cbuf_add(struct cbuf *cb, int n) | 69 | static void cbuf_add(struct cbuf *cb, int n) | 
| 64 | { | 70 | { | 
| 65 | cb->len += n; | 71 | cb->len += n; | 
| @@ -88,28 +94,25 @@ static bool cbuf_empty(struct cbuf *cb) | |||
| 88 | return cb->len == 0; | 94 | return cb->len == 0; | 
| 89 | } | 95 | } | 
| 90 | 96 | ||
| 91 | /* Maximum number of incoming messages to process before | ||
| 92 | doing a cond_resched() | ||
| 93 | */ | ||
| 94 | #define MAX_RX_MSG_COUNT 25 | ||
| 95 | |||
| 96 | struct connection { | 97 | struct connection { | 
| 97 | struct socket *sock; /* NULL if not connected */ | 98 | struct socket *sock; /* NULL if not connected */ | 
| 98 | uint32_t nodeid; /* So we know who we are in the list */ | 99 | uint32_t nodeid; /* So we know who we are in the list */ | 
| 99 | struct mutex sock_mutex; | 100 | struct mutex sock_mutex; | 
| 100 | unsigned long flags; /* bit 1,2 = We are on the read/write lists */ | 101 | unsigned long flags; | 
| 101 | #define CF_READ_PENDING 1 | 102 | #define CF_READ_PENDING 1 | 
| 102 | #define CF_WRITE_PENDING 2 | 103 | #define CF_WRITE_PENDING 2 | 
| 103 | #define CF_CONNECT_PENDING 3 | 104 | #define CF_CONNECT_PENDING 3 | 
| 104 | #define CF_IS_OTHERCON 4 | 105 | #define CF_INIT_PENDING 4 | 
| 106 | #define CF_IS_OTHERCON 5 | ||
| 105 | struct list_head writequeue; /* List of outgoing writequeue_entries */ | 107 | struct list_head writequeue; /* List of outgoing writequeue_entries */ | 
| 106 | struct list_head listenlist; /* List of allocated listening sockets */ | ||
| 107 | spinlock_t writequeue_lock; | 108 | spinlock_t writequeue_lock; | 
| 108 | int (*rx_action) (struct connection *); /* What to do when active */ | 109 | int (*rx_action) (struct connection *); /* What to do when active */ | 
| 110 | void (*connect_action) (struct connection *); /* What to do to connect */ | ||
| 109 | struct page *rx_page; | 111 | struct page *rx_page; | 
| 110 | struct cbuf cb; | 112 | struct cbuf cb; | 
| 111 | int retries; | 113 | int retries; | 
| 112 | #define MAX_CONNECT_RETRIES 3 | 114 | #define MAX_CONNECT_RETRIES 3 | 
| 115 | int sctp_assoc; | ||
| 113 | struct connection *othercon; | 116 | struct connection *othercon; | 
| 114 | struct work_struct rwork; /* Receive workqueue */ | 117 | struct work_struct rwork; /* Receive workqueue */ | 
| 115 | struct work_struct swork; /* Send workqueue */ | 118 | struct work_struct swork; /* Send workqueue */ | 
| @@ -127,68 +130,136 @@ struct writequeue_entry { | |||
| 127 | struct connection *con; | 130 | struct connection *con; | 
| 128 | }; | 131 | }; | 
| 129 | 132 | ||
| 130 | static struct sockaddr_storage dlm_local_addr; | 133 | static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; | 
| 134 | static int dlm_local_count; | ||
| 131 | 135 | ||
| 132 | /* Work queues */ | 136 | /* Work queues */ | 
| 133 | static struct workqueue_struct *recv_workqueue; | 137 | static struct workqueue_struct *recv_workqueue; | 
| 134 | static struct workqueue_struct *send_workqueue; | 138 | static struct workqueue_struct *send_workqueue; | 
| 135 | 139 | ||
| 136 | /* An array of pointers to connections, indexed by NODEID */ | 140 | static DEFINE_IDR(connections_idr); | 
| 137 | static struct connection **connections; | ||
| 138 | static DECLARE_MUTEX(connections_lock); | 141 | static DECLARE_MUTEX(connections_lock); | 
| 142 | static int max_nodeid; | ||
| 139 | static struct kmem_cache *con_cache; | 143 | static struct kmem_cache *con_cache; | 
| 140 | static int conn_array_size; | ||
| 141 | 144 | ||
| 142 | static void process_recv_sockets(struct work_struct *work); | 145 | static void process_recv_sockets(struct work_struct *work); | 
| 143 | static void process_send_sockets(struct work_struct *work); | 146 | static void process_send_sockets(struct work_struct *work); | 
| 144 | 147 | ||
| 145 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | 148 | /* | 
| 149 | * If 'allocation' is zero then we don't attempt to create a new | ||
| 150 | * connection structure for this node. | ||
| 151 | */ | ||
| 152 | static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | ||
| 146 | { | 153 | { | 
| 147 | struct connection *con = NULL; | 154 | struct connection *con = NULL; | 
| 155 | int r; | ||
| 156 | int n; | ||
| 148 | 157 | ||
| 149 | down(&connections_lock); | 158 | con = idr_find(&connections_idr, nodeid); | 
| 150 | if (nodeid >= conn_array_size) { | 159 | if (con || !alloc) | 
| 151 | int new_size = nodeid + NODE_INCREMENT; | 160 | return con; | 
| 152 | struct connection **new_conns; | ||
| 153 | 161 | ||
| 154 | new_conns = kzalloc(sizeof(struct connection *) * | 162 | r = idr_pre_get(&connections_idr, alloc); | 
| 155 | new_size, allocation); | 163 | if (!r) | 
| 156 | if (!new_conns) | 164 | return NULL; | 
| 157 | goto finish; | 165 | |
| 166 | con = kmem_cache_zalloc(con_cache, alloc); | ||
| 167 | if (!con) | ||
| 168 | return NULL; | ||
| 158 | 169 | ||
| 159 | memcpy(new_conns, connections, sizeof(struct connection *) * conn_array_size); | 170 | r = idr_get_new_above(&connections_idr, con, nodeid, &n); | 
| 160 | conn_array_size = new_size; | 171 | if (r) { | 
| 161 | kfree(connections); | 172 | kmem_cache_free(con_cache, con); | 
| 162 | connections = new_conns; | 173 | return NULL; | 
| 174 | } | ||
| 163 | 175 | ||
| 176 | if (n != nodeid) { | ||
| 177 | idr_remove(&connections_idr, n); | ||
| 178 | kmem_cache_free(con_cache, con); | ||
| 179 | return NULL; | ||
| 164 | } | 180 | } | 
| 165 | 181 | ||
| 166 | con = connections[nodeid]; | 182 | con->nodeid = nodeid; | 
| 167 | if (con == NULL && allocation) { | 183 | mutex_init(&con->sock_mutex); | 
| 168 | con = kmem_cache_zalloc(con_cache, allocation); | 184 | INIT_LIST_HEAD(&con->writequeue); | 
| 169 | if (!con) | 185 | spin_lock_init(&con->writequeue_lock); | 
| 170 | goto finish; | 186 | INIT_WORK(&con->swork, process_send_sockets); | 
| 187 | INIT_WORK(&con->rwork, process_recv_sockets); | ||
| 171 | 188 | ||
| 172 | con->nodeid = nodeid; | 189 | /* Setup action pointers for child sockets */ | 
| 173 | mutex_init(&con->sock_mutex); | 190 | if (con->nodeid) { | 
| 174 | INIT_LIST_HEAD(&con->writequeue); | 191 | struct connection *zerocon = idr_find(&connections_idr, 0); | 
| 175 | spin_lock_init(&con->writequeue_lock); | ||
| 176 | INIT_WORK(&con->swork, process_send_sockets); | ||
| 177 | INIT_WORK(&con->rwork, process_recv_sockets); | ||
| 178 | 192 | ||
| 179 | connections[nodeid] = con; | 193 | con->connect_action = zerocon->connect_action; | 
| 194 | if (!con->rx_action) | ||
| 195 | con->rx_action = zerocon->rx_action; | ||
| 180 | } | 196 | } | 
| 181 | 197 | ||
| 182 | finish: | 198 | if (nodeid > max_nodeid) | 
| 199 | max_nodeid = nodeid; | ||
| 200 | |||
| 201 | return con; | ||
| 202 | } | ||
| 203 | |||
| 204 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | ||
| 205 | { | ||
| 206 | struct connection *con; | ||
| 207 | |||
| 208 | down(&connections_lock); | ||
| 209 | con = __nodeid2con(nodeid, allocation); | ||
| 183 | up(&connections_lock); | 210 | up(&connections_lock); | 
| 211 | |||
| 184 | return con; | 212 | return con; | 
| 185 | } | 213 | } | 
| 186 | 214 | ||
| 215 | /* This is a bit drastic, but only called when things go wrong */ | ||
| 216 | static struct connection *assoc2con(int assoc_id) | ||
| 217 | { | ||
| 218 | int i; | ||
| 219 | struct connection *con; | ||
| 220 | |||
| 221 | down(&connections_lock); | ||
| 222 | for (i=0; i<=max_nodeid; i++) { | ||
| 223 | con = __nodeid2con(i, 0); | ||
| 224 | if (con && con->sctp_assoc == assoc_id) { | ||
| 225 | up(&connections_lock); | ||
| 226 | return con; | ||
| 227 | } | ||
| 228 | } | ||
| 229 | up(&connections_lock); | ||
| 230 | return NULL; | ||
| 231 | } | ||
| 232 | |||
| 233 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | ||
| 234 | { | ||
| 235 | struct sockaddr_storage addr; | ||
| 236 | int error; | ||
| 237 | |||
| 238 | if (!dlm_local_count) | ||
| 239 | return -1; | ||
| 240 | |||
| 241 | error = dlm_nodeid_to_addr(nodeid, &addr); | ||
| 242 | if (error) | ||
| 243 | return error; | ||
| 244 | |||
| 245 | if (dlm_local_addr[0]->ss_family == AF_INET) { | ||
| 246 | struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; | ||
| 247 | struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; | ||
| 248 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | ||
| 249 | } else { | ||
| 250 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | ||
| 251 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | ||
| 252 | memcpy(&ret6->sin6_addr, &in6->sin6_addr, | ||
| 253 | sizeof(in6->sin6_addr)); | ||
| 254 | } | ||
| 255 | |||
| 256 | return 0; | ||
| 257 | } | ||
| 258 | |||
| 187 | /* Data available on socket or listen socket received a connect */ | 259 | /* Data available on socket or listen socket received a connect */ | 
| 188 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | 260 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | 
| 189 | { | 261 | { | 
| 190 | struct connection *con = sock2con(sk); | 262 | struct connection *con = sock2con(sk); | 
| 191 | |||
| 192 | if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) | 263 | if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) | 
| 193 | queue_work(recv_workqueue, &con->rwork); | 264 | queue_work(recv_workqueue, &con->rwork); | 
| 194 | } | 265 | } | 
| @@ -222,20 +293,21 @@ static int add_sock(struct socket *sock, struct connection *con) | |||
| 222 | con->sock->sk->sk_data_ready = lowcomms_data_ready; | 293 | con->sock->sk->sk_data_ready = lowcomms_data_ready; | 
| 223 | con->sock->sk->sk_write_space = lowcomms_write_space; | 294 | con->sock->sk->sk_write_space = lowcomms_write_space; | 
| 224 | con->sock->sk->sk_state_change = lowcomms_state_change; | 295 | con->sock->sk->sk_state_change = lowcomms_state_change; | 
| 225 | 296 | con->sock->sk->sk_user_data = con; | |
| 226 | return 0; | 297 | return 0; | 
| 227 | } | 298 | } | 
| 228 | 299 | ||
| 229 | /* Add the port number to an IP6 or 4 sockaddr and return the address | 300 | /* Add the port number to an IPv6 or 4 sockaddr and return the address | 
| 230 | length */ | 301 | length */ | 
| 231 | static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | 302 | static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | 
| 232 | int *addr_len) | 303 | int *addr_len) | 
| 233 | { | 304 | { | 
| 234 | saddr->ss_family = dlm_local_addr.ss_family; | 305 | saddr->ss_family = dlm_local_addr[0]->ss_family; | 
| 235 | if (saddr->ss_family == AF_INET) { | 306 | if (saddr->ss_family == AF_INET) { | 
| 236 | struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; | 307 | struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; | 
| 237 | in4_addr->sin_port = cpu_to_be16(port); | 308 | in4_addr->sin_port = cpu_to_be16(port); | 
| 238 | *addr_len = sizeof(struct sockaddr_in); | 309 | *addr_len = sizeof(struct sockaddr_in); | 
| 310 | memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero)); | ||
| 239 | } else { | 311 | } else { | 
| 240 | struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; | 312 | struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr; | 
| 241 | in6_addr->sin6_port = cpu_to_be16(port); | 313 | in6_addr->sin6_port = cpu_to_be16(port); | 
| @@ -264,6 +336,193 @@ static void close_connection(struct connection *con, bool and_other) | |||
| 264 | mutex_unlock(&con->sock_mutex); | 336 | mutex_unlock(&con->sock_mutex); | 
| 265 | } | 337 | } | 
| 266 | 338 | ||
| 339 | /* We only send shutdown messages to nodes that are not part of the cluster */ | ||
| 340 | static void sctp_send_shutdown(sctp_assoc_t associd) | ||
| 341 | { | ||
| 342 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 343 | struct msghdr outmessage; | ||
| 344 | struct cmsghdr *cmsg; | ||
| 345 | struct sctp_sndrcvinfo *sinfo; | ||
| 346 | int ret; | ||
| 347 | struct connection *con; | ||
| 348 | |||
| 349 | con = nodeid2con(0,0); | ||
| 350 | BUG_ON(con == NULL); | ||
| 351 | |||
| 352 | outmessage.msg_name = NULL; | ||
| 353 | outmessage.msg_namelen = 0; | ||
| 354 | outmessage.msg_control = outcmsg; | ||
| 355 | outmessage.msg_controllen = sizeof(outcmsg); | ||
| 356 | outmessage.msg_flags = MSG_EOR; | ||
| 357 | |||
| 358 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
| 359 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 360 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 361 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 362 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
| 363 | sinfo = CMSG_DATA(cmsg); | ||
| 364 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 365 | |||
| 366 | sinfo->sinfo_flags |= MSG_EOF; | ||
| 367 | sinfo->sinfo_assoc_id = associd; | ||
| 368 | |||
| 369 | ret = kernel_sendmsg(con->sock, &outmessage, NULL, 0, 0); | ||
| 370 | |||
| 371 | if (ret != 0) | ||
| 372 | log_print("send EOF to node failed: %d", ret); | ||
| 373 | } | ||
| 374 | |||
| 375 | /* INIT failed but we don't know which node... | ||
| 376 | restart INIT on all pending nodes */ | ||
| 377 | static void sctp_init_failed(void) | ||
| 378 | { | ||
| 379 | int i; | ||
| 380 | struct connection *con; | ||
| 381 | |||
| 382 | down(&connections_lock); | ||
| 383 | for (i=1; i<=max_nodeid; i++) { | ||
| 384 | con = __nodeid2con(i, 0); | ||
| 385 | if (!con) | ||
| 386 | continue; | ||
| 387 | con->sctp_assoc = 0; | ||
| 388 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
| 389 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { | ||
| 390 | queue_work(send_workqueue, &con->swork); | ||
| 391 | } | ||
| 392 | } | ||
| 393 | } | ||
| 394 | up(&connections_lock); | ||
| 395 | } | ||
| 396 | |||
| 397 | /* Something happened to an association */ | ||
| 398 | static void process_sctp_notification(struct connection *con, | ||
| 399 | struct msghdr *msg, char *buf) | ||
| 400 | { | ||
| 401 | union sctp_notification *sn = (union sctp_notification *)buf; | ||
| 402 | |||
| 403 | if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { | ||
| 404 | switch (sn->sn_assoc_change.sac_state) { | ||
| 405 | |||
| 406 | case SCTP_COMM_UP: | ||
| 407 | case SCTP_RESTART: | ||
| 408 | { | ||
| 409 | /* Check that the new node is in the lockspace */ | ||
| 410 | struct sctp_prim prim; | ||
| 411 | int nodeid; | ||
| 412 | int prim_len, ret; | ||
| 413 | int addr_len; | ||
| 414 | struct connection *new_con; | ||
| 415 | struct file *file; | ||
| 416 | sctp_peeloff_arg_t parg; | ||
| 417 | int parglen = sizeof(parg); | ||
| 418 | |||
| 419 | /* | ||
| 420 | * We get this before any data for an association. | ||
| 421 | * We verify that the node is in the cluster and | ||
| 422 | * then peel off a socket for it. | ||
| 423 | */ | ||
| 424 | if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) { | ||
| 425 | log_print("COMM_UP for invalid assoc ID %d", | ||
| 426 | (int)sn->sn_assoc_change.sac_assoc_id); | ||
| 427 | sctp_init_failed(); | ||
| 428 | return; | ||
| 429 | } | ||
| 430 | memset(&prim, 0, sizeof(struct sctp_prim)); | ||
| 431 | prim_len = sizeof(struct sctp_prim); | ||
| 432 | prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
| 433 | |||
| 434 | ret = kernel_getsockopt(con->sock, | ||
| 435 | IPPROTO_SCTP, | ||
| 436 | SCTP_PRIMARY_ADDR, | ||
| 437 | (char*)&prim, | ||
| 438 | &prim_len); | ||
| 439 | if (ret < 0) { | ||
| 440 | log_print("getsockopt/sctp_primary_addr on " | ||
| 441 | "new assoc %d failed : %d", | ||
| 442 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
| 443 | ret); | ||
| 444 | |||
| 445 | /* Retry INIT later */ | ||
| 446 | new_con = assoc2con(sn->sn_assoc_change.sac_assoc_id); | ||
| 447 | if (new_con) | ||
| 448 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
| 449 | return; | ||
| 450 | } | ||
| 451 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | ||
| 452 | if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | ||
| 453 | int i; | ||
| 454 | unsigned char *b=(unsigned char *)&prim.ssp_addr; | ||
| 455 | log_print("reject connect from unknown addr"); | ||
| 456 | for (i=0; i<sizeof(struct sockaddr_storage);i++) | ||
| 457 | printk("%02x ", b[i]); | ||
| 458 | printk("\n"); | ||
| 459 | sctp_send_shutdown(prim.ssp_assoc_id); | ||
| 460 | return; | ||
| 461 | } | ||
| 462 | |||
| 463 | new_con = nodeid2con(nodeid, GFP_KERNEL); | ||
| 464 | if (!new_con) | ||
| 465 | return; | ||
| 466 | |||
| 467 | /* Peel off a new sock */ | ||
| 468 | parg.associd = sn->sn_assoc_change.sac_assoc_id; | ||
| 469 | ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, | ||
| 470 | SCTP_SOCKOPT_PEELOFF, | ||
| 471 | (void *)&parg, &parglen); | ||
| 472 | if (ret) { | ||
| 473 | log_print("Can't peel off a socket for " | ||
| 474 | "connection %d to node %d: err=%d\n", | ||
| 475 | parg.associd, nodeid, ret); | ||
| 476 | } | ||
| 477 | file = fget(parg.sd); | ||
| 478 | new_con->sock = SOCKET_I(file->f_dentry->d_inode); | ||
| 479 | add_sock(new_con->sock, new_con); | ||
| 480 | fput(file); | ||
| 481 | put_unused_fd(parg.sd); | ||
| 482 | |||
| 483 | log_print("got new/restarted association %d nodeid %d", | ||
| 484 | (int)sn->sn_assoc_change.sac_assoc_id, nodeid); | ||
| 485 | |||
| 486 | /* Send any pending writes */ | ||
| 487 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); | ||
| 488 | clear_bit(CF_INIT_PENDING, &con->flags); | ||
| 489 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { | ||
| 490 | queue_work(send_workqueue, &new_con->swork); | ||
| 491 | } | ||
| 492 | if (!test_and_set_bit(CF_READ_PENDING, &new_con->flags)) | ||
| 493 | queue_work(recv_workqueue, &new_con->rwork); | ||
| 494 | } | ||
| 495 | break; | ||
| 496 | |||
| 497 | case SCTP_COMM_LOST: | ||
| 498 | case SCTP_SHUTDOWN_COMP: | ||
| 499 | { | ||
| 500 | con = assoc2con(sn->sn_assoc_change.sac_assoc_id); | ||
| 501 | if (con) { | ||
| 502 | con->sctp_assoc = 0; | ||
| 503 | } | ||
| 504 | } | ||
| 505 | break; | ||
| 506 | |||
| 507 | /* We don't know which INIT failed, so clear the PENDING flags | ||
| 508 | * on them all. if assoc_id is zero then it will then try | ||
| 509 | * again */ | ||
| 510 | |||
| 511 | case SCTP_CANT_STR_ASSOC: | ||
| 512 | { | ||
| 513 | log_print("Can't start SCTP association - retrying"); | ||
| 514 | sctp_init_failed(); | ||
| 515 | } | ||
| 516 | break; | ||
| 517 | |||
| 518 | default: | ||
| 519 | log_print("unexpected SCTP assoc change id=%d state=%d", | ||
| 520 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
| 521 | sn->sn_assoc_change.sac_state); | ||
| 522 | } | ||
| 523 | } | ||
| 524 | } | ||
| 525 | |||
| 267 | /* Data received from remote end */ | 526 | /* Data received from remote end */ | 
| 268 | static int receive_from_sock(struct connection *con) | 527 | static int receive_from_sock(struct connection *con) | 
| 269 | { | 528 | { | 
| @@ -274,6 +533,7 @@ static int receive_from_sock(struct connection *con) | |||
| 274 | int r; | 533 | int r; | 
| 275 | int call_again_soon = 0; | 534 | int call_again_soon = 0; | 
| 276 | int nvec; | 535 | int nvec; | 
| 536 | char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 277 | 537 | ||
| 278 | mutex_lock(&con->sock_mutex); | 538 | mutex_lock(&con->sock_mutex); | 
| 279 | 539 | ||
| @@ -293,12 +553,18 @@ static int receive_from_sock(struct connection *con) | |||
| 293 | cbuf_init(&con->cb, PAGE_CACHE_SIZE); | 553 | cbuf_init(&con->cb, PAGE_CACHE_SIZE); | 
| 294 | } | 554 | } | 
| 295 | 555 | ||
| 556 | /* Only SCTP needs these really */ | ||
| 557 | memset(&incmsg, 0, sizeof(incmsg)); | ||
| 558 | msg.msg_control = incmsg; | ||
| 559 | msg.msg_controllen = sizeof(incmsg); | ||
| 560 | |||
| 296 | /* | 561 | /* | 
| 297 | * iov[0] is the bit of the circular buffer between the current end | 562 | * iov[0] is the bit of the circular buffer between the current end | 
| 298 | * point (cb.base + cb.len) and the end of the buffer. | 563 | * point (cb.base + cb.len) and the end of the buffer. | 
| 299 | */ | 564 | */ | 
| 300 | iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); | 565 | iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); | 
| 301 | iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); | 566 | iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); | 
| 567 | iov[1].iov_len = 0; | ||
| 302 | nvec = 1; | 568 | nvec = 1; | 
| 303 | 569 | ||
| 304 | /* | 570 | /* | 
| @@ -315,11 +581,20 @@ static int receive_from_sock(struct connection *con) | |||
| 315 | 581 | ||
| 316 | r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len, | 582 | r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len, | 
| 317 | MSG_DONTWAIT | MSG_NOSIGNAL); | 583 | MSG_DONTWAIT | MSG_NOSIGNAL); | 
| 318 | |||
| 319 | if (ret <= 0) | 584 | if (ret <= 0) | 
| 320 | goto out_close; | 585 | goto out_close; | 
| 321 | if (ret == -EAGAIN) | 586 | |
| 322 | goto out_resched; | 587 | /* Process SCTP notifications */ | 
| 588 | if (msg.msg_flags & MSG_NOTIFICATION) { | ||
| 589 | msg.msg_control = incmsg; | ||
| 590 | msg.msg_controllen = sizeof(incmsg); | ||
| 591 | |||
| 592 | process_sctp_notification(con, &msg, | ||
| 593 | page_address(con->rx_page) + con->cb.base); | ||
| 594 | mutex_unlock(&con->sock_mutex); | ||
| 595 | return 0; | ||
| 596 | } | ||
| 597 | BUG_ON(con->nodeid == 0); | ||
| 323 | 598 | ||
| 324 | if (ret == len) | 599 | if (ret == len) | 
| 325 | call_again_soon = 1; | 600 | call_again_soon = 1; | 
| @@ -329,10 +604,10 @@ static int receive_from_sock(struct connection *con) | |||
| 329 | con->cb.base, con->cb.len, | 604 | con->cb.base, con->cb.len, | 
| 330 | PAGE_CACHE_SIZE); | 605 | PAGE_CACHE_SIZE); | 
| 331 | if (ret == -EBADMSG) { | 606 | if (ret == -EBADMSG) { | 
| 332 | printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, " | 607 | log_print("lowcomms: addr=%p, base=%u, len=%u, " | 
| 333 | "iov_len=%u, iov_base[0]=%p, read=%d\n", | 608 | "iov_len=%u, iov_base[0]=%p, read=%d", | 
| 334 | page_address(con->rx_page), con->cb.base, con->cb.len, | 609 | page_address(con->rx_page), con->cb.base, con->cb.len, | 
| 335 | len, iov[0].iov_base, r); | 610 | len, iov[0].iov_base, r); | 
| 336 | } | 611 | } | 
| 337 | if (ret < 0) | 612 | if (ret < 0) | 
| 338 | goto out_close; | 613 | goto out_close; | 
| @@ -368,7 +643,7 @@ out_close: | |||
| 368 | } | 643 | } | 
| 369 | 644 | ||
| 370 | /* Listening socket is busy, accept a connection */ | 645 | /* Listening socket is busy, accept a connection */ | 
| 371 | static int accept_from_sock(struct connection *con) | 646 | static int tcp_accept_from_sock(struct connection *con) | 
| 372 | { | 647 | { | 
| 373 | int result; | 648 | int result; | 
| 374 | struct sockaddr_storage peeraddr; | 649 | struct sockaddr_storage peeraddr; | 
| @@ -379,7 +654,7 @@ static int accept_from_sock(struct connection *con) | |||
| 379 | struct connection *addcon; | 654 | struct connection *addcon; | 
| 380 | 655 | ||
| 381 | memset(&peeraddr, 0, sizeof(peeraddr)); | 656 | memset(&peeraddr, 0, sizeof(peeraddr)); | 
| 382 | result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, | 657 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, | 
| 383 | IPPROTO_TCP, &newsock); | 658 | IPPROTO_TCP, &newsock); | 
| 384 | if (result < 0) | 659 | if (result < 0) | 
| 385 | return -ENOMEM; | 660 | return -ENOMEM; | 
| @@ -408,7 +683,7 @@ static int accept_from_sock(struct connection *con) | |||
| 408 | /* Get the new node's NODEID */ | 683 | /* Get the new node's NODEID */ | 
| 409 | make_sockaddr(&peeraddr, 0, &len); | 684 | make_sockaddr(&peeraddr, 0, &len); | 
| 410 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { | 685 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { | 
| 411 | printk("dlm: connect from non cluster node\n"); | 686 | log_print("connect from non cluster node"); | 
| 412 | sock_release(newsock); | 687 | sock_release(newsock); | 
| 413 | mutex_unlock(&con->sock_mutex); | 688 | mutex_unlock(&con->sock_mutex); | 
| 414 | return -1; | 689 | return -1; | 
| @@ -419,7 +694,6 @@ static int accept_from_sock(struct connection *con) | |||
| 419 | /* Check to see if we already have a connection to this node. This | 694 | /* Check to see if we already have a connection to this node. This | 
| 420 | * could happen if the two nodes initiate a connection at roughly | 695 | * could happen if the two nodes initiate a connection at roughly | 
| 421 | * the same time and the connections cross on the wire. | 696 | * the same time and the connections cross on the wire. | 
| 422 | * TEMPORARY FIX: | ||
| 423 | * In this case we store the incoming one in "othercon" | 697 | * In this case we store the incoming one in "othercon" | 
| 424 | */ | 698 | */ | 
| 425 | newcon = nodeid2con(nodeid, GFP_KERNEL); | 699 | newcon = nodeid2con(nodeid, GFP_KERNEL); | 
| @@ -434,7 +708,7 @@ static int accept_from_sock(struct connection *con) | |||
| 434 | if (!othercon) { | 708 | if (!othercon) { | 
| 435 | othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); | 709 | othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); | 
| 436 | if (!othercon) { | 710 | if (!othercon) { | 
| 437 | printk("dlm: failed to allocate incoming socket\n"); | 711 | log_print("failed to allocate incoming socket"); | 
| 438 | mutex_unlock(&newcon->sock_mutex); | 712 | mutex_unlock(&newcon->sock_mutex); | 
| 439 | result = -ENOMEM; | 713 | result = -ENOMEM; | 
| 440 | goto accept_err; | 714 | goto accept_err; | 
| @@ -477,12 +751,107 @@ accept_err: | |||
| 477 | sock_release(newsock); | 751 | sock_release(newsock); | 
| 478 | 752 | ||
| 479 | if (result != -EAGAIN) | 753 | if (result != -EAGAIN) | 
| 480 | printk("dlm: error accepting connection from node: %d\n", result); | 754 | log_print("error accepting connection from node: %d", result); | 
| 481 | return result; | 755 | return result; | 
| 482 | } | 756 | } | 
| 483 | 757 | ||
| 758 | static void free_entry(struct writequeue_entry *e) | ||
| 759 | { | ||
| 760 | __free_page(e->page); | ||
| 761 | kfree(e); | ||
| 762 | } | ||
| 763 | |||
| 764 | /* Initiate an SCTP association. | ||
| 765 | This is a special case of send_to_sock() in that we don't yet have a | ||
| 766 | peeled-off socket for this association, so we use the listening socket | ||
| 767 | and add the primary IP address of the remote node. | ||
| 768 | */ | ||
| 769 | static void sctp_init_assoc(struct connection *con) | ||
| 770 | { | ||
| 771 | struct sockaddr_storage rem_addr; | ||
| 772 | char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
| 773 | struct msghdr outmessage; | ||
| 774 | struct cmsghdr *cmsg; | ||
| 775 | struct sctp_sndrcvinfo *sinfo; | ||
| 776 | struct connection *base_con; | ||
| 777 | struct writequeue_entry *e; | ||
| 778 | int len, offset; | ||
| 779 | int ret; | ||
| 780 | int addrlen; | ||
| 781 | struct kvec iov[1]; | ||
| 782 | |||
| 783 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) | ||
| 784 | return; | ||
| 785 | |||
| 786 | if (con->retries++ > MAX_CONNECT_RETRIES) | ||
| 787 | return; | ||
| 788 | |||
| 789 | log_print("Initiating association with node %d", con->nodeid); | ||
| 790 | |||
| 791 | if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { | ||
| 792 | log_print("no address for nodeid %d", con->nodeid); | ||
| 793 | return; | ||
| 794 | } | ||
| 795 | base_con = nodeid2con(0, 0); | ||
| 796 | BUG_ON(base_con == NULL); | ||
| 797 | |||
| 798 | make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen); | ||
| 799 | |||
| 800 | outmessage.msg_name = &rem_addr; | ||
| 801 | outmessage.msg_namelen = addrlen; | ||
| 802 | outmessage.msg_control = outcmsg; | ||
| 803 | outmessage.msg_controllen = sizeof(outcmsg); | ||
| 804 | outmessage.msg_flags = MSG_EOR; | ||
| 805 | |||
| 806 | spin_lock(&con->writequeue_lock); | ||
| 807 | e = list_entry(con->writequeue.next, struct writequeue_entry, | ||
| 808 | list); | ||
| 809 | |||
| 810 | BUG_ON((struct list_head *) e == &con->writequeue); | ||
| 811 | |||
| 812 | len = e->len; | ||
| 813 | offset = e->offset; | ||
| 814 | spin_unlock(&con->writequeue_lock); | ||
| 815 | kmap(e->page); | ||
| 816 | |||
| 817 | /* Send the first block off the write queue */ | ||
| 818 | iov[0].iov_base = page_address(e->page)+offset; | ||
| 819 | iov[0].iov_len = len; | ||
| 820 | |||
| 821 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
| 822 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
| 823 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
| 824 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
| 825 | sinfo = CMSG_DATA(cmsg); | ||
| 826 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
| 827 | sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid()); | ||
| 828 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
| 829 | |||
| 830 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); | ||
| 831 | if (ret < 0) { | ||
| 832 | log_print("Send first packet to node %d failed: %d", | ||
| 833 | con->nodeid, ret); | ||
| 834 | |||
| 835 | /* Try again later */ | ||
| 836 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
| 837 | clear_bit(CF_INIT_PENDING, &con->flags); | ||
| 838 | } | ||
| 839 | else { | ||
| 840 | spin_lock(&con->writequeue_lock); | ||
| 841 | e->offset += ret; | ||
| 842 | e->len -= ret; | ||
| 843 | |||
| 844 | if (e->len == 0 && e->users == 0) { | ||
| 845 | list_del(&e->list); | ||
| 846 | kunmap(e->page); | ||
| 847 | free_entry(e); | ||
| 848 | } | ||
| 849 | spin_unlock(&con->writequeue_lock); | ||
| 850 | } | ||
| 851 | } | ||
| 852 | |||
| 484 | /* Connect a new socket to its peer */ | 853 | /* Connect a new socket to its peer */ | 
| 485 | static void connect_to_sock(struct connection *con) | 854 | static void tcp_connect_to_sock(struct connection *con) | 
| 486 | { | 855 | { | 
| 487 | int result = -EHOSTUNREACH; | 856 | int result = -EHOSTUNREACH; | 
| 488 | struct sockaddr_storage saddr; | 857 | struct sockaddr_storage saddr; | 
| @@ -505,7 +874,7 @@ static void connect_to_sock(struct connection *con) | |||
| 505 | } | 874 | } | 
| 506 | 875 | ||
| 507 | /* Create a socket to communicate with */ | 876 | /* Create a socket to communicate with */ | 
| 508 | result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, | 877 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, | 
| 509 | IPPROTO_TCP, &sock); | 878 | IPPROTO_TCP, &sock); | 
| 510 | if (result < 0) | 879 | if (result < 0) | 
| 511 | goto out_err; | 880 | goto out_err; | 
| @@ -516,11 +885,11 @@ static void connect_to_sock(struct connection *con) | |||
| 516 | 885 | ||
| 517 | sock->sk->sk_user_data = con; | 886 | sock->sk->sk_user_data = con; | 
| 518 | con->rx_action = receive_from_sock; | 887 | con->rx_action = receive_from_sock; | 
| 888 | con->connect_action = tcp_connect_to_sock; | ||
| 889 | add_sock(sock, con); | ||
| 519 | 890 | ||
| 520 | make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); | 891 | make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); | 
| 521 | 892 | ||
| 522 | add_sock(sock, con); | ||
| 523 | |||
| 524 | log_print("connecting to %d", con->nodeid); | 893 | log_print("connecting to %d", con->nodeid); | 
| 525 | result = | 894 | result = | 
| 526 | sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, | 895 | sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, | 
| @@ -550,64 +919,57 @@ out: | |||
| 550 | return; | 919 | return; | 
| 551 | } | 920 | } | 
| 552 | 921 | ||
| 553 | static struct socket *create_listen_sock(struct connection *con, | 922 | static struct socket *tcp_create_listen_sock(struct connection *con, | 
| 554 | struct sockaddr_storage *saddr) | 923 | struct sockaddr_storage *saddr) | 
| 555 | { | 924 | { | 
| 556 | struct socket *sock = NULL; | 925 | struct socket *sock = NULL; | 
| 557 | mm_segment_t fs; | ||
| 558 | int result = 0; | 926 | int result = 0; | 
| 559 | int one = 1; | 927 | int one = 1; | 
| 560 | int addr_len; | 928 | int addr_len; | 
| 561 | 929 | ||
| 562 | if (dlm_local_addr.ss_family == AF_INET) | 930 | if (dlm_local_addr[0]->ss_family == AF_INET) | 
| 563 | addr_len = sizeof(struct sockaddr_in); | 931 | addr_len = sizeof(struct sockaddr_in); | 
| 564 | else | 932 | else | 
| 565 | addr_len = sizeof(struct sockaddr_in6); | 933 | addr_len = sizeof(struct sockaddr_in6); | 
| 566 | 934 | ||
| 567 | /* Create a socket to communicate with */ | 935 | /* Create a socket to communicate with */ | 
| 568 | result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); | 936 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, | 
| 937 | IPPROTO_TCP, &sock); | ||
| 569 | if (result < 0) { | 938 | if (result < 0) { | 
| 570 | printk("dlm: Can't create listening comms socket\n"); | 939 | log_print("Can't create listening comms socket"); | 
| 571 | goto create_out; | 940 | goto create_out; | 
| 572 | } | 941 | } | 
| 573 | 942 | ||
| 574 | fs = get_fs(); | 943 | result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, | 
| 575 | set_fs(get_ds()); | 944 | (char *)&one, sizeof(one)); | 
| 576 | result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, | 945 | |
| 577 | (char *)&one, sizeof(one)); | ||
| 578 | set_fs(fs); | ||
| 579 | if (result < 0) { | 946 | if (result < 0) { | 
| 580 | printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n", | 947 | log_print("Failed to set SO_REUSEADDR on socket: %d", result); | 
| 581 | result); | ||
| 582 | } | 948 | } | 
| 583 | sock->sk->sk_user_data = con; | 949 | sock->sk->sk_user_data = con; | 
| 584 | con->rx_action = accept_from_sock; | 950 | con->rx_action = tcp_accept_from_sock; | 
| 951 | con->connect_action = tcp_connect_to_sock; | ||
| 585 | con->sock = sock; | 952 | con->sock = sock; | 
| 586 | 953 | ||
| 587 | /* Bind to our port */ | 954 | /* Bind to our port */ | 
| 588 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); | 955 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); | 
| 589 | result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); | 956 | result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); | 
| 590 | if (result < 0) { | 957 | if (result < 0) { | 
| 591 | printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port); | 958 | log_print("Can't bind to port %d", dlm_config.ci_tcp_port); | 
| 592 | sock_release(sock); | 959 | sock_release(sock); | 
| 593 | sock = NULL; | 960 | sock = NULL; | 
| 594 | con->sock = NULL; | 961 | con->sock = NULL; | 
| 595 | goto create_out; | 962 | goto create_out; | 
| 596 | } | 963 | } | 
| 597 | 964 | result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, | |
| 598 | fs = get_fs(); | ||
| 599 | set_fs(get_ds()); | ||
| 600 | |||
| 601 | result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, | ||
| 602 | (char *)&one, sizeof(one)); | 965 | (char *)&one, sizeof(one)); | 
| 603 | set_fs(fs); | ||
| 604 | if (result < 0) { | 966 | if (result < 0) { | 
| 605 | printk("dlm: Set keepalive failed: %d\n", result); | 967 | log_print("Set keepalive failed: %d", result); | 
| 606 | } | 968 | } | 
| 607 | 969 | ||
| 608 | result = sock->ops->listen(sock, 5); | 970 | result = sock->ops->listen(sock, 5); | 
| 609 | if (result < 0) { | 971 | if (result < 0) { | 
| 610 | printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port); | 972 | log_print("Can't listen on port %d", dlm_config.ci_tcp_port); | 
| 611 | sock_release(sock); | 973 | sock_release(sock); | 
| 612 | sock = NULL; | 974 | sock = NULL; | 
| 613 | goto create_out; | 975 | goto create_out; | 
| @@ -617,18 +979,146 @@ create_out: | |||
| 617 | return sock; | 979 | return sock; | 
| 618 | } | 980 | } | 
| 619 | 981 | ||
| 982 | /* Get local addresses */ | ||
| 983 | static void init_local(void) | ||
| 984 | { | ||
| 985 | struct sockaddr_storage sas, *addr; | ||
| 986 | int i; | ||
| 987 | |||
| 988 | dlm_local_count = 0; | ||
| 989 | for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { | ||
| 990 | if (dlm_our_addr(&sas, i)) | ||
| 991 | break; | ||
| 992 | |||
| 993 | addr = kmalloc(sizeof(*addr), GFP_KERNEL); | ||
| 994 | if (!addr) | ||
| 995 | break; | ||
| 996 | memcpy(addr, &sas, sizeof(*addr)); | ||
| 997 | dlm_local_addr[dlm_local_count++] = addr; | ||
| 998 | } | ||
| 999 | } | ||
| 1000 | |||
| 1001 | /* Bind to an IP address. SCTP allows multiple address so it can do | ||
| 1002 | multi-homing */ | ||
| 1003 | static int add_sctp_bind_addr(struct connection *sctp_con, | ||
| 1004 | struct sockaddr_storage *addr, | ||
| 1005 | int addr_len, int num) | ||
| 1006 | { | ||
| 1007 | int result = 0; | ||
| 1008 | |||
| 1009 | if (num == 1) | ||
| 1010 | result = kernel_bind(sctp_con->sock, | ||
| 1011 | (struct sockaddr *) addr, | ||
| 1012 | addr_len); | ||
| 1013 | else | ||
| 1014 | result = kernel_setsockopt(sctp_con->sock, SOL_SCTP, | ||
| 1015 | SCTP_SOCKOPT_BINDX_ADD, | ||
| 1016 | (char *)addr, addr_len); | ||
| 1017 | |||
| 1018 | if (result < 0) | ||
| 1019 | log_print("Can't bind to port %d addr number %d", | ||
| 1020 | dlm_config.ci_tcp_port, num); | ||
| 1021 | |||
| 1022 | return result; | ||
| 1023 | } | ||
| 620 | 1024 | ||
| 621 | /* Listen on all interfaces */ | 1025 | /* Initialise SCTP socket and bind to all interfaces */ | 
| 622 | static int listen_for_all(void) | 1026 | static int sctp_listen_for_all(void) | 
| 1027 | { | ||
| 1028 | struct socket *sock = NULL; | ||
| 1029 | struct sockaddr_storage localaddr; | ||
| 1030 | struct sctp_event_subscribe subscribe; | ||
| 1031 | int result = -EINVAL, num = 1, i, addr_len; | ||
| 1032 | struct connection *con = nodeid2con(0, GFP_KERNEL); | ||
| 1033 | int bufsize = NEEDED_RMEM; | ||
| 1034 | |||
| 1035 | if (!con) | ||
| 1036 | return -ENOMEM; | ||
| 1037 | |||
| 1038 | log_print("Using SCTP for communications"); | ||
| 1039 | |||
| 1040 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET, | ||
| 1041 | IPPROTO_SCTP, &sock); | ||
| 1042 | if (result < 0) { | ||
| 1043 | log_print("Can't create comms socket, check SCTP is loaded"); | ||
| 1044 | goto out; | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | /* Listen for events */ | ||
| 1048 | memset(&subscribe, 0, sizeof(subscribe)); | ||
| 1049 | subscribe.sctp_data_io_event = 1; | ||
| 1050 | subscribe.sctp_association_event = 1; | ||
| 1051 | subscribe.sctp_send_failure_event = 1; | ||
| 1052 | subscribe.sctp_shutdown_event = 1; | ||
| 1053 | subscribe.sctp_partial_delivery_event = 1; | ||
| 1054 | |||
| 1055 | result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, | ||
| 1056 | (char *)&bufsize, sizeof(bufsize)); | ||
| 1057 | if (result) | ||
| 1058 | log_print("Error increasing buffer space on socket %d", result); | ||
| 1059 | |||
| 1060 | result = kernel_setsockopt(sock, SOL_SCTP, SCTP_EVENTS, | ||
| 1061 | (char *)&subscribe, sizeof(subscribe)); | ||
| 1062 | if (result < 0) { | ||
| 1063 | log_print("Failed to set SCTP_EVENTS on socket: result=%d", | ||
| 1064 | result); | ||
| 1065 | goto create_delsock; | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | /* Init con struct */ | ||
| 1069 | sock->sk->sk_user_data = con; | ||
| 1070 | con->sock = sock; | ||
| 1071 | con->sock->sk->sk_data_ready = lowcomms_data_ready; | ||
| 1072 | con->rx_action = receive_from_sock; | ||
| 1073 | con->connect_action = sctp_init_assoc; | ||
| 1074 | |||
| 1075 | /* Bind to all interfaces. */ | ||
| 1076 | for (i = 0; i < dlm_local_count; i++) { | ||
| 1077 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); | ||
| 1078 | make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len); | ||
| 1079 | |||
| 1080 | result = add_sctp_bind_addr(con, &localaddr, addr_len, num); | ||
| 1081 | if (result) | ||
| 1082 | goto create_delsock; | ||
| 1083 | ++num; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | result = sock->ops->listen(sock, 5); | ||
| 1087 | if (result < 0) { | ||
| 1088 | log_print("Can't set socket listening"); | ||
| 1089 | goto create_delsock; | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | return 0; | ||
| 1093 | |||
| 1094 | create_delsock: | ||
| 1095 | sock_release(sock); | ||
| 1096 | con->sock = NULL; | ||
| 1097 | out: | ||
| 1098 | return result; | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | static int tcp_listen_for_all(void) | ||
| 623 | { | 1102 | { | 
| 624 | struct socket *sock = NULL; | 1103 | struct socket *sock = NULL; | 
| 625 | struct connection *con = nodeid2con(0, GFP_KERNEL); | 1104 | struct connection *con = nodeid2con(0, GFP_KERNEL); | 
| 626 | int result = -EINVAL; | 1105 | int result = -EINVAL; | 
| 627 | 1106 | ||
| 1107 | if (!con) | ||
| 1108 | return -ENOMEM; | ||
| 1109 | |||
| 628 | /* We don't support multi-homed hosts */ | 1110 | /* We don't support multi-homed hosts */ | 
| 1111 | if (dlm_local_addr[1] != NULL) { | ||
| 1112 | log_print("TCP protocol can't handle multi-homed hosts, " | ||
| 1113 | "try SCTP"); | ||
| 1114 | return -EINVAL; | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | log_print("Using TCP for communications"); | ||
| 1118 | |||
| 629 | set_bit(CF_IS_OTHERCON, &con->flags); | 1119 | set_bit(CF_IS_OTHERCON, &con->flags); | 
| 630 | 1120 | ||
| 631 | sock = create_listen_sock(con, &dlm_local_addr); | 1121 | sock = tcp_create_listen_sock(con, dlm_local_addr[0]); | 
| 632 | if (sock) { | 1122 | if (sock) { | 
| 633 | add_sock(sock, con); | 1123 | add_sock(sock, con); | 
| 634 | result = 0; | 1124 | result = 0; | 
| @@ -666,8 +1156,7 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con, | |||
| 666 | return entry; | 1156 | return entry; | 
| 667 | } | 1157 | } | 
| 668 | 1158 | ||
| 669 | void *dlm_lowcomms_get_buffer(int nodeid, int len, | 1159 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | 
| 670 | gfp_t allocation, char **ppc) | ||
| 671 | { | 1160 | { | 
| 672 | struct connection *con; | 1161 | struct connection *con; | 
| 673 | struct writequeue_entry *e; | 1162 | struct writequeue_entry *e; | 
| @@ -735,12 +1224,6 @@ out: | |||
| 735 | return; | 1224 | return; | 
| 736 | } | 1225 | } | 
| 737 | 1226 | ||
| 738 | static void free_entry(struct writequeue_entry *e) | ||
| 739 | { | ||
| 740 | __free_page(e->page); | ||
| 741 | kfree(e); | ||
| 742 | } | ||
| 743 | |||
| 744 | /* Send a message */ | 1227 | /* Send a message */ | 
| 745 | static void send_to_sock(struct connection *con) | 1228 | static void send_to_sock(struct connection *con) | 
| 746 | { | 1229 | { | 
| @@ -777,8 +1260,7 @@ static void send_to_sock(struct connection *con) | |||
| 777 | goto out; | 1260 | goto out; | 
| 778 | if (ret <= 0) | 1261 | if (ret <= 0) | 
| 779 | goto send_error; | 1262 | goto send_error; | 
| 780 | } | 1263 | } else { | 
| 781 | else { | ||
| 782 | /* Don't starve people filling buffers */ | 1264 | /* Don't starve people filling buffers */ | 
| 783 | cond_resched(); | 1265 | cond_resched(); | 
| 784 | } | 1266 | } | 
| @@ -807,7 +1289,8 @@ send_error: | |||
| 807 | 1289 | ||
| 808 | out_connect: | 1290 | out_connect: | 
| 809 | mutex_unlock(&con->sock_mutex); | 1291 | mutex_unlock(&con->sock_mutex); | 
| 810 | connect_to_sock(con); | 1292 | if (!test_bit(CF_INIT_PENDING, &con->flags)) | 
| 1293 | lowcomms_connect_sock(con); | ||
| 811 | return; | 1294 | return; | 
| 812 | } | 1295 | } | 
| 813 | 1296 | ||
| @@ -832,9 +1315,6 @@ int dlm_lowcomms_close(int nodeid) | |||
| 832 | { | 1315 | { | 
| 833 | struct connection *con; | 1316 | struct connection *con; | 
| 834 | 1317 | ||
| 835 | if (!connections) | ||
| 836 | goto out; | ||
| 837 | |||
| 838 | log_print("closing connection to node %d", nodeid); | 1318 | log_print("closing connection to node %d", nodeid); | 
| 839 | con = nodeid2con(nodeid, 0); | 1319 | con = nodeid2con(nodeid, 0); | 
| 840 | if (con) { | 1320 | if (con) { | 
| @@ -842,12 +1322,9 @@ int dlm_lowcomms_close(int nodeid) | |||
| 842 | close_connection(con, true); | 1322 | close_connection(con, true); | 
| 843 | } | 1323 | } | 
| 844 | return 0; | 1324 | return 0; | 
| 845 | |||
| 846 | out: | ||
| 847 | return -1; | ||
| 848 | } | 1325 | } | 
| 849 | 1326 | ||
| 850 | /* Look for activity on active sockets */ | 1327 | /* Receive workqueue function */ | 
| 851 | static void process_recv_sockets(struct work_struct *work) | 1328 | static void process_recv_sockets(struct work_struct *work) | 
| 852 | { | 1329 | { | 
| 853 | struct connection *con = container_of(work, struct connection, rwork); | 1330 | struct connection *con = container_of(work, struct connection, rwork); | 
| @@ -859,15 +1336,14 @@ static void process_recv_sockets(struct work_struct *work) | |||
| 859 | } while (!err); | 1336 | } while (!err); | 
| 860 | } | 1337 | } | 
| 861 | 1338 | ||
| 862 | 1339 | /* Send workqueue function */ | |
| 863 | static void process_send_sockets(struct work_struct *work) | 1340 | static void process_send_sockets(struct work_struct *work) | 
| 864 | { | 1341 | { | 
| 865 | struct connection *con = container_of(work, struct connection, swork); | 1342 | struct connection *con = container_of(work, struct connection, swork); | 
| 866 | 1343 | ||
| 867 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 1344 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 
| 868 | connect_to_sock(con); | 1345 | con->connect_action(con); | 
| 869 | } | 1346 | } | 
| 870 | |||
| 871 | clear_bit(CF_WRITE_PENDING, &con->flags); | 1347 | clear_bit(CF_WRITE_PENDING, &con->flags); | 
| 872 | send_to_sock(con); | 1348 | send_to_sock(con); | 
| 873 | } | 1349 | } | 
| @@ -878,8 +1354,8 @@ static void clean_writequeues(void) | |||
| 878 | { | 1354 | { | 
| 879 | int nodeid; | 1355 | int nodeid; | 
| 880 | 1356 | ||
| 881 | for (nodeid = 1; nodeid < conn_array_size; nodeid++) { | 1357 | for (nodeid = 1; nodeid <= max_nodeid; nodeid++) { | 
| 882 | struct connection *con = nodeid2con(nodeid, 0); | 1358 | struct connection *con = __nodeid2con(nodeid, 0); | 
| 883 | 1359 | ||
| 884 | if (con) | 1360 | if (con) | 
| 885 | clean_one_writequeue(con); | 1361 | clean_one_writequeue(con); | 
| @@ -916,64 +1392,67 @@ static int work_start(void) | |||
| 916 | void dlm_lowcomms_stop(void) | 1392 | void dlm_lowcomms_stop(void) | 
| 917 | { | 1393 | { | 
| 918 | int i; | 1394 | int i; | 
| 1395 | struct connection *con; | ||
| 919 | 1396 | ||
| 920 | /* Set all the flags to prevent any | 1397 | /* Set all the flags to prevent any | 
| 921 | socket activity. | 1398 | socket activity. | 
| 922 | */ | 1399 | */ | 
| 923 | for (i = 0; i < conn_array_size; i++) { | 1400 | down(&connections_lock); | 
| 924 | if (connections[i]) | 1401 | for (i = 0; i <= max_nodeid; i++) { | 
| 925 | connections[i]->flags |= 0xFF; | 1402 | con = __nodeid2con(i, 0); | 
| 1403 | if (con) | ||
| 1404 | con->flags |= 0xFF; | ||
| 926 | } | 1405 | } | 
| 1406 | up(&connections_lock); | ||
| 927 | 1407 | ||
| 928 | work_stop(); | 1408 | work_stop(); | 
| 1409 | |||
| 1410 | down(&connections_lock); | ||
| 929 | clean_writequeues(); | 1411 | clean_writequeues(); | 
| 930 | 1412 | ||
| 931 | for (i = 0; i < conn_array_size; i++) { | 1413 | for (i = 0; i <= max_nodeid; i++) { | 
| 932 | if (connections[i]) { | 1414 | con = __nodeid2con(i, 0); | 
| 933 | close_connection(connections[i], true); | 1415 | if (con) { | 
| 934 | if (connections[i]->othercon) | 1416 | close_connection(con, true); | 
| 935 | kmem_cache_free(con_cache, connections[i]->othercon); | 1417 | if (con->othercon) | 
| 936 | kmem_cache_free(con_cache, connections[i]); | 1418 | kmem_cache_free(con_cache, con->othercon); | 
| 1419 | kmem_cache_free(con_cache, con); | ||
| 937 | } | 1420 | } | 
| 938 | } | 1421 | } | 
| 939 | 1422 | max_nodeid = 0; | |
| 940 | kfree(connections); | 1423 | up(&connections_lock); | 
| 941 | connections = NULL; | ||
| 942 | |||
| 943 | kmem_cache_destroy(con_cache); | 1424 | kmem_cache_destroy(con_cache); | 
| 1425 | idr_init(&connections_idr); | ||
| 944 | } | 1426 | } | 
| 945 | 1427 | ||
| 946 | /* This is quite likely to sleep... */ | ||
| 947 | int dlm_lowcomms_start(void) | 1428 | int dlm_lowcomms_start(void) | 
| 948 | { | 1429 | { | 
| 949 | int error = 0; | 1430 | int error = -EINVAL; | 
| 950 | 1431 | struct connection *con; | |
| 951 | error = -ENOMEM; | ||
| 952 | connections = kzalloc(sizeof(struct connection *) * | ||
| 953 | NODE_INCREMENT, GFP_KERNEL); | ||
| 954 | if (!connections) | ||
| 955 | goto out; | ||
| 956 | |||
| 957 | conn_array_size = NODE_INCREMENT; | ||
| 958 | 1432 | ||
| 959 | if (dlm_our_addr(&dlm_local_addr, 0)) { | 1433 | init_local(); | 
| 1434 | if (!dlm_local_count) { | ||
| 1435 | error = -ENOTCONN; | ||
| 960 | log_print("no local IP address has been set"); | 1436 | log_print("no local IP address has been set"); | 
| 961 | goto fail_free_conn; | 1437 | goto out; | 
| 962 | } | ||
| 963 | if (!dlm_our_addr(&dlm_local_addr, 1)) { | ||
| 964 | log_print("This dlm comms module does not support multi-homed clustering"); | ||
| 965 | goto fail_free_conn; | ||
| 966 | } | 1438 | } | 
| 967 | 1439 | ||
| 1440 | error = -ENOMEM; | ||
| 968 | con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), | 1441 | con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), | 
| 969 | __alignof__(struct connection), 0, | 1442 | __alignof__(struct connection), 0, | 
| 970 | NULL, NULL); | 1443 | NULL, NULL); | 
| 971 | if (!con_cache) | 1444 | if (!con_cache) | 
| 972 | goto fail_free_conn; | 1445 | goto out; | 
| 973 | 1446 | ||
| 1447 | /* Set some sysctl minima */ | ||
| 1448 | if (sysctl_rmem_max < NEEDED_RMEM) | ||
| 1449 | sysctl_rmem_max = NEEDED_RMEM; | ||
| 974 | 1450 | ||
| 975 | /* Start listening */ | 1451 | /* Start listening */ | 
| 976 | error = listen_for_all(); | 1452 | if (dlm_config.ci_protocol == 0) | 
| 1453 | error = tcp_listen_for_all(); | ||
| 1454 | else | ||
| 1455 | error = sctp_listen_for_all(); | ||
| 977 | if (error) | 1456 | if (error) | 
| 978 | goto fail_unlisten; | 1457 | goto fail_unlisten; | 
| 979 | 1458 | ||
| @@ -984,24 +1463,13 @@ int dlm_lowcomms_start(void) | |||
| 984 | return 0; | 1463 | return 0; | 
| 985 | 1464 | ||
| 986 | fail_unlisten: | 1465 | fail_unlisten: | 
| 987 | close_connection(connections[0], false); | 1466 | con = nodeid2con(0,0); | 
| 988 | kmem_cache_free(con_cache, connections[0]); | 1467 | if (con) { | 
| 1468 | close_connection(con, false); | ||
| 1469 | kmem_cache_free(con_cache, con); | ||
| 1470 | } | ||
| 989 | kmem_cache_destroy(con_cache); | 1471 | kmem_cache_destroy(con_cache); | 
| 990 | 1472 | ||
| 991 | fail_free_conn: | ||
| 992 | kfree(connections); | ||
| 993 | |||
| 994 | out: | 1473 | out: | 
| 995 | return error; | 1474 | return error; | 
| 996 | } | 1475 | } | 
| 997 | |||
| 998 | /* | ||
| 999 | * Overrides for Emacs so that we follow Linus's tabbing style. | ||
| 1000 | * Emacs will notice this stuff at the end of the file and automatically | ||
| 1001 | * adjust the settings for this buffer only. This must remain at the end | ||
| 1002 | * of the file. | ||
| 1003 | * --------------------------------------------------------------------------- | ||
| 1004 | * Local variables: | ||
| 1005 | * c-file-style: "linux" | ||
| 1006 | * End: | ||
| 1007 | */ | ||
| diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 3870150b83a4..b0201ec325a7 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* | 
| 2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved. | 
| 3 | * | 3 | * | 
| 4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, | 
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions | 
| @@ -56,6 +56,7 @@ struct dlm_write_request32 { | |||
| 56 | union { | 56 | union { | 
| 57 | struct dlm_lock_params32 lock; | 57 | struct dlm_lock_params32 lock; | 
| 58 | struct dlm_lspace_params lspace; | 58 | struct dlm_lspace_params lspace; | 
| 59 | struct dlm_purge_params purge; | ||
| 59 | } i; | 60 | } i; | 
| 60 | }; | 61 | }; | 
| 61 | 62 | ||
| @@ -92,6 +93,9 @@ static void compat_input(struct dlm_write_request *kb, | |||
| 92 | kb->i.lspace.flags = kb32->i.lspace.flags; | 93 | kb->i.lspace.flags = kb32->i.lspace.flags; | 
| 93 | kb->i.lspace.minor = kb32->i.lspace.minor; | 94 | kb->i.lspace.minor = kb32->i.lspace.minor; | 
| 94 | strcpy(kb->i.lspace.name, kb32->i.lspace.name); | 95 | strcpy(kb->i.lspace.name, kb32->i.lspace.name); | 
| 96 | } else if (kb->cmd == DLM_USER_PURGE) { | ||
| 97 | kb->i.purge.nodeid = kb32->i.purge.nodeid; | ||
| 98 | kb->i.purge.pid = kb32->i.purge.pid; | ||
| 95 | } else { | 99 | } else { | 
| 96 | kb->i.lock.mode = kb32->i.lock.mode; | 100 | kb->i.lock.mode = kb32->i.lock.mode; | 
| 97 | kb->i.lock.namelen = kb32->i.lock.namelen; | 101 | kb->i.lock.namelen = kb32->i.lock.namelen; | 
| @@ -111,8 +115,6 @@ static void compat_input(struct dlm_write_request *kb, | |||
| 111 | static void compat_output(struct dlm_lock_result *res, | 115 | static void compat_output(struct dlm_lock_result *res, | 
| 112 | struct dlm_lock_result32 *res32) | 116 | struct dlm_lock_result32 *res32) | 
| 113 | { | 117 | { | 
| 114 | res32->length = res->length - (sizeof(struct dlm_lock_result) - | ||
| 115 | sizeof(struct dlm_lock_result32)); | ||
| 116 | res32->user_astaddr = (__u32)(long)res->user_astaddr; | 118 | res32->user_astaddr = (__u32)(long)res->user_astaddr; | 
| 117 | res32->user_astparam = (__u32)(long)res->user_astparam; | 119 | res32->user_astparam = (__u32)(long)res->user_astparam; | 
| 118 | res32->user_lksb = (__u32)(long)res->user_lksb; | 120 | res32->user_lksb = (__u32)(long)res->user_lksb; | 
| @@ -128,35 +130,30 @@ static void compat_output(struct dlm_lock_result *res, | |||
| 128 | } | 130 | } | 
| 129 | #endif | 131 | #endif | 
| 130 | 132 | ||
| 133 | /* we could possibly check if the cancel of an orphan has resulted in the lkb | ||
| 134 | being removed and then remove that lkb from the orphans list and free it */ | ||
| 131 | 135 | ||
| 132 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | 136 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | 
| 133 | { | 137 | { | 
| 134 | struct dlm_ls *ls; | 138 | struct dlm_ls *ls; | 
| 135 | struct dlm_user_args *ua; | 139 | struct dlm_user_args *ua; | 
| 136 | struct dlm_user_proc *proc; | 140 | struct dlm_user_proc *proc; | 
| 137 | int remove_ownqueue = 0; | 141 | int eol = 0, ast_type; | 
| 138 | 142 | ||
| 139 | /* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each | 143 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) | 
| 140 | lkb before dealing with it. We need to check this | ||
| 141 | flag before taking ls_clear_proc_locks mutex because if | ||
| 142 | it's set, dlm_clear_proc_locks() holds the mutex. */ | ||
| 143 | |||
| 144 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { | ||
| 145 | /* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */ | ||
| 146 | return; | 144 | return; | 
| 147 | } | ||
| 148 | 145 | ||
| 149 | ls = lkb->lkb_resource->res_ls; | 146 | ls = lkb->lkb_resource->res_ls; | 
| 150 | mutex_lock(&ls->ls_clear_proc_locks); | 147 | mutex_lock(&ls->ls_clear_proc_locks); | 
| 151 | 148 | ||
| 152 | /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast | 149 | /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast | 
| 153 | can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed | 150 | can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed | 
| 154 | lkb->ua so we can't try to use it. */ | 151 | lkb->ua so we can't try to use it. This second check is necessary | 
| 152 | for cases where a completion ast is received for an operation that | ||
| 153 | began before clear_proc_locks did its cancel/unlock. */ | ||
| 155 | 154 | ||
| 156 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { | 155 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) | 
| 157 | /* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */ | ||
| 158 | goto out; | 156 | goto out; | 
| 159 | } | ||
| 160 | 157 | ||
| 161 | DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); | 158 | DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); | 
| 162 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | 159 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | 
| @@ -166,28 +163,42 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
| 166 | goto out; | 163 | goto out; | 
| 167 | 164 | ||
| 168 | spin_lock(&proc->asts_spin); | 165 | spin_lock(&proc->asts_spin); | 
| 169 | if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { | 166 | |
| 167 | ast_type = lkb->lkb_ast_type; | ||
| 168 | lkb->lkb_ast_type |= type; | ||
| 169 | |||
| 170 | if (!ast_type) { | ||
| 170 | kref_get(&lkb->lkb_ref); | 171 | kref_get(&lkb->lkb_ref); | 
| 171 | list_add_tail(&lkb->lkb_astqueue, &proc->asts); | 172 | list_add_tail(&lkb->lkb_astqueue, &proc->asts); | 
| 172 | lkb->lkb_ast_type |= type; | ||
| 173 | wake_up_interruptible(&proc->wait); | 173 | wake_up_interruptible(&proc->wait); | 
| 174 | } | 174 | } | 
| 175 | 175 | if (type == AST_COMP && (ast_type & AST_COMP)) | |
| 176 | /* noqueue requests that fail may need to be removed from the | 176 | log_debug(ls, "ast overlap %x status %x %x", | 
| 177 | proc's locks list, there should be a better way of detecting | 177 | lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); | 
| 178 | this situation than checking all these things... */ | 178 | |
| 179 | 179 | /* Figure out if this lock is at the end of its life and no longer | |
| 180 | if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV && | 180 | available for the application to use. The lkb still exists until | 
| 181 | ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) | 181 | the final ast is read. A lock becomes EOL in three situations: | 
| 182 | remove_ownqueue = 1; | 182 | 1. a noqueue request fails with EAGAIN | 
| 183 | 183 | 2. an unlock completes with EUNLOCK | |
| 184 | /* unlocks or cancels of waiting requests need to be removed from the | 184 | 3. a cancel of a waiting request completes with ECANCEL | 
| 185 | proc's unlocking list, again there must be a better way... */ | 185 | An EOL lock needs to be removed from the process's list of locks. | 
| 186 | 186 | And we can't allow any new operation on an EOL lock. This is | |
| 187 | if (ua->lksb.sb_status == -DLM_EUNLOCK || | 187 | not related to the lifetime of the lkb struct which is managed | 
| 188 | entirely by refcount. */ | ||
| 189 | |||
| 190 | if (type == AST_COMP && | ||
| 191 | lkb->lkb_grmode == DLM_LOCK_IV && | ||
| 192 | ua->lksb.sb_status == -EAGAIN) | ||
| 193 | eol = 1; | ||
| 194 | else if (ua->lksb.sb_status == -DLM_EUNLOCK || | ||
| 188 | (ua->lksb.sb_status == -DLM_ECANCEL && | 195 | (ua->lksb.sb_status == -DLM_ECANCEL && | 
| 189 | lkb->lkb_grmode == DLM_LOCK_IV)) | 196 | lkb->lkb_grmode == DLM_LOCK_IV)) | 
| 190 | remove_ownqueue = 1; | 197 | eol = 1; | 
| 198 | if (eol) { | ||
| 199 | lkb->lkb_ast_type &= ~AST_BAST; | ||
| 200 | lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; | ||
| 201 | } | ||
| 191 | 202 | ||
| 192 | /* We want to copy the lvb to userspace when the completion | 203 | /* We want to copy the lvb to userspace when the completion | 
| 193 | ast is read if the status is 0, the lock has an lvb and | 204 | ast is read if the status is 0, the lock has an lvb and | 
| @@ -204,11 +215,13 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
| 204 | 215 | ||
| 205 | spin_unlock(&proc->asts_spin); | 216 | spin_unlock(&proc->asts_spin); | 
| 206 | 217 | ||
| 207 | if (remove_ownqueue) { | 218 | if (eol) { | 
| 208 | spin_lock(&ua->proc->locks_spin); | 219 | spin_lock(&ua->proc->locks_spin); | 
| 209 | list_del_init(&lkb->lkb_ownqueue); | 220 | if (!list_empty(&lkb->lkb_ownqueue)) { | 
| 221 | list_del_init(&lkb->lkb_ownqueue); | ||
| 222 | dlm_put_lkb(lkb); | ||
| 223 | } | ||
| 210 | spin_unlock(&ua->proc->locks_spin); | 224 | spin_unlock(&ua->proc->locks_spin); | 
| 211 | dlm_put_lkb(lkb); | ||
| 212 | } | 225 | } | 
| 213 | out: | 226 | out: | 
| 214 | mutex_unlock(&ls->ls_clear_proc_locks); | 227 | mutex_unlock(&ls->ls_clear_proc_locks); | 
| @@ -286,47 +299,71 @@ static int device_user_unlock(struct dlm_user_proc *proc, | |||
| 286 | return error; | 299 | return error; | 
| 287 | } | 300 | } | 
| 288 | 301 | ||
| 289 | static int device_create_lockspace(struct dlm_lspace_params *params) | 302 | static int create_misc_device(struct dlm_ls *ls, char *name) | 
| 290 | { | 303 | { | 
| 291 | dlm_lockspace_t *lockspace; | ||
| 292 | struct dlm_ls *ls; | ||
| 293 | int error, len; | 304 | int error, len; | 
| 294 | 305 | ||
| 295 | if (!capable(CAP_SYS_ADMIN)) | ||
| 296 | return -EPERM; | ||
| 297 | |||
| 298 | error = dlm_new_lockspace(params->name, strlen(params->name), | ||
| 299 | &lockspace, 0, DLM_USER_LVB_LEN); | ||
| 300 | if (error) | ||
| 301 | return error; | ||
| 302 | |||
| 303 | ls = dlm_find_lockspace_local(lockspace); | ||
| 304 | if (!ls) | ||
| 305 | return -ENOENT; | ||
| 306 | |||
| 307 | error = -ENOMEM; | 306 | error = -ENOMEM; | 
| 308 | len = strlen(params->name) + strlen(name_prefix) + 2; | 307 | len = strlen(name) + strlen(name_prefix) + 2; | 
| 309 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); | 308 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); | 
| 310 | if (!ls->ls_device.name) | 309 | if (!ls->ls_device.name) | 
| 311 | goto fail; | 310 | goto fail; | 
| 311 | |||
| 312 | snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix, | 312 | snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix, | 
| 313 | params->name); | 313 | name); | 
| 314 | ls->ls_device.fops = &device_fops; | 314 | ls->ls_device.fops = &device_fops; | 
| 315 | ls->ls_device.minor = MISC_DYNAMIC_MINOR; | 315 | ls->ls_device.minor = MISC_DYNAMIC_MINOR; | 
| 316 | 316 | ||
| 317 | error = misc_register(&ls->ls_device); | 317 | error = misc_register(&ls->ls_device); | 
| 318 | if (error) { | 318 | if (error) { | 
| 319 | kfree(ls->ls_device.name); | 319 | kfree(ls->ls_device.name); | 
| 320 | goto fail; | ||
| 321 | } | 320 | } | 
| 321 | fail: | ||
| 322 | return error; | ||
| 323 | } | ||
| 324 | |||
| 325 | static int device_user_purge(struct dlm_user_proc *proc, | ||
| 326 | struct dlm_purge_params *params) | ||
| 327 | { | ||
| 328 | struct dlm_ls *ls; | ||
| 329 | int error; | ||
| 330 | |||
| 331 | ls = dlm_find_lockspace_local(proc->lockspace); | ||
| 332 | if (!ls) | ||
| 333 | return -ENOENT; | ||
| 334 | |||
| 335 | error = dlm_user_purge(ls, proc, params->nodeid, params->pid); | ||
| 322 | 336 | ||
| 323 | error = ls->ls_device.minor; | ||
| 324 | dlm_put_lockspace(ls); | 337 | dlm_put_lockspace(ls); | 
| 325 | return error; | 338 | return error; | 
| 339 | } | ||
| 340 | |||
| 341 | static int device_create_lockspace(struct dlm_lspace_params *params) | ||
| 342 | { | ||
| 343 | dlm_lockspace_t *lockspace; | ||
| 344 | struct dlm_ls *ls; | ||
| 345 | int error; | ||
| 326 | 346 | ||
| 327 | fail: | 347 | if (!capable(CAP_SYS_ADMIN)) | 
| 348 | return -EPERM; | ||
| 349 | |||
| 350 | error = dlm_new_lockspace(params->name, strlen(params->name), | ||
| 351 | &lockspace, 0, DLM_USER_LVB_LEN); | ||
| 352 | if (error) | ||
| 353 | return error; | ||
| 354 | |||
| 355 | ls = dlm_find_lockspace_local(lockspace); | ||
| 356 | if (!ls) | ||
| 357 | return -ENOENT; | ||
| 358 | |||
| 359 | error = create_misc_device(ls, params->name); | ||
| 328 | dlm_put_lockspace(ls); | 360 | dlm_put_lockspace(ls); | 
| 329 | dlm_release_lockspace(lockspace, 0); | 361 | |
| 362 | if (error) | ||
| 363 | dlm_release_lockspace(lockspace, 0); | ||
| 364 | else | ||
| 365 | error = ls->ls_device.minor; | ||
| 366 | |||
| 330 | return error; | 367 | return error; | 
| 331 | } | 368 | } | 
| 332 | 369 | ||
| @@ -343,6 +380,10 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) | |||
| 343 | if (!ls) | 380 | if (!ls) | 
| 344 | return -ENOENT; | 381 | return -ENOENT; | 
| 345 | 382 | ||
| 383 | /* Deregister the misc device first, so we don't have | ||
| 384 | * a device that's not attached to a lockspace. If | ||
| 385 | * dlm_release_lockspace fails then we can recreate it | ||
| 386 | */ | ||
| 346 | error = misc_deregister(&ls->ls_device); | 387 | error = misc_deregister(&ls->ls_device); | 
| 347 | if (error) { | 388 | if (error) { | 
| 348 | dlm_put_lockspace(ls); | 389 | dlm_put_lockspace(ls); | 
| @@ -361,6 +402,8 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) | |||
| 361 | 402 | ||
| 362 | dlm_put_lockspace(ls); | 403 | dlm_put_lockspace(ls); | 
| 363 | error = dlm_release_lockspace(lockspace, force); | 404 | error = dlm_release_lockspace(lockspace, force); | 
| 405 | if (error) | ||
| 406 | create_misc_device(ls, ls->ls_name); | ||
| 364 | out: | 407 | out: | 
| 365 | return error; | 408 | return error; | 
| 366 | } | 409 | } | 
| @@ -497,6 +540,14 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
| 497 | error = device_remove_lockspace(&kbuf->i.lspace); | 540 | error = device_remove_lockspace(&kbuf->i.lspace); | 
| 498 | break; | 541 | break; | 
| 499 | 542 | ||
| 543 | case DLM_USER_PURGE: | ||
| 544 | if (!proc) { | ||
| 545 | log_print("no locking on control device"); | ||
| 546 | goto out_sig; | ||
| 547 | } | ||
| 548 | error = device_user_purge(proc, &kbuf->i.purge); | ||
| 549 | break; | ||
| 550 | |||
| 500 | default: | 551 | default: | 
| 501 | log_print("Unknown command passed to DLM device : %d\n", | 552 | log_print("Unknown command passed to DLM device : %d\n", | 
| 502 | kbuf->cmd); | 553 | kbuf->cmd); | 
| diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 82a1ac7895a2..a96fa07b3f3b 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
| @@ -1262,9 +1262,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | |||
| 1262 | u64 leaf_no) | 1262 | u64 leaf_no) | 
| 1263 | { | 1263 | { | 
| 1264 | struct gfs2_inode *ip = GFS2_I(inode); | 1264 | struct gfs2_inode *ip = GFS2_I(inode); | 
| 1265 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1265 | struct buffer_head *bh; | 1266 | struct buffer_head *bh; | 
| 1266 | struct gfs2_leaf *lf; | 1267 | struct gfs2_leaf *lf; | 
| 1267 | unsigned entries = 0; | 1268 | unsigned entries = 0, entries2 = 0; | 
| 1268 | unsigned leaves = 0; | 1269 | unsigned leaves = 0; | 
| 1269 | const struct gfs2_dirent **darr, *dent; | 1270 | const struct gfs2_dirent **darr, *dent; | 
| 1270 | struct dirent_gather g; | 1271 | struct dirent_gather g; | 
| @@ -1290,7 +1291,13 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | |||
| 1290 | return 0; | 1291 | return 0; | 
| 1291 | 1292 | ||
| 1292 | error = -ENOMEM; | 1293 | error = -ENOMEM; | 
| 1293 | larr = vmalloc((leaves + entries) * sizeof(void *)); | 1294 | /* | 
| 1295 | * The extra 99 entries are not normally used, but are a buffer | ||
| 1296 | * zone in case the number of entries in the leaf is corrupt. | ||
| 1297 | * 99 is the maximum number of entries that can fit in a single | ||
| 1298 | * leaf block. | ||
| 1299 | */ | ||
| 1300 | larr = vmalloc((leaves + entries + 99) * sizeof(void *)); | ||
| 1294 | if (!larr) | 1301 | if (!larr) | 
| 1295 | goto out; | 1302 | goto out; | 
| 1296 | darr = (const struct gfs2_dirent **)(larr + leaves); | 1303 | darr = (const struct gfs2_dirent **)(larr + leaves); | 
| @@ -1305,10 +1312,20 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | |||
| 1305 | lf = (struct gfs2_leaf *)bh->b_data; | 1312 | lf = (struct gfs2_leaf *)bh->b_data; | 
| 1306 | lfn = be64_to_cpu(lf->lf_next); | 1313 | lfn = be64_to_cpu(lf->lf_next); | 
| 1307 | if (lf->lf_entries) { | 1314 | if (lf->lf_entries) { | 
| 1315 | entries2 += be16_to_cpu(lf->lf_entries); | ||
| 1308 | dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, | 1316 | dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, | 
| 1309 | gfs2_dirent_gather, NULL, &g); | 1317 | gfs2_dirent_gather, NULL, &g); | 
| 1310 | error = PTR_ERR(dent); | 1318 | error = PTR_ERR(dent); | 
| 1311 | if (IS_ERR(dent)) { | 1319 | if (IS_ERR(dent)) | 
| 1320 | goto out_kfree; | ||
| 1321 | if (entries2 != g.offset) { | ||
| 1322 | fs_warn(sdp, "Number of entries corrupt in dir " | ||
| 1323 | "leaf %llu, entries2 (%u) != " | ||
| 1324 | "g.offset (%u)\n", | ||
| 1325 | (unsigned long long)bh->b_blocknr, | ||
| 1326 | entries2, g.offset); | ||
| 1327 | |||
| 1328 | error = -EIO; | ||
| 1312 | goto out_kfree; | 1329 | goto out_kfree; | 
| 1313 | } | 1330 | } | 
| 1314 | error = 0; | 1331 | error = 0; | 
| @@ -1318,6 +1335,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | |||
| 1318 | } | 1335 | } | 
| 1319 | } while(lfn); | 1336 | } while(lfn); | 
| 1320 | 1337 | ||
| 1338 | BUG_ON(entries2 != entries); | ||
| 1321 | error = do_filldir_main(ip, offset, opaque, filldir, darr, | 1339 | error = do_filldir_main(ip, offset, opaque, filldir, darr, | 
| 1322 | entries, copied); | 1340 | entries, copied); | 
| 1323 | out_kfree: | 1341 | out_kfree: | 
| @@ -1401,6 +1419,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1401 | filldir_t filldir) | 1419 | filldir_t filldir) | 
| 1402 | { | 1420 | { | 
| 1403 | struct gfs2_inode *dip = GFS2_I(inode); | 1421 | struct gfs2_inode *dip = GFS2_I(inode); | 
| 1422 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1404 | struct dirent_gather g; | 1423 | struct dirent_gather g; | 
| 1405 | const struct gfs2_dirent **darr, *dent; | 1424 | const struct gfs2_dirent **darr, *dent; | 
| 1406 | struct buffer_head *dibh; | 1425 | struct buffer_head *dibh; | 
| @@ -1423,8 +1442,8 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1423 | return error; | 1442 | return error; | 
| 1424 | 1443 | ||
| 1425 | error = -ENOMEM; | 1444 | error = -ENOMEM; | 
| 1426 | darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *), | 1445 | /* 96 is max number of dirents which can be stuffed into an inode */ | 
| 1427 | GFP_KERNEL); | 1446 | darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_KERNEL); | 
| 1428 | if (darr) { | 1447 | if (darr) { | 
| 1429 | g.pdent = darr; | 1448 | g.pdent = darr; | 
| 1430 | g.offset = 0; | 1449 | g.offset = 0; | 
| @@ -1434,6 +1453,15 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1434 | error = PTR_ERR(dent); | 1453 | error = PTR_ERR(dent); | 
| 1435 | goto out; | 1454 | goto out; | 
| 1436 | } | 1455 | } | 
| 1456 | if (dip->i_di.di_entries != g.offset) { | ||
| 1457 | fs_warn(sdp, "Number of entries corrupt in dir %llu, " | ||
| 1458 | "ip->i_di.di_entries (%u) != g.offset (%u)\n", | ||
| 1459 | (unsigned long long)dip->i_num.no_addr, | ||
| 1460 | dip->i_di.di_entries, | ||
| 1461 | g.offset); | ||
| 1462 | error = -EIO; | ||
| 1463 | goto out; | ||
| 1464 | } | ||
| 1437 | error = do_filldir_main(dip, offset, opaque, filldir, darr, | 1465 | error = do_filldir_main(dip, offset, opaque, filldir, darr, | 
| 1438 | dip->i_di.di_entries, &copied); | 1466 | dip->i_di.di_entries, &copied); | 
| 1439 | out: | 1467 | out: | 
| diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 12accb08fe02..1815429a2978 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -23,6 +23,10 @@ | |||
| 23 | #include <linux/module.h> | 23 | #include <linux/module.h> | 
| 24 | #include <linux/rwsem.h> | 24 | #include <linux/rwsem.h> | 
| 25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> | 
| 26 | #include <linux/seq_file.h> | ||
| 27 | #include <linux/debugfs.h> | ||
| 28 | #include <linux/module.h> | ||
| 29 | #include <linux/kallsyms.h> | ||
| 26 | 30 | ||
| 27 | #include "gfs2.h" | 31 | #include "gfs2.h" | 
| 28 | #include "incore.h" | 32 | #include "incore.h" | 
| @@ -40,20 +44,30 @@ struct gfs2_gl_hash_bucket { | |||
| 40 | struct hlist_head hb_list; | 44 | struct hlist_head hb_list; | 
| 41 | }; | 45 | }; | 
| 42 | 46 | ||
| 47 | struct glock_iter { | ||
| 48 | int hash; /* hash bucket index */ | ||
| 49 | struct gfs2_sbd *sdp; /* incore superblock */ | ||
| 50 | struct gfs2_glock *gl; /* current glock struct */ | ||
| 51 | struct hlist_head *hb_list; /* current hash bucket ptr */ | ||
| 52 | struct seq_file *seq; /* sequence file for debugfs */ | ||
| 53 | char string[512]; /* scratch space */ | ||
| 54 | }; | ||
| 55 | |||
| 43 | typedef void (*glock_examiner) (struct gfs2_glock * gl); | 56 | typedef void (*glock_examiner) (struct gfs2_glock * gl); | 
| 44 | 57 | ||
| 45 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | 58 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | 
| 46 | static int dump_glock(struct gfs2_glock *gl); | 59 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); | 
| 47 | static int dump_inode(struct gfs2_inode *ip); | 60 | static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); | 
| 48 | static void gfs2_glock_xmote_th(struct gfs2_holder *gh); | ||
| 49 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); | 61 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); | 
| 50 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | 62 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | 
| 63 | static struct dentry *gfs2_root; | ||
| 51 | 64 | ||
| 52 | #define GFS2_GL_HASH_SHIFT 15 | 65 | #define GFS2_GL_HASH_SHIFT 15 | 
| 53 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) | 66 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) | 
| 54 | #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) | 67 | #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) | 
| 55 | 68 | ||
| 56 | static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; | 69 | static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; | 
| 70 | static struct dentry *gfs2_root; | ||
| 57 | 71 | ||
| 58 | /* | 72 | /* | 
| 59 | * Despite what you might think, the numbers below are not arbitrary :-) | 73 | * Despite what you might think, the numbers below are not arbitrary :-) | 
| @@ -202,7 +216,6 @@ int gfs2_glock_put(struct gfs2_glock *gl) | |||
| 202 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); | 216 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); | 
| 203 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); | 217 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); | 
| 204 | gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); | 218 | gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); | 
| 205 | gfs2_assert(sdp, list_empty(&gl->gl_waiters2)); | ||
| 206 | gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); | 219 | gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); | 
| 207 | glock_free(gl); | 220 | glock_free(gl); | 
| 208 | rv = 1; | 221 | rv = 1; | 
| @@ -303,7 +316,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
| 303 | atomic_set(&gl->gl_ref, 1); | 316 | atomic_set(&gl->gl_ref, 1); | 
| 304 | gl->gl_state = LM_ST_UNLOCKED; | 317 | gl->gl_state = LM_ST_UNLOCKED; | 
| 305 | gl->gl_hash = hash; | 318 | gl->gl_hash = hash; | 
| 306 | gl->gl_owner = NULL; | 319 | gl->gl_owner_pid = 0; | 
| 307 | gl->gl_ip = 0; | 320 | gl->gl_ip = 0; | 
| 308 | gl->gl_ops = glops; | 321 | gl->gl_ops = glops; | 
| 309 | gl->gl_req_gh = NULL; | 322 | gl->gl_req_gh = NULL; | 
| @@ -367,7 +380,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | |||
| 367 | INIT_LIST_HEAD(&gh->gh_list); | 380 | INIT_LIST_HEAD(&gh->gh_list); | 
| 368 | gh->gh_gl = gl; | 381 | gh->gh_gl = gl; | 
| 369 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 382 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 
| 370 | gh->gh_owner = current; | 383 | gh->gh_owner_pid = current->pid; | 
| 371 | gh->gh_state = state; | 384 | gh->gh_state = state; | 
| 372 | gh->gh_flags = flags; | 385 | gh->gh_flags = flags; | 
| 373 | gh->gh_error = 0; | 386 | gh->gh_error = 0; | 
| @@ -389,7 +402,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * | |||
| 389 | { | 402 | { | 
| 390 | gh->gh_state = state; | 403 | gh->gh_state = state; | 
| 391 | gh->gh_flags = flags; | 404 | gh->gh_flags = flags; | 
| 392 | gh->gh_iflags &= 1 << HIF_ALLOCED; | 405 | gh->gh_iflags = 0; | 
| 393 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 406 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 
| 394 | } | 407 | } | 
| 395 | 408 | ||
| @@ -406,54 +419,8 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) | |||
| 406 | gh->gh_ip = 0; | 419 | gh->gh_ip = 0; | 
| 407 | } | 420 | } | 
| 408 | 421 | ||
| 409 | /** | 422 | static void gfs2_holder_wake(struct gfs2_holder *gh) | 
| 410 | * gfs2_holder_get - get a struct gfs2_holder structure | ||
| 411 | * @gl: the glock | ||
| 412 | * @state: the state we're requesting | ||
| 413 | * @flags: the modifier flags | ||
| 414 | * @gfp_flags: | ||
| 415 | * | ||
| 416 | * Figure out how big an impact this function has. Either: | ||
| 417 | * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd | ||
| 418 | * 2) Leave it like it is | ||
| 419 | * | ||
| 420 | * Returns: the holder structure, NULL on ENOMEM | ||
| 421 | */ | ||
| 422 | |||
| 423 | static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, | ||
| 424 | unsigned int state, | ||
| 425 | int flags, gfp_t gfp_flags) | ||
| 426 | { | ||
| 427 | struct gfs2_holder *gh; | ||
| 428 | |||
| 429 | gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags); | ||
| 430 | if (!gh) | ||
| 431 | return NULL; | ||
| 432 | |||
| 433 | gfs2_holder_init(gl, state, flags, gh); | ||
| 434 | set_bit(HIF_ALLOCED, &gh->gh_iflags); | ||
| 435 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | ||
| 436 | return gh; | ||
| 437 | } | ||
| 438 | |||
| 439 | /** | ||
| 440 | * gfs2_holder_put - get rid of a struct gfs2_holder structure | ||
| 441 | * @gh: the holder structure | ||
| 442 | * | ||
| 443 | */ | ||
| 444 | |||
| 445 | static void gfs2_holder_put(struct gfs2_holder *gh) | ||
| 446 | { | 423 | { | 
| 447 | gfs2_holder_uninit(gh); | ||
| 448 | kfree(gh); | ||
| 449 | } | ||
| 450 | |||
| 451 | static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh) | ||
| 452 | { | ||
| 453 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) { | ||
| 454 | gfs2_holder_put(gh); | ||
| 455 | return; | ||
| 456 | } | ||
| 457 | clear_bit(HIF_WAIT, &gh->gh_iflags); | 424 | clear_bit(HIF_WAIT, &gh->gh_iflags); | 
| 458 | smp_mb(); | 425 | smp_mb(); | 
| 459 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | 426 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | 
| @@ -519,7 +486,7 @@ static int rq_promote(struct gfs2_holder *gh) | |||
| 519 | gfs2_reclaim_glock(sdp); | 486 | gfs2_reclaim_glock(sdp); | 
| 520 | } | 487 | } | 
| 521 | 488 | ||
| 522 | gfs2_glock_xmote_th(gh); | 489 | gfs2_glock_xmote_th(gh->gh_gl, gh); | 
| 523 | spin_lock(&gl->gl_spin); | 490 | spin_lock(&gl->gl_spin); | 
| 524 | } | 491 | } | 
| 525 | return 1; | 492 | return 1; | 
| @@ -542,7 +509,7 @@ static int rq_promote(struct gfs2_holder *gh) | |||
| 542 | gh->gh_error = 0; | 509 | gh->gh_error = 0; | 
| 543 | set_bit(HIF_HOLDER, &gh->gh_iflags); | 510 | set_bit(HIF_HOLDER, &gh->gh_iflags); | 
| 544 | 511 | ||
| 545 | gfs2_holder_dispose_or_wake(gh); | 512 | gfs2_holder_wake(gh); | 
| 546 | 513 | ||
| 547 | return 0; | 514 | return 0; | 
| 548 | } | 515 | } | 
| @@ -554,32 +521,24 @@ static int rq_promote(struct gfs2_holder *gh) | |||
| 554 | * Returns: 1 if the queue is blocked | 521 | * Returns: 1 if the queue is blocked | 
| 555 | */ | 522 | */ | 
| 556 | 523 | ||
| 557 | static int rq_demote(struct gfs2_holder *gh) | 524 | static int rq_demote(struct gfs2_glock *gl) | 
| 558 | { | 525 | { | 
| 559 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 560 | |||
| 561 | if (!list_empty(&gl->gl_holders)) | 526 | if (!list_empty(&gl->gl_holders)) | 
| 562 | return 1; | 527 | return 1; | 
| 563 | 528 | ||
| 564 | if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) { | 529 | if (gl->gl_state == gl->gl_demote_state || | 
| 565 | list_del_init(&gh->gh_list); | 530 | gl->gl_state == LM_ST_UNLOCKED) { | 
| 566 | gh->gh_error = 0; | 531 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | 
| 567 | spin_unlock(&gl->gl_spin); | 532 | return 0; | 
| 568 | gfs2_holder_dispose_or_wake(gh); | ||
| 569 | spin_lock(&gl->gl_spin); | ||
| 570 | } else { | ||
| 571 | gl->gl_req_gh = gh; | ||
| 572 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 573 | spin_unlock(&gl->gl_spin); | ||
| 574 | |||
| 575 | if (gh->gh_state == LM_ST_UNLOCKED || | ||
| 576 | gl->gl_state != LM_ST_EXCLUSIVE) | ||
| 577 | gfs2_glock_drop_th(gl); | ||
| 578 | else | ||
| 579 | gfs2_glock_xmote_th(gh); | ||
| 580 | |||
| 581 | spin_lock(&gl->gl_spin); | ||
| 582 | } | 533 | } | 
| 534 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
| 535 | spin_unlock(&gl->gl_spin); | ||
| 536 | if (gl->gl_demote_state == LM_ST_UNLOCKED || | ||
| 537 | gl->gl_state != LM_ST_EXCLUSIVE) | ||
| 538 | gfs2_glock_drop_th(gl); | ||
| 539 | else | ||
| 540 | gfs2_glock_xmote_th(gl, NULL); | ||
| 541 | spin_lock(&gl->gl_spin); | ||
| 583 | 542 | ||
| 584 | return 0; | 543 | return 0; | 
| 585 | } | 544 | } | 
| @@ -607,16 +566,8 @@ static void run_queue(struct gfs2_glock *gl) | |||
| 607 | else | 566 | else | 
| 608 | gfs2_assert_warn(gl->gl_sbd, 0); | 567 | gfs2_assert_warn(gl->gl_sbd, 0); | 
| 609 | 568 | ||
| 610 | } else if (!list_empty(&gl->gl_waiters2) && | 569 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | 
| 611 | !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) { | 570 | blocked = rq_demote(gl); | 
| 612 | gh = list_entry(gl->gl_waiters2.next, | ||
| 613 | struct gfs2_holder, gh_list); | ||
| 614 | |||
| 615 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) | ||
| 616 | blocked = rq_demote(gh); | ||
| 617 | else | ||
| 618 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
| 619 | |||
| 620 | } else if (!list_empty(&gl->gl_waiters3)) { | 571 | } else if (!list_empty(&gl->gl_waiters3)) { | 
| 621 | gh = list_entry(gl->gl_waiters3.next, | 572 | gh = list_entry(gl->gl_waiters3.next, | 
| 622 | struct gfs2_holder, gh_list); | 573 | struct gfs2_holder, gh_list); | 
| @@ -654,7 +605,7 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) | |||
| 654 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 605 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 
| 655 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); | 606 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); | 
| 656 | } else { | 607 | } else { | 
| 657 | gl->gl_owner = current; | 608 | gl->gl_owner_pid = current->pid; | 
| 658 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 609 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 
| 659 | clear_bit(HIF_WAIT, &gh.gh_iflags); | 610 | clear_bit(HIF_WAIT, &gh.gh_iflags); | 
| 660 | smp_mb(); | 611 | smp_mb(); | 
| @@ -681,7 +632,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) | |||
| 681 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 632 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 
| 682 | acquired = 0; | 633 | acquired = 0; | 
| 683 | } else { | 634 | } else { | 
| 684 | gl->gl_owner = current; | 635 | gl->gl_owner_pid = current->pid; | 
| 685 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 636 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 
| 686 | } | 637 | } | 
| 687 | spin_unlock(&gl->gl_spin); | 638 | spin_unlock(&gl->gl_spin); | 
| @@ -699,7 +650,7 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | |||
| 699 | { | 650 | { | 
| 700 | spin_lock(&gl->gl_spin); | 651 | spin_lock(&gl->gl_spin); | 
| 701 | clear_bit(GLF_LOCK, &gl->gl_flags); | 652 | clear_bit(GLF_LOCK, &gl->gl_flags); | 
| 702 | gl->gl_owner = NULL; | 653 | gl->gl_owner_pid = 0; | 
| 703 | gl->gl_ip = 0; | 654 | gl->gl_ip = 0; | 
| 704 | run_queue(gl); | 655 | run_queue(gl); | 
| 705 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | 656 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | 
| @@ -707,50 +658,24 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | |||
| 707 | } | 658 | } | 
| 708 | 659 | ||
| 709 | /** | 660 | /** | 
| 710 | * handle_callback - add a demote request to a lock's queue | 661 | * handle_callback - process a demote request | 
| 711 | * @gl: the glock | 662 | * @gl: the glock | 
| 712 | * @state: the state the caller wants us to change to | 663 | * @state: the state the caller wants us to change to | 
| 713 | * | 664 | * | 
| 714 | * Note: This may fail sliently if we are out of memory. | 665 | * There are only two requests that we are going to see in actual | 
| 666 | * practise: LM_ST_SHARED and LM_ST_UNLOCKED | ||
| 715 | */ | 667 | */ | 
| 716 | 668 | ||
| 717 | static void handle_callback(struct gfs2_glock *gl, unsigned int state) | 669 | static void handle_callback(struct gfs2_glock *gl, unsigned int state) | 
| 718 | { | 670 | { | 
| 719 | struct gfs2_holder *gh, *new_gh = NULL; | ||
| 720 | |||
| 721 | restart: | ||
| 722 | spin_lock(&gl->gl_spin); | 671 | spin_lock(&gl->gl_spin); | 
| 723 | 672 | if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { | |
| 724 | list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { | 673 | gl->gl_demote_state = state; | 
| 725 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags) && | 674 | gl->gl_demote_time = jiffies; | 
| 726 | gl->gl_req_gh != gh) { | 675 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { | 
| 727 | if (gh->gh_state != state) | 676 | gl->gl_demote_state = state; | 
| 728 | gh->gh_state = LM_ST_UNLOCKED; | ||
| 729 | goto out; | ||
| 730 | } | ||
| 731 | } | ||
| 732 | |||
| 733 | if (new_gh) { | ||
| 734 | list_add_tail(&new_gh->gh_list, &gl->gl_waiters2); | ||
| 735 | new_gh = NULL; | ||
| 736 | } else { | ||
| 737 | spin_unlock(&gl->gl_spin); | ||
| 738 | |||
| 739 | new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS); | ||
| 740 | if (!new_gh) | ||
| 741 | return; | ||
| 742 | set_bit(HIF_DEMOTE, &new_gh->gh_iflags); | ||
| 743 | set_bit(HIF_DEALLOC, &new_gh->gh_iflags); | ||
| 744 | set_bit(HIF_WAIT, &new_gh->gh_iflags); | ||
| 745 | |||
| 746 | goto restart; | ||
| 747 | } | 677 | } | 
| 748 | |||
| 749 | out: | ||
| 750 | spin_unlock(&gl->gl_spin); | 678 | spin_unlock(&gl->gl_spin); | 
| 751 | |||
| 752 | if (new_gh) | ||
| 753 | gfs2_holder_put(new_gh); | ||
| 754 | } | 679 | } | 
| 755 | 680 | ||
| 756 | /** | 681 | /** | 
| @@ -810,56 +735,37 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 810 | 735 | ||
| 811 | /* Deal with each possible exit condition */ | 736 | /* Deal with each possible exit condition */ | 
| 812 | 737 | ||
| 813 | if (!gh) | 738 | if (!gh) { | 
| 814 | gl->gl_stamp = jiffies; | 739 | gl->gl_stamp = jiffies; | 
| 815 | else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { | 740 | if (ret & LM_OUT_CANCELED) | 
| 741 | op_done = 0; | ||
| 742 | else | ||
| 743 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 744 | } else { | ||
| 816 | spin_lock(&gl->gl_spin); | 745 | spin_lock(&gl->gl_spin); | 
| 817 | list_del_init(&gh->gh_list); | 746 | list_del_init(&gh->gh_list); | 
| 818 | gh->gh_error = -EIO; | 747 | gh->gh_error = -EIO; | 
| 819 | spin_unlock(&gl->gl_spin); | 748 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 
| 820 | } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) { | 749 | goto out; | 
| 821 | spin_lock(&gl->gl_spin); | 750 | gh->gh_error = GLR_CANCELED; | 
| 822 | list_del_init(&gh->gh_list); | 751 | if (ret & LM_OUT_CANCELED) | 
| 823 | if (gl->gl_state == gh->gh_state || | 752 | goto out; | 
| 824 | gl->gl_state == LM_ST_UNLOCKED) { | 753 | if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | 
| 754 | list_add_tail(&gh->gh_list, &gl->gl_holders); | ||
| 825 | gh->gh_error = 0; | 755 | gh->gh_error = 0; | 
| 826 | } else { | 756 | set_bit(HIF_HOLDER, &gh->gh_iflags); | 
| 827 | if (gfs2_assert_warn(sdp, gh->gh_flags & | 757 | set_bit(HIF_FIRST, &gh->gh_iflags); | 
| 828 | (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1) | 758 | op_done = 0; | 
| 829 | fs_warn(sdp, "ret = 0x%.8X\n", ret); | 759 | goto out; | 
| 830 | gh->gh_error = GLR_TRYFAILED; | ||
| 831 | } | 760 | } | 
| 832 | spin_unlock(&gl->gl_spin); | ||
| 833 | |||
| 834 | if (ret & LM_OUT_CANCELED) | ||
| 835 | handle_callback(gl, LM_ST_UNLOCKED); | ||
| 836 | |||
| 837 | } else if (ret & LM_OUT_CANCELED) { | ||
| 838 | spin_lock(&gl->gl_spin); | ||
| 839 | list_del_init(&gh->gh_list); | ||
| 840 | gh->gh_error = GLR_CANCELED; | ||
| 841 | spin_unlock(&gl->gl_spin); | ||
| 842 | |||
| 843 | } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | ||
| 844 | spin_lock(&gl->gl_spin); | ||
| 845 | list_move_tail(&gh->gh_list, &gl->gl_holders); | ||
| 846 | gh->gh_error = 0; | ||
| 847 | set_bit(HIF_HOLDER, &gh->gh_iflags); | ||
| 848 | spin_unlock(&gl->gl_spin); | ||
| 849 | |||
| 850 | set_bit(HIF_FIRST, &gh->gh_iflags); | ||
| 851 | |||
| 852 | op_done = 0; | ||
| 853 | |||
| 854 | } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { | ||
| 855 | spin_lock(&gl->gl_spin); | ||
| 856 | list_del_init(&gh->gh_list); | ||
| 857 | gh->gh_error = GLR_TRYFAILED; | 761 | gh->gh_error = GLR_TRYFAILED; | 
| 858 | spin_unlock(&gl->gl_spin); | 762 | if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) | 
| 859 | 763 | goto out; | |
| 860 | } else { | 764 | gh->gh_error = -EINVAL; | 
| 861 | if (gfs2_assert_withdraw(sdp, 0) == -1) | 765 | if (gfs2_assert_withdraw(sdp, 0) == -1) | 
| 862 | fs_err(sdp, "ret = 0x%.8X\n", ret); | 766 | fs_err(sdp, "ret = 0x%.8X\n", ret); | 
| 767 | out: | ||
| 768 | spin_unlock(&gl->gl_spin); | ||
| 863 | } | 769 | } | 
| 864 | 770 | ||
| 865 | if (glops->go_xmote_bh) | 771 | if (glops->go_xmote_bh) | 
| @@ -877,7 +783,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 877 | gfs2_glock_put(gl); | 783 | gfs2_glock_put(gl); | 
| 878 | 784 | ||
| 879 | if (gh) | 785 | if (gh) | 
| 880 | gfs2_holder_dispose_or_wake(gh); | 786 | gfs2_holder_wake(gh); | 
| 881 | } | 787 | } | 
| 882 | 788 | ||
| 883 | /** | 789 | /** | 
| @@ -888,12 +794,11 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 888 | * | 794 | * | 
| 889 | */ | 795 | */ | 
| 890 | 796 | ||
| 891 | void gfs2_glock_xmote_th(struct gfs2_holder *gh) | 797 | void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | 
| 892 | { | 798 | { | 
| 893 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 894 | struct gfs2_sbd *sdp = gl->gl_sbd; | 799 | struct gfs2_sbd *sdp = gl->gl_sbd; | 
| 895 | int flags = gh->gh_flags; | 800 | int flags = gh ? gh->gh_flags : 0; | 
| 896 | unsigned state = gh->gh_state; | 801 | unsigned state = gh ? gh->gh_state : gl->gl_demote_state; | 
| 897 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 802 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 
| 898 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | | 803 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | | 
| 899 | LM_FLAG_NOEXP | LM_FLAG_ANY | | 804 | LM_FLAG_NOEXP | LM_FLAG_ANY | | 
| @@ -943,6 +848,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 943 | gfs2_assert_warn(sdp, !ret); | 848 | gfs2_assert_warn(sdp, !ret); | 
| 944 | 849 | ||
| 945 | state_change(gl, LM_ST_UNLOCKED); | 850 | state_change(gl, LM_ST_UNLOCKED); | 
| 851 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | ||
| 946 | 852 | ||
| 947 | if (glops->go_inval) | 853 | if (glops->go_inval) | 
| 948 | glops->go_inval(gl, DIO_METADATA); | 854 | glops->go_inval(gl, DIO_METADATA); | 
| @@ -964,7 +870,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 964 | gfs2_glock_put(gl); | 870 | gfs2_glock_put(gl); | 
| 965 | 871 | ||
| 966 | if (gh) | 872 | if (gh) | 
| 967 | gfs2_holder_dispose_or_wake(gh); | 873 | gfs2_holder_wake(gh); | 
| 968 | } | 874 | } | 
| 969 | 875 | ||
| 970 | /** | 876 | /** | 
| @@ -1097,18 +1003,32 @@ static int glock_wait_internal(struct gfs2_holder *gh) | |||
| 1097 | } | 1003 | } | 
| 1098 | 1004 | ||
| 1099 | static inline struct gfs2_holder * | 1005 | static inline struct gfs2_holder * | 
| 1100 | find_holder_by_owner(struct list_head *head, struct task_struct *owner) | 1006 | find_holder_by_owner(struct list_head *head, pid_t pid) | 
| 1101 | { | 1007 | { | 
| 1102 | struct gfs2_holder *gh; | 1008 | struct gfs2_holder *gh; | 
| 1103 | 1009 | ||
| 1104 | list_for_each_entry(gh, head, gh_list) { | 1010 | list_for_each_entry(gh, head, gh_list) { | 
| 1105 | if (gh->gh_owner == owner) | 1011 | if (gh->gh_owner_pid == pid) | 
| 1106 | return gh; | 1012 | return gh; | 
| 1107 | } | 1013 | } | 
| 1108 | 1014 | ||
| 1109 | return NULL; | 1015 | return NULL; | 
| 1110 | } | 1016 | } | 
| 1111 | 1017 | ||
| 1018 | static void print_dbg(struct glock_iter *gi, const char *fmt, ...) | ||
| 1019 | { | ||
| 1020 | va_list args; | ||
| 1021 | |||
| 1022 | va_start(args, fmt); | ||
| 1023 | if (gi) { | ||
| 1024 | vsprintf(gi->string, fmt, args); | ||
| 1025 | seq_printf(gi->seq, gi->string); | ||
| 1026 | } | ||
| 1027 | else | ||
| 1028 | vprintk(fmt, args); | ||
| 1029 | va_end(args); | ||
| 1030 | } | ||
| 1031 | |||
| 1112 | /** | 1032 | /** | 
| 1113 | * add_to_queue - Add a holder to the wait queue (but look for recursion) | 1033 | * add_to_queue - Add a holder to the wait queue (but look for recursion) | 
| 1114 | * @gh: the holder structure to add | 1034 | * @gh: the holder structure to add | 
| @@ -1120,24 +1040,24 @@ static void add_to_queue(struct gfs2_holder *gh) | |||
| 1120 | struct gfs2_glock *gl = gh->gh_gl; | 1040 | struct gfs2_glock *gl = gh->gh_gl; | 
| 1121 | struct gfs2_holder *existing; | 1041 | struct gfs2_holder *existing; | 
| 1122 | 1042 | ||
| 1123 | BUG_ON(!gh->gh_owner); | 1043 | BUG_ON(!gh->gh_owner_pid); | 
| 1124 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) | 1044 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) | 
| 1125 | BUG(); | 1045 | BUG(); | 
| 1126 | 1046 | ||
| 1127 | existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); | 1047 | existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid); | 
| 1128 | if (existing) { | 1048 | if (existing) { | 
| 1129 | print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); | 1049 | print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); | 
| 1130 | printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid); | 1050 | printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); | 
| 1131 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 1051 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 
| 1132 | existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); | 1052 | existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); | 
| 1133 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | 1053 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | 
| 1134 | printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid); | 1054 | printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); | 
| 1135 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 1055 | printk(KERN_INFO "lock type : %d lock state : %d\n", | 
| 1136 | gl->gl_name.ln_type, gl->gl_state); | 1056 | gl->gl_name.ln_type, gl->gl_state); | 
| 1137 | BUG(); | 1057 | BUG(); | 
| 1138 | } | 1058 | } | 
| 1139 | 1059 | ||
| 1140 | existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner); | 1060 | existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid); | 
| 1141 | if (existing) { | 1061 | if (existing) { | 
| 1142 | print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); | 1062 | print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); | 
| 1143 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | 1063 | print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); | 
| @@ -1267,9 +1187,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
| 1267 | if (glops->go_unlock) | 1187 | if (glops->go_unlock) | 
| 1268 | glops->go_unlock(gh); | 1188 | glops->go_unlock(gh); | 
| 1269 | 1189 | ||
| 1270 | gl->gl_stamp = jiffies; | ||
| 1271 | |||
| 1272 | spin_lock(&gl->gl_spin); | 1190 | spin_lock(&gl->gl_spin); | 
| 1191 | gl->gl_stamp = jiffies; | ||
| 1273 | } | 1192 | } | 
| 1274 | 1193 | ||
| 1275 | clear_bit(GLF_LOCK, &gl->gl_flags); | 1194 | clear_bit(GLF_LOCK, &gl->gl_flags); | 
| @@ -1841,6 +1760,15 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
| 1841 | * Diagnostic routines to help debug distributed deadlock | 1760 | * Diagnostic routines to help debug distributed deadlock | 
| 1842 | */ | 1761 | */ | 
| 1843 | 1762 | ||
| 1763 | static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, | ||
| 1764 | unsigned long address) | ||
| 1765 | { | ||
| 1766 | char buffer[KSYM_SYMBOL_LEN]; | ||
| 1767 | |||
| 1768 | sprint_symbol(buffer, address); | ||
| 1769 | print_dbg(gi, fmt, buffer); | ||
| 1770 | } | ||
| 1771 | |||
| 1844 | /** | 1772 | /** | 
| 1845 | * dump_holder - print information about a glock holder | 1773 | * dump_holder - print information about a glock holder | 
| 1846 | * @str: a string naming the type of holder | 1774 | * @str: a string naming the type of holder | 
| @@ -1849,31 +1777,37 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
| 1849 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1777 | * Returns: 0 on success, -ENOBUFS when we run out of space | 
| 1850 | */ | 1778 | */ | 
| 1851 | 1779 | ||
| 1852 | static int dump_holder(char *str, struct gfs2_holder *gh) | 1780 | static int dump_holder(struct glock_iter *gi, char *str, | 
| 1781 | struct gfs2_holder *gh) | ||
| 1853 | { | 1782 | { | 
| 1854 | unsigned int x; | 1783 | unsigned int x; | 
| 1855 | int error = -ENOBUFS; | 1784 | struct task_struct *gh_owner; | 
| 1856 | 1785 | ||
| 1857 | printk(KERN_INFO " %s\n", str); | 1786 | print_dbg(gi, " %s\n", str); | 
| 1858 | printk(KERN_INFO " owner = %ld\n", | 1787 | if (gh->gh_owner_pid) { | 
| 1859 | (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); | 1788 | print_dbg(gi, " owner = %ld ", (long)gh->gh_owner_pid); | 
| 1860 | printk(KERN_INFO " gh_state = %u\n", gh->gh_state); | 1789 | gh_owner = find_task_by_pid(gh->gh_owner_pid); | 
| 1861 | printk(KERN_INFO " gh_flags ="); | 1790 | if (gh_owner) | 
| 1791 | print_dbg(gi, "(%s)\n", gh_owner->comm); | ||
| 1792 | else | ||
| 1793 | print_dbg(gi, "(ended)\n"); | ||
| 1794 | } else | ||
| 1795 | print_dbg(gi, " owner = -1\n"); | ||
| 1796 | print_dbg(gi, " gh_state = %u\n", gh->gh_state); | ||
| 1797 | print_dbg(gi, " gh_flags ="); | ||
| 1862 | for (x = 0; x < 32; x++) | 1798 | for (x = 0; x < 32; x++) | 
| 1863 | if (gh->gh_flags & (1 << x)) | 1799 | if (gh->gh_flags & (1 << x)) | 
| 1864 | printk(" %u", x); | 1800 | print_dbg(gi, " %u", x); | 
| 1865 | printk(" \n"); | 1801 | print_dbg(gi, " \n"); | 
| 1866 | printk(KERN_INFO " error = %d\n", gh->gh_error); | 1802 | print_dbg(gi, " error = %d\n", gh->gh_error); | 
| 1867 | printk(KERN_INFO " gh_iflags ="); | 1803 | print_dbg(gi, " gh_iflags ="); | 
| 1868 | for (x = 0; x < 32; x++) | 1804 | for (x = 0; x < 32; x++) | 
| 1869 | if (test_bit(x, &gh->gh_iflags)) | 1805 | if (test_bit(x, &gh->gh_iflags)) | 
| 1870 | printk(" %u", x); | 1806 | print_dbg(gi, " %u", x); | 
| 1871 | printk(" \n"); | 1807 | print_dbg(gi, " \n"); | 
| 1872 | print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); | 1808 | gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); | 
| 1873 | |||
| 1874 | error = 0; | ||
| 1875 | 1809 | ||
| 1876 | return error; | 1810 | return 0; | 
| 1877 | } | 1811 | } | 
| 1878 | 1812 | ||
| 1879 | /** | 1813 | /** | 
| @@ -1883,25 +1817,20 @@ static int dump_holder(char *str, struct gfs2_holder *gh) | |||
| 1883 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1817 | * Returns: 0 on success, -ENOBUFS when we run out of space | 
| 1884 | */ | 1818 | */ | 
| 1885 | 1819 | ||
| 1886 | static int dump_inode(struct gfs2_inode *ip) | 1820 | static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) | 
| 1887 | { | 1821 | { | 
| 1888 | unsigned int x; | 1822 | unsigned int x; | 
| 1889 | int error = -ENOBUFS; | ||
| 1890 | 1823 | ||
| 1891 | printk(KERN_INFO " Inode:\n"); | 1824 | print_dbg(gi, " Inode:\n"); | 
| 1892 | printk(KERN_INFO " num = %llu %llu\n", | 1825 | print_dbg(gi, " num = %llu/%llu\n", | 
| 1893 | (unsigned long long)ip->i_num.no_formal_ino, | 1826 | ip->i_num.no_formal_ino, ip->i_num.no_addr); | 
| 1894 | (unsigned long long)ip->i_num.no_addr); | 1827 | print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); | 
| 1895 | printk(KERN_INFO " type = %u\n", IF2DT(ip->i_inode.i_mode)); | 1828 | print_dbg(gi, " i_flags ="); | 
| 1896 | printk(KERN_INFO " i_flags ="); | ||
| 1897 | for (x = 0; x < 32; x++) | 1829 | for (x = 0; x < 32; x++) | 
| 1898 | if (test_bit(x, &ip->i_flags)) | 1830 | if (test_bit(x, &ip->i_flags)) | 
| 1899 | printk(" %u", x); | 1831 | print_dbg(gi, " %u", x); | 
| 1900 | printk(" \n"); | 1832 | print_dbg(gi, " \n"); | 
| 1901 | 1833 | return 0; | |
| 1902 | error = 0; | ||
| 1903 | |||
| 1904 | return error; | ||
| 1905 | } | 1834 | } | 
| 1906 | 1835 | ||
| 1907 | /** | 1836 | /** | 
| @@ -1912,74 +1841,86 @@ static int dump_inode(struct gfs2_inode *ip) | |||
| 1912 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1841 | * Returns: 0 on success, -ENOBUFS when we run out of space | 
| 1913 | */ | 1842 | */ | 
| 1914 | 1843 | ||
| 1915 | static int dump_glock(struct gfs2_glock *gl) | 1844 | static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) | 
| 1916 | { | 1845 | { | 
| 1917 | struct gfs2_holder *gh; | 1846 | struct gfs2_holder *gh; | 
| 1918 | unsigned int x; | 1847 | unsigned int x; | 
| 1919 | int error = -ENOBUFS; | 1848 | int error = -ENOBUFS; | 
| 1849 | struct task_struct *gl_owner; | ||
| 1920 | 1850 | ||
| 1921 | spin_lock(&gl->gl_spin); | 1851 | spin_lock(&gl->gl_spin); | 
| 1922 | 1852 | ||
| 1923 | printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, | 1853 | print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, | 
| 1924 | (unsigned long long)gl->gl_name.ln_number); | 1854 | (unsigned long long)gl->gl_name.ln_number); | 
| 1925 | printk(KERN_INFO " gl_flags ="); | 1855 | print_dbg(gi, " gl_flags ="); | 
| 1926 | for (x = 0; x < 32; x++) { | 1856 | for (x = 0; x < 32; x++) { | 
| 1927 | if (test_bit(x, &gl->gl_flags)) | 1857 | if (test_bit(x, &gl->gl_flags)) | 
| 1928 | printk(" %u", x); | 1858 | print_dbg(gi, " %u", x); | 
| 1929 | } | 1859 | } | 
| 1930 | printk(" \n"); | 1860 | if (!test_bit(GLF_LOCK, &gl->gl_flags)) | 
| 1931 | printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref)); | 1861 | print_dbg(gi, " (unlocked)"); | 
| 1932 | printk(KERN_INFO " gl_state = %u\n", gl->gl_state); | 1862 | print_dbg(gi, " \n"); | 
| 1933 | printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm); | 1863 | print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); | 
| 1934 | print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip); | 1864 | print_dbg(gi, " gl_state = %u\n", gl->gl_state); | 
| 1935 | printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); | 1865 | if (gl->gl_owner_pid) { | 
| 1936 | printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); | 1866 | gl_owner = find_task_by_pid(gl->gl_owner_pid); | 
| 1937 | printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | 1867 | if (gl_owner) | 
| 1938 | printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no"); | 1868 | print_dbg(gi, " gl_owner = pid %d (%s)\n", | 
| 1939 | printk(KERN_INFO " le = %s\n", | 1869 | gl->gl_owner_pid, gl_owner->comm); | 
| 1870 | else | ||
| 1871 | print_dbg(gi, " gl_owner = %d (ended)\n", | ||
| 1872 | gl->gl_owner_pid); | ||
| 1873 | } else | ||
| 1874 | print_dbg(gi, " gl_owner = -1\n"); | ||
| 1875 | print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); | ||
| 1876 | print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); | ||
| 1877 | print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); | ||
| 1878 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | ||
| 1879 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); | ||
| 1880 | print_dbg(gi, " le = %s\n", | ||
| 1940 | (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); | 1881 | (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); | 
| 1941 | printk(KERN_INFO " reclaim = %s\n", | 1882 | print_dbg(gi, " reclaim = %s\n", | 
| 1942 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); | 1883 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); | 
| 1943 | if (gl->gl_aspace) | 1884 | if (gl->gl_aspace) | 
| 1944 | printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, | 1885 | print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, | 
| 1945 | gl->gl_aspace->i_mapping->nrpages); | 1886 | gl->gl_aspace->i_mapping->nrpages); | 
| 1946 | else | 1887 | else | 
| 1947 | printk(KERN_INFO " aspace = no\n"); | 1888 | print_dbg(gi, " aspace = no\n"); | 
| 1948 | printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count)); | 1889 | print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); | 
| 1949 | if (gl->gl_req_gh) { | 1890 | if (gl->gl_req_gh) { | 
| 1950 | error = dump_holder("Request", gl->gl_req_gh); | 1891 | error = dump_holder(gi, "Request", gl->gl_req_gh); | 
| 1951 | if (error) | 1892 | if (error) | 
| 1952 | goto out; | 1893 | goto out; | 
| 1953 | } | 1894 | } | 
| 1954 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 1895 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 
| 1955 | error = dump_holder("Holder", gh); | 1896 | error = dump_holder(gi, "Holder", gh); | 
| 1956 | if (error) | 1897 | if (error) | 
| 1957 | goto out; | 1898 | goto out; | 
| 1958 | } | 1899 | } | 
| 1959 | list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { | 1900 | list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { | 
| 1960 | error = dump_holder("Waiter1", gh); | 1901 | error = dump_holder(gi, "Waiter1", gh); | 
| 1961 | if (error) | ||
| 1962 | goto out; | ||
| 1963 | } | ||
| 1964 | list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { | ||
| 1965 | error = dump_holder("Waiter2", gh); | ||
| 1966 | if (error) | 1902 | if (error) | 
| 1967 | goto out; | 1903 | goto out; | 
| 1968 | } | 1904 | } | 
| 1969 | list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { | 1905 | list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { | 
| 1970 | error = dump_holder("Waiter3", gh); | 1906 | error = dump_holder(gi, "Waiter3", gh); | 
| 1971 | if (error) | 1907 | if (error) | 
| 1972 | goto out; | 1908 | goto out; | 
| 1973 | } | 1909 | } | 
| 1910 | if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | ||
| 1911 | print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", | ||
| 1912 | gl->gl_demote_state, | ||
| 1913 | (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ)); | ||
| 1914 | } | ||
| 1974 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { | 1915 | if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { | 
| 1975 | if (!test_bit(GLF_LOCK, &gl->gl_flags) && | 1916 | if (!test_bit(GLF_LOCK, &gl->gl_flags) && | 
| 1976 | list_empty(&gl->gl_holders)) { | 1917 | list_empty(&gl->gl_holders)) { | 
| 1977 | error = dump_inode(gl->gl_object); | 1918 | error = dump_inode(gi, gl->gl_object); | 
| 1978 | if (error) | 1919 | if (error) | 
| 1979 | goto out; | 1920 | goto out; | 
| 1980 | } else { | 1921 | } else { | 
| 1981 | error = -ENOBUFS; | 1922 | error = -ENOBUFS; | 
| 1982 | printk(KERN_INFO " Inode: busy\n"); | 1923 | print_dbg(gi, " Inode: busy\n"); | 
| 1983 | } | 1924 | } | 
| 1984 | } | 1925 | } | 
| 1985 | 1926 | ||
| @@ -2014,7 +1955,7 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) | |||
| 2014 | if (gl->gl_sbd != sdp) | 1955 | if (gl->gl_sbd != sdp) | 
| 2015 | continue; | 1956 | continue; | 
| 2016 | 1957 | ||
| 2017 | error = dump_glock(gl); | 1958 | error = dump_glock(NULL, gl); | 
| 2018 | if (error) | 1959 | if (error) | 
| 2019 | break; | 1960 | break; | 
| 2020 | } | 1961 | } | 
| @@ -2043,3 +1984,189 @@ int __init gfs2_glock_init(void) | |||
| 2043 | return 0; | 1984 | return 0; | 
| 2044 | } | 1985 | } | 
| 2045 | 1986 | ||
| 1987 | static int gfs2_glock_iter_next(struct glock_iter *gi) | ||
| 1988 | { | ||
| 1989 | read_lock(gl_lock_addr(gi->hash)); | ||
| 1990 | while (1) { | ||
| 1991 | if (!gi->hb_list) { /* If we don't have a hash bucket yet */ | ||
| 1992 | gi->hb_list = &gl_hash_table[gi->hash].hb_list; | ||
| 1993 | if (hlist_empty(gi->hb_list)) { | ||
| 1994 | read_unlock(gl_lock_addr(gi->hash)); | ||
| 1995 | gi->hash++; | ||
| 1996 | read_lock(gl_lock_addr(gi->hash)); | ||
| 1997 | gi->hb_list = NULL; | ||
| 1998 | if (gi->hash >= GFS2_GL_HASH_SIZE) { | ||
| 1999 | read_unlock(gl_lock_addr(gi->hash)); | ||
| 2000 | return 1; | ||
| 2001 | } | ||
| 2002 | else | ||
| 2003 | continue; | ||
| 2004 | } | ||
| 2005 | if (!hlist_empty(gi->hb_list)) { | ||
| 2006 | gi->gl = list_entry(gi->hb_list->first, | ||
| 2007 | struct gfs2_glock, | ||
| 2008 | gl_list); | ||
| 2009 | } | ||
| 2010 | } else { | ||
| 2011 | if (gi->gl->gl_list.next == NULL) { | ||
| 2012 | read_unlock(gl_lock_addr(gi->hash)); | ||
| 2013 | gi->hash++; | ||
| 2014 | read_lock(gl_lock_addr(gi->hash)); | ||
| 2015 | gi->hb_list = NULL; | ||
| 2016 | continue; | ||
| 2017 | } | ||
| 2018 | gi->gl = list_entry(gi->gl->gl_list.next, | ||
| 2019 | struct gfs2_glock, gl_list); | ||
| 2020 | } | ||
| 2021 | if (gi->gl) | ||
| 2022 | break; | ||
| 2023 | } | ||
| 2024 | read_unlock(gl_lock_addr(gi->hash)); | ||
| 2025 | return 0; | ||
| 2026 | } | ||
| 2027 | |||
| 2028 | static void gfs2_glock_iter_free(struct glock_iter *gi) | ||
| 2029 | { | ||
| 2030 | kfree(gi); | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) | ||
| 2034 | { | ||
| 2035 | struct glock_iter *gi; | ||
| 2036 | |||
| 2037 | gi = kmalloc(sizeof (*gi), GFP_KERNEL); | ||
| 2038 | if (!gi) | ||
| 2039 | return NULL; | ||
| 2040 | |||
| 2041 | gi->sdp = sdp; | ||
| 2042 | gi->hash = 0; | ||
| 2043 | gi->gl = NULL; | ||
| 2044 | gi->hb_list = NULL; | ||
| 2045 | gi->seq = NULL; | ||
| 2046 | memset(gi->string, 0, sizeof(gi->string)); | ||
| 2047 | |||
| 2048 | if (gfs2_glock_iter_next(gi)) { | ||
| 2049 | gfs2_glock_iter_free(gi); | ||
| 2050 | return NULL; | ||
| 2051 | } | ||
| 2052 | |||
| 2053 | return gi; | ||
| 2054 | } | ||
| 2055 | |||
| 2056 | static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) | ||
| 2057 | { | ||
| 2058 | struct glock_iter *gi; | ||
| 2059 | loff_t n = *pos; | ||
| 2060 | |||
| 2061 | gi = gfs2_glock_iter_init(file->private); | ||
| 2062 | if (!gi) | ||
| 2063 | return NULL; | ||
| 2064 | |||
| 2065 | while (n--) { | ||
| 2066 | if (gfs2_glock_iter_next(gi)) { | ||
| 2067 | gfs2_glock_iter_free(gi); | ||
| 2068 | return NULL; | ||
| 2069 | } | ||
| 2070 | } | ||
| 2071 | |||
| 2072 | return gi; | ||
| 2073 | } | ||
| 2074 | |||
| 2075 | static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, | ||
| 2076 | loff_t *pos) | ||
| 2077 | { | ||
| 2078 | struct glock_iter *gi = iter_ptr; | ||
| 2079 | |||
| 2080 | (*pos)++; | ||
| 2081 | |||
| 2082 | if (gfs2_glock_iter_next(gi)) { | ||
| 2083 | gfs2_glock_iter_free(gi); | ||
| 2084 | return NULL; | ||
| 2085 | } | ||
| 2086 | |||
| 2087 | return gi; | ||
| 2088 | } | ||
| 2089 | |||
| 2090 | static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) | ||
| 2091 | { | ||
| 2092 | /* nothing for now */ | ||
| 2093 | } | ||
| 2094 | |||
| 2095 | static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) | ||
| 2096 | { | ||
| 2097 | struct glock_iter *gi = iter_ptr; | ||
| 2098 | |||
| 2099 | gi->seq = file; | ||
| 2100 | dump_glock(gi, gi->gl); | ||
| 2101 | |||
| 2102 | return 0; | ||
| 2103 | } | ||
| 2104 | |||
| 2105 | static struct seq_operations gfs2_glock_seq_ops = { | ||
| 2106 | .start = gfs2_glock_seq_start, | ||
| 2107 | .next = gfs2_glock_seq_next, | ||
| 2108 | .stop = gfs2_glock_seq_stop, | ||
| 2109 | .show = gfs2_glock_seq_show, | ||
| 2110 | }; | ||
| 2111 | |||
| 2112 | static int gfs2_debugfs_open(struct inode *inode, struct file *file) | ||
| 2113 | { | ||
| 2114 | struct seq_file *seq; | ||
| 2115 | int ret; | ||
| 2116 | |||
| 2117 | ret = seq_open(file, &gfs2_glock_seq_ops); | ||
| 2118 | if (ret) | ||
| 2119 | return ret; | ||
| 2120 | |||
| 2121 | seq = file->private_data; | ||
| 2122 | seq->private = inode->i_private; | ||
| 2123 | |||
| 2124 | return 0; | ||
| 2125 | } | ||
| 2126 | |||
| 2127 | static const struct file_operations gfs2_debug_fops = { | ||
| 2128 | .owner = THIS_MODULE, | ||
| 2129 | .open = gfs2_debugfs_open, | ||
| 2130 | .read = seq_read, | ||
| 2131 | .llseek = seq_lseek, | ||
| 2132 | .release = seq_release | ||
| 2133 | }; | ||
| 2134 | |||
| 2135 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) | ||
| 2136 | { | ||
| 2137 | sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); | ||
| 2138 | if (!sdp->debugfs_dir) | ||
| 2139 | return -ENOMEM; | ||
| 2140 | sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", | ||
| 2141 | S_IFREG | S_IRUGO, | ||
| 2142 | sdp->debugfs_dir, sdp, | ||
| 2143 | &gfs2_debug_fops); | ||
| 2144 | if (!sdp->debugfs_dentry_glocks) | ||
| 2145 | return -ENOMEM; | ||
| 2146 | |||
| 2147 | return 0; | ||
| 2148 | } | ||
| 2149 | |||
| 2150 | void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) | ||
| 2151 | { | ||
| 2152 | if (sdp && sdp->debugfs_dir) { | ||
| 2153 | if (sdp->debugfs_dentry_glocks) { | ||
| 2154 | debugfs_remove(sdp->debugfs_dentry_glocks); | ||
| 2155 | sdp->debugfs_dentry_glocks = NULL; | ||
| 2156 | } | ||
| 2157 | debugfs_remove(sdp->debugfs_dir); | ||
| 2158 | sdp->debugfs_dir = NULL; | ||
| 2159 | } | ||
| 2160 | } | ||
| 2161 | |||
| 2162 | int gfs2_register_debugfs(void) | ||
| 2163 | { | ||
| 2164 | gfs2_root = debugfs_create_dir("gfs2", NULL); | ||
| 2165 | return gfs2_root ? 0 : -ENOMEM; | ||
| 2166 | } | ||
| 2167 | |||
| 2168 | void gfs2_unregister_debugfs(void) | ||
| 2169 | { | ||
| 2170 | debugfs_remove(gfs2_root); | ||
| 2171 | gfs2_root = NULL; | ||
| 2172 | } | ||
| diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index f50e40ceca43..11477ca3a3c0 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
| @@ -38,7 +38,7 @@ static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | |||
| 38 | /* Look in glock's list of holders for one with current task as owner */ | 38 | /* Look in glock's list of holders for one with current task as owner */ | 
| 39 | spin_lock(&gl->gl_spin); | 39 | spin_lock(&gl->gl_spin); | 
| 40 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 40 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 
| 41 | if (gh->gh_owner == current) { | 41 | if (gh->gh_owner_pid == current->pid) { | 
| 42 | locked = 1; | 42 | locked = 1; | 
| 43 | break; | 43 | break; | 
| 44 | } | 44 | } | 
| @@ -67,7 +67,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) | |||
| 67 | { | 67 | { | 
| 68 | int ret; | 68 | int ret; | 
| 69 | spin_lock(&gl->gl_spin); | 69 | spin_lock(&gl->gl_spin); | 
| 70 | ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3); | 70 | ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3); | 
| 71 | spin_unlock(&gl->gl_spin); | 71 | spin_unlock(&gl->gl_spin); | 
| 72 | return ret; | 72 | return ret; | 
| 73 | } | 73 | } | 
| @@ -135,5 +135,9 @@ void gfs2_scand_internal(struct gfs2_sbd *sdp); | |||
| 135 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); | 135 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); | 
| 136 | 136 | ||
| 137 | int __init gfs2_glock_init(void); | 137 | int __init gfs2_glock_init(void); | 
| 138 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); | ||
| 139 | void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); | ||
| 140 | int gfs2_register_debugfs(void); | ||
| 141 | void gfs2_unregister_debugfs(void); | ||
| 138 | 142 | ||
| 139 | #endif /* __GLOCK_DOT_H__ */ | 143 | #endif /* __GLOCK_DOT_H__ */ | 
| diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 49f0dbf40d86..d995441373ab 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -115,11 +115,8 @@ enum { | |||
| 115 | /* Actions */ | 115 | /* Actions */ | 
| 116 | HIF_MUTEX = 0, | 116 | HIF_MUTEX = 0, | 
| 117 | HIF_PROMOTE = 1, | 117 | HIF_PROMOTE = 1, | 
| 118 | HIF_DEMOTE = 2, | ||
| 119 | 118 | ||
| 120 | /* States */ | 119 | /* States */ | 
| 121 | HIF_ALLOCED = 4, | ||
| 122 | HIF_DEALLOC = 5, | ||
| 123 | HIF_HOLDER = 6, | 120 | HIF_HOLDER = 6, | 
| 124 | HIF_FIRST = 7, | 121 | HIF_FIRST = 7, | 
| 125 | HIF_ABORTED = 9, | 122 | HIF_ABORTED = 9, | 
| @@ -130,7 +127,7 @@ struct gfs2_holder { | |||
| 130 | struct list_head gh_list; | 127 | struct list_head gh_list; | 
| 131 | 128 | ||
| 132 | struct gfs2_glock *gh_gl; | 129 | struct gfs2_glock *gh_gl; | 
| 133 | struct task_struct *gh_owner; | 130 | pid_t gh_owner_pid; | 
| 134 | unsigned int gh_state; | 131 | unsigned int gh_state; | 
| 135 | unsigned gh_flags; | 132 | unsigned gh_flags; | 
| 136 | 133 | ||
| @@ -142,8 +139,8 @@ struct gfs2_holder { | |||
| 142 | enum { | 139 | enum { | 
| 143 | GLF_LOCK = 1, | 140 | GLF_LOCK = 1, | 
| 144 | GLF_STICKY = 2, | 141 | GLF_STICKY = 2, | 
| 142 | GLF_DEMOTE = 3, | ||
| 145 | GLF_DIRTY = 5, | 143 | GLF_DIRTY = 5, | 
| 146 | GLF_SKIP_WAITERS2 = 6, | ||
| 147 | }; | 144 | }; | 
| 148 | 145 | ||
| 149 | struct gfs2_glock { | 146 | struct gfs2_glock { | 
| @@ -156,11 +153,12 @@ struct gfs2_glock { | |||
| 156 | 153 | ||
| 157 | unsigned int gl_state; | 154 | unsigned int gl_state; | 
| 158 | unsigned int gl_hash; | 155 | unsigned int gl_hash; | 
| 159 | struct task_struct *gl_owner; | 156 | unsigned int gl_demote_state; /* state requested by remote node */ | 
| 157 | unsigned long gl_demote_time; /* time of first demote request */ | ||
| 158 | pid_t gl_owner_pid; | ||
| 160 | unsigned long gl_ip; | 159 | unsigned long gl_ip; | 
| 161 | struct list_head gl_holders; | 160 | struct list_head gl_holders; | 
| 162 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 161 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 
| 163 | struct list_head gl_waiters2; /* HIF_DEMOTE */ | ||
| 164 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 162 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 
| 165 | 163 | ||
| 166 | const struct gfs2_glock_operations *gl_ops; | 164 | const struct gfs2_glock_operations *gl_ops; | 
| @@ -611,6 +609,8 @@ struct gfs2_sbd { | |||
| 611 | 609 | ||
| 612 | unsigned long sd_last_warning; | 610 | unsigned long sd_last_warning; | 
| 613 | struct vfsmount *sd_gfs2mnt; | 611 | struct vfsmount *sd_gfs2mnt; | 
| 612 | struct dentry *debugfs_dir; /* debugfs directory */ | ||
| 613 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ | ||
| 614 | }; | 614 | }; | 
| 615 | 615 | ||
| 616 | #endif /* __INCORE_DOT_H__ */ | 616 | #endif /* __INCORE_DOT_H__ */ | 
| diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index b167addf9fd1..c305255bfe8a 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c | |||
| @@ -151,7 +151,7 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, | |||
| 151 | 151 | ||
| 152 | /* make_strname - convert GFS lock numbers to a string */ | 152 | /* make_strname - convert GFS lock numbers to a string */ | 
| 153 | 153 | ||
| 154 | static inline void make_strname(struct lm_lockname *lockname, | 154 | static inline void make_strname(const struct lm_lockname *lockname, | 
| 155 | struct gdlm_strname *str) | 155 | struct gdlm_strname *str) | 
| 156 | { | 156 | { | 
| 157 | sprintf(str->name, "%8x%16llx", lockname->ln_type, | 157 | sprintf(str->name, "%8x%16llx", lockname->ln_type, | 
| @@ -169,6 +169,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | |||
| 169 | return -ENOMEM; | 169 | return -ENOMEM; | 
| 170 | 170 | ||
| 171 | lp->lockname = *name; | 171 | lp->lockname = *name; | 
| 172 | make_strname(name, &lp->strname); | ||
| 172 | lp->ls = ls; | 173 | lp->ls = ls; | 
| 173 | lp->cur = DLM_LOCK_IV; | 174 | lp->cur = DLM_LOCK_IV; | 
| 174 | lp->lvb = NULL; | 175 | lp->lvb = NULL; | 
| @@ -227,7 +228,6 @@ void gdlm_put_lock(void *lock) | |||
| 227 | unsigned int gdlm_do_lock(struct gdlm_lock *lp) | 228 | unsigned int gdlm_do_lock(struct gdlm_lock *lp) | 
| 228 | { | 229 | { | 
| 229 | struct gdlm_ls *ls = lp->ls; | 230 | struct gdlm_ls *ls = lp->ls; | 
| 230 | struct gdlm_strname str; | ||
| 231 | int error, bast = 1; | 231 | int error, bast = 1; | 
| 232 | 232 | ||
| 233 | /* | 233 | /* | 
| @@ -249,8 +249,6 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) | |||
| 249 | if (test_bit(LFL_NOBAST, &lp->flags)) | 249 | if (test_bit(LFL_NOBAST, &lp->flags)) | 
| 250 | bast = 0; | 250 | bast = 0; | 
| 251 | 251 | ||
| 252 | make_strname(&lp->lockname, &str); | ||
| 253 | |||
| 254 | set_bit(LFL_ACTIVE, &lp->flags); | 252 | set_bit(LFL_ACTIVE, &lp->flags); | 
| 255 | 253 | ||
| 256 | log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type, | 254 | log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type, | 
| @@ -258,8 +256,8 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) | |||
| 258 | lp->cur, lp->req, lp->lkf); | 256 | lp->cur, lp->req, lp->lkf); | 
| 259 | 257 | ||
| 260 | error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf, | 258 | error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf, | 
| 261 | str.name, str.namelen, 0, gdlm_ast, lp, | 259 | lp->strname.name, lp->strname.namelen, 0, gdlm_ast, | 
| 262 | bast ? gdlm_bast : NULL); | 260 | lp, bast ? gdlm_bast : NULL); | 
| 263 | 261 | ||
| 264 | if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { | 262 | if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { | 
| 265 | lp->lksb.sb_status = -EAGAIN; | 263 | lp->lksb.sb_status = -EAGAIN; | 
| @@ -268,7 +266,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) | |||
| 268 | } | 266 | } | 
| 269 | 267 | ||
| 270 | if (error) { | 268 | if (error) { | 
| 271 | log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " | 269 | log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " | 
| 272 | "flags=%lx", ls->fsname, lp->lockname.ln_type, | 270 | "flags=%lx", ls->fsname, lp->lockname.ln_type, | 
| 273 | (unsigned long long)lp->lockname.ln_number, error, | 271 | (unsigned long long)lp->lockname.ln_number, error, | 
| 274 | lp->cur, lp->req, lp->lkf, lp->flags); | 272 | lp->cur, lp->req, lp->lkf, lp->flags); | 
| @@ -296,7 +294,7 @@ static unsigned int gdlm_do_unlock(struct gdlm_lock *lp) | |||
| 296 | error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp); | 294 | error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp); | 
| 297 | 295 | ||
| 298 | if (error) { | 296 | if (error) { | 
| 299 | log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " | 297 | log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " | 
| 300 | "flags=%lx", ls->fsname, lp->lockname.ln_type, | 298 | "flags=%lx", ls->fsname, lp->lockname.ln_type, | 
| 301 | (unsigned long long)lp->lockname.ln_number, error, | 299 | (unsigned long long)lp->lockname.ln_number, error, | 
| 302 | lp->cur, lp->req, lp->lkf, lp->flags); | 300 | lp->cur, lp->req, lp->lkf, lp->flags); | 
| diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index a87c7bf3c568..d074c6e6f9bf 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
| @@ -36,7 +36,7 @@ | |||
| 36 | 36 | ||
| 37 | #define GDLM_STRNAME_BYTES 24 | 37 | #define GDLM_STRNAME_BYTES 24 | 
| 38 | #define GDLM_LVB_SIZE 32 | 38 | #define GDLM_LVB_SIZE 32 | 
| 39 | #define GDLM_DROP_COUNT 200000 | 39 | #define GDLM_DROP_COUNT 0 | 
| 40 | #define GDLM_DROP_PERIOD 60 | 40 | #define GDLM_DROP_PERIOD 60 | 
| 41 | #define GDLM_NAME_LEN 128 | 41 | #define GDLM_NAME_LEN 128 | 
| 42 | 42 | ||
| @@ -106,6 +106,7 @@ enum { | |||
| 106 | struct gdlm_lock { | 106 | struct gdlm_lock { | 
| 107 | struct gdlm_ls *ls; | 107 | struct gdlm_ls *ls; | 
| 108 | struct lm_lockname lockname; | 108 | struct lm_lockname lockname; | 
| 109 | struct gdlm_strname strname; | ||
| 109 | char *lvb; | 110 | char *lvb; | 
| 110 | struct dlm_lksb lksb; | 111 | struct dlm_lksb lksb; | 
| 111 | 112 | ||
| diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 16bb4b4561ae..f82d84d05d23 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
| @@ -33,16 +33,17 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
| 33 | 33 | ||
| 34 | tr->tr_touched = 1; | 34 | tr->tr_touched = 1; | 
| 35 | 35 | ||
| 36 | if (!list_empty(&le->le_list)) | ||
| 37 | return; | ||
| 38 | |||
| 39 | gl = container_of(le, struct gfs2_glock, gl_le); | 36 | gl = container_of(le, struct gfs2_glock, gl_le); | 
| 40 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) | 37 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) | 
| 41 | return; | 38 | return; | 
| 42 | gfs2_glock_hold(gl); | ||
| 43 | set_bit(GLF_DIRTY, &gl->gl_flags); | ||
| 44 | 39 | ||
| 45 | gfs2_log_lock(sdp); | 40 | gfs2_log_lock(sdp); | 
| 41 | if (!list_empty(&le->le_list)){ | ||
| 42 | gfs2_log_unlock(sdp); | ||
| 43 | return; | ||
| 44 | } | ||
| 45 | gfs2_glock_hold(gl); | ||
| 46 | set_bit(GLF_DIRTY, &gl->gl_flags); | ||
| 46 | sdp->sd_log_num_gl++; | 47 | sdp->sd_log_num_gl++; | 
| 47 | list_add(&le->le_list, &sdp->sd_log_le_gl); | 48 | list_add(&le->le_list, &sdp->sd_log_le_gl); | 
| 48 | gfs2_log_unlock(sdp); | 49 | gfs2_log_unlock(sdp); | 
| @@ -415,13 +416,14 @@ static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
| 415 | 416 | ||
| 416 | tr->tr_touched = 1; | 417 | tr->tr_touched = 1; | 
| 417 | 418 | ||
| 418 | if (!list_empty(&le->le_list)) | ||
| 419 | return; | ||
| 420 | |||
| 421 | rgd = container_of(le, struct gfs2_rgrpd, rd_le); | 419 | rgd = container_of(le, struct gfs2_rgrpd, rd_le); | 
| 422 | gfs2_rgrp_bh_hold(rgd); | ||
| 423 | 420 | ||
| 424 | gfs2_log_lock(sdp); | 421 | gfs2_log_lock(sdp); | 
| 422 | if (!list_empty(&le->le_list)){ | ||
| 423 | gfs2_log_unlock(sdp); | ||
| 424 | return; | ||
| 425 | } | ||
| 426 | gfs2_rgrp_bh_hold(rgd); | ||
| 425 | sdp->sd_log_num_rg++; | 427 | sdp->sd_log_num_rg++; | 
| 426 | list_add(&le->le_list, &sdp->sd_log_le_rg); | 428 | list_add(&le->le_list, &sdp->sd_log_le_rg); | 
| 427 | gfs2_log_unlock(sdp); | 429 | gfs2_log_unlock(sdp); | 
| diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index e2bffae683cc..e460487c0557 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -43,7 +43,6 @@ static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned | |||
| 43 | spin_lock_init(&gl->gl_spin); | 43 | spin_lock_init(&gl->gl_spin); | 
| 44 | INIT_LIST_HEAD(&gl->gl_holders); | 44 | INIT_LIST_HEAD(&gl->gl_holders); | 
| 45 | INIT_LIST_HEAD(&gl->gl_waiters1); | 45 | INIT_LIST_HEAD(&gl->gl_waiters1); | 
| 46 | INIT_LIST_HEAD(&gl->gl_waiters2); | ||
| 47 | INIT_LIST_HEAD(&gl->gl_waiters3); | 46 | INIT_LIST_HEAD(&gl->gl_waiters3); | 
| 48 | gl->gl_lvb = NULL; | 47 | gl->gl_lvb = NULL; | 
| 49 | atomic_set(&gl->gl_lvb_count, 0); | 48 | atomic_set(&gl->gl_lvb_count, 0); | 
| @@ -101,6 +100,8 @@ static int __init init_gfs2_fs(void) | |||
| 101 | if (error) | 100 | if (error) | 
| 102 | goto fail_unregister; | 101 | goto fail_unregister; | 
| 103 | 102 | ||
| 103 | gfs2_register_debugfs(); | ||
| 104 | |||
| 104 | printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); | 105 | printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); | 
| 105 | 106 | ||
| 106 | return 0; | 107 | return 0; | 
| @@ -128,6 +129,7 @@ fail: | |||
| 128 | 129 | ||
| 129 | static void __exit exit_gfs2_fs(void) | 130 | static void __exit exit_gfs2_fs(void) | 
| 130 | { | 131 | { | 
| 132 | gfs2_unregister_debugfs(); | ||
| 131 | unregister_filesystem(&gfs2_fs_type); | 133 | unregister_filesystem(&gfs2_fs_type); | 
| 132 | unregister_filesystem(&gfs2meta_fs_type); | 134 | unregister_filesystem(&gfs2meta_fs_type); | 
| 133 | 135 | ||
| diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index 32caecd20300..4864659555d4 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> | 
| 14 | #include <linux/gfs2_ondisk.h> | 14 | #include <linux/gfs2_ondisk.h> | 
| 15 | #include <linux/lm_interface.h> | 15 | #include <linux/lm_interface.h> | 
| 16 | #include <linux/parser.h> | ||
| 16 | 17 | ||
| 17 | #include "gfs2.h" | 18 | #include "gfs2.h" | 
| 18 | #include "incore.h" | 19 | #include "incore.h" | 
| @@ -20,6 +21,52 @@ | |||
| 20 | #include "sys.h" | 21 | #include "sys.h" | 
| 21 | #include "util.h" | 22 | #include "util.h" | 
| 22 | 23 | ||
| 24 | enum { | ||
| 25 | Opt_lockproto, | ||
| 26 | Opt_locktable, | ||
| 27 | Opt_hostdata, | ||
| 28 | Opt_spectator, | ||
| 29 | Opt_ignore_local_fs, | ||
| 30 | Opt_localflocks, | ||
| 31 | Opt_localcaching, | ||
| 32 | Opt_debug, | ||
| 33 | Opt_nodebug, | ||
| 34 | Opt_upgrade, | ||
| 35 | Opt_num_glockd, | ||
| 36 | Opt_acl, | ||
| 37 | Opt_noacl, | ||
| 38 | Opt_quota_off, | ||
| 39 | Opt_quota_account, | ||
| 40 | Opt_quota_on, | ||
| 41 | Opt_suiddir, | ||
| 42 | Opt_nosuiddir, | ||
| 43 | Opt_data_writeback, | ||
| 44 | Opt_data_ordered, | ||
| 45 | }; | ||
| 46 | |||
| 47 | static match_table_t tokens = { | ||
| 48 | {Opt_lockproto, "lockproto=%s"}, | ||
| 49 | {Opt_locktable, "locktable=%s"}, | ||
| 50 | {Opt_hostdata, "hostdata=%s"}, | ||
| 51 | {Opt_spectator, "spectator"}, | ||
| 52 | {Opt_ignore_local_fs, "ignore_local_fs"}, | ||
| 53 | {Opt_localflocks, "localflocks"}, | ||
| 54 | {Opt_localcaching, "localcaching"}, | ||
| 55 | {Opt_debug, "debug"}, | ||
| 56 | {Opt_nodebug, "nodebug"}, | ||
| 57 | {Opt_upgrade, "upgrade"}, | ||
| 58 | {Opt_num_glockd, "num_glockd=%d"}, | ||
| 59 | {Opt_acl, "acl"}, | ||
| 60 | {Opt_noacl, "noacl"}, | ||
| 61 | {Opt_quota_off, "quota=off"}, | ||
| 62 | {Opt_quota_account, "quota=account"}, | ||
| 63 | {Opt_quota_on, "quota=on"}, | ||
| 64 | {Opt_suiddir, "suiddir"}, | ||
| 65 | {Opt_nosuiddir, "nosuiddir"}, | ||
| 66 | {Opt_data_writeback, "data=writeback"}, | ||
| 67 | {Opt_data_ordered, "data=ordered"} | ||
| 68 | }; | ||
| 69 | |||
| 23 | /** | 70 | /** | 
| 24 | * gfs2_mount_args - Parse mount options | 71 | * gfs2_mount_args - Parse mount options | 
| 25 | * @sdp: | 72 | * @sdp: | 
| @@ -54,146 +101,150 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | |||
| 54 | process them */ | 101 | process them */ | 
| 55 | 102 | ||
| 56 | for (options = data; (o = strsep(&options, ",")); ) { | 103 | for (options = data; (o = strsep(&options, ",")); ) { | 
| 104 | int token, option; | ||
| 105 | substring_t tmp[MAX_OPT_ARGS]; | ||
| 106 | |||
| 57 | if (!*o) | 107 | if (!*o) | 
| 58 | continue; | 108 | continue; | 
| 59 | 109 | ||
| 60 | v = strchr(o, '='); | 110 | token = match_token(o, tokens, tmp); | 
| 61 | if (v) | 111 | switch (token) { | 
| 62 | *v++ = 0; | 112 | case Opt_lockproto: | 
| 113 | v = match_strdup(&tmp[0]); | ||
| 114 | if (!v) { | ||
| 115 | fs_info(sdp, "no memory for lockproto\n"); | ||
| 116 | error = -ENOMEM; | ||
| 117 | goto out_error; | ||
| 118 | } | ||
| 63 | 119 | ||
| 64 | if (!strcmp(o, "lockproto")) { | 120 | if (remount && strcmp(v, args->ar_lockproto)) { | 
| 65 | if (!v) | 121 | kfree(v); | 
| 66 | goto need_value; | ||
| 67 | if (remount && strcmp(v, args->ar_lockproto)) | ||
| 68 | goto cant_remount; | 122 | goto cant_remount; | 
| 123 | } | ||
| 124 | |||
| 69 | strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN); | 125 | strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN); | 
| 70 | args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0; | 126 | args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0; | 
| 71 | } | 127 | kfree(v); | 
| 128 | break; | ||
| 129 | case Opt_locktable: | ||
| 130 | v = match_strdup(&tmp[0]); | ||
| 131 | if (!v) { | ||
| 132 | fs_info(sdp, "no memory for locktable\n"); | ||
| 133 | error = -ENOMEM; | ||
| 134 | goto out_error; | ||
| 135 | } | ||
| 72 | 136 | ||
| 73 | else if (!strcmp(o, "locktable")) { | 137 | if (remount && strcmp(v, args->ar_locktable)) { | 
| 74 | if (!v) | 138 | kfree(v); | 
| 75 | goto need_value; | ||
| 76 | if (remount && strcmp(v, args->ar_locktable)) | ||
| 77 | goto cant_remount; | 139 | goto cant_remount; | 
| 140 | } | ||
| 141 | |||
| 78 | strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN); | 142 | strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN); | 
| 79 | args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; | 143 | args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; | 
| 80 | } | 144 | kfree(v); | 
| 145 | break; | ||
| 146 | case Opt_hostdata: | ||
| 147 | v = match_strdup(&tmp[0]); | ||
| 148 | if (!v) { | ||
| 149 | fs_info(sdp, "no memory for hostdata\n"); | ||
| 150 | error = -ENOMEM; | ||
| 151 | goto out_error; | ||
| 152 | } | ||
| 81 | 153 | ||
| 82 | else if (!strcmp(o, "hostdata")) { | 154 | if (remount && strcmp(v, args->ar_hostdata)) { | 
| 83 | if (!v) | 155 | kfree(v); | 
| 84 | goto need_value; | ||
| 85 | if (remount && strcmp(v, args->ar_hostdata)) | ||
| 86 | goto cant_remount; | 156 | goto cant_remount; | 
| 157 | } | ||
| 158 | |||
| 87 | strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN); | 159 | strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN); | 
| 88 | args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0; | 160 | args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0; | 
| 89 | } | 161 | kfree(v); | 
| 90 | 162 | break; | |
| 91 | else if (!strcmp(o, "spectator")) { | 163 | case Opt_spectator: | 
| 92 | if (remount && !args->ar_spectator) | 164 | if (remount && !args->ar_spectator) | 
| 93 | goto cant_remount; | 165 | goto cant_remount; | 
| 94 | args->ar_spectator = 1; | 166 | args->ar_spectator = 1; | 
| 95 | sdp->sd_vfs->s_flags |= MS_RDONLY; | 167 | sdp->sd_vfs->s_flags |= MS_RDONLY; | 
| 96 | } | 168 | break; | 
| 97 | 169 | case Opt_ignore_local_fs: | |
| 98 | else if (!strcmp(o, "ignore_local_fs")) { | ||
| 99 | if (remount && !args->ar_ignore_local_fs) | 170 | if (remount && !args->ar_ignore_local_fs) | 
| 100 | goto cant_remount; | 171 | goto cant_remount; | 
| 101 | args->ar_ignore_local_fs = 1; | 172 | args->ar_ignore_local_fs = 1; | 
| 102 | } | 173 | break; | 
| 103 | 174 | case Opt_localflocks: | |
| 104 | else if (!strcmp(o, "localflocks")) { | ||
| 105 | if (remount && !args->ar_localflocks) | 175 | if (remount && !args->ar_localflocks) | 
| 106 | goto cant_remount; | 176 | goto cant_remount; | 
| 107 | args->ar_localflocks = 1; | 177 | args->ar_localflocks = 1; | 
| 108 | } | 178 | break; | 
| 109 | 179 | case Opt_localcaching: | |
| 110 | else if (!strcmp(o, "localcaching")) { | ||
| 111 | if (remount && !args->ar_localcaching) | 180 | if (remount && !args->ar_localcaching) | 
| 112 | goto cant_remount; | 181 | goto cant_remount; | 
| 113 | args->ar_localcaching = 1; | 182 | args->ar_localcaching = 1; | 
| 114 | } | 183 | break; | 
| 115 | 184 | case Opt_debug: | |
| 116 | else if (!strcmp(o, "debug")) | ||
| 117 | args->ar_debug = 1; | 185 | args->ar_debug = 1; | 
| 118 | 186 | break; | |
| 119 | else if (!strcmp(o, "nodebug")) | 187 | case Opt_nodebug: | 
| 120 | args->ar_debug = 0; | 188 | args->ar_debug = 0; | 
| 121 | 189 | break; | |
| 122 | else if (!strcmp(o, "upgrade")) { | 190 | case Opt_upgrade: | 
| 123 | if (remount && !args->ar_upgrade) | 191 | if (remount && !args->ar_upgrade) | 
| 124 | goto cant_remount; | 192 | goto cant_remount; | 
| 125 | args->ar_upgrade = 1; | 193 | args->ar_upgrade = 1; | 
| 126 | } | 194 | break; | 
| 195 | case Opt_num_glockd: | ||
| 196 | if ((error = match_int(&tmp[0], &option))) { | ||
| 197 | fs_info(sdp, "problem getting num_glockd\n"); | ||
| 198 | goto out_error; | ||
| 199 | } | ||
| 127 | 200 | ||
| 128 | else if (!strcmp(o, "num_glockd")) { | 201 | if (remount && option != args->ar_num_glockd) | 
| 129 | unsigned int x; | ||
| 130 | if (!v) | ||
| 131 | goto need_value; | ||
| 132 | sscanf(v, "%u", &x); | ||
| 133 | if (remount && x != args->ar_num_glockd) | ||
| 134 | goto cant_remount; | 202 | goto cant_remount; | 
| 135 | if (!x || x > GFS2_GLOCKD_MAX) { | 203 | if (!option || option > GFS2_GLOCKD_MAX) { | 
| 136 | fs_info(sdp, "0 < num_glockd <= %u (not %u)\n", | 204 | fs_info(sdp, "0 < num_glockd <= %u (not %u)\n", | 
| 137 | GFS2_GLOCKD_MAX, x); | 205 | GFS2_GLOCKD_MAX, option); | 
| 138 | error = -EINVAL; | 206 | error = -EINVAL; | 
| 139 | break; | 207 | goto out_error; | 
| 140 | } | 208 | } | 
| 141 | args->ar_num_glockd = x; | 209 | args->ar_num_glockd = option; | 
| 142 | } | 210 | break; | 
| 143 | 211 | case Opt_acl: | |
| 144 | else if (!strcmp(o, "acl")) { | ||
| 145 | args->ar_posix_acl = 1; | 212 | args->ar_posix_acl = 1; | 
| 146 | sdp->sd_vfs->s_flags |= MS_POSIXACL; | 213 | sdp->sd_vfs->s_flags |= MS_POSIXACL; | 
| 147 | } | 214 | break; | 
| 148 | 215 | case Opt_noacl: | |
| 149 | else if (!strcmp(o, "noacl")) { | ||
| 150 | args->ar_posix_acl = 0; | 216 | args->ar_posix_acl = 0; | 
| 151 | sdp->sd_vfs->s_flags &= ~MS_POSIXACL; | 217 | sdp->sd_vfs->s_flags &= ~MS_POSIXACL; | 
| 152 | } | 218 | break; | 
| 153 | 219 | case Opt_quota_off: | |
| 154 | else if (!strcmp(o, "quota")) { | 220 | args->ar_quota = GFS2_QUOTA_OFF; | 
| 155 | if (!v) | 221 | break; | 
| 156 | goto need_value; | 222 | case Opt_quota_account: | 
| 157 | if (!strcmp(v, "off")) | 223 | args->ar_quota = GFS2_QUOTA_ACCOUNT; | 
| 158 | args->ar_quota = GFS2_QUOTA_OFF; | 224 | break; | 
| 159 | else if (!strcmp(v, "account")) | 225 | case Opt_quota_on: | 
| 160 | args->ar_quota = GFS2_QUOTA_ACCOUNT; | 226 | args->ar_quota = GFS2_QUOTA_ON; | 
| 161 | else if (!strcmp(v, "on")) | 227 | break; | 
| 162 | args->ar_quota = GFS2_QUOTA_ON; | 228 | case Opt_suiddir: | 
| 163 | else { | ||
| 164 | fs_info(sdp, "invalid value for quota\n"); | ||
| 165 | error = -EINVAL; | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | else if (!strcmp(o, "suiddir")) | ||
| 171 | args->ar_suiddir = 1; | 229 | args->ar_suiddir = 1; | 
| 172 | 230 | break; | |
| 173 | else if (!strcmp(o, "nosuiddir")) | 231 | case Opt_nosuiddir: | 
| 174 | args->ar_suiddir = 0; | 232 | args->ar_suiddir = 0; | 
| 175 | 233 | break; | |
| 176 | else if (!strcmp(o, "data")) { | 234 | case Opt_data_writeback: | 
| 177 | if (!v) | 235 | args->ar_data = GFS2_DATA_WRITEBACK; | 
| 178 | goto need_value; | 236 | break; | 
| 179 | if (!strcmp(v, "writeback")) | 237 | case Opt_data_ordered: | 
| 180 | args->ar_data = GFS2_DATA_WRITEBACK; | 238 | args->ar_data = GFS2_DATA_ORDERED; | 
| 181 | else if (!strcmp(v, "ordered")) | 239 | break; | 
| 182 | args->ar_data = GFS2_DATA_ORDERED; | 240 | default: | 
| 183 | else { | ||
| 184 | fs_info(sdp, "invalid value for data\n"); | ||
| 185 | error = -EINVAL; | ||
| 186 | break; | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | else { | ||
| 191 | fs_info(sdp, "unknown option: %s\n", o); | 241 | fs_info(sdp, "unknown option: %s\n", o); | 
| 192 | error = -EINVAL; | 242 | error = -EINVAL; | 
| 193 | break; | 243 | goto out_error; | 
| 194 | } | 244 | } | 
| 195 | } | 245 | } | 
| 196 | 246 | ||
| 247 | out_error: | ||
| 197 | if (error) | 248 | if (error) | 
| 198 | fs_info(sdp, "invalid mount option(s)\n"); | 249 | fs_info(sdp, "invalid mount option(s)\n"); | 
| 199 | 250 | ||
| @@ -202,10 +253,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | |||
| 202 | 253 | ||
| 203 | return error; | 254 | return error; | 
| 204 | 255 | ||
| 205 | need_value: | ||
| 206 | fs_info(sdp, "need value for option %s\n", o); | ||
| 207 | return -EINVAL; | ||
| 208 | |||
| 209 | cant_remount: | 256 | cant_remount: | 
| 210 | fs_info(sdp, "can't remount with option %s\n", o); | 257 | fs_info(sdp, "can't remount with option %s\n", o); | 
| 211 | return -EINVAL; | 258 | return -EINVAL; | 
| diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index b3b7e8475359..30c15622174f 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
| @@ -197,7 +197,19 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) | |||
| 197 | void *kaddr; | 197 | void *kaddr; | 
| 198 | int error; | 198 | int error; | 
| 199 | 199 | ||
| 200 | BUG_ON(page->index); | 200 | /* | 
| 201 | * Due to the order of unstuffing files and ->nopage(), we can be | ||
| 202 | * asked for a zero page in the case of a stuffed file being extended, | ||
| 203 | * so we need to supply one here. It doesn't happen often. | ||
| 204 | */ | ||
| 205 | if (unlikely(page->index)) { | ||
| 206 | kaddr = kmap_atomic(page, KM_USER0); | ||
| 207 | memset(kaddr, 0, PAGE_CACHE_SIZE); | ||
| 208 | kunmap_atomic(kaddr, KM_USER0); | ||
| 209 | flush_dcache_page(page); | ||
| 210 | SetPageUptodate(page); | ||
| 211 | return 0; | ||
| 212 | } | ||
| 201 | 213 | ||
| 202 | error = gfs2_meta_inode_buffer(ip, &dibh); | 214 | error = gfs2_meta_inode_buffer(ip, &dibh); | 
| 203 | if (error) | 215 | if (error) | 
| @@ -208,9 +220,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) | |||
| 208 | ip->i_di.di_size); | 220 | ip->i_di.di_size); | 
| 209 | memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size); | 221 | memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size); | 
| 210 | kunmap_atomic(kaddr, KM_USER0); | 222 | kunmap_atomic(kaddr, KM_USER0); | 
| 211 | 223 | flush_dcache_page(page); | |
| 212 | brelse(dibh); | 224 | brelse(dibh); | 
| 213 | |||
| 214 | SetPageUptodate(page); | 225 | SetPageUptodate(page); | 
| 215 | 226 | ||
| 216 | return 0; | 227 | return 0; | 
| @@ -507,7 +518,9 @@ static int gfs2_commit_write(struct file *file, struct page *page, | |||
| 507 | gfs2_quota_unlock(ip); | 518 | gfs2_quota_unlock(ip); | 
| 508 | gfs2_alloc_put(ip); | 519 | gfs2_alloc_put(ip); | 
| 509 | } | 520 | } | 
| 521 | unlock_page(page); | ||
| 510 | gfs2_glock_dq_m(1, &ip->i_gh); | 522 | gfs2_glock_dq_m(1, &ip->i_gh); | 
| 523 | lock_page(page); | ||
| 511 | gfs2_holder_uninit(&ip->i_gh); | 524 | gfs2_holder_uninit(&ip->i_gh); | 
| 512 | return 0; | 525 | return 0; | 
| 513 | 526 | ||
| @@ -520,7 +533,9 @@ fail_endtrans: | |||
| 520 | gfs2_quota_unlock(ip); | 533 | gfs2_quota_unlock(ip); | 
| 521 | gfs2_alloc_put(ip); | 534 | gfs2_alloc_put(ip); | 
| 522 | } | 535 | } | 
| 536 | unlock_page(page); | ||
| 523 | gfs2_glock_dq_m(1, &ip->i_gh); | 537 | gfs2_glock_dq_m(1, &ip->i_gh); | 
| 538 | lock_page(page); | ||
| 524 | gfs2_holder_uninit(&ip->i_gh); | 539 | gfs2_holder_uninit(&ip->i_gh); | 
| 525 | fail_nounlock: | 540 | fail_nounlock: | 
| 526 | ClearPageUptodate(page); | 541 | ClearPageUptodate(page); | 
| diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ee54cb667083..2c5f8e7def0d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -690,6 +690,8 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
| 690 | if (error) | 690 | if (error) | 
| 691 | goto fail; | 691 | goto fail; | 
| 692 | 692 | ||
| 693 | gfs2_create_debugfs_file(sdp); | ||
| 694 | |||
| 693 | error = gfs2_sys_fs_add(sdp); | 695 | error = gfs2_sys_fs_add(sdp); | 
| 694 | if (error) | 696 | if (error) | 
| 695 | goto fail; | 697 | goto fail; | 
| @@ -754,6 +756,7 @@ fail_lm: | |||
| 754 | fail_sys: | 756 | fail_sys: | 
| 755 | gfs2_sys_fs_del(sdp); | 757 | gfs2_sys_fs_del(sdp); | 
| 756 | fail: | 758 | fail: | 
| 759 | gfs2_delete_debugfs_file(sdp); | ||
| 757 | kfree(sdp); | 760 | kfree(sdp); | 
| 758 | sb->s_fs_info = NULL; | 761 | sb->s_fs_info = NULL; | 
| 759 | return error; | 762 | return error; | 
| @@ -896,6 +899,7 @@ error: | |||
| 896 | 899 | ||
| 897 | static void gfs2_kill_sb(struct super_block *sb) | 900 | static void gfs2_kill_sb(struct super_block *sb) | 
| 898 | { | 901 | { | 
| 902 | gfs2_delete_debugfs_file(sb->s_fs_info); | ||
| 899 | kill_block_super(sb); | 903 | kill_block_super(sb); | 
| 900 | } | 904 | } | 
| 901 | 905 | ||
| diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index b89999d3a767..485ce3d49923 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
| @@ -284,6 +284,31 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 284 | } | 284 | } | 
| 285 | 285 | ||
| 286 | /** | 286 | /** | 
| 287 | * gfs2_drop_inode - Drop an inode (test for remote unlink) | ||
| 288 | * @inode: The inode to drop | ||
| 289 | * | ||
| 290 | * If we've received a callback on an iopen lock then its because a | ||
| 291 | * remote node tried to deallocate the inode but failed due to this node | ||
| 292 | * still having the inode open. Here we mark the link count zero | ||
| 293 | * since we know that it must have reached zero if the GLF_DEMOTE flag | ||
| 294 | * is set on the iopen glock. If we didn't do a disk read since the | ||
| 295 | * remote node removed the final link then we might otherwise miss | ||
| 296 | * this event. This check ensures that this node will deallocate the | ||
| 297 | * inode's blocks, or alternatively pass the baton on to another | ||
| 298 | * node for later deallocation. | ||
| 299 | */ | ||
| 300 | static void gfs2_drop_inode(struct inode *inode) | ||
| 301 | { | ||
| 302 | if (inode->i_private && inode->i_nlink) { | ||
| 303 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 304 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; | ||
| 305 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) | ||
| 306 | clear_nlink(inode); | ||
| 307 | } | ||
| 308 | generic_drop_inode(inode); | ||
| 309 | } | ||
| 310 | |||
| 311 | /** | ||
| 287 | * gfs2_clear_inode - Deallocate an inode when VFS is done with it | 312 | * gfs2_clear_inode - Deallocate an inode when VFS is done with it | 
| 288 | * @inode: The VFS inode | 313 | * @inode: The VFS inode | 
| 289 | * | 314 | * | 
| @@ -441,7 +466,7 @@ out_unlock: | |||
| 441 | out_uninit: | 466 | out_uninit: | 
| 442 | gfs2_holder_uninit(&ip->i_iopen_gh); | 467 | gfs2_holder_uninit(&ip->i_iopen_gh); | 
| 443 | gfs2_glock_dq_uninit(&gh); | 468 | gfs2_glock_dq_uninit(&gh); | 
| 444 | if (error) | 469 | if (error && error != GLR_TRYFAILED) | 
| 445 | fs_warn(sdp, "gfs2_delete_inode: %d\n", error); | 470 | fs_warn(sdp, "gfs2_delete_inode: %d\n", error); | 
| 446 | out: | 471 | out: | 
| 447 | truncate_inode_pages(&inode->i_data, 0); | 472 | truncate_inode_pages(&inode->i_data, 0); | 
| @@ -481,6 +506,7 @@ const struct super_operations gfs2_super_ops = { | |||
| 481 | .statfs = gfs2_statfs, | 506 | .statfs = gfs2_statfs, | 
| 482 | .remount_fs = gfs2_remount_fs, | 507 | .remount_fs = gfs2_remount_fs, | 
| 483 | .clear_inode = gfs2_clear_inode, | 508 | .clear_inode = gfs2_clear_inode, | 
| 509 | .drop_inode = gfs2_drop_inode, | ||
| 484 | .show_options = gfs2_show_options, | 510 | .show_options = gfs2_show_options, | 
| 485 | }; | 511 | }; | 
| 486 | 512 | ||
| diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8d9c08b5c4b6..1727f5012efe 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "trans.h" | 27 | #include "trans.h" | 
| 28 | #include "ops_file.h" | 28 | #include "ops_file.h" | 
| 29 | #include "util.h" | 29 | #include "util.h" | 
| 30 | #include "log.h" | ||
| 30 | 31 | ||
| 31 | #define BFITNOENT ((u32)~0) | 32 | #define BFITNOENT ((u32)~0) | 
| 32 | 33 | ||
| @@ -697,8 +698,6 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) | |||
| 697 | * @al: the struct gfs2_alloc structure describing the reservation | 698 | * @al: the struct gfs2_alloc structure describing the reservation | 
| 698 | * | 699 | * | 
| 699 | * If there's room for the requested blocks to be allocated from the RG: | 700 | * If there's room for the requested blocks to be allocated from the RG: | 
| 700 | * Sets the $al_reserved_data field in @al. | ||
| 701 | * Sets the $al_reserved_meta field in @al. | ||
| 702 | * Sets the $al_rgd field in @al. | 701 | * Sets the $al_rgd field in @al. | 
| 703 | * | 702 | * | 
| 704 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) | 703 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) | 
| @@ -709,6 +708,9 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | |||
| 709 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 708 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 
| 710 | int ret = 0; | 709 | int ret = 0; | 
| 711 | 710 | ||
| 711 | if (rgd->rd_rg.rg_flags & GFS2_RGF_NOALLOC) | ||
| 712 | return 0; | ||
| 713 | |||
| 712 | spin_lock(&sdp->sd_rindex_spin); | 714 | spin_lock(&sdp->sd_rindex_spin); | 
| 713 | if (rgd->rd_free_clone >= al->al_requested) { | 715 | if (rgd->rd_free_clone >= al->al_requested) { | 
| 714 | al->al_rgd = rgd; | 716 | al->al_rgd = rgd; | 
| @@ -941,9 +943,13 @@ static int get_local_rgrp(struct gfs2_inode *ip) | |||
| 941 | rgd = gfs2_rgrpd_get_first(sdp); | 943 | rgd = gfs2_rgrpd_get_first(sdp); | 
| 942 | 944 | ||
| 943 | if (rgd == begin) { | 945 | if (rgd == begin) { | 
| 944 | if (++loops >= 2 || !skipped) | 946 | if (++loops >= 3) | 
| 945 | return -ENOSPC; | 947 | return -ENOSPC; | 
| 948 | if (!skipped) | ||
| 949 | loops++; | ||
| 946 | flags = 0; | 950 | flags = 0; | 
| 951 | if (loops == 2) | ||
| 952 | gfs2_log_flush(sdp, NULL); | ||
| 947 | } | 953 | } | 
| 948 | } | 954 | } | 
| 949 | 955 | ||
| diff --git a/include/linux/dlm_device.h b/include/linux/dlm_device.h index 2a2dd189b9fd..c2735cab2ebf 100644 --- a/include/linux/dlm_device.h +++ b/include/linux/dlm_device.h | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | 19 | ||
| 20 | /* Version of the device interface */ | 20 | /* Version of the device interface */ | 
| 21 | #define DLM_DEVICE_VERSION_MAJOR 5 | 21 | #define DLM_DEVICE_VERSION_MAJOR 5 | 
| 22 | #define DLM_DEVICE_VERSION_MINOR 0 | 22 | #define DLM_DEVICE_VERSION_MINOR 1 | 
| 23 | #define DLM_DEVICE_VERSION_PATCH 0 | 23 | #define DLM_DEVICE_VERSION_PATCH 0 | 
| 24 | 24 | ||
| 25 | /* struct passed to the lock write */ | 25 | /* struct passed to the lock write */ | 
| @@ -44,6 +44,11 @@ struct dlm_lspace_params { | |||
| 44 | char name[0]; | 44 | char name[0]; | 
| 45 | }; | 45 | }; | 
| 46 | 46 | ||
| 47 | struct dlm_purge_params { | ||
| 48 | __u32 nodeid; | ||
| 49 | __u32 pid; | ||
| 50 | }; | ||
| 51 | |||
| 47 | struct dlm_write_request { | 52 | struct dlm_write_request { | 
| 48 | __u32 version[3]; | 53 | __u32 version[3]; | 
| 49 | __u8 cmd; | 54 | __u8 cmd; | 
| @@ -53,6 +58,7 @@ struct dlm_write_request { | |||
| 53 | union { | 58 | union { | 
| 54 | struct dlm_lock_params lock; | 59 | struct dlm_lock_params lock; | 
| 55 | struct dlm_lspace_params lspace; | 60 | struct dlm_lspace_params lspace; | 
| 61 | struct dlm_purge_params purge; | ||
| 56 | } i; | 62 | } i; | 
| 57 | }; | 63 | }; | 
| 58 | 64 | ||
| @@ -76,6 +82,7 @@ struct dlm_lock_result { | |||
| 76 | #define DLM_USER_QUERY 3 | 82 | #define DLM_USER_QUERY 3 | 
| 77 | #define DLM_USER_CREATE_LOCKSPACE 4 | 83 | #define DLM_USER_CREATE_LOCKSPACE 4 | 
| 78 | #define DLM_USER_REMOVE_LOCKSPACE 5 | 84 | #define DLM_USER_REMOVE_LOCKSPACE 5 | 
| 85 | #define DLM_USER_PURGE 6 | ||
| 79 | 86 | ||
| 80 | /* Arbitrary length restriction */ | 87 | /* Arbitrary length restriction */ | 
| 81 | #define MAX_LS_NAME_LEN 64 | 88 | #define MAX_LS_NAME_LEN 64 | 
