aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-07 11:09:00 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-07 11:09:00 -0500
commitd3f8fd765e94b9137e1f27bbb0ac25289f9e565c (patch)
treea9ee7f05b3ef9c03292b101e1e2e0ed0e1c3e85a /fs
parent0670afdf0e69e5e73c8358da9c39bf3a8807b03e (diff)
parenta2cf822274b3d58a16a65c8338e299e18b3dc3a4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (57 commits) [GFS2] make gfs2_writepages() static [GFS2] Unlock page on prepare_write try lock failure [GFS2] nfsd readdirplus assertion failure [DLM] fix softlockup in dlm_recv [DLM] zero new user lvbs [DLM/GFS2] indent help text [GFS2] Fix unlink deadlocks [GFS2] Put back semaphore to avoid umount problem [GFS2] more CURRENT_TIME_SEC [GFS2/DLM] fix GFS2 circular dependency [GFS2/DLM] use sysfs [GFS2] make lock_dlm drop_count tunable in sysfs [GFS2] increase default lock limit [GFS2] Fix list corruption in lops.c [GFS2] Fix recursive locking attempt with NFS [DLM] can miss clearing resend flag [DLM] saved dlm message can be dropped [DLM] Make sock_sem into a mutex [GFS2] Fix typo in glock.c [GFS2] use CURRENT_TIME_SEC instead of get_seconds in gfs2 ...
Diffstat (limited to 'fs')
-rw-r--r--fs/dlm/Kconfig18
-rw-r--r--fs/dlm/config.c154
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/dlm_internal.h20
-rw-r--r--fs/dlm/lock.c87
-rw-r--r--fs/dlm/lockspace.c10
-rw-r--r--fs/dlm/lowcomms-sctp.c151
-rw-r--r--fs/dlm/lowcomms-tcp.c361
-rw-r--r--fs/dlm/midcomms.c4
-rw-r--r--fs/dlm/rcom.c85
-rw-r--r--fs/dlm/recover.c8
-rw-r--r--fs/dlm/recoverd.c22
-rw-r--r--fs/dlm/user.c9
-rw-r--r--fs/dlm/util.c4
-rw-r--r--fs/gfs2/Kconfig47
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/dir.c25
-rw-r--r--fs/gfs2/dir.h21
-rw-r--r--fs/gfs2/eattr.c8
-rw-r--r--fs/gfs2/glock.c316
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c136
-rw-r--r--fs/gfs2/incore.h18
-rw-r--r--fs/gfs2/inode.c61
-rw-r--r--fs/gfs2/lm.c8
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h2
-rw-r--r--fs/gfs2/locking/dlm/main.c6
-rw-r--r--fs/gfs2/locking/dlm/mount.c6
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c13
-rw-r--r--fs/gfs2/lops.c14
-rw-r--r--fs/gfs2/ops_address.c134
-rw-r--r--fs/gfs2/ops_dentry.c16
-rw-r--r--fs/gfs2/ops_export.c15
-rw-r--r--fs/gfs2/ops_file.c52
-rw-r--r--fs/gfs2/ops_inode.c55
-rw-r--r--fs/gfs2/ops_super.c11
-rw-r--r--fs/gfs2/ops_vm.c24
-rw-r--r--fs/gfs2/super.c16
-rw-r--r--fs/gfs2/sys.c10
39 files changed, 866 insertions, 1119 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index b5654a284fef..6fa7b0d5c043 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager"
3 3
4config DLM 4config DLM
5 tristate "Distributed Lock Manager (DLM)" 5 tristate "Distributed Lock Manager (DLM)"
6 depends on IPV6 || IPV6=n 6 depends on SYSFS && (IPV6 || IPV6=n)
7 select CONFIGFS_FS 7 select CONFIGFS_FS
8 select IP_SCTP if DLM_SCTP 8 select IP_SCTP if DLM_SCTP
9 help 9 help
10 A general purpose distributed lock manager for kernel or userspace 10 A general purpose distributed lock manager for kernel or userspace
11 applications. 11 applications.
12 12
13choice 13choice
14 prompt "Select DLM communications protocol" 14 prompt "Select DLM communications protocol"
15 depends on DLM 15 depends on DLM
16 default DLM_TCP 16 default DLM_TCP
17 help 17 help
18 The DLM Can use TCP or SCTP for it's network communications. 18 The DLM Can use TCP or SCTP for it's network communications.
19 SCTP supports multi-homed operations whereas TCP doesn't. 19 SCTP supports multi-homed operations whereas TCP doesn't.
20 However, SCTP seems to have stability problems at the moment. 20 However, SCTP seems to have stability problems at the moment.
21 21
22config DLM_TCP 22config DLM_TCP
23 bool "TCP/IP" 23 bool "TCP/IP"
@@ -31,8 +31,8 @@ config DLM_DEBUG
31 bool "DLM debugging" 31 bool "DLM debugging"
32 depends on DLM 32 depends on DLM
33 help 33 help
34 Under the debugfs mount point, the name of each lockspace will 34 Under the debugfs mount point, the name of each lockspace will
35 appear as a file in the "dlm" directory. The output is the 35 appear as a file in the "dlm" directory. The output is the
36 list of resource and locks the local node knows about. 36 list of resource and locks the local node knows about.
37 37
38endmenu 38endmenu
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 88553054bbfa..8665c88e5af2 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *);
54static void drop_node(struct config_group *, struct config_item *); 54static void drop_node(struct config_group *, struct config_item *);
55static void release_node(struct config_item *); 55static void release_node(struct config_item *);
56 56
57static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
58 char *buf);
59static ssize_t store_cluster(struct config_item *i,
60 struct configfs_attribute *a,
61 const char *buf, size_t len);
57static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 62static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
58 char *buf); 63 char *buf);
59static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, 64static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
73static ssize_t node_weight_read(struct node *nd, char *buf); 78static ssize_t node_weight_read(struct node *nd, char *buf);
74static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); 79static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
75 80
81struct cluster {
82 struct config_group group;
83 unsigned int cl_tcp_port;
84 unsigned int cl_buffer_size;
85 unsigned int cl_rsbtbl_size;
86 unsigned int cl_lkbtbl_size;
87 unsigned int cl_dirtbl_size;
88 unsigned int cl_recover_timer;
89 unsigned int cl_toss_secs;
90 unsigned int cl_scan_secs;
91 unsigned int cl_log_debug;
92};
93
94enum {
95 CLUSTER_ATTR_TCP_PORT = 0,
96 CLUSTER_ATTR_BUFFER_SIZE,
97 CLUSTER_ATTR_RSBTBL_SIZE,
98 CLUSTER_ATTR_LKBTBL_SIZE,
99 CLUSTER_ATTR_DIRTBL_SIZE,
100 CLUSTER_ATTR_RECOVER_TIMER,
101 CLUSTER_ATTR_TOSS_SECS,
102 CLUSTER_ATTR_SCAN_SECS,
103 CLUSTER_ATTR_LOG_DEBUG,
104};
105
106struct cluster_attribute {
107 struct configfs_attribute attr;
108 ssize_t (*show)(struct cluster *, char *);
109 ssize_t (*store)(struct cluster *, const char *, size_t);
110};
111
112static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
113 unsigned int *info_field, int check_zero,
114 const char *buf, size_t len)
115{
116 unsigned int x;
117
118 if (!capable(CAP_SYS_ADMIN))
119 return -EACCES;
120
121 x = simple_strtoul(buf, NULL, 0);
122
123 if (check_zero && !x)
124 return -EINVAL;
125
126 *cl_field = x;
127 *info_field = x;
128
129 return len;
130}
131
132#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
133 .attr = { .ca_name = __stringify(_name), \
134 .ca_mode = _mode, \
135 .ca_owner = THIS_MODULE }, \
136 .show = _read, \
137 .store = _write, \
138}
139
140#define CLUSTER_ATTR(name, check_zero) \
141static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
142{ \
143 return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
144 check_zero, buf, len); \
145} \
146static ssize_t name##_read(struct cluster *cl, char *buf) \
147{ \
148 return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
149} \
150static struct cluster_attribute cluster_attr_##name = \
151__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
152
153CLUSTER_ATTR(tcp_port, 1);
154CLUSTER_ATTR(buffer_size, 1);
155CLUSTER_ATTR(rsbtbl_size, 1);
156CLUSTER_ATTR(lkbtbl_size, 1);
157CLUSTER_ATTR(dirtbl_size, 1);
158CLUSTER_ATTR(recover_timer, 1);
159CLUSTER_ATTR(toss_secs, 1);
160CLUSTER_ATTR(scan_secs, 1);
161CLUSTER_ATTR(log_debug, 0);
162
163static struct configfs_attribute *cluster_attrs[] = {
164 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
165 [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
166 [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
167 [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
168 [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
169 [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
170 [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
171 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
172 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
173 NULL,
174};
175
76enum { 176enum {
77 COMM_ATTR_NODEID = 0, 177 COMM_ATTR_NODEID = 0,
78 COMM_ATTR_LOCAL, 178 COMM_ATTR_LOCAL,
@@ -152,10 +252,6 @@ struct clusters {
152 struct configfs_subsystem subsys; 252 struct configfs_subsystem subsys;
153}; 253};
154 254
155struct cluster {
156 struct config_group group;
157};
158
159struct spaces { 255struct spaces {
160 struct config_group ss_group; 256 struct config_group ss_group;
161}; 257};
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = {
197 293
198static struct configfs_item_operations cluster_ops = { 294static struct configfs_item_operations cluster_ops = {
199 .release = release_cluster, 295 .release = release_cluster,
296 .show_attribute = show_cluster,
297 .store_attribute = store_cluster,
200}; 298};
201 299
202static struct configfs_group_operations spaces_ops = { 300static struct configfs_group_operations spaces_ops = {
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = {
237 335
238static struct config_item_type cluster_type = { 336static struct config_item_type cluster_type = {
239 .ct_item_ops = &cluster_ops, 337 .ct_item_ops = &cluster_ops,
338 .ct_attrs = cluster_attrs,
240 .ct_owner = THIS_MODULE, 339 .ct_owner = THIS_MODULE,
241}; 340};
242 341
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g,
317 cl->group.default_groups[1] = &cms->cs_group; 416 cl->group.default_groups[1] = &cms->cs_group;
318 cl->group.default_groups[2] = NULL; 417 cl->group.default_groups[2] = NULL;
319 418
419 cl->cl_tcp_port = dlm_config.ci_tcp_port;
420 cl->cl_buffer_size = dlm_config.ci_buffer_size;
421 cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
422 cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
423 cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
424 cl->cl_recover_timer = dlm_config.ci_recover_timer;
425 cl->cl_toss_secs = dlm_config.ci_toss_secs;
426 cl->cl_scan_secs = dlm_config.ci_scan_secs;
427 cl->cl_log_debug = dlm_config.ci_log_debug;
428
320 space_list = &sps->ss_group; 429 space_list = &sps->ss_group;
321 comm_list = &cms->cs_group; 430 comm_list = &cms->cs_group;
322 return &cl->group; 431 return &cl->group;
@@ -509,6 +618,25 @@ void dlm_config_exit(void)
509 * Functions for user space to read/write attributes 618 * Functions for user space to read/write attributes
510 */ 619 */
511 620
621static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
622 char *buf)
623{
624 struct cluster *cl = to_cluster(i);
625 struct cluster_attribute *cla =
626 container_of(a, struct cluster_attribute, attr);
627 return cla->show ? cla->show(cl, buf) : 0;
628}
629
630static ssize_t store_cluster(struct config_item *i,
631 struct configfs_attribute *a,
632 const char *buf, size_t len)
633{
634 struct cluster *cl = to_cluster(i);
635 struct cluster_attribute *cla =
636 container_of(a, struct cluster_attribute, attr);
637 return cla->store ? cla->store(cl, buf, len) : -EINVAL;
638}
639
512static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 640static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
513 char *buf) 641 char *buf)
514{ 642{
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
775#define DEFAULT_RECOVER_TIMER 5 903#define DEFAULT_RECOVER_TIMER 5
776#define DEFAULT_TOSS_SECS 10 904#define DEFAULT_TOSS_SECS 10
777#define DEFAULT_SCAN_SECS 5 905#define DEFAULT_SCAN_SECS 5
906#define DEFAULT_LOG_DEBUG 0
778 907
779struct dlm_config_info dlm_config = { 908struct dlm_config_info dlm_config = {
780 .tcp_port = DEFAULT_TCP_PORT, 909 .ci_tcp_port = DEFAULT_TCP_PORT,
781 .buffer_size = DEFAULT_BUFFER_SIZE, 910 .ci_buffer_size = DEFAULT_BUFFER_SIZE,
782 .rsbtbl_size = DEFAULT_RSBTBL_SIZE, 911 .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
783 .lkbtbl_size = DEFAULT_LKBTBL_SIZE, 912 .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
784 .dirtbl_size = DEFAULT_DIRTBL_SIZE, 913 .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
785 .recover_timer = DEFAULT_RECOVER_TIMER, 914 .ci_recover_timer = DEFAULT_RECOVER_TIMER,
786 .toss_secs = DEFAULT_TOSS_SECS, 915 .ci_toss_secs = DEFAULT_TOSS_SECS,
787 .scan_secs = DEFAULT_SCAN_SECS 916 .ci_scan_secs = DEFAULT_SCAN_SECS,
917 .ci_log_debug = DEFAULT_LOG_DEBUG
788}; 918};
789 919
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 9da7839958a9..1e978611a96e 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -17,14 +17,15 @@
17#define DLM_MAX_ADDR_COUNT 3 17#define DLM_MAX_ADDR_COUNT 3
18 18
19struct dlm_config_info { 19struct dlm_config_info {
20 int tcp_port; 20 int ci_tcp_port;
21 int buffer_size; 21 int ci_buffer_size;
22 int rsbtbl_size; 22 int ci_rsbtbl_size;
23 int lkbtbl_size; 23 int ci_lkbtbl_size;
24 int dirtbl_size; 24 int ci_dirtbl_size;
25 int recover_timer; 25 int ci_recover_timer;
26 int toss_secs; 26 int ci_toss_secs;
27 int scan_secs; 27 int ci_scan_secs;
28 int ci_log_debug;
28}; 29};
29 30
30extern struct dlm_config_info dlm_config; 31extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1ee8195e6fc0..61d93201e1b2 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,6 +41,7 @@
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42 42
43#include <linux/dlm.h> 43#include <linux/dlm.h>
44#include "config.h"
44 45
45#define DLM_LOCKSPACE_LEN 64 46#define DLM_LOCKSPACE_LEN 64
46 47
@@ -69,12 +70,12 @@ struct dlm_mhandle;
69#define log_error(ls, fmt, args...) \ 70#define log_error(ls, fmt, args...) \
70 printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) 71 printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
71 72
72#define DLM_LOG_DEBUG 73#define log_debug(ls, fmt, args...) \
73#ifdef DLM_LOG_DEBUG 74do { \
74#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) 75 if (dlm_config.ci_log_debug) \
75#else 76 printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
76#define log_debug(ls, fmt, args...) 77 (ls)->ls_name , ##args); \
77#endif 78} while (0)
78 79
79#define DLM_ASSERT(x, do) \ 80#define DLM_ASSERT(x, do) \
80{ \ 81{ \
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
309 310
310/* dlm_header is first element of all structs sent between nodes */ 311/* dlm_header is first element of all structs sent between nodes */
311 312
312#define DLM_HEADER_MAJOR 0x00020000 313#define DLM_HEADER_MAJOR 0x00030000
313#define DLM_HEADER_MINOR 0x00000001 314#define DLM_HEADER_MINOR 0x00000000
314 315
315#define DLM_MSG 1 316#define DLM_MSG 1
316#define DLM_RCOM 2 317#define DLM_RCOM 2
@@ -386,6 +387,8 @@ struct dlm_rcom {
386 uint32_t rc_type; /* DLM_RCOM_ */ 387 uint32_t rc_type; /* DLM_RCOM_ */
387 int rc_result; /* multi-purpose */ 388 int rc_result; /* multi-purpose */
388 uint64_t rc_id; /* match reply with request */ 389 uint64_t rc_id; /* match reply with request */
390 uint64_t rc_seq; /* sender's ls_recover_seq */
391 uint64_t rc_seq_reply; /* remote ls_recover_seq */
389 char rc_buf[0]; 392 char rc_buf[0];
390}; 393};
391 394
@@ -523,6 +526,7 @@ struct dlm_user_proc {
523 spinlock_t asts_spin; 526 spinlock_t asts_spin;
524 struct list_head locks; 527 struct list_head locks;
525 spinlock_t locks_spin; 528 spinlock_t locks_spin;
529 struct list_head unlocking;
526 wait_queue_head_t wait; 530 wait_queue_head_t wait;
527}; 531};
528 532
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 30878defaeb6..e725005fafd0 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
754 mutex_unlock(&ls->ls_waiters_mutex); 754 mutex_unlock(&ls->ls_waiters_mutex);
755} 755}
756 756
757/* We clear the RESEND flag because we might be taking an lkb off the waiters
758 list as part of process_requestqueue (e.g. a lookup that has an optimized
759 request reply on the requestqueue) between dlm_recover_waiters_pre() which
760 set RESEND and dlm_recover_waiters_post() */
761
757static int _remove_from_waiters(struct dlm_lkb *lkb) 762static int _remove_from_waiters(struct dlm_lkb *lkb)
758{ 763{
759 int error = 0; 764 int error = 0;
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb)
764 goto out; 769 goto out;
765 } 770 }
766 lkb->lkb_wait_type = 0; 771 lkb->lkb_wait_type = 0;
772 lkb->lkb_flags &= ~DLM_IFL_RESEND;
767 list_del(&lkb->lkb_wait_reply); 773 list_del(&lkb->lkb_wait_reply);
768 unhold_lkb(lkb); 774 unhold_lkb(lkb);
769 out: 775 out:
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
810 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, 816 list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
811 res_hashchain) { 817 res_hashchain) {
812 if (!time_after_eq(jiffies, r->res_toss_time + 818 if (!time_after_eq(jiffies, r->res_toss_time +
813 dlm_config.toss_secs * HZ)) 819 dlm_config.ci_toss_secs * HZ))
814 continue; 820 continue;
815 found = 1; 821 found = 1;
816 break; 822 break;
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
2144 if (lkb->lkb_astaddr) 2150 if (lkb->lkb_astaddr)
2145 ms->m_asts |= AST_COMP; 2151 ms->m_asts |= AST_COMP;
2146 2152
2147 if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) 2153 /* compare with switch in create_message; send_remove() doesn't
2148 memcpy(ms->m_extra, r->res_name, r->res_length); 2154 use send_args() */
2149 2155
2150 else if (lkb->lkb_lvbptr) 2156 switch (ms->m_type) {
2157 case DLM_MSG_REQUEST:
2158 case DLM_MSG_LOOKUP:
2159 memcpy(ms->m_extra, r->res_name, r->res_length);
2160 break;
2161 case DLM_MSG_CONVERT:
2162 case DLM_MSG_UNLOCK:
2163 case DLM_MSG_REQUEST_REPLY:
2164 case DLM_MSG_CONVERT_REPLY:
2165 case DLM_MSG_GRANT:
2166 if (!lkb->lkb_lvbptr)
2167 break;
2151 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); 2168 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
2152 2169 break;
2170 }
2153} 2171}
2154 2172
2155static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) 2173static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2418 2436
2419 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); 2437 DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
2420 2438
2421 if (receive_lvb(ls, lkb, ms)) 2439 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
2422 return -ENOMEM; 2440 /* lkb was just created so there won't be an lvb yet */
2441 lkb->lkb_lvbptr = allocate_lvb(ls);
2442 if (!lkb->lkb_lvbptr)
2443 return -ENOMEM;
2444 }
2423 2445
2424 return 0; 2446 return 0;
2425} 2447}
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3002{ 3024{
3003 struct dlm_message *ms = (struct dlm_message *) hd; 3025 struct dlm_message *ms = (struct dlm_message *) hd;
3004 struct dlm_ls *ls; 3026 struct dlm_ls *ls;
3005 int error; 3027 int error = 0;
3006 3028
3007 if (!recovery) 3029 if (!recovery)
3008 dlm_message_in(ms); 3030 dlm_message_in(ms);
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3119 out: 3141 out:
3120 dlm_put_lockspace(ls); 3142 dlm_put_lockspace(ls);
3121 dlm_astd_wake(); 3143 dlm_astd_wake();
3122 return 0; 3144 return error;
3123} 3145}
3124 3146
3125 3147
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3132 if (middle_conversion(lkb)) { 3154 if (middle_conversion(lkb)) {
3133 hold_lkb(lkb); 3155 hold_lkb(lkb);
3134 ls->ls_stub_ms.m_result = -EINPROGRESS; 3156 ls->ls_stub_ms.m_result = -EINPROGRESS;
3157 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3135 _remove_from_waiters(lkb); 3158 _remove_from_waiters(lkb);
3136 _receive_convert_reply(lkb, &ls->ls_stub_ms); 3159 _receive_convert_reply(lkb, &ls->ls_stub_ms);
3137 3160
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3205 case DLM_MSG_UNLOCK: 3228 case DLM_MSG_UNLOCK:
3206 hold_lkb(lkb); 3229 hold_lkb(lkb);
3207 ls->ls_stub_ms.m_result = -DLM_EUNLOCK; 3230 ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
3231 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3208 _remove_from_waiters(lkb); 3232 _remove_from_waiters(lkb);
3209 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 3233 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
3210 dlm_put_lkb(lkb); 3234 dlm_put_lkb(lkb);
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3213 case DLM_MSG_CANCEL: 3237 case DLM_MSG_CANCEL:
3214 hold_lkb(lkb); 3238 hold_lkb(lkb);
3215 ls->ls_stub_ms.m_result = -DLM_ECANCEL; 3239 ls->ls_stub_ms.m_result = -DLM_ECANCEL;
3240 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3216 _remove_from_waiters(lkb); 3241 _remove_from_waiters(lkb);
3217 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 3242 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
3218 dlm_put_lkb(lkb); 3243 dlm_put_lkb(lkb);
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3571 lock_rsb(r); 3596 lock_rsb(r);
3572 3597
3573 switch (error) { 3598 switch (error) {
3599 case -EBADR:
3600 /* There's a chance the new master received our lock before
3601 dlm_recover_master_reply(), this wouldn't happen if we did
3602 a barrier between recover_masters and recover_locks. */
3603 log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
3604 (unsigned long)r, r->res_name);
3605 dlm_send_rcom_lock(r, lkb);
3606 goto out;
3574 case -EEXIST: 3607 case -EEXIST:
3575 log_debug(ls, "master copy exists %x", lkb->lkb_id); 3608 log_debug(ls, "master copy exists %x", lkb->lkb_id);
3576 /* fall through */ 3609 /* fall through */
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3585 /* an ack for dlm_recover_locks() which waits for replies from 3618 /* an ack for dlm_recover_locks() which waits for replies from
3586 all the locks it sends to new masters */ 3619 all the locks it sends to new masters */
3587 dlm_recovered_lock(r); 3620 dlm_recovered_lock(r);
3588 3621 out:
3589 unlock_rsb(r); 3622 unlock_rsb(r);
3590 put_rsb(r); 3623 put_rsb(r);
3591 dlm_put_lkb(lkb); 3624 dlm_put_lkb(lkb);
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
3610 } 3643 }
3611 3644
3612 if (flags & DLM_LKF_VALBLK) { 3645 if (flags & DLM_LKF_VALBLK) {
3613 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 3646 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3614 if (!ua->lksb.sb_lvbptr) { 3647 if (!ua->lksb.sb_lvbptr) {
3615 kfree(ua); 3648 kfree(ua);
3616 __put_lkb(ls, lkb); 3649 __put_lkb(ls, lkb);
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3679 ua = (struct dlm_user_args *)lkb->lkb_astparam; 3712 ua = (struct dlm_user_args *)lkb->lkb_astparam;
3680 3713
3681 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { 3714 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
3682 ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 3715 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
3683 if (!ua->lksb.sb_lvbptr) { 3716 if (!ua->lksb.sb_lvbptr) {
3684 error = -ENOMEM; 3717 error = -ENOMEM;
3685 goto out_put; 3718 goto out_put;
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3745 goto out_put; 3778 goto out_put;
3746 3779
3747 spin_lock(&ua->proc->locks_spin); 3780 spin_lock(&ua->proc->locks_spin);
3748 list_del_init(&lkb->lkb_ownqueue); 3781 /* dlm_user_add_ast() may have already taken lkb off the proc list */
3782 if (!list_empty(&lkb->lkb_ownqueue))
3783 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
3749 spin_unlock(&ua->proc->locks_spin); 3784 spin_unlock(&ua->proc->locks_spin);
3750
3751 /* this removes the reference for the proc->locks list added by
3752 dlm_user_request */
3753 unhold_lkb(lkb);
3754 out_put: 3785 out_put:
3755 dlm_put_lkb(lkb); 3786 dlm_put_lkb(lkb);
3756 out: 3787 out:
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
3790 /* this lkb was removed from the WAITING queue */ 3821 /* this lkb was removed from the WAITING queue */
3791 if (lkb->lkb_grmode == DLM_LOCK_IV) { 3822 if (lkb->lkb_grmode == DLM_LOCK_IV) {
3792 spin_lock(&ua->proc->locks_spin); 3823 spin_lock(&ua->proc->locks_spin);
3793 list_del_init(&lkb->lkb_ownqueue); 3824 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
3794 spin_unlock(&ua->proc->locks_spin); 3825 spin_unlock(&ua->proc->locks_spin);
3795 unhold_lkb(lkb);
3796 } 3826 }
3797 out_put: 3827 out_put:
3798 dlm_put_lkb(lkb); 3828 dlm_put_lkb(lkb);
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3853 mutex_lock(&ls->ls_clear_proc_locks); 3883 mutex_lock(&ls->ls_clear_proc_locks);
3854 3884
3855 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { 3885 list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
3856 if (lkb->lkb_ast_type) {
3857 list_del(&lkb->lkb_astqueue);
3858 unhold_lkb(lkb);
3859 }
3860
3861 list_del_init(&lkb->lkb_ownqueue); 3886 list_del_init(&lkb->lkb_ownqueue);
3862 3887
3863 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { 3888 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
3874 3899
3875 dlm_put_lkb(lkb); 3900 dlm_put_lkb(lkb);
3876 } 3901 }
3902
3903 /* in-progress unlocks */
3904 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
3905 list_del_init(&lkb->lkb_ownqueue);
3906 lkb->lkb_flags |= DLM_IFL_DEAD;
3907 dlm_put_lkb(lkb);
3908 }
3909
3910 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
3911 list_del(&lkb->lkb_astqueue);
3912 dlm_put_lkb(lkb);
3913 }
3914
3877 mutex_unlock(&ls->ls_clear_proc_locks); 3915 mutex_unlock(&ls->ls_clear_proc_locks);
3878 unlock_recovery(ls); 3916 unlock_recovery(ls);
3879} 3917}
3918
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 59012b089e8d..f40817b53c6f 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -236,7 +236,7 @@ static int dlm_scand(void *data)
236 while (!kthread_should_stop()) { 236 while (!kthread_should_stop()) {
237 list_for_each_entry(ls, &lslist, ls_list) 237 list_for_each_entry(ls, &lslist, ls_list)
238 dlm_scan_rsbs(ls); 238 dlm_scan_rsbs(ls);
239 schedule_timeout_interruptible(dlm_config.scan_secs * HZ); 239 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
240 } 240 }
241 return 0; 241 return 0;
242} 242}
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
422 ls->ls_count = 0; 422 ls->ls_count = 0;
423 ls->ls_flags = 0; 423 ls->ls_flags = 0;
424 424
425 size = dlm_config.rsbtbl_size; 425 size = dlm_config.ci_rsbtbl_size;
426 ls->ls_rsbtbl_size = size; 426 ls->ls_rsbtbl_size = size;
427 427
428 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); 428 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
434 rwlock_init(&ls->ls_rsbtbl[i].lock); 434 rwlock_init(&ls->ls_rsbtbl[i].lock);
435 } 435 }
436 436
437 size = dlm_config.lkbtbl_size; 437 size = dlm_config.ci_lkbtbl_size;
438 ls->ls_lkbtbl_size = size; 438 ls->ls_lkbtbl_size = size;
439 439
440 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); 440 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
446 ls->ls_lkbtbl[i].counter = 1; 446 ls->ls_lkbtbl[i].counter = 1;
447 } 447 }
448 448
449 size = dlm_config.dirtbl_size; 449 size = dlm_config.ci_dirtbl_size;
450 ls->ls_dirtbl_size = size; 450 ls->ls_dirtbl_size = size;
451 451
452 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); 452 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
489 mutex_init(&ls->ls_requestqueue_mutex); 489 mutex_init(&ls->ls_requestqueue_mutex);
490 mutex_init(&ls->ls_clear_proc_locks); 490 mutex_init(&ls->ls_clear_proc_locks);
491 491
492 ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); 492 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
493 if (!ls->ls_recover_buf) 493 if (!ls->ls_recover_buf)
494 goto out_dirfree; 494 goto out_dirfree;
495 495
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index fe158d7a9285..dc83a9d979b5 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -72,6 +72,8 @@ struct nodeinfo {
72 struct list_head writequeue; /* outgoing writequeue_entries */ 72 struct list_head writequeue; /* outgoing writequeue_entries */
73 spinlock_t writequeue_lock; 73 spinlock_t writequeue_lock;
74 int nodeid; 74 int nodeid;
75 struct work_struct swork; /* Send workqueue */
76 struct work_struct lwork; /* Locking workqueue */
75}; 77};
76 78
77static DEFINE_IDR(nodeinfo_idr); 79static DEFINE_IDR(nodeinfo_idr);
@@ -96,6 +98,7 @@ struct connection {
96 atomic_t waiting_requests; 98 atomic_t waiting_requests;
97 struct cbuf cb; 99 struct cbuf cb;
98 int eagain_flag; 100 int eagain_flag;
101 struct work_struct work; /* Send workqueue */
99}; 102};
100 103
101/* An entry waiting to be sent */ 104/* An entry waiting to be sent */
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n)
137static LIST_HEAD(write_nodes); 140static LIST_HEAD(write_nodes);
138static DEFINE_SPINLOCK(write_nodes_lock); 141static DEFINE_SPINLOCK(write_nodes_lock);
139 142
143
140/* Maximum number of incoming messages to process before 144/* Maximum number of incoming messages to process before
141 * doing a schedule() 145 * doing a schedule()
142 */ 146 */
143#define MAX_RX_MSG_COUNT 25 147#define MAX_RX_MSG_COUNT 25
144 148
145/* Manage daemons */ 149/* Work queues */
146static struct task_struct *recv_task; 150static struct workqueue_struct *recv_workqueue;
147static struct task_struct *send_task; 151static struct workqueue_struct *send_workqueue;
148static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait); 152static struct workqueue_struct *lock_workqueue;
149 153
150/* The SCTP connection */ 154/* The SCTP connection */
151static struct connection sctp_con; 155static struct connection sctp_con;
152 156
157static void process_send_sockets(struct work_struct *work);
158static void process_recv_sockets(struct work_struct *work);
159static void process_lock_request(struct work_struct *work);
153 160
154static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) 161static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
155{ 162{
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
222 spin_lock_init(&ni->lock); 229 spin_lock_init(&ni->lock);
223 INIT_LIST_HEAD(&ni->writequeue); 230 INIT_LIST_HEAD(&ni->writequeue);
224 spin_lock_init(&ni->writequeue_lock); 231 spin_lock_init(&ni->writequeue_lock);
232 INIT_WORK(&ni->lwork, process_lock_request);
233 INIT_WORK(&ni->swork, process_send_sockets);
225 ni->nodeid = nodeid; 234 ni->nodeid = nodeid;
226 235
227 if (nodeid > max_nodeid) 236 if (nodeid > max_nodeid)
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
249/* Data or notification available on socket */ 258/* Data or notification available on socket */
250static void lowcomms_data_ready(struct sock *sk, int count_unused) 259static void lowcomms_data_ready(struct sock *sk, int count_unused)
251{ 260{
252 atomic_inc(&sctp_con.waiting_requests);
253 if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) 261 if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
254 return; 262 queue_work(recv_workqueue, &sctp_con.work);
255
256 wake_up_interruptible(&lowcomms_recv_wait);
257} 263}
258 264
259 265
@@ -361,10 +367,10 @@ static void init_failed(void)
361 spin_lock_bh(&write_nodes_lock); 367 spin_lock_bh(&write_nodes_lock);
362 list_add_tail(&ni->write_list, &write_nodes); 368 list_add_tail(&ni->write_list, &write_nodes);
363 spin_unlock_bh(&write_nodes_lock); 369 spin_unlock_bh(&write_nodes_lock);
370 queue_work(send_workqueue, &ni->swork);
364 } 371 }
365 } 372 }
366 } 373 }
367 wake_up_process(send_task);
368} 374}
369 375
370/* Something happened to an association */ 376/* Something happened to an association */
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
446 spin_lock_bh(&write_nodes_lock); 452 spin_lock_bh(&write_nodes_lock);
447 list_add_tail(&ni->write_list, &write_nodes); 453 list_add_tail(&ni->write_list, &write_nodes);
448 spin_unlock_bh(&write_nodes_lock); 454 spin_unlock_bh(&write_nodes_lock);
455 queue_work(send_workqueue, &ni->swork);
449 } 456 }
450 wake_up_process(send_task);
451 } 457 }
452 break; 458 break;
453 459
@@ -580,8 +586,8 @@ static int receive_from_sock(void)
580 spin_lock_bh(&write_nodes_lock); 586 spin_lock_bh(&write_nodes_lock);
581 list_add_tail(&ni->write_list, &write_nodes); 587 list_add_tail(&ni->write_list, &write_nodes);
582 spin_unlock_bh(&write_nodes_lock); 588 spin_unlock_bh(&write_nodes_lock);
589 queue_work(send_workqueue, &ni->swork);
583 } 590 }
584 wake_up_process(send_task);
585 } 591 }
586 } 592 }
587 593
@@ -590,6 +596,7 @@ static int receive_from_sock(void)
590 return 0; 596 return 0;
591 597
592 cbuf_add(&sctp_con.cb, ret); 598 cbuf_add(&sctp_con.cb, ret);
599 // PJC: TODO: Add to node's workqueue....can we ??
593 ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), 600 ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
594 page_address(sctp_con.rx_page), 601 page_address(sctp_con.rx_page),
595 sctp_con.cb.base, sctp_con.cb.len, 602 sctp_con.cb.base, sctp_con.cb.len,
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
635 642
636 if (result < 0) 643 if (result < 0)
637 log_print("Can't bind to port %d addr number %d", 644 log_print("Can't bind to port %d addr number %d",
638 dlm_config.tcp_port, num); 645 dlm_config.ci_tcp_port, num);
639 646
640 return result; 647 return result;
641} 648}
@@ -711,7 +718,7 @@ static int init_sock(void)
711 /* Bind to all interfaces. */ 718 /* Bind to all interfaces. */
712 for (i = 0; i < dlm_local_count; i++) { 719 for (i = 0; i < dlm_local_count; i++) {
713 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); 720 memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
714 make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); 721 make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
715 722
716 result = add_bind_addr(&localaddr, addr_len, num); 723 result = add_bind_addr(&localaddr, addr_len, num);
717 if (result) 724 if (result)
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg)
820 spin_lock_bh(&write_nodes_lock); 827 spin_lock_bh(&write_nodes_lock);
821 list_add_tail(&ni->write_list, &write_nodes); 828 list_add_tail(&ni->write_list, &write_nodes);
822 spin_unlock_bh(&write_nodes_lock); 829 spin_unlock_bh(&write_nodes_lock);
823 wake_up_process(send_task); 830
831 queue_work(send_workqueue, &ni->swork);
824 } 832 }
825 return; 833 return;
826 834
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid)
863 return; 871 return;
864 } 872 }
865 873
866 make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); 874 make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
867 875
868 outmessage.msg_name = &rem_addr; 876 outmessage.msg_name = &rem_addr;
869 outmessage.msg_namelen = addrlen; 877 outmessage.msg_namelen = addrlen;
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid)
1088 return 0; 1096 return 0;
1089} 1097}
1090 1098
1091static int write_list_empty(void) 1099// PJC: The work queue function for receiving.
1100static void process_recv_sockets(struct work_struct *work)
1092{ 1101{
1093 int status; 1102 if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
1094 1103 int ret;
1095 spin_lock_bh(&write_nodes_lock);
1096 status = list_empty(&write_nodes);
1097 spin_unlock_bh(&write_nodes_lock);
1098
1099 return status;
1100}
1101
1102static int dlm_recvd(void *data)
1103{
1104 DECLARE_WAITQUEUE(wait, current);
1105
1106 while (!kthread_should_stop()) {
1107 int count = 0; 1104 int count = 0;
1108 1105
1109 set_current_state(TASK_INTERRUPTIBLE); 1106 do {
1110 add_wait_queue(&lowcomms_recv_wait, &wait); 1107 ret = receive_from_sock();
1111 if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
1112 cond_resched();
1113 remove_wait_queue(&lowcomms_recv_wait, &wait);
1114 set_current_state(TASK_RUNNING);
1115
1116 if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
1117 int ret;
1118
1119 do {
1120 ret = receive_from_sock();
1121 1108
1122 /* Don't starve out everyone else */ 1109 /* Don't starve out everyone else */
1123 if (++count >= MAX_RX_MSG_COUNT) { 1110 if (++count >= MAX_RX_MSG_COUNT) {
1124 cond_resched(); 1111 cond_resched();
1125 count = 0; 1112 count = 0;
1126 } 1113 }
1127 } while (!kthread_should_stop() && ret >=0); 1114 } while (!kthread_should_stop() && ret >=0);
1128 }
1129 cond_resched();
1130 } 1115 }
1131 1116 cond_resched();
1132 return 0;
1133} 1117}
1134 1118
1135static int dlm_sendd(void *data) 1119// PJC: the work queue function for sending
1120static void process_send_sockets(struct work_struct *work)
1136{ 1121{
1137 DECLARE_WAITQUEUE(wait, current); 1122 if (sctp_con.eagain_flag) {
1138 1123 sctp_con.eagain_flag = 0;
1139 add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); 1124 refill_write_queue();
1140
1141 while (!kthread_should_stop()) {
1142 set_current_state(TASK_INTERRUPTIBLE);
1143 if (write_list_empty())
1144 cond_resched();
1145 set_current_state(TASK_RUNNING);
1146
1147 if (sctp_con.eagain_flag) {
1148 sctp_con.eagain_flag = 0;
1149 refill_write_queue();
1150 }
1151 process_output_queue();
1152 } 1125 }
1126 process_output_queue();
1127}
1153 1128
1154 remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); 1129// PJC: Process lock requests from a particular node.
1155 1130// TODO: can we optimise this out on UP ??
1156 return 0; 1131static void process_lock_request(struct work_struct *work)
1132{
1157} 1133}
1158 1134
1159static void daemons_stop(void) 1135static void daemons_stop(void)
1160{ 1136{
1161 kthread_stop(recv_task); 1137 destroy_workqueue(recv_workqueue);
1162 kthread_stop(send_task); 1138 destroy_workqueue(send_workqueue);
1139 destroy_workqueue(lock_workqueue);
1163} 1140}
1164 1141
1165static int daemons_start(void) 1142static int daemons_start(void)
1166{ 1143{
1167 struct task_struct *p;
1168 int error; 1144 int error;
1145 recv_workqueue = create_workqueue("dlm_recv");
1146 error = IS_ERR(recv_workqueue);
1147 if (error) {
1148 log_print("can't start dlm_recv %d", error);
1149 return error;
1150 }
1169 1151
1170 p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); 1152 send_workqueue = create_singlethread_workqueue("dlm_send");
1171 error = IS_ERR(p); 1153 error = IS_ERR(send_workqueue);
1172 if (error) { 1154 if (error) {
1173 log_print("can't start dlm_recvd %d", error); 1155 log_print("can't start dlm_send %d", error);
1156 destroy_workqueue(recv_workqueue);
1174 return error; 1157 return error;
1175 } 1158 }
1176 recv_task = p;
1177 1159
1178 p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); 1160 lock_workqueue = create_workqueue("dlm_rlock");
1179 error = IS_ERR(p); 1161 error = IS_ERR(lock_workqueue);
1180 if (error) { 1162 if (error) {
1181 log_print("can't start dlm_sendd %d", error); 1163 log_print("can't start dlm_rlock %d", error);
1182 kthread_stop(recv_task); 1164 destroy_workqueue(send_workqueue);
1165 destroy_workqueue(recv_workqueue);
1183 return error; 1166 return error;
1184 } 1167 }
1185 send_task = p;
1186 1168
1187 return 0; 1169 return 0;
1188} 1170}
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void)
1194{ 1176{
1195 int error; 1177 int error;
1196 1178
1179 INIT_WORK(&sctp_con.work, process_recv_sockets);
1180
1197 error = init_sock(); 1181 error = init_sock();
1198 if (error) 1182 if (error)
1199 goto fail_sock; 1183 goto fail_sock;
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void)
1224 for (i = 0; i < dlm_local_count; i++) 1208 for (i = 0; i < dlm_local_count; i++)
1225 kfree(dlm_local_addr[i]); 1209 kfree(dlm_local_addr[i]);
1226} 1210}
1227
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 9be3a440c42a..f1efd17b2614 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb)
96struct connection { 96struct connection {
97 struct socket *sock; /* NULL if not connected */ 97 struct socket *sock; /* NULL if not connected */
98 uint32_t nodeid; /* So we know who we are in the list */ 98 uint32_t nodeid; /* So we know who we are in the list */
99 struct rw_semaphore sock_sem; /* Stop connect races */ 99 struct mutex sock_mutex;
100 struct list_head read_list; /* On this list when ready for reading */
101 struct list_head write_list; /* On this list when ready for writing */
102 struct list_head state_list; /* On this list when ready to connect */
103 unsigned long flags; /* bit 1,2 = We are on the read/write lists */ 100 unsigned long flags; /* bit 1,2 = We are on the read/write lists */
104#define CF_READ_PENDING 1 101#define CF_READ_PENDING 1
105#define CF_WRITE_PENDING 2 102#define CF_WRITE_PENDING 2
@@ -112,9 +109,10 @@ struct connection {
112 struct page *rx_page; 109 struct page *rx_page;
113 struct cbuf cb; 110 struct cbuf cb;
114 int retries; 111 int retries;
115 atomic_t waiting_requests;
116#define MAX_CONNECT_RETRIES 3 112#define MAX_CONNECT_RETRIES 3
117 struct connection *othercon; 113 struct connection *othercon;
114 struct work_struct rwork; /* Receive workqueue */
115 struct work_struct swork; /* Send workqueue */
118}; 116};
119#define sock2con(x) ((struct connection *)(x)->sk_user_data) 117#define sock2con(x) ((struct connection *)(x)->sk_user_data)
120 118
@@ -131,14 +129,9 @@ struct writequeue_entry {
131 129
132static struct sockaddr_storage dlm_local_addr; 130static struct sockaddr_storage dlm_local_addr;
133 131
134/* Manage daemons */ 132/* Work queues */
135static struct task_struct *recv_task; 133static struct workqueue_struct *recv_workqueue;
136static struct task_struct *send_task; 134static struct workqueue_struct *send_workqueue;
137
138static wait_queue_t lowcomms_send_waitq_head;
139static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
140static wait_queue_t lowcomms_recv_waitq_head;
141static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
142 135
143/* An array of pointers to connections, indexed by NODEID */ 136/* An array of pointers to connections, indexed by NODEID */
144static struct connection **connections; 137static struct connection **connections;
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock);
146static struct kmem_cache *con_cache; 139static struct kmem_cache *con_cache;
147static int conn_array_size; 140static int conn_array_size;
148 141
149/* List of sockets that have reads pending */ 142static void process_recv_sockets(struct work_struct *work);
150static LIST_HEAD(read_sockets); 143static void process_send_sockets(struct work_struct *work);
151static DEFINE_SPINLOCK(read_sockets_lock);
152
153/* List of sockets which have writes pending */
154static LIST_HEAD(write_sockets);
155static DEFINE_SPINLOCK(write_sockets_lock);
156
157/* List of sockets which have connects pending */
158static LIST_HEAD(state_sockets);
159static DEFINE_SPINLOCK(state_sockets_lock);
160 144
161static struct connection *nodeid2con(int nodeid, gfp_t allocation) 145static struct connection *nodeid2con(int nodeid, gfp_t allocation)
162{ 146{
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
186 goto finish; 170 goto finish;
187 171
188 con->nodeid = nodeid; 172 con->nodeid = nodeid;
189 init_rwsem(&con->sock_sem); 173 mutex_init(&con->sock_mutex);
190 INIT_LIST_HEAD(&con->writequeue); 174 INIT_LIST_HEAD(&con->writequeue);
191 spin_lock_init(&con->writequeue_lock); 175 spin_lock_init(&con->writequeue_lock);
176 INIT_WORK(&con->swork, process_send_sockets);
177 INIT_WORK(&con->rwork, process_recv_sockets);
192 178
193 connections[nodeid] = con; 179 connections[nodeid] = con;
194 } 180 }
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused)
203{ 189{
204 struct connection *con = sock2con(sk); 190 struct connection *con = sock2con(sk);
205 191
206 atomic_inc(&con->waiting_requests); 192 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
207 if (test_and_set_bit(CF_READ_PENDING, &con->flags)) 193 queue_work(recv_workqueue, &con->rwork);
208 return;
209
210 spin_lock_bh(&read_sockets_lock);
211 list_add_tail(&con->read_list, &read_sockets);
212 spin_unlock_bh(&read_sockets_lock);
213
214 wake_up_interruptible(&lowcomms_recv_waitq);
215} 194}
216 195
217static void lowcomms_write_space(struct sock *sk) 196static void lowcomms_write_space(struct sock *sk)
218{ 197{
219 struct connection *con = sock2con(sk); 198 struct connection *con = sock2con(sk);
220 199
221 if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 200 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
222 return; 201 queue_work(send_workqueue, &con->swork);
223
224 spin_lock_bh(&write_sockets_lock);
225 list_add_tail(&con->write_list, &write_sockets);
226 spin_unlock_bh(&write_sockets_lock);
227
228 wake_up_interruptible(&lowcomms_send_waitq);
229} 202}
230 203
231static inline void lowcomms_connect_sock(struct connection *con) 204static inline void lowcomms_connect_sock(struct connection *con)
232{ 205{
233 if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) 206 if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
234 return; 207 queue_work(send_workqueue, &con->swork);
235
236 spin_lock_bh(&state_sockets_lock);
237 list_add_tail(&con->state_list, &state_sockets);
238 spin_unlock_bh(&state_sockets_lock);
239
240 wake_up_interruptible(&lowcomms_send_waitq);
241} 208}
242 209
243static void lowcomms_state_change(struct sock *sk) 210static void lowcomms_state_change(struct sock *sk)
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
279/* Close a remote connection and tidy up */ 246/* Close a remote connection and tidy up */
280static void close_connection(struct connection *con, bool and_other) 247static void close_connection(struct connection *con, bool and_other)
281{ 248{
282 down_write(&con->sock_sem); 249 mutex_lock(&con->sock_mutex);
283 250
284 if (con->sock) { 251 if (con->sock) {
285 sock_release(con->sock); 252 sock_release(con->sock);
@@ -294,7 +261,7 @@ static void close_connection(struct connection *con, bool and_other)
294 con->rx_page = NULL; 261 con->rx_page = NULL;
295 } 262 }
296 con->retries = 0; 263 con->retries = 0;
297 up_write(&con->sock_sem); 264 mutex_unlock(&con->sock_mutex);
298} 265}
299 266
300/* Data received from remote end */ 267/* Data received from remote end */
@@ -308,10 +275,13 @@ static int receive_from_sock(struct connection *con)
308 int r; 275 int r;
309 int call_again_soon = 0; 276 int call_again_soon = 0;
310 277
311 down_read(&con->sock_sem); 278 mutex_lock(&con->sock_mutex);
279
280 if (con->sock == NULL) {
281 ret = -EAGAIN;
282 goto out_close;
283 }
312 284
313 if (con->sock == NULL)
314 goto out;
315 if (con->rx_page == NULL) { 285 if (con->rx_page == NULL) {
316 /* 286 /*
317 * This doesn't need to be atomic, but I think it should 287 * This doesn't need to be atomic, but I think it should
@@ -359,6 +329,9 @@ static int receive_from_sock(struct connection *con)
359 329
360 if (ret <= 0) 330 if (ret <= 0)
361 goto out_close; 331 goto out_close;
332 if (ret == -EAGAIN)
333 goto out_resched;
334
362 if (ret == len) 335 if (ret == len)
363 call_again_soon = 1; 336 call_again_soon = 1;
364 cbuf_add(&con->cb, ret); 337 cbuf_add(&con->cb, ret);
@@ -381,24 +354,26 @@ static int receive_from_sock(struct connection *con)
381 con->rx_page = NULL; 354 con->rx_page = NULL;
382 } 355 }
383 356
384out:
385 if (call_again_soon) 357 if (call_again_soon)
386 goto out_resched; 358 goto out_resched;
387 up_read(&con->sock_sem); 359 mutex_unlock(&con->sock_mutex);
388 return 0; 360 return 0;
389 361
390out_resched: 362out_resched:
391 lowcomms_data_ready(con->sock->sk, 0); 363 if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
392 up_read(&con->sock_sem); 364 queue_work(recv_workqueue, &con->rwork);
393 cond_resched(); 365 mutex_unlock(&con->sock_mutex);
394 return 0; 366 return -EAGAIN;
395 367
396out_close: 368out_close:
397 up_read(&con->sock_sem); 369 mutex_unlock(&con->sock_mutex);
398 if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { 370 if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
399 close_connection(con, false); 371 close_connection(con, false);
400 /* Reconnect when there is something to send */ 372 /* Reconnect when there is something to send */
401 } 373 }
374 /* Don't return success if we really got EOF */
375 if (ret == 0)
376 ret = -EAGAIN;
402 377
403 return ret; 378 return ret;
404} 379}
@@ -412,6 +387,7 @@ static int accept_from_sock(struct connection *con)
412 int len; 387 int len;
413 int nodeid; 388 int nodeid;
414 struct connection *newcon; 389 struct connection *newcon;
390 struct connection *addcon;
415 391
416 memset(&peeraddr, 0, sizeof(peeraddr)); 392 memset(&peeraddr, 0, sizeof(peeraddr));
417 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, 393 result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
@@ -419,7 +395,7 @@ static int accept_from_sock(struct connection *con)
419 if (result < 0) 395 if (result < 0)
420 return -ENOMEM; 396 return -ENOMEM;
421 397
422 down_read(&con->sock_sem); 398 mutex_lock_nested(&con->sock_mutex, 0);
423 399
424 result = -ENOTCONN; 400 result = -ENOTCONN;
425 if (con->sock == NULL) 401 if (con->sock == NULL)
@@ -445,7 +421,7 @@ static int accept_from_sock(struct connection *con)
445 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { 421 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
446 printk("dlm: connect from non cluster node\n"); 422 printk("dlm: connect from non cluster node\n");
447 sock_release(newsock); 423 sock_release(newsock);
448 up_read(&con->sock_sem); 424 mutex_unlock(&con->sock_mutex);
449 return -1; 425 return -1;
450 } 426 }
451 427
@@ -462,7 +438,7 @@ static int accept_from_sock(struct connection *con)
462 result = -ENOMEM; 438 result = -ENOMEM;
463 goto accept_err; 439 goto accept_err;
464 } 440 }
465 down_write(&newcon->sock_sem); 441 mutex_lock_nested(&newcon->sock_mutex, 1);
466 if (newcon->sock) { 442 if (newcon->sock) {
467 struct connection *othercon = newcon->othercon; 443 struct connection *othercon = newcon->othercon;
468 444
@@ -470,41 +446,45 @@ static int accept_from_sock(struct connection *con)
470 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); 446 othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
471 if (!othercon) { 447 if (!othercon) {
472 printk("dlm: failed to allocate incoming socket\n"); 448 printk("dlm: failed to allocate incoming socket\n");
473 up_write(&newcon->sock_sem); 449 mutex_unlock(&newcon->sock_mutex);
474 result = -ENOMEM; 450 result = -ENOMEM;
475 goto accept_err; 451 goto accept_err;
476 } 452 }
477 othercon->nodeid = nodeid; 453 othercon->nodeid = nodeid;
478 othercon->rx_action = receive_from_sock; 454 othercon->rx_action = receive_from_sock;
479 init_rwsem(&othercon->sock_sem); 455 mutex_init(&othercon->sock_mutex);
456 INIT_WORK(&othercon->swork, process_send_sockets);
457 INIT_WORK(&othercon->rwork, process_recv_sockets);
480 set_bit(CF_IS_OTHERCON, &othercon->flags); 458 set_bit(CF_IS_OTHERCON, &othercon->flags);
481 newcon->othercon = othercon; 459 newcon->othercon = othercon;
482 } 460 }
483 othercon->sock = newsock; 461 othercon->sock = newsock;
484 newsock->sk->sk_user_data = othercon; 462 newsock->sk->sk_user_data = othercon;
485 add_sock(newsock, othercon); 463 add_sock(newsock, othercon);
464 addcon = othercon;
486 } 465 }
487 else { 466 else {
488 newsock->sk->sk_user_data = newcon; 467 newsock->sk->sk_user_data = newcon;
489 newcon->rx_action = receive_from_sock; 468 newcon->rx_action = receive_from_sock;
490 add_sock(newsock, newcon); 469 add_sock(newsock, newcon);
491 470 addcon = newcon;
492 } 471 }
493 472
494 up_write(&newcon->sock_sem); 473 mutex_unlock(&newcon->sock_mutex);
495 474
496 /* 475 /*
497 * Add it to the active queue in case we got data 476 * Add it to the active queue in case we got data
498 * beween processing the accept adding the socket 477 * beween processing the accept adding the socket
499 * to the read_sockets list 478 * to the read_sockets list
500 */ 479 */
501 lowcomms_data_ready(newsock->sk, 0); 480 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
502 up_read(&con->sock_sem); 481 queue_work(recv_workqueue, &addcon->rwork);
482 mutex_unlock(&con->sock_mutex);
503 483
504 return 0; 484 return 0;
505 485
506accept_err: 486accept_err:
507 up_read(&con->sock_sem); 487 mutex_unlock(&con->sock_mutex);
508 sock_release(newsock); 488 sock_release(newsock);
509 489
510 if (result != -EAGAIN) 490 if (result != -EAGAIN)
@@ -525,7 +505,7 @@ static void connect_to_sock(struct connection *con)
525 return; 505 return;
526 } 506 }
527 507
528 down_write(&con->sock_sem); 508 mutex_lock(&con->sock_mutex);
529 if (con->retries++ > MAX_CONNECT_RETRIES) 509 if (con->retries++ > MAX_CONNECT_RETRIES)
530 goto out; 510 goto out;
531 511
@@ -548,7 +528,7 @@ static void connect_to_sock(struct connection *con)
548 sock->sk->sk_user_data = con; 528 sock->sk->sk_user_data = con;
549 con->rx_action = receive_from_sock; 529 con->rx_action = receive_from_sock;
550 530
551 make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); 531 make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
552 532
553 add_sock(sock, con); 533 add_sock(sock, con);
554 534
@@ -577,7 +557,7 @@ out_err:
577 result = 0; 557 result = 0;
578 } 558 }
579out: 559out:
580 up_write(&con->sock_sem); 560 mutex_unlock(&con->sock_mutex);
581 return; 561 return;
582} 562}
583 563
@@ -616,10 +596,10 @@ static struct socket *create_listen_sock(struct connection *con,
616 con->sock = sock; 596 con->sock = sock;
617 597
618 /* Bind to our port */ 598 /* Bind to our port */
619 make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); 599 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
620 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); 600 result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
621 if (result < 0) { 601 if (result < 0) {
622 printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); 602 printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port);
623 sock_release(sock); 603 sock_release(sock);
624 sock = NULL; 604 sock = NULL;
625 con->sock = NULL; 605 con->sock = NULL;
@@ -638,7 +618,7 @@ static struct socket *create_listen_sock(struct connection *con,
638 618
639 result = sock->ops->listen(sock, 5); 619 result = sock->ops->listen(sock, 5);
640 if (result < 0) { 620 if (result < 0) {
641 printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); 621 printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port);
642 sock_release(sock); 622 sock_release(sock);
643 sock = NULL; 623 sock = NULL;
644 goto create_out; 624 goto create_out;
@@ -709,6 +689,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
709 if (!con) 689 if (!con)
710 return NULL; 690 return NULL;
711 691
692 spin_lock(&con->writequeue_lock);
712 e = list_entry(con->writequeue.prev, struct writequeue_entry, list); 693 e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
713 if ((&e->list == &con->writequeue) || 694 if ((&e->list == &con->writequeue) ||
714 (PAGE_CACHE_SIZE - e->end < len)) { 695 (PAGE_CACHE_SIZE - e->end < len)) {
@@ -747,6 +728,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
747 struct connection *con = e->con; 728 struct connection *con = e->con;
748 int users; 729 int users;
749 730
731 spin_lock(&con->writequeue_lock);
750 users = --e->users; 732 users = --e->users;
751 if (users) 733 if (users)
752 goto out; 734 goto out;
@@ -754,12 +736,8 @@ void dlm_lowcomms_commit_buffer(void *mh)
754 kunmap(e->page); 736 kunmap(e->page);
755 spin_unlock(&con->writequeue_lock); 737 spin_unlock(&con->writequeue_lock);
756 738
757 if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { 739 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
758 spin_lock_bh(&write_sockets_lock); 740 queue_work(send_workqueue, &con->swork);
759 list_add_tail(&con->write_list, &write_sockets);
760 spin_unlock_bh(&write_sockets_lock);
761
762 wake_up_interruptible(&lowcomms_send_waitq);
763 } 741 }
764 return; 742 return;
765 743
@@ -783,7 +761,7 @@ static void send_to_sock(struct connection *con)
783 struct writequeue_entry *e; 761 struct writequeue_entry *e;
784 int len, offset; 762 int len, offset;
785 763
786 down_read(&con->sock_sem); 764 mutex_lock(&con->sock_mutex);
787 if (con->sock == NULL) 765 if (con->sock == NULL)
788 goto out_connect; 766 goto out_connect;
789 767
@@ -800,6 +778,7 @@ static void send_to_sock(struct connection *con)
800 offset = e->offset; 778 offset = e->offset;
801 BUG_ON(len == 0 && e->users == 0); 779 BUG_ON(len == 0 && e->users == 0);
802 spin_unlock(&con->writequeue_lock); 780 spin_unlock(&con->writequeue_lock);
781 kmap(e->page);
803 782
804 ret = 0; 783 ret = 0;
805 if (len) { 784 if (len) {
@@ -828,18 +807,18 @@ static void send_to_sock(struct connection *con)
828 } 807 }
829 spin_unlock(&con->writequeue_lock); 808 spin_unlock(&con->writequeue_lock);
830out: 809out:
831 up_read(&con->sock_sem); 810 mutex_unlock(&con->sock_mutex);
832 return; 811 return;
833 812
834send_error: 813send_error:
835 up_read(&con->sock_sem); 814 mutex_unlock(&con->sock_mutex);
836 close_connection(con, false); 815 close_connection(con, false);
837 lowcomms_connect_sock(con); 816 lowcomms_connect_sock(con);
838 return; 817 return;
839 818
840out_connect: 819out_connect:
841 up_read(&con->sock_sem); 820 mutex_unlock(&con->sock_mutex);
842 lowcomms_connect_sock(con); 821 connect_to_sock(con);
843 return; 822 return;
844} 823}
845 824
@@ -872,7 +851,6 @@ int dlm_lowcomms_close(int nodeid)
872 if (con) { 851 if (con) {
873 clean_one_writequeue(con); 852 clean_one_writequeue(con);
874 close_connection(con, true); 853 close_connection(con, true);
875 atomic_set(&con->waiting_requests, 0);
876 } 854 }
877 return 0; 855 return 0;
878 856
@@ -880,102 +858,29 @@ out:
880 return -1; 858 return -1;
881} 859}
882 860
883/* API send message call, may queue the request */
884/* N.B. This is the old interface - use the new one for new calls */
885int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
886{
887 struct writequeue_entry *e;
888 char *b;
889
890 e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
891 if (e) {
892 memcpy(b, buf, len);
893 dlm_lowcomms_commit_buffer(e);
894 return 0;
895 }
896 return -ENOBUFS;
897}
898
899/* Look for activity on active sockets */ 861/* Look for activity on active sockets */
900static void process_sockets(void) 862static void process_recv_sockets(struct work_struct *work)
901{ 863{
902 struct list_head *list; 864 struct connection *con = container_of(work, struct connection, rwork);
903 struct list_head *temp; 865 int err;
904 int count = 0;
905
906 spin_lock_bh(&read_sockets_lock);
907 list_for_each_safe(list, temp, &read_sockets) {
908 866
909 struct connection *con = 867 clear_bit(CF_READ_PENDING, &con->flags);
910 list_entry(list, struct connection, read_list); 868 do {
911 list_del(&con->read_list); 869 err = con->rx_action(con);
912 clear_bit(CF_READ_PENDING, &con->flags); 870 } while (!err);
913
914 spin_unlock_bh(&read_sockets_lock);
915
916 /* This can reach zero if we are processing requests
917 * as they come in.
918 */
919 if (atomic_read(&con->waiting_requests) == 0) {
920 spin_lock_bh(&read_sockets_lock);
921 continue;
922 }
923
924 do {
925 con->rx_action(con);
926
927 /* Don't starve out everyone else */
928 if (++count >= MAX_RX_MSG_COUNT) {
929 cond_resched();
930 count = 0;
931 }
932
933 } while (!atomic_dec_and_test(&con->waiting_requests) &&
934 !kthread_should_stop());
935
936 spin_lock_bh(&read_sockets_lock);
937 }
938 spin_unlock_bh(&read_sockets_lock);
939} 871}
940 872
941/* Try to send any messages that are pending
942 */
943static void process_output_queue(void)
944{
945 struct list_head *list;
946 struct list_head *temp;
947
948 spin_lock_bh(&write_sockets_lock);
949 list_for_each_safe(list, temp, &write_sockets) {
950 struct connection *con =
951 list_entry(list, struct connection, write_list);
952 clear_bit(CF_WRITE_PENDING, &con->flags);
953 list_del(&con->write_list);
954
955 spin_unlock_bh(&write_sockets_lock);
956 send_to_sock(con);
957 spin_lock_bh(&write_sockets_lock);
958 }
959 spin_unlock_bh(&write_sockets_lock);
960}
961 873
962static void process_state_queue(void) 874static void process_send_sockets(struct work_struct *work)
963{ 875{
964 struct list_head *list; 876 struct connection *con = container_of(work, struct connection, swork);
965 struct list_head *temp;
966
967 spin_lock_bh(&state_sockets_lock);
968 list_for_each_safe(list, temp, &state_sockets) {
969 struct connection *con =
970 list_entry(list, struct connection, state_list);
971 list_del(&con->state_list);
972 clear_bit(CF_CONNECT_PENDING, &con->flags);
973 spin_unlock_bh(&state_sockets_lock);
974 877
878 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
975 connect_to_sock(con); 879 connect_to_sock(con);
976 spin_lock_bh(&state_sockets_lock);
977 } 880 }
978 spin_unlock_bh(&state_sockets_lock); 881
882 clear_bit(CF_WRITE_PENDING, &con->flags);
883 send_to_sock(con);
979} 884}
980 885
981 886
@@ -992,109 +897,33 @@ static void clean_writequeues(void)
992 } 897 }
993} 898}
994 899
995static int read_list_empty(void) 900static void work_stop(void)
996{ 901{
997 int status; 902 destroy_workqueue(recv_workqueue);
998 903 destroy_workqueue(send_workqueue);
999 spin_lock_bh(&read_sockets_lock);
1000 status = list_empty(&read_sockets);
1001 spin_unlock_bh(&read_sockets_lock);
1002
1003 return status;
1004}
1005
1006/* DLM Transport comms receive daemon */
1007static int dlm_recvd(void *data)
1008{
1009 init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
1010 add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
1011
1012 while (!kthread_should_stop()) {
1013 set_current_state(TASK_INTERRUPTIBLE);
1014 if (read_list_empty())
1015 cond_resched();
1016 set_current_state(TASK_RUNNING);
1017
1018 process_sockets();
1019 }
1020
1021 return 0;
1022} 904}
1023 905
1024static int write_and_state_lists_empty(void) 906static int work_start(void)
1025{ 907{
1026 int status;
1027
1028 spin_lock_bh(&write_sockets_lock);
1029 status = list_empty(&write_sockets);
1030 spin_unlock_bh(&write_sockets_lock);
1031
1032 spin_lock_bh(&state_sockets_lock);
1033 if (list_empty(&state_sockets) == 0)
1034 status = 0;
1035 spin_unlock_bh(&state_sockets_lock);
1036
1037 return status;
1038}
1039
1040/* DLM Transport send daemon */
1041static int dlm_sendd(void *data)
1042{
1043 init_waitqueue_entry(&lowcomms_send_waitq_head, current);
1044 add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
1045
1046 while (!kthread_should_stop()) {
1047 set_current_state(TASK_INTERRUPTIBLE);
1048 if (write_and_state_lists_empty())
1049 cond_resched();
1050 set_current_state(TASK_RUNNING);
1051
1052 process_state_queue();
1053 process_output_queue();
1054 }
1055
1056 return 0;
1057}
1058
1059static void daemons_stop(void)
1060{
1061 kthread_stop(recv_task);
1062 kthread_stop(send_task);
1063}
1064
1065static int daemons_start(void)
1066{
1067 struct task_struct *p;
1068 int error; 908 int error;
1069 909 recv_workqueue = create_workqueue("dlm_recv");
1070 p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); 910 error = IS_ERR(recv_workqueue);
1071 error = IS_ERR(p);
1072 if (error) { 911 if (error) {
1073 log_print("can't start dlm_recvd %d", error); 912 log_print("can't start dlm_recv %d", error);
1074 return error; 913 return error;
1075 } 914 }
1076 recv_task = p;
1077 915
1078 p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); 916 send_workqueue = create_singlethread_workqueue("dlm_send");
1079 error = IS_ERR(p); 917 error = IS_ERR(send_workqueue);
1080 if (error) { 918 if (error) {
1081 log_print("can't start dlm_sendd %d", error); 919 log_print("can't start dlm_send %d", error);
1082 kthread_stop(recv_task); 920 destroy_workqueue(recv_workqueue);
1083 return error; 921 return error;
1084 } 922 }
1085 send_task = p;
1086 923
1087 return 0; 924 return 0;
1088} 925}
1089 926
1090/*
1091 * Return the largest buffer size we can cope with.
1092 */
1093int lowcomms_max_buffer_size(void)
1094{
1095 return PAGE_CACHE_SIZE;
1096}
1097
1098void dlm_lowcomms_stop(void) 927void dlm_lowcomms_stop(void)
1099{ 928{
1100 int i; 929 int i;
@@ -1107,7 +936,7 @@ void dlm_lowcomms_stop(void)
1107 connections[i]->flags |= 0xFF; 936 connections[i]->flags |= 0xFF;
1108 } 937 }
1109 938
1110 daemons_stop(); 939 work_stop();
1111 clean_writequeues(); 940 clean_writequeues();
1112 941
1113 for (i = 0; i < conn_array_size; i++) { 942 for (i = 0; i < conn_array_size; i++) {
@@ -1159,7 +988,7 @@ int dlm_lowcomms_start(void)
1159 if (error) 988 if (error)
1160 goto fail_unlisten; 989 goto fail_unlisten;
1161 990
1162 error = daemons_start(); 991 error = work_start();
1163 if (error) 992 if (error)
1164 goto fail_unlisten; 993 goto fail_unlisten;
1165 994
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index c9b1c3d535f4..a5126e0c68a6 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
82 if (msglen < sizeof(struct dlm_header)) 82 if (msglen < sizeof(struct dlm_header))
83 break; 83 break;
84 err = -E2BIG; 84 err = -E2BIG;
85 if (msglen > dlm_config.buffer_size) { 85 if (msglen > dlm_config.ci_buffer_size) {
86 log_print("message size %d from %d too big, buf len %d", 86 log_print("message size %d from %d too big, buf len %d",
87 msglen, nodeid, len); 87 msglen, nodeid, len);
88 break; 88 break;
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
103 103
104 if (msglen > sizeof(__tmp) && 104 if (msglen > sizeof(__tmp) &&
105 msg == (struct dlm_header *) __tmp) { 105 msg == (struct dlm_header *) __tmp) {
106 msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); 106 msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
107 if (msg == NULL) 107 if (msg == NULL)
108 return ret; 108 return ret;
109 } 109 }
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 4cc31be9cd9d..6bfbd6153809 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
56 56
57 rc->rc_type = type; 57 rc->rc_type = type;
58 58
59 spin_lock(&ls->ls_recover_lock);
60 rc->rc_seq = ls->ls_recover_seq;
61 spin_unlock(&ls->ls_recover_lock);
62
59 *mh_ret = mh; 63 *mh_ret = mh;
60 *rc_ret = rc; 64 *rc_ret = rc;
61 return 0; 65 return 0;
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
78 rf->rf_lsflags = ls->ls_exflags; 82 rf->rf_lsflags = ls->ls_exflags;
79} 83}
80 84
81static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) 85static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
82{ 86{
87 struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
88
89 if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
90 log_error(ls, "version mismatch: %x nodeid %d: %x",
91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
92 rc->rc_header.h_version);
93 return -EINVAL;
94 }
95
83 if (rf->rf_lvblen != ls->ls_lvblen || 96 if (rf->rf_lvblen != ls->ls_lvblen ||
84 rf->rf_lsflags != ls->ls_exflags) { 97 rf->rf_lsflags != ls->ls_exflags) {
85 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", 98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
125 goto out; 138 goto out;
126 139
127 allow_sync_reply(ls, &rc->rc_id); 140 allow_sync_reply(ls, &rc->rc_id);
128 memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); 141 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
129 142
130 send_rcom(ls, mh, rc); 143 send_rcom(ls, mh, rc);
131 144
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
141 log_debug(ls, "remote node %d not ready", nodeid); 154 log_debug(ls, "remote node %d not ready", nodeid);
142 rc->rc_result = 0; 155 rc->rc_result = 0;
143 } else 156 } else
144 error = check_config(ls, (struct rcom_config *) rc->rc_buf, 157 error = check_config(ls, rc, nodeid);
145 nodeid);
146 /* the caller looks at rc_result for the remote recovery status */ 158 /* the caller looks at rc_result for the remote recovery status */
147 out: 159 out:
148 return error; 160 return error;
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
159 if (error) 171 if (error)
160 return; 172 return;
161 rc->rc_id = rc_in->rc_id; 173 rc->rc_id = rc_in->rc_id;
174 rc->rc_seq_reply = rc_in->rc_seq;
162 rc->rc_result = dlm_recover_status(ls); 175 rc->rc_result = dlm_recover_status(ls);
163 make_config(ls, (struct rcom_config *) rc->rc_buf); 176 make_config(ls, (struct rcom_config *) rc->rc_buf);
164 177
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
200 if (nodeid == dlm_our_nodeid()) { 213 if (nodeid == dlm_our_nodeid()) {
201 dlm_copy_master_names(ls, last_name, last_len, 214 dlm_copy_master_names(ls, last_name, last_len,
202 ls->ls_recover_buf + len, 215 ls->ls_recover_buf + len,
203 dlm_config.buffer_size - len, nodeid); 216 dlm_config.ci_buffer_size - len, nodeid);
204 goto out; 217 goto out;
205 } 218 }
206 219
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
210 memcpy(rc->rc_buf, last_name, last_len); 223 memcpy(rc->rc_buf, last_name, last_len);
211 224
212 allow_sync_reply(ls, &rc->rc_id); 225 allow_sync_reply(ls, &rc->rc_id);
213 memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); 226 memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
214 227
215 send_rcom(ls, mh, rc); 228 send_rcom(ls, mh, rc);
216 229
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
224{ 237{
225 struct dlm_rcom *rc; 238 struct dlm_rcom *rc;
226 struct dlm_mhandle *mh; 239 struct dlm_mhandle *mh;
227 int error, inlen, outlen; 240 int error, inlen, outlen, nodeid;
228 int nodeid = rc_in->rc_header.h_nodeid;
229 uint32_t status = dlm_recover_status(ls);
230
231 /*
232 * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
233 * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
234 * It could only happen in rare cases where we get a late NAMES
235 * message from a previous instance of recovery.
236 */
237
238 if (!(status & DLM_RS_NODES)) {
239 log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
240 return;
241 }
242 241
243 nodeid = rc_in->rc_header.h_nodeid; 242 nodeid = rc_in->rc_header.h_nodeid;
244 inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); 243 inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
245 outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); 244 outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
246 245
247 error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); 246 error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
248 if (error) 247 if (error)
249 return; 248 return;
250 rc->rc_id = rc_in->rc_id; 249 rc->rc_id = rc_in->rc_id;
250 rc->rc_seq_reply = rc_in->rc_seq;
251 251
252 dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, 252 dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
253 nodeid); 253 nodeid);
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
294 ret_nodeid = error; 294 ret_nodeid = error;
295 rc->rc_result = ret_nodeid; 295 rc->rc_result = ret_nodeid;
296 rc->rc_id = rc_in->rc_id; 296 rc->rc_id = rc_in->rc_id;
297 rc->rc_seq_reply = rc_in->rc_seq;
297 298
298 send_rcom(ls, mh, rc); 299 send_rcom(ls, mh, rc);
299} 300}
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
375 376
376 memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); 377 memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
377 rc->rc_id = rc_in->rc_id; 378 rc->rc_id = rc_in->rc_id;
379 rc->rc_seq_reply = rc_in->rc_seq;
378 380
379 send_rcom(ls, mh, rc); 381 send_rcom(ls, mh, rc);
380} 382}
381 383
382static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) 384static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
383{ 385{
384 uint32_t status = dlm_recover_status(ls);
385
386 if (!(status & DLM_RS_DIR)) {
387 log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
388 rc_in->rc_header.h_nodeid);
389 return;
390 }
391
392 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
393} 387}
394 388
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
415 409
416 rc->rc_type = DLM_RCOM_STATUS_REPLY; 410 rc->rc_type = DLM_RCOM_STATUS_REPLY;
417 rc->rc_id = rc_in->rc_id; 411 rc->rc_id = rc_in->rc_id;
412 rc->rc_seq_reply = rc_in->rc_seq;
418 rc->rc_result = -ESRCH; 413 rc->rc_result = -ESRCH;
419 414
420 rf = (struct rcom_config *) rc->rc_buf; 415 rf = (struct rcom_config *) rc->rc_buf;
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
426 return 0; 421 return 0;
427} 422}
428 423
424static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
425{
426 uint64_t seq;
427 int rv = 0;
428
429 switch (rc->rc_type) {
430 case DLM_RCOM_STATUS_REPLY:
431 case DLM_RCOM_NAMES_REPLY:
432 case DLM_RCOM_LOOKUP_REPLY:
433 case DLM_RCOM_LOCK_REPLY:
434 spin_lock(&ls->ls_recover_lock);
435 seq = ls->ls_recover_seq;
436 spin_unlock(&ls->ls_recover_lock);
437 if (rc->rc_seq_reply != seq) {
438 log_debug(ls, "ignoring old reply %x from %d "
439 "seq_reply %llx expect %llx",
440 rc->rc_type, rc->rc_header.h_nodeid,
441 (unsigned long long)rc->rc_seq_reply,
442 (unsigned long long)seq);
443 rv = 1;
444 }
445 }
446 return rv;
447}
448
429/* Called by dlm_recvd; corresponds to dlm_receive_message() but special 449/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
430 recovery-only comms are sent through here. */ 450 recovery-only comms are sent through here. */
431 451
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
449 } 469 }
450 470
451 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { 471 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
452 log_error(ls, "ignoring recovery message %x from %d", 472 log_debug(ls, "ignoring recovery message %x from %d",
453 rc->rc_type, nodeid); 473 rc->rc_type, nodeid);
454 goto out; 474 goto out;
455 } 475 }
456 476
477 if (is_old_reply(ls, rc))
478 goto out;
479
457 if (nodeid != rc->rc_header.h_nodeid) { 480 if (nodeid != rc->rc_header.h_nodeid) {
458 log_error(ls, "bad rcom nodeid %d from %d", 481 log_error(ls, "bad rcom nodeid %d from %d",
459 rc->rc_header.h_nodeid, nodeid); 482 rc->rc_header.h_nodeid, nodeid);
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index cf9f6831bab5..c2cc7694cd16 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -44,7 +44,7 @@
44static void dlm_wait_timer_fn(unsigned long data) 44static void dlm_wait_timer_fn(unsigned long data)
45{ 45{
46 struct dlm_ls *ls = (struct dlm_ls *) data; 46 struct dlm_ls *ls = (struct dlm_ls *) data;
47 mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); 47 mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
48 wake_up(&ls->ls_wait_general); 48 wake_up(&ls->ls_wait_general);
49} 49}
50 50
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
55 init_timer(&ls->ls_timer); 55 init_timer(&ls->ls_timer);
56 ls->ls_timer.function = dlm_wait_timer_fn; 56 ls->ls_timer.function = dlm_wait_timer_fn;
57 ls->ls_timer.data = (long) ls; 57 ls->ls_timer.data = (long) ls;
58 ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); 58 ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
59 add_timer(&ls->ls_timer); 59 add_timer(&ls->ls_timer);
60 60
61 wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); 61 wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls)
397 397
398 if (dlm_no_directory(ls)) 398 if (dlm_no_directory(ls))
399 count += recover_master_static(r); 399 count += recover_master_static(r);
400 else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { 400 else if (!is_master(r) &&
401 (dlm_is_removed(ls, r->res_nodeid) ||
402 rsb_flag(r, RSB_NEW_MASTER))) {
401 recover_master(r); 403 recover_master(r);
402 count++; 404 count++;
403 } 405 }
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 650536aa5139..3cb636d60249 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
77 77
78 error = dlm_recover_members(ls, rv, &neg); 78 error = dlm_recover_members(ls, rv, &neg);
79 if (error) { 79 if (error) {
80 log_error(ls, "recover_members failed %d", error); 80 log_debug(ls, "recover_members failed %d", error);
81 goto fail; 81 goto fail;
82 } 82 }
83 start = jiffies; 83 start = jiffies;
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
89 89
90 error = dlm_recover_directory(ls); 90 error = dlm_recover_directory(ls);
91 if (error) { 91 if (error) {
92 log_error(ls, "recover_directory failed %d", error); 92 log_debug(ls, "recover_directory failed %d", error);
93 goto fail; 93 goto fail;
94 } 94 }
95 95
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
99 99
100 error = dlm_recover_directory_wait(ls); 100 error = dlm_recover_directory_wait(ls);
101 if (error) { 101 if (error) {
102 log_error(ls, "recover_directory_wait failed %d", error); 102 log_debug(ls, "recover_directory_wait failed %d", error);
103 goto fail; 103 goto fail;
104 } 104 }
105 105
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
129 129
130 error = dlm_recover_masters(ls); 130 error = dlm_recover_masters(ls);
131 if (error) { 131 if (error) {
132 log_error(ls, "recover_masters failed %d", error); 132 log_debug(ls, "recover_masters failed %d", error);
133 goto fail; 133 goto fail;
134 } 134 }
135 135
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
139 139
140 error = dlm_recover_locks(ls); 140 error = dlm_recover_locks(ls);
141 if (error) { 141 if (error) {
142 log_error(ls, "recover_locks failed %d", error); 142 log_debug(ls, "recover_locks failed %d", error);
143 goto fail; 143 goto fail;
144 } 144 }
145 145
146 error = dlm_recover_locks_wait(ls); 146 error = dlm_recover_locks_wait(ls);
147 if (error) { 147 if (error) {
148 log_error(ls, "recover_locks_wait failed %d", error); 148 log_debug(ls, "recover_locks_wait failed %d", error);
149 goto fail; 149 goto fail;
150 } 150 }
151 151
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
166 166
167 error = dlm_recover_locks_wait(ls); 167 error = dlm_recover_locks_wait(ls);
168 if (error) { 168 if (error) {
169 log_error(ls, "recover_locks_wait failed %d", error); 169 log_debug(ls, "recover_locks_wait failed %d", error);
170 goto fail; 170 goto fail;
171 } 171 }
172 } 172 }
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
184 dlm_set_recover_status(ls, DLM_RS_DONE); 184 dlm_set_recover_status(ls, DLM_RS_DONE);
185 error = dlm_recover_done_wait(ls); 185 error = dlm_recover_done_wait(ls);
186 if (error) { 186 if (error) {
187 log_error(ls, "recover_done_wait failed %d", error); 187 log_debug(ls, "recover_done_wait failed %d", error);
188 goto fail; 188 goto fail;
189 } 189 }
190 190
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
192 192
193 error = enable_locking(ls, rv->seq); 193 error = enable_locking(ls, rv->seq);
194 if (error) { 194 if (error) {
195 log_error(ls, "enable_locking failed %d", error); 195 log_debug(ls, "enable_locking failed %d", error);
196 goto fail; 196 goto fail;
197 } 197 }
198 198
199 error = dlm_process_requestqueue(ls); 199 error = dlm_process_requestqueue(ls);
200 if (error) { 200 if (error) {
201 log_error(ls, "process_requestqueue failed %d", error); 201 log_debug(ls, "process_requestqueue failed %d", error);
202 goto fail; 202 goto fail;
203 } 203 }
204 204
205 error = dlm_recover_waiters_post(ls); 205 error = dlm_recover_waiters_post(ls);
206 if (error) { 206 if (error) {
207 log_error(ls, "recover_waiters_post failed %d", error); 207 log_debug(ls, "recover_waiters_post failed %d", error);
208 goto fail; 208 goto fail;
209 } 209 }
210 210
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index c37e93e4f2df..d378b7fe2a1e 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
180 ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) 180 ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
181 remove_ownqueue = 1; 181 remove_ownqueue = 1;
182 182
183 /* unlocks or cancels of waiting requests need to be removed from the
184 proc's unlocking list, again there must be a better way... */
185
186 if (ua->lksb.sb_status == -DLM_EUNLOCK ||
187 (ua->lksb.sb_status == -DLM_ECANCEL &&
188 lkb->lkb_grmode == DLM_LOCK_IV))
189 remove_ownqueue = 1;
190
183 /* We want to copy the lvb to userspace when the completion 191 /* We want to copy the lvb to userspace when the completion
184 ast is read if the status is 0, the lock has an lvb and 192 ast is read if the status is 0, the lock has an lvb and
185 lvb_ops says we should. We could probably have set_lvb_lock() 193 lvb_ops says we should. We could probably have set_lvb_lock()
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file)
523 proc->lockspace = ls->ls_local_handle; 531 proc->lockspace = ls->ls_local_handle;
524 INIT_LIST_HEAD(&proc->asts); 532 INIT_LIST_HEAD(&proc->asts);
525 INIT_LIST_HEAD(&proc->locks); 533 INIT_LIST_HEAD(&proc->locks);
534 INIT_LIST_HEAD(&proc->unlocking);
526 spin_lock_init(&proc->asts_spin); 535 spin_lock_init(&proc->asts_spin);
527 spin_lock_init(&proc->locks_spin); 536 spin_lock_init(&proc->locks_spin);
528 init_waitqueue_head(&proc->wait); 537 init_waitqueue_head(&proc->wait);
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 767197db9944..963889cf6740 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
134 rc->rc_type = cpu_to_le32(rc->rc_type); 134 rc->rc_type = cpu_to_le32(rc->rc_type);
135 rc->rc_result = cpu_to_le32(rc->rc_result); 135 rc->rc_result = cpu_to_le32(rc->rc_result);
136 rc->rc_id = cpu_to_le64(rc->rc_id); 136 rc->rc_id = cpu_to_le64(rc->rc_id);
137 rc->rc_seq = cpu_to_le64(rc->rc_seq);
138 rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
137 139
138 if (type == DLM_RCOM_LOCK) 140 if (type == DLM_RCOM_LOCK)
139 rcom_lock_out((struct rcom_lock *) rc->rc_buf); 141 rcom_lock_out((struct rcom_lock *) rc->rc_buf);
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
151 rc->rc_type = le32_to_cpu(rc->rc_type); 153 rc->rc_type = le32_to_cpu(rc->rc_type);
152 rc->rc_result = le32_to_cpu(rc->rc_result); 154 rc->rc_result = le32_to_cpu(rc->rc_result);
153 rc->rc_id = le64_to_cpu(rc->rc_id); 155 rc->rc_id = le64_to_cpu(rc->rc_id);
156 rc->rc_seq = le64_to_cpu(rc->rc_seq);
157 rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
154 158
155 if (rc->rc_type == DLM_RCOM_LOCK) 159 if (rc->rc_type == DLM_RCOM_LOCK)
156 rcom_lock_in((struct rcom_lock *) rc->rc_buf); 160 rcom_lock_in((struct rcom_lock *) rc->rc_buf);
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 6a2ffa2db14f..de8e64c03f73 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,44 +4,43 @@ config GFS2_FS
4 select FS_POSIX_ACL 4 select FS_POSIX_ACL
5 select CRC32 5 select CRC32
6 help 6 help
7 A cluster filesystem. 7 A cluster filesystem.
8 8
9 Allows a cluster of computers to simultaneously use a block device 9 Allows a cluster of computers to simultaneously use a block device
10 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads 10 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
11 and writes to the block device like a local filesystem, but also uses 11 and writes to the block device like a local filesystem, but also uses
12 a lock module to allow the computers coordinate their I/O so 12 a lock module to allow the computers coordinate their I/O so
13 filesystem consistency is maintained. One of the nifty features of 13 filesystem consistency is maintained. One of the nifty features of
14 GFS is perfect consistency -- changes made to the filesystem on one 14 GFS is perfect consistency -- changes made to the filesystem on one
15 machine show up immediately on all other machines in the cluster. 15 machine show up immediately on all other machines in the cluster.
16 16
17 To use the GFS2 filesystem, you will need to enable one or more of 17 To use the GFS2 filesystem, you will need to enable one or more of
18 the below locking modules. Documentation and utilities for GFS2 can 18 the below locking modules. Documentation and utilities for GFS2 can
19 be found here: http://sources.redhat.com/cluster 19 be found here: http://sources.redhat.com/cluster
20 20
21config GFS2_FS_LOCKING_NOLOCK 21config GFS2_FS_LOCKING_NOLOCK
22 tristate "GFS2 \"nolock\" locking module" 22 tristate "GFS2 \"nolock\" locking module"
23 depends on GFS2_FS 23 depends on GFS2_FS
24 help 24 help
25 Single node locking module for GFS2. 25 Single node locking module for GFS2.
26 26
27 Use this module if you want to use GFS2 on a single node without 27 Use this module if you want to use GFS2 on a single node without
28 its clustering features. You can still take advantage of the 28 its clustering features. You can still take advantage of the
29 large file support, and upgrade to running a full cluster later on 29 large file support, and upgrade to running a full cluster later on
30 if required. 30 if required.
31 31
32 If you will only be using GFS2 in cluster mode, you do not need this 32 If you will only be using GFS2 in cluster mode, you do not need this
33 module. 33 module.
34 34
35config GFS2_FS_LOCKING_DLM 35config GFS2_FS_LOCKING_DLM
36 tristate "GFS2 DLM locking module" 36 tristate "GFS2 DLM locking module"
37 depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n) 37 depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
38 select IP_SCTP if DLM_SCTP 38 select IP_SCTP if DLM_SCTP
39 select CONFIGFS_FS 39 select CONFIGFS_FS
40 select DLM 40 select DLM
41 help 41 help
42 Multiple node locking module for GFS2 42 Multiple node locking module for GFS2
43
44 Most users of GFS2 will require this module. It provides the locking
45 interface between GFS2 and the DLM, which is required to use GFS2
46 in a cluster environment.
47 43
44 Most users of GFS2 will require this module. It provides the locking
45 interface between GFS2 and the DLM, which is required to use GFS2
46 in a cluster environment.
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8240c1ff94f4..113f6c9110c7 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
773 gfs2_free_data(ip, bstart, blen); 773 gfs2_free_data(ip, bstart, blen);
774 } 774 }
775 775
776 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 776 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
777 777
778 gfs2_dinode_out(ip, dibh->b_data); 778 gfs2_dinode_out(ip, dibh->b_data);
779 779
@@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
848 } 848 }
849 849
850 ip->i_di.di_size = size; 850 ip->i_di.di_size = size;
851 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 851 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
852 852
853 error = gfs2_meta_inode_buffer(ip, &dibh); 853 error = gfs2_meta_inode_buffer(ip, &dibh);
854 if (error) 854 if (error)
@@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
963 963
964 if (gfs2_is_stuffed(ip)) { 964 if (gfs2_is_stuffed(ip)) {
965 ip->i_di.di_size = size; 965 ip->i_di.di_size = size;
966 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 966 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
967 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 967 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
968 gfs2_dinode_out(ip, dibh->b_data); 968 gfs2_dinode_out(ip, dibh->b_data);
969 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); 969 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
975 975
976 if (!error) { 976 if (!error) {
977 ip->i_di.di_size = size; 977 ip->i_di.di_size = size;
978 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 978 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
979 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; 979 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
980 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 980 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
981 gfs2_dinode_out(ip, dibh->b_data); 981 gfs2_dinode_out(ip, dibh->b_data);
@@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip)
1048 ip->i_num.no_addr; 1048 ip->i_num.no_addr;
1049 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1049 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1050 } 1050 }
1051 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 1051 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
1052 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; 1052 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
1053 1053
1054 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1054 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 0fdcb7713cd9..c93ca8f361b5 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_di.di_size < offset + size) 132 if (ip->i_di.di_size < offset + size)
133 ip->i_di.di_size = offset + size; 133 ip->i_di.di_size = offset + size;
134 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 134 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
135 gfs2_dinode_out(ip, dibh->b_data); 135 gfs2_dinode_out(ip, dibh->b_data);
136 136
137 brelse(dibh); 137 brelse(dibh);
@@ -229,7 +229,7 @@ out:
229 229
230 if (ip->i_di.di_size < offset + copied) 230 if (ip->i_di.di_size < offset + copied)
231 ip->i_di.di_size = offset + copied; 231 ip->i_di.di_size = offset + copied;
232 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
233 233
234 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 234 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
235 gfs2_dinode_out(ip, dibh->b_data); 235 gfs2_dinode_out(ip, dibh->b_data);
@@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b)
1198 */ 1198 */
1199 1199
1200static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, 1200static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1201 void *opaque, gfs2_filldir_t filldir, 1201 void *opaque, filldir_t filldir,
1202 const struct gfs2_dirent **darr, u32 entries, 1202 const struct gfs2_dirent **darr, u32 entries,
1203 int *copied) 1203 int *copied)
1204{ 1204{
1205 const struct gfs2_dirent *dent, *dent_next; 1205 const struct gfs2_dirent *dent, *dent_next;
1206 struct gfs2_inum_host inum;
1207 u64 off, off_next; 1206 u64 off, off_next;
1208 unsigned int x, y; 1207 unsigned int x, y;
1209 int run = 0; 1208 int run = 0;
@@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1240 *offset = off; 1239 *offset = off;
1241 } 1240 }
1242 1241
1243 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1244
1245 error = filldir(opaque, (const char *)(dent + 1), 1242 error = filldir(opaque, (const char *)(dent + 1),
1246 be16_to_cpu(dent->de_name_len), 1243 be16_to_cpu(dent->de_name_len),
1247 off, &inum, 1244 off, be64_to_cpu(dent->de_inum.no_addr),
1248 be16_to_cpu(dent->de_type)); 1245 be16_to_cpu(dent->de_type));
1249 if (error) 1246 if (error)
1250 return 1; 1247 return 1;
@@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1262} 1259}
1263 1260
1264static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, 1261static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1265 gfs2_filldir_t filldir, int *copied, 1262 filldir_t filldir, int *copied, unsigned *depth,
1266 unsigned *depth, u64 leaf_no) 1263 u64 leaf_no)
1267{ 1264{
1268 struct gfs2_inode *ip = GFS2_I(inode); 1265 struct gfs2_inode *ip = GFS2_I(inode);
1269 struct buffer_head *bh; 1266 struct buffer_head *bh;
@@ -1343,7 +1340,7 @@ out:
1343 */ 1340 */
1344 1341
1345static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, 1342static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1346 gfs2_filldir_t filldir) 1343 filldir_t filldir)
1347{ 1344{
1348 struct gfs2_inode *dip = GFS2_I(inode); 1345 struct gfs2_inode *dip = GFS2_I(inode);
1349 struct gfs2_sbd *sdp = GFS2_SB(inode); 1346 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1402,7 +1399,7 @@ out:
1402} 1399}
1403 1400
1404int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 1401int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1405 gfs2_filldir_t filldir) 1402 filldir_t filldir)
1406{ 1403{
1407 struct gfs2_inode *dip = GFS2_I(inode); 1404 struct gfs2_inode *dip = GFS2_I(inode);
1408 struct dirent_gather g; 1405 struct dirent_gather g;
@@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1568 break; 1565 break;
1569 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1566 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1570 ip->i_di.di_entries++; 1567 ip->i_di.di_entries++;
1571 ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); 1568 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
1572 gfs2_dinode_out(ip, bh->b_data); 1569 gfs2_dinode_out(ip, bh->b_data);
1573 brelse(bh); 1570 brelse(bh);
1574 error = 0; 1571 error = 0;
@@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1654 gfs2_consist_inode(dip); 1651 gfs2_consist_inode(dip);
1655 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1652 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1656 dip->i_di.di_entries--; 1653 dip->i_di.di_entries--;
1657 dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); 1654 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
1658 gfs2_dinode_out(dip, bh->b_data); 1655 gfs2_dinode_out(dip, bh->b_data);
1659 brelse(bh); 1656 brelse(bh);
1660 mark_inode_dirty(&dip->i_inode); 1657 mark_inode_dirty(&dip->i_inode);
@@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1702 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1699 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1703 } 1700 }
1704 1701
1705 dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); 1702 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
1706 gfs2_dinode_out(dip, bh->b_data); 1703 gfs2_dinode_out(dip, bh->b_data);
1707 brelse(bh); 1704 brelse(bh);
1708 return 0; 1705 return 0;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index b21b33668a5b..48fe89046bba 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,30 +16,13 @@ struct inode;
16struct gfs2_inode; 16struct gfs2_inode;
17struct gfs2_inum; 17struct gfs2_inum;
18 18
19/**
20 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
21 * @opaque: opaque data used by the function
22 * @name: the name of the directory entry
23 * @length: the length of the name
24 * @offset: the entry's offset in the directory
25 * @inum: the inode number the entry points to
26 * @type: the type of inode the entry points to
27 *
28 * Returns: 0 on success, 1 if buffer full
29 */
30
31typedef int (*gfs2_filldir_t) (void *opaque,
32 const char *name, unsigned int length,
33 u64 offset,
34 struct gfs2_inum_host *inum, unsigned int type);
35
36int gfs2_dir_search(struct inode *dir, const struct qstr *filename, 19int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
37 struct gfs2_inum_host *inum, unsigned int *type); 20 struct gfs2_inum_host *inum, unsigned int *type);
38int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 21int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
39 const struct gfs2_inum_host *inum, unsigned int type); 22 const struct gfs2_inum_host *inum, unsigned int type);
40int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 23int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
41int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, 24int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
42 gfs2_filldir_t filldir); 25 filldir_t filldir);
43int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 26int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
44 struct gfs2_inum_host *new_inum, unsigned int new_type); 27 struct gfs2_inum_host *new_inum, unsigned int new_type);
45 28
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index ebebbdcd7057..0c83c7f4dda8 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
301 301
302 error = gfs2_meta_inode_buffer(ip, &dibh); 302 error = gfs2_meta_inode_buffer(ip, &dibh);
303 if (!error) { 303 if (!error) {
304 ip->i_inode.i_ctime.tv_sec = get_seconds(); 304 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
305 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 305 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
306 gfs2_dinode_out(ip, dibh->b_data); 306 gfs2_dinode_out(ip, dibh->b_data);
307 brelse(dibh); 307 brelse(dibh);
@@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
718 (er->er_mode & S_IFMT)); 718 (er->er_mode & S_IFMT));
719 ip->i_inode.i_mode = er->er_mode; 719 ip->i_inode.i_mode = er->er_mode;
720 } 720 }
721 ip->i_inode.i_ctime.tv_sec = get_seconds(); 721 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
722 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 722 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
723 gfs2_dinode_out(ip, dibh->b_data); 723 gfs2_dinode_out(ip, dibh->b_data);
724 brelse(dibh); 724 brelse(dibh);
@@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
853 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); 853 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
854 ip->i_inode.i_mode = er->er_mode; 854 ip->i_inode.i_mode = er->er_mode;
855 } 855 }
856 ip->i_inode.i_ctime.tv_sec = get_seconds(); 856 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
857 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 857 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
858 gfs2_dinode_out(ip, dibh->b_data); 858 gfs2_dinode_out(ip, dibh->b_data);
859 brelse(dibh); 859 brelse(dibh);
@@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1134 1134
1135 error = gfs2_meta_inode_buffer(ip, &dibh); 1135 error = gfs2_meta_inode_buffer(ip, &dibh);
1136 if (!error) { 1136 if (!error) {
1137 ip->i_inode.i_ctime.tv_sec = get_seconds(); 1137 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
1138 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1138 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1139 gfs2_dinode_out(ip, dibh->b_data); 1139 gfs2_dinode_out(ip, dibh->b_data);
1140 brelse(dibh); 1140 brelse(dibh);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 438146904b58..6618c1190252 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -19,6 +19,8 @@
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/lm_interface.h> 21#include <linux/lm_interface.h>
22#include <linux/wait.h>
23#include <linux/rwsem.h>
22#include <asm/uaccess.h> 24#include <asm/uaccess.h>
23 25
24#include "gfs2.h" 26#include "gfs2.h"
@@ -33,11 +35,6 @@
33#include "super.h" 35#include "super.h"
34#include "util.h" 36#include "util.h"
35 37
36struct greedy {
37 struct gfs2_holder gr_gh;
38 struct delayed_work gr_work;
39};
40
41struct gfs2_gl_hash_bucket { 38struct gfs2_gl_hash_bucket {
42 struct hlist_head hb_list; 39 struct hlist_head hb_list;
43}; 40};
@@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl);
47static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); 44static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
48static int dump_glock(struct gfs2_glock *gl); 45static int dump_glock(struct gfs2_glock *gl);
49static int dump_inode(struct gfs2_inode *ip); 46static int dump_inode(struct gfs2_inode *ip);
47static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
48static void gfs2_glock_drop_th(struct gfs2_glock *gl);
49static DECLARE_RWSEM(gfs2_umount_flush_sem);
50 50
51#define GFS2_GL_HASH_SHIFT 15 51#define GFS2_GL_HASH_SHIFT 15
52#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 52#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -213,30 +213,6 @@ out:
213} 213}
214 214
215/** 215/**
216 * queue_empty - check to see if a glock's queue is empty
217 * @gl: the glock
218 * @head: the head of the queue to check
219 *
220 * This function protects the list in the event that a process already
221 * has a holder on the list and is adding a second holder for itself.
222 * The glmutex lock is what generally prevents processes from working
223 * on the same glock at once, but the special case of adding a second
224 * holder for yourself ("recursive" locking) doesn't involve locking
225 * glmutex, making the spin lock necessary.
226 *
227 * Returns: 1 if the queue is empty
228 */
229
230static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
231{
232 int empty;
233 spin_lock(&gl->gl_spin);
234 empty = list_empty(head);
235 spin_unlock(&gl->gl_spin);
236 return empty;
237}
238
239/**
240 * search_bucket() - Find struct gfs2_glock by lock number 216 * search_bucket() - Find struct gfs2_glock by lock number
241 * @bucket: the bucket to search 217 * @bucket: the bucket to search
242 * @name: The lock name 218 * @name: The lock name
@@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
395 gh->gh_flags = flags; 371 gh->gh_flags = flags;
396 gh->gh_error = 0; 372 gh->gh_error = 0;
397 gh->gh_iflags = 0; 373 gh->gh_iflags = 0;
398 init_completion(&gh->gh_wait);
399
400 if (gh->gh_state == LM_ST_EXCLUSIVE)
401 gh->gh_flags |= GL_LOCAL_EXCL;
402
403 gfs2_glock_hold(gl); 374 gfs2_glock_hold(gl);
404} 375}
405 376
@@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
417{ 388{
418 gh->gh_state = state; 389 gh->gh_state = state;
419 gh->gh_flags = flags; 390 gh->gh_flags = flags;
420 if (gh->gh_state == LM_ST_EXCLUSIVE)
421 gh->gh_flags |= GL_LOCAL_EXCL;
422
423 gh->gh_iflags &= 1 << HIF_ALLOCED; 391 gh->gh_iflags &= 1 << HIF_ALLOCED;
424 gh->gh_ip = (unsigned long)__builtin_return_address(0); 392 gh->gh_ip = (unsigned long)__builtin_return_address(0);
425} 393}
@@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh)
479 kfree(gh); 447 kfree(gh);
480} 448}
481 449
450static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh)
451{
452 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) {
453 gfs2_holder_put(gh);
454 return;
455 }
456 clear_bit(HIF_WAIT, &gh->gh_iflags);
457 smp_mb();
458 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
459}
460
461static int holder_wait(void *word)
462{
463 schedule();
464 return 0;
465}
466
467static void wait_on_holder(struct gfs2_holder *gh)
468{
469 might_sleep();
470 wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
471}
472
482/** 473/**
483 * rq_mutex - process a mutex request in the queue 474 * rq_mutex - process a mutex request in the queue
484 * @gh: the glock holder 475 * @gh: the glock holder
@@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh)
493 list_del_init(&gh->gh_list); 484 list_del_init(&gh->gh_list);
494 /* gh->gh_error never examined. */ 485 /* gh->gh_error never examined. */
495 set_bit(GLF_LOCK, &gl->gl_flags); 486 set_bit(GLF_LOCK, &gl->gl_flags);
496 complete(&gh->gh_wait); 487 clear_bit(HIF_WAIT, &gh->gh_iflags);
488 smp_mb();
489 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
497 490
498 return 1; 491 return 1;
499} 492}
@@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh)
511{ 504{
512 struct gfs2_glock *gl = gh->gh_gl; 505 struct gfs2_glock *gl = gh->gh_gl;
513 struct gfs2_sbd *sdp = gl->gl_sbd; 506 struct gfs2_sbd *sdp = gl->gl_sbd;
514 const struct gfs2_glock_operations *glops = gl->gl_ops;
515 507
516 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { 508 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
517 if (list_empty(&gl->gl_holders)) { 509 if (list_empty(&gl->gl_holders)) {
@@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh)
526 gfs2_reclaim_glock(sdp); 518 gfs2_reclaim_glock(sdp);
527 } 519 }
528 520
529 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); 521 gfs2_glock_xmote_th(gh);
530 spin_lock(&gl->gl_spin); 522 spin_lock(&gl->gl_spin);
531 } 523 }
532 return 1; 524 return 1;
@@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh)
537 set_bit(GLF_LOCK, &gl->gl_flags); 529 set_bit(GLF_LOCK, &gl->gl_flags);
538 } else { 530 } else {
539 struct gfs2_holder *next_gh; 531 struct gfs2_holder *next_gh;
540 if (gh->gh_flags & GL_LOCAL_EXCL) 532 if (gh->gh_state == LM_ST_EXCLUSIVE)
541 return 1; 533 return 1;
542 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, 534 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
543 gh_list); 535 gh_list);
544 if (next_gh->gh_flags & GL_LOCAL_EXCL) 536 if (next_gh->gh_state == LM_ST_EXCLUSIVE)
545 return 1; 537 return 1;
546 } 538 }
547 539
@@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh)
549 gh->gh_error = 0; 541 gh->gh_error = 0;
550 set_bit(HIF_HOLDER, &gh->gh_iflags); 542 set_bit(HIF_HOLDER, &gh->gh_iflags);
551 543
552 complete(&gh->gh_wait); 544 gfs2_holder_dispose_or_wake(gh);
553 545
554 return 0; 546 return 0;
555} 547}
@@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh)
564static int rq_demote(struct gfs2_holder *gh) 556static int rq_demote(struct gfs2_holder *gh)
565{ 557{
566 struct gfs2_glock *gl = gh->gh_gl; 558 struct gfs2_glock *gl = gh->gh_gl;
567 const struct gfs2_glock_operations *glops = gl->gl_ops;
568 559
569 if (!list_empty(&gl->gl_holders)) 560 if (!list_empty(&gl->gl_holders))
570 return 1; 561 return 1;
@@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh)
573 list_del_init(&gh->gh_list); 564 list_del_init(&gh->gh_list);
574 gh->gh_error = 0; 565 gh->gh_error = 0;
575 spin_unlock(&gl->gl_spin); 566 spin_unlock(&gl->gl_spin);
576 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) 567 gfs2_holder_dispose_or_wake(gh);
577 gfs2_holder_put(gh);
578 else
579 complete(&gh->gh_wait);
580 spin_lock(&gl->gl_spin); 568 spin_lock(&gl->gl_spin);
581 } else { 569 } else {
582 gl->gl_req_gh = gh; 570 gl->gl_req_gh = gh;
@@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh)
585 573
586 if (gh->gh_state == LM_ST_UNLOCKED || 574 if (gh->gh_state == LM_ST_UNLOCKED ||
587 gl->gl_state != LM_ST_EXCLUSIVE) 575 gl->gl_state != LM_ST_EXCLUSIVE)
588 glops->go_drop_th(gl); 576 gfs2_glock_drop_th(gl);
589 else 577 else
590 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); 578 gfs2_glock_xmote_th(gh);
591 579
592 spin_lock(&gl->gl_spin); 580 spin_lock(&gl->gl_spin);
593 } 581 }
@@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh)
596} 584}
597 585
598/** 586/**
599 * rq_greedy - process a queued request to drop greedy status
600 * @gh: the glock holder
601 *
602 * Returns: 1 if the queue is blocked
603 */
604
605static int rq_greedy(struct gfs2_holder *gh)
606{
607 struct gfs2_glock *gl = gh->gh_gl;
608
609 list_del_init(&gh->gh_list);
610 /* gh->gh_error never examined. */
611 clear_bit(GLF_GREEDY, &gl->gl_flags);
612 spin_unlock(&gl->gl_spin);
613
614 gfs2_holder_uninit(gh);
615 kfree(container_of(gh, struct greedy, gr_gh));
616
617 spin_lock(&gl->gl_spin);
618
619 return 0;
620}
621
622/**
623 * run_queue - process holder structures on a glock 587 * run_queue - process holder structures on a glock
624 * @gl: the glock 588 * @gl: the glock
625 * 589 *
@@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl)
649 613
650 if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) 614 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
651 blocked = rq_demote(gh); 615 blocked = rq_demote(gh);
652 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
653 blocked = rq_greedy(gh);
654 else 616 else
655 gfs2_assert_warn(gl->gl_sbd, 0); 617 gfs2_assert_warn(gl->gl_sbd, 0);
656 618
@@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
684 646
685 gfs2_holder_init(gl, 0, 0, &gh); 647 gfs2_holder_init(gl, 0, 0, &gh);
686 set_bit(HIF_MUTEX, &gh.gh_iflags); 648 set_bit(HIF_MUTEX, &gh.gh_iflags);
649 if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags))
650 BUG();
687 651
688 spin_lock(&gl->gl_spin); 652 spin_lock(&gl->gl_spin);
689 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 653 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
@@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
691 } else { 655 } else {
692 gl->gl_owner = current; 656 gl->gl_owner = current;
693 gl->gl_ip = (unsigned long)__builtin_return_address(0); 657 gl->gl_ip = (unsigned long)__builtin_return_address(0);
694 complete(&gh.gh_wait); 658 clear_bit(HIF_WAIT, &gh.gh_iflags);
659 smp_mb();
660 wake_up_bit(&gh.gh_iflags, HIF_WAIT);
695 } 661 }
696 spin_unlock(&gl->gl_spin); 662 spin_unlock(&gl->gl_spin);
697 663
698 wait_for_completion(&gh.gh_wait); 664 wait_on_holder(&gh);
699 gfs2_holder_uninit(&gh); 665 gfs2_holder_uninit(&gh);
700} 666}
701 667
@@ -774,6 +740,7 @@ restart:
774 return; 740 return;
775 set_bit(HIF_DEMOTE, &new_gh->gh_iflags); 741 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
776 set_bit(HIF_DEALLOC, &new_gh->gh_iflags); 742 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
743 set_bit(HIF_WAIT, &new_gh->gh_iflags);
777 744
778 goto restart; 745 goto restart;
779 } 746 }
@@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
825 int op_done = 1; 792 int op_done = 1;
826 793
827 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 794 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
828 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 795 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
829 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); 796 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
830 797
831 state_change(gl, ret & LM_OUT_ST_MASK); 798 state_change(gl, ret & LM_OUT_ST_MASK);
@@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
908 875
909 gfs2_glock_put(gl); 876 gfs2_glock_put(gl);
910 877
911 if (gh) { 878 if (gh)
912 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) 879 gfs2_holder_dispose_or_wake(gh);
913 gfs2_holder_put(gh);
914 else
915 complete(&gh->gh_wait);
916 }
917} 880}
918 881
919/** 882/**
@@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
924 * 887 *
925 */ 888 */
926 889
927void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) 890void gfs2_glock_xmote_th(struct gfs2_holder *gh)
928{ 891{
892 struct gfs2_glock *gl = gh->gh_gl;
929 struct gfs2_sbd *sdp = gl->gl_sbd; 893 struct gfs2_sbd *sdp = gl->gl_sbd;
894 int flags = gh->gh_flags;
895 unsigned state = gh->gh_state;
930 const struct gfs2_glock_operations *glops = gl->gl_ops; 896 const struct gfs2_glock_operations *glops = gl->gl_ops;
931 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | 897 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
932 LM_FLAG_NOEXP | LM_FLAG_ANY | 898 LM_FLAG_NOEXP | LM_FLAG_ANY |
933 LM_FLAG_PRIORITY); 899 LM_FLAG_PRIORITY);
934 unsigned int lck_ret; 900 unsigned int lck_ret;
935 901
902 if (glops->go_xmote_th)
903 glops->go_xmote_th(gl);
904
936 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 905 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
937 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 906 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
938 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); 907 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
939 gfs2_assert_warn(sdp, state != gl->gl_state); 908 gfs2_assert_warn(sdp, state != gl->gl_state);
940 909
941 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
942 glops->go_sync(gl);
943
944 gfs2_glock_hold(gl); 910 gfs2_glock_hold(gl);
945 gl->gl_req_bh = xmote_bh; 911 gl->gl_req_bh = xmote_bh;
946 912
@@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
971 const struct gfs2_glock_operations *glops = gl->gl_ops; 937 const struct gfs2_glock_operations *glops = gl->gl_ops;
972 struct gfs2_holder *gh = gl->gl_req_gh; 938 struct gfs2_holder *gh = gl->gl_req_gh;
973 939
974 clear_bit(GLF_PREFETCH, &gl->gl_flags);
975
976 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 940 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
977 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 941 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
978 gfs2_assert_warn(sdp, !ret); 942 gfs2_assert_warn(sdp, !ret);
979 943
980 state_change(gl, LM_ST_UNLOCKED); 944 state_change(gl, LM_ST_UNLOCKED);
@@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
1001 965
1002 gfs2_glock_put(gl); 966 gfs2_glock_put(gl);
1003 967
1004 if (gh) { 968 if (gh)
1005 if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) 969 gfs2_holder_dispose_or_wake(gh);
1006 gfs2_holder_put(gh);
1007 else
1008 complete(&gh->gh_wait);
1009 }
1010} 970}
1011 971
1012/** 972/**
@@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
1015 * 975 *
1016 */ 976 */
1017 977
1018void gfs2_glock_drop_th(struct gfs2_glock *gl) 978static void gfs2_glock_drop_th(struct gfs2_glock *gl)
1019{ 979{
1020 struct gfs2_sbd *sdp = gl->gl_sbd; 980 struct gfs2_sbd *sdp = gl->gl_sbd;
1021 const struct gfs2_glock_operations *glops = gl->gl_ops; 981 const struct gfs2_glock_operations *glops = gl->gl_ops;
1022 unsigned int ret; 982 unsigned int ret;
1023 983
984 if (glops->go_drop_th)
985 glops->go_drop_th(gl);
986
1024 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); 987 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1025 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); 988 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
1026 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); 989 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1027 990
1028 if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
1029 glops->go_sync(gl);
1030
1031 gfs2_glock_hold(gl); 991 gfs2_glock_hold(gl);
1032 gl->gl_req_bh = drop_bh; 992 gl->gl_req_bh = drop_bh;
1033 993
@@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh)
1107 if (gh->gh_flags & LM_FLAG_PRIORITY) 1067 if (gh->gh_flags & LM_FLAG_PRIORITY)
1108 do_cancels(gh); 1068 do_cancels(gh);
1109 1069
1110 wait_for_completion(&gh->gh_wait); 1070 wait_on_holder(gh);
1111
1112 if (gh->gh_error) 1071 if (gh->gh_error)
1113 return gh->gh_error; 1072 return gh->gh_error;
1114 1073
@@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh)
1164 struct gfs2_holder *existing; 1123 struct gfs2_holder *existing;
1165 1124
1166 BUG_ON(!gh->gh_owner); 1125 BUG_ON(!gh->gh_owner);
1126 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1127 BUG();
1167 1128
1168 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); 1129 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1169 if (existing) { 1130 if (existing) {
@@ -1227,8 +1188,6 @@ restart:
1227 } 1188 }
1228 } 1189 }
1229 1190
1230 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1231
1232 return error; 1191 return error;
1233} 1192}
1234 1193
@@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1321} 1280}
1322 1281
1323/** 1282/**
1324 * gfs2_glock_prefetch - Try to prefetch a glock
1325 * @gl: the glock
1326 * @state: the state to prefetch in
1327 * @flags: flags passed to go_xmote_th()
1328 *
1329 */
1330
1331static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
1332 int flags)
1333{
1334 const struct gfs2_glock_operations *glops = gl->gl_ops;
1335
1336 spin_lock(&gl->gl_spin);
1337
1338 if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) ||
1339 !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) ||
1340 !list_empty(&gl->gl_waiters3) ||
1341 relaxed_state_ok(gl->gl_state, state, flags)) {
1342 spin_unlock(&gl->gl_spin);
1343 return;
1344 }
1345
1346 set_bit(GLF_PREFETCH, &gl->gl_flags);
1347 set_bit(GLF_LOCK, &gl->gl_flags);
1348 spin_unlock(&gl->gl_spin);
1349
1350 glops->go_xmote_th(gl, state, flags);
1351}
1352
1353static void greedy_work(struct work_struct *work)
1354{
1355 struct greedy *gr = container_of(work, struct greedy, gr_work.work);
1356 struct gfs2_holder *gh = &gr->gr_gh;
1357 struct gfs2_glock *gl = gh->gh_gl;
1358 const struct gfs2_glock_operations *glops = gl->gl_ops;
1359
1360 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1361
1362 if (glops->go_greedy)
1363 glops->go_greedy(gl);
1364
1365 spin_lock(&gl->gl_spin);
1366
1367 if (list_empty(&gl->gl_waiters2)) {
1368 clear_bit(GLF_GREEDY, &gl->gl_flags);
1369 spin_unlock(&gl->gl_spin);
1370 gfs2_holder_uninit(gh);
1371 kfree(gr);
1372 } else {
1373 gfs2_glock_hold(gl);
1374 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1375 run_queue(gl);
1376 spin_unlock(&gl->gl_spin);
1377 gfs2_glock_put(gl);
1378 }
1379}
1380
1381/**
1382 * gfs2_glock_be_greedy -
1383 * @gl:
1384 * @time:
1385 *
1386 * Returns: 0 if go_greedy will be called, 1 otherwise
1387 */
1388
1389int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1390{
1391 struct greedy *gr;
1392 struct gfs2_holder *gh;
1393
1394 if (!time || gl->gl_sbd->sd_args.ar_localcaching ||
1395 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1396 return 1;
1397
1398 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1399 if (!gr) {
1400 clear_bit(GLF_GREEDY, &gl->gl_flags);
1401 return 1;
1402 }
1403 gh = &gr->gr_gh;
1404
1405 gfs2_holder_init(gl, 0, 0, gh);
1406 set_bit(HIF_GREEDY, &gh->gh_iflags);
1407 INIT_DELAYED_WORK(&gr->gr_work, greedy_work);
1408
1409 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1410 schedule_delayed_work(&gr->gr_work, time);
1411
1412 return 0;
1413}
1414
1415/**
1416 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1283 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1417 * @gh: the holder structure 1284 * @gh: the holder structure
1418 * 1285 *
@@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b)
1470 return 1; 1337 return 1;
1471 if (a->ln_number < b->ln_number) 1338 if (a->ln_number < b->ln_number)
1472 return -1; 1339 return -1;
1473 if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) 1340 BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1474 return 1;
1475 if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL))
1476 return 1;
1477 return 0; 1341 return 0;
1478} 1342}
1479 1343
@@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1618} 1482}
1619 1483
1620/** 1484/**
1621 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1622 * @sdp: the filesystem
1623 * @number: the lock number
1624 * @glops: the glock operations for the type of glock
1625 * @state: the state to acquire the glock in
1626 * @flags: modifier flags for the aquisition
1627 *
1628 * Returns: errno
1629 */
1630
1631void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
1632 const struct gfs2_glock_operations *glops,
1633 unsigned int state, int flags)
1634{
1635 struct gfs2_glock *gl;
1636 int error;
1637
1638 if (atomic_read(&sdp->sd_reclaim_count) <
1639 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1640 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1641 if (!error) {
1642 gfs2_glock_prefetch(gl, state, flags);
1643 gfs2_glock_put(gl);
1644 }
1645 }
1646}
1647
1648/**
1649 * gfs2_lvb_hold - attach a LVB from a glock 1485 * gfs2_lvb_hold - attach a LVB from a glock
1650 * @gl: The glock in question 1486 * @gl: The glock in question
1651 * 1487 *
@@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1703 if (!gl) 1539 if (!gl)
1704 return; 1540 return;
1705 1541
1706 if (gl->gl_ops->go_callback)
1707 gl->gl_ops->go_callback(gl, state);
1708 handle_callback(gl, state); 1542 handle_callback(gl, state);
1709 1543
1710 spin_lock(&gl->gl_spin); 1544 spin_lock(&gl->gl_spin);
@@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1746 struct lm_async_cb *async = data; 1580 struct lm_async_cb *async = data;
1747 struct gfs2_glock *gl; 1581 struct gfs2_glock *gl;
1748 1582
1583 down_read(&gfs2_umount_flush_sem);
1749 gl = gfs2_glock_find(sdp, &async->lc_name); 1584 gl = gfs2_glock_find(sdp, &async->lc_name);
1750 if (gfs2_assert_warn(sdp, gl)) 1585 if (gfs2_assert_warn(sdp, gl))
1751 return; 1586 return;
1752 if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) 1587 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1753 gl->gl_req_bh(gl, async->lc_ret); 1588 gl->gl_req_bh(gl, async->lc_ret);
1754 gfs2_glock_put(gl); 1589 gfs2_glock_put(gl);
1590 up_read(&gfs2_umount_flush_sem);
1755 return; 1591 return;
1756 } 1592 }
1757 1593
@@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1781 1617
1782static int demote_ok(struct gfs2_glock *gl) 1618static int demote_ok(struct gfs2_glock *gl)
1783{ 1619{
1784 struct gfs2_sbd *sdp = gl->gl_sbd;
1785 const struct gfs2_glock_operations *glops = gl->gl_ops; 1620 const struct gfs2_glock_operations *glops = gl->gl_ops;
1786 int demote = 1; 1621 int demote = 1;
1787 1622
1788 if (test_bit(GLF_STICKY, &gl->gl_flags)) 1623 if (test_bit(GLF_STICKY, &gl->gl_flags))
1789 demote = 0; 1624 demote = 0;
1790 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
1791 demote = time_after_eq(jiffies, gl->gl_stamp +
1792 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
1793 else if (glops->go_demote_ok) 1625 else if (glops->go_demote_ok)
1794 demote = glops->go_demote_ok(gl); 1626 demote = glops->go_demote_ok(gl);
1795 1627
@@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1845 atomic_inc(&sdp->sd_reclaimed); 1677 atomic_inc(&sdp->sd_reclaimed);
1846 1678
1847 if (gfs2_glmutex_trylock(gl)) { 1679 if (gfs2_glmutex_trylock(gl)) {
1848 if (queue_empty(gl, &gl->gl_holders) && 1680 if (list_empty(&gl->gl_holders) &&
1849 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1681 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1850 handle_callback(gl, LM_ST_UNLOCKED); 1682 handle_callback(gl, LM_ST_UNLOCKED);
1851 gfs2_glmutex_unlock(gl); 1683 gfs2_glmutex_unlock(gl);
@@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl)
1909 return; 1741 return;
1910 1742
1911 if (gfs2_glmutex_trylock(gl)) { 1743 if (gfs2_glmutex_trylock(gl)) {
1912 if (queue_empty(gl, &gl->gl_holders) && 1744 if (list_empty(&gl->gl_holders) &&
1913 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1745 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1914 goto out_schedule; 1746 goto out_schedule;
1915 gfs2_glmutex_unlock(gl); 1747 gfs2_glmutex_unlock(gl);
@@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl)
1958 } 1790 }
1959 1791
1960 if (gfs2_glmutex_trylock(gl)) { 1792 if (gfs2_glmutex_trylock(gl)) {
1961 if (queue_empty(gl, &gl->gl_holders) && 1793 if (list_empty(&gl->gl_holders) &&
1962 gl->gl_state != LM_ST_UNLOCKED) 1794 gl->gl_state != LM_ST_UNLOCKED)
1963 handle_callback(gl, LM_ST_UNLOCKED); 1795 handle_callback(gl, LM_ST_UNLOCKED);
1964 gfs2_glmutex_unlock(gl); 1796 gfs2_glmutex_unlock(gl);
@@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
2000 t = jiffies; 1832 t = jiffies;
2001 } 1833 }
2002 1834
1835 down_write(&gfs2_umount_flush_sem);
2003 invalidate_inodes(sdp->sd_vfs); 1836 invalidate_inodes(sdp->sd_vfs);
1837 up_write(&gfs2_umount_flush_sem);
2004 msleep(10); 1838 msleep(10);
2005 } 1839 }
2006} 1840}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index fb39108fc05c..f50e40ceca43 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -20,7 +20,6 @@
20#define LM_FLAG_ANY 0x00000008 20#define LM_FLAG_ANY 0x00000008
21#define LM_FLAG_PRIORITY 0x00000010 */ 21#define LM_FLAG_PRIORITY 0x00000010 */
22 22
23#define GL_LOCAL_EXCL 0x00000020
24#define GL_ASYNC 0x00000040 23#define GL_ASYNC 0x00000040
25#define GL_EXACT 0x00000080 24#define GL_EXACT 0x00000080
26#define GL_SKIP 0x00000100 25#define GL_SKIP 0x00000100
@@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
83void gfs2_holder_reinit(unsigned int state, unsigned flags, 82void gfs2_holder_reinit(unsigned int state, unsigned flags,
84 struct gfs2_holder *gh); 83 struct gfs2_holder *gh);
85void gfs2_holder_uninit(struct gfs2_holder *gh); 84void gfs2_holder_uninit(struct gfs2_holder *gh);
86
87void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
88void gfs2_glock_drop_th(struct gfs2_glock *gl);
89
90int gfs2_glock_nq(struct gfs2_holder *gh); 85int gfs2_glock_nq(struct gfs2_holder *gh);
91int gfs2_glock_poll(struct gfs2_holder *gh); 86int gfs2_glock_poll(struct gfs2_holder *gh);
92int gfs2_glock_wait(struct gfs2_holder *gh); 87int gfs2_glock_wait(struct gfs2_holder *gh);
93void gfs2_glock_dq(struct gfs2_holder *gh); 88void gfs2_glock_dq(struct gfs2_holder *gh);
94 89
95int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
96
97void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 90void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
98int gfs2_glock_nq_num(struct gfs2_sbd *sdp, 91int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
99 u64 number, const struct gfs2_glock_operations *glops, 92 u64 number, const struct gfs2_glock_operations *glops,
@@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
103void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 96void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
104void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 97void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
105 98
106void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
107 const struct gfs2_glock_operations *glops,
108 unsigned int state, int flags);
109
110/** 99/**
111 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock 100 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
112 * @gl: the glock 101 * @gl: the glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index b068d10bcb6e..c4b0391b7aa2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
117 117
118static void meta_go_sync(struct gfs2_glock *gl) 118static void meta_go_sync(struct gfs2_glock *gl)
119{ 119{
120 if (gl->gl_state != LM_ST_EXCLUSIVE)
121 return;
122
120 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { 123 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
121 gfs2_log_flush(gl->gl_sbd, gl); 124 gfs2_log_flush(gl->gl_sbd, gl);
122 gfs2_meta_sync(gl); 125 gfs2_meta_sync(gl);
123 gfs2_ail_empty_gl(gl); 126 gfs2_ail_empty_gl(gl);
124 } 127 }
125
126} 128}
127 129
128/** 130/**
@@ -142,6 +144,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
142} 144}
143 145
144/** 146/**
147 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
148 * @gl: the glock protecting the inode
149 *
150 */
151
152static void inode_go_sync(struct gfs2_glock *gl)
153{
154 struct gfs2_inode *ip = gl->gl_object;
155
156 if (ip && !S_ISREG(ip->i_inode.i_mode))
157 ip = NULL;
158
159 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
160 gfs2_log_flush(gl->gl_sbd, gl);
161 if (ip)
162 filemap_fdatawrite(ip->i_inode.i_mapping);
163 gfs2_meta_sync(gl);
164 if (ip) {
165 struct address_space *mapping = ip->i_inode.i_mapping;
166 int error = filemap_fdatawait(mapping);
167 if (error == -ENOSPC)
168 set_bit(AS_ENOSPC, &mapping->flags);
169 else if (error)
170 set_bit(AS_EIO, &mapping->flags);
171 }
172 clear_bit(GLF_DIRTY, &gl->gl_flags);
173 gfs2_ail_empty_gl(gl);
174 }
175}
176
177/**
145 * inode_go_xmote_th - promote/demote a glock 178 * inode_go_xmote_th - promote/demote a glock
146 * @gl: the glock 179 * @gl: the glock
147 * @state: the requested state 180 * @state: the requested state
@@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
149 * 182 *
150 */ 183 */
151 184
152static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, 185static void inode_go_xmote_th(struct gfs2_glock *gl)
153 int flags)
154{ 186{
155 if (gl->gl_state != LM_ST_UNLOCKED) 187 if (gl->gl_state != LM_ST_UNLOCKED)
156 gfs2_pte_inval(gl); 188 gfs2_pte_inval(gl);
157 gfs2_glock_xmote_th(gl, state, flags); 189 if (gl->gl_state == LM_ST_EXCLUSIVE)
190 inode_go_sync(gl);
158} 191}
159 192
160/** 193/**
@@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl)
189static void inode_go_drop_th(struct gfs2_glock *gl) 222static void inode_go_drop_th(struct gfs2_glock *gl)
190{ 223{
191 gfs2_pte_inval(gl); 224 gfs2_pte_inval(gl);
192 gfs2_glock_drop_th(gl); 225 if (gl->gl_state == LM_ST_EXCLUSIVE)
193} 226 inode_go_sync(gl);
194
195/**
196 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
197 * @gl: the glock protecting the inode
198 *
199 */
200
201static void inode_go_sync(struct gfs2_glock *gl)
202{
203 struct gfs2_inode *ip = gl->gl_object;
204
205 if (ip && !S_ISREG(ip->i_inode.i_mode))
206 ip = NULL;
207
208 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
209 gfs2_log_flush(gl->gl_sbd, gl);
210 if (ip)
211 filemap_fdatawrite(ip->i_inode.i_mapping);
212 gfs2_meta_sync(gl);
213 if (ip) {
214 struct address_space *mapping = ip->i_inode.i_mapping;
215 int error = filemap_fdatawait(mapping);
216 if (error == -ENOSPC)
217 set_bit(AS_ENOSPC, &mapping->flags);
218 else if (error)
219 set_bit(AS_EIO, &mapping->flags);
220 }
221 clear_bit(GLF_DIRTY, &gl->gl_flags);
222 gfs2_ail_empty_gl(gl);
223 }
224} 227}
225 228
226/** 229/**
@@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
295 298
296 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && 299 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
297 (gl->gl_state == LM_ST_EXCLUSIVE) && 300 (gl->gl_state == LM_ST_EXCLUSIVE) &&
298 (gh->gh_flags & GL_LOCAL_EXCL)) 301 (gh->gh_state == LM_ST_EXCLUSIVE))
299 error = gfs2_truncatei_resume(ip); 302 error = gfs2_truncatei_resume(ip);
300 303
301 return error; 304 return error;
@@ -319,39 +322,6 @@ static void inode_go_unlock(struct gfs2_holder *gh)
319} 322}
320 323
321/** 324/**
322 * inode_greedy -
323 * @gl: the glock
324 *
325 */
326
327static void inode_greedy(struct gfs2_glock *gl)
328{
329 struct gfs2_sbd *sdp = gl->gl_sbd;
330 struct gfs2_inode *ip = gl->gl_object;
331 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
332 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
333 unsigned int new_time;
334
335 spin_lock(&ip->i_spin);
336
337 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
338 new_time = ip->i_greedy + quantum;
339 if (new_time > max)
340 new_time = max;
341 } else {
342 new_time = ip->i_greedy - quantum;
343 if (!new_time || new_time > max)
344 new_time = 1;
345 }
346
347 ip->i_greedy = new_time;
348
349 spin_unlock(&ip->i_spin);
350
351 iput(&ip->i_inode);
352}
353
354/**
355 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock 325 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
356 * @gl: the glock 326 * @gl: the glock
357 * 327 *
@@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
398 * 368 *
399 */ 369 */
400 370
401static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, 371static void trans_go_xmote_th(struct gfs2_glock *gl)
402 int flags)
403{ 372{
404 struct gfs2_sbd *sdp = gl->gl_sbd; 373 struct gfs2_sbd *sdp = gl->gl_sbd;
405 374
@@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
408 gfs2_meta_syncfs(sdp); 377 gfs2_meta_syncfs(sdp);
409 gfs2_log_shutdown(sdp); 378 gfs2_log_shutdown(sdp);
410 } 379 }
411
412 gfs2_glock_xmote_th(gl, state, flags);
413} 380}
414 381
415/** 382/**
@@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl)
461 gfs2_meta_syncfs(sdp); 428 gfs2_meta_syncfs(sdp);
462 gfs2_log_shutdown(sdp); 429 gfs2_log_shutdown(sdp);
463 } 430 }
464
465 gfs2_glock_drop_th(gl);
466} 431}
467 432
468/** 433/**
@@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl)
478} 443}
479 444
480const struct gfs2_glock_operations gfs2_meta_glops = { 445const struct gfs2_glock_operations gfs2_meta_glops = {
481 .go_xmote_th = gfs2_glock_xmote_th, 446 .go_xmote_th = meta_go_sync,
482 .go_drop_th = gfs2_glock_drop_th, 447 .go_drop_th = meta_go_sync,
483 .go_type = LM_TYPE_META, 448 .go_type = LM_TYPE_META,
484}; 449};
485 450
@@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
487 .go_xmote_th = inode_go_xmote_th, 452 .go_xmote_th = inode_go_xmote_th,
488 .go_xmote_bh = inode_go_xmote_bh, 453 .go_xmote_bh = inode_go_xmote_bh,
489 .go_drop_th = inode_go_drop_th, 454 .go_drop_th = inode_go_drop_th,
490 .go_sync = inode_go_sync,
491 .go_inval = inode_go_inval, 455 .go_inval = inode_go_inval,
492 .go_demote_ok = inode_go_demote_ok, 456 .go_demote_ok = inode_go_demote_ok,
493 .go_lock = inode_go_lock, 457 .go_lock = inode_go_lock,
494 .go_unlock = inode_go_unlock, 458 .go_unlock = inode_go_unlock,
495 .go_greedy = inode_greedy,
496 .go_type = LM_TYPE_INODE, 459 .go_type = LM_TYPE_INODE,
497}; 460};
498 461
499const struct gfs2_glock_operations gfs2_rgrp_glops = { 462const struct gfs2_glock_operations gfs2_rgrp_glops = {
500 .go_xmote_th = gfs2_glock_xmote_th,
501 .go_drop_th = gfs2_glock_drop_th,
502 .go_sync = meta_go_sync,
503 .go_inval = meta_go_inval, 463 .go_inval = meta_go_inval,
504 .go_demote_ok = rgrp_go_demote_ok, 464 .go_demote_ok = rgrp_go_demote_ok,
505 .go_lock = rgrp_go_lock, 465 .go_lock = rgrp_go_lock,
@@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = {
515}; 475};
516 476
517const struct gfs2_glock_operations gfs2_iopen_glops = { 477const struct gfs2_glock_operations gfs2_iopen_glops = {
518 .go_xmote_th = gfs2_glock_xmote_th,
519 .go_drop_th = gfs2_glock_drop_th,
520 .go_type = LM_TYPE_IOPEN, 478 .go_type = LM_TYPE_IOPEN,
521}; 479};
522 480
523const struct gfs2_glock_operations gfs2_flock_glops = { 481const struct gfs2_glock_operations gfs2_flock_glops = {
524 .go_xmote_th = gfs2_glock_xmote_th,
525 .go_drop_th = gfs2_glock_drop_th,
526 .go_type = LM_TYPE_FLOCK, 482 .go_type = LM_TYPE_FLOCK,
527}; 483};
528 484
529const struct gfs2_glock_operations gfs2_nondisk_glops = { 485const struct gfs2_glock_operations gfs2_nondisk_glops = {
530 .go_xmote_th = gfs2_glock_xmote_th,
531 .go_drop_th = gfs2_glock_drop_th,
532 .go_type = LM_TYPE_NONDISK, 486 .go_type = LM_TYPE_NONDISK,
533}; 487};
534 488
535const struct gfs2_glock_operations gfs2_quota_glops = { 489const struct gfs2_glock_operations gfs2_quota_glops = {
536 .go_xmote_th = gfs2_glock_xmote_th,
537 .go_drop_th = gfs2_glock_drop_th,
538 .go_demote_ok = quota_go_demote_ok, 490 .go_demote_ok = quota_go_demote_ok,
539 .go_type = LM_TYPE_QUOTA, 491 .go_type = LM_TYPE_QUOTA,
540}; 492};
541 493
542const struct gfs2_glock_operations gfs2_journal_glops = { 494const struct gfs2_glock_operations gfs2_journal_glops = {
543 .go_xmote_th = gfs2_glock_xmote_th,
544 .go_drop_th = gfs2_glock_drop_th,
545 .go_type = LM_TYPE_JOURNAL, 495 .go_type = LM_TYPE_JOURNAL,
546}; 496};
547 497
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 734421edae85..12c80fd28db5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -101,17 +101,14 @@ struct gfs2_bufdata {
101}; 101};
102 102
103struct gfs2_glock_operations { 103struct gfs2_glock_operations {
104 void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags); 104 void (*go_xmote_th) (struct gfs2_glock *gl);
105 void (*go_xmote_bh) (struct gfs2_glock *gl); 105 void (*go_xmote_bh) (struct gfs2_glock *gl);
106 void (*go_drop_th) (struct gfs2_glock *gl); 106 void (*go_drop_th) (struct gfs2_glock *gl);
107 void (*go_drop_bh) (struct gfs2_glock *gl); 107 void (*go_drop_bh) (struct gfs2_glock *gl);
108 void (*go_sync) (struct gfs2_glock *gl);
109 void (*go_inval) (struct gfs2_glock *gl, int flags); 108 void (*go_inval) (struct gfs2_glock *gl, int flags);
110 int (*go_demote_ok) (struct gfs2_glock *gl); 109 int (*go_demote_ok) (struct gfs2_glock *gl);
111 int (*go_lock) (struct gfs2_holder *gh); 110 int (*go_lock) (struct gfs2_holder *gh);
112 void (*go_unlock) (struct gfs2_holder *gh); 111 void (*go_unlock) (struct gfs2_holder *gh);
113 void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
114 void (*go_greedy) (struct gfs2_glock *gl);
115 const int go_type; 112 const int go_type;
116}; 113};
117 114
@@ -120,7 +117,6 @@ enum {
120 HIF_MUTEX = 0, 117 HIF_MUTEX = 0,
121 HIF_PROMOTE = 1, 118 HIF_PROMOTE = 1,
122 HIF_DEMOTE = 2, 119 HIF_DEMOTE = 2,
123 HIF_GREEDY = 3,
124 120
125 /* States */ 121 /* States */
126 HIF_ALLOCED = 4, 122 HIF_ALLOCED = 4,
@@ -128,6 +124,7 @@ enum {
128 HIF_HOLDER = 6, 124 HIF_HOLDER = 6,
129 HIF_FIRST = 7, 125 HIF_FIRST = 7,
130 HIF_ABORTED = 9, 126 HIF_ABORTED = 9,
127 HIF_WAIT = 10,
131}; 128};
132 129
133struct gfs2_holder { 130struct gfs2_holder {
@@ -140,17 +137,14 @@ struct gfs2_holder {
140 137
141 int gh_error; 138 int gh_error;
142 unsigned long gh_iflags; 139 unsigned long gh_iflags;
143 struct completion gh_wait;
144 unsigned long gh_ip; 140 unsigned long gh_ip;
145}; 141};
146 142
147enum { 143enum {
148 GLF_LOCK = 1, 144 GLF_LOCK = 1,
149 GLF_STICKY = 2, 145 GLF_STICKY = 2,
150 GLF_PREFETCH = 3,
151 GLF_DIRTY = 5, 146 GLF_DIRTY = 5,
152 GLF_SKIP_WAITERS2 = 6, 147 GLF_SKIP_WAITERS2 = 6,
153 GLF_GREEDY = 7,
154}; 148};
155 149
156struct gfs2_glock { 150struct gfs2_glock {
@@ -167,7 +161,7 @@ struct gfs2_glock {
167 unsigned long gl_ip; 161 unsigned long gl_ip;
168 struct list_head gl_holders; 162 struct list_head gl_holders;
169 struct list_head gl_waiters1; /* HIF_MUTEX */ 163 struct list_head gl_waiters1; /* HIF_MUTEX */
170 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ 164 struct list_head gl_waiters2; /* HIF_DEMOTE */
171 struct list_head gl_waiters3; /* HIF_PROMOTE */ 165 struct list_head gl_waiters3; /* HIF_PROMOTE */
172 166
173 const struct gfs2_glock_operations *gl_ops; 167 const struct gfs2_glock_operations *gl_ops;
@@ -236,7 +230,6 @@ struct gfs2_inode {
236 230
237 spinlock_t i_spin; 231 spinlock_t i_spin;
238 struct rw_semaphore i_rw_mutex; 232 struct rw_semaphore i_rw_mutex;
239 unsigned int i_greedy;
240 unsigned long i_last_pfault; 233 unsigned long i_last_pfault;
241 234
242 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; 235 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
@@ -418,17 +411,12 @@ struct gfs2_tune {
418 unsigned int gt_atime_quantum; /* Min secs between atime updates */ 411 unsigned int gt_atime_quantum; /* Min secs between atime updates */
419 unsigned int gt_new_files_jdata; 412 unsigned int gt_new_files_jdata;
420 unsigned int gt_new_files_directio; 413 unsigned int gt_new_files_directio;
421 unsigned int gt_max_atomic_write; /* Split big writes into this size */
422 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 414 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
423 unsigned int gt_lockdump_size; 415 unsigned int gt_lockdump_size;
424 unsigned int gt_stall_secs; /* Detects trouble! */ 416 unsigned int gt_stall_secs; /* Detects trouble! */
425 unsigned int gt_complain_secs; 417 unsigned int gt_complain_secs;
426 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ 418 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
427 unsigned int gt_entries_per_readdir; 419 unsigned int gt_entries_per_readdir;
428 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
429 unsigned int gt_greedy_default;
430 unsigned int gt_greedy_quantum;
431 unsigned int gt_greedy_max;
432 unsigned int gt_statfs_quantum; 420 unsigned int gt_statfs_quantum;
433 unsigned int gt_statfs_slow; 421 unsigned int gt_statfs_slow;
434}; 422};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d122074c45e1..0d6831a40565 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -287,10 +287,8 @@ out:
287 * 287 *
288 * Returns: errno 288 * Returns: errno
289 */ 289 */
290
291int gfs2_change_nlink(struct gfs2_inode *ip, int diff) 290int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
292{ 291{
293 struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
294 struct buffer_head *dibh; 292 struct buffer_head *dibh;
295 u32 nlink; 293 u32 nlink;
296 int error; 294 int error;
@@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
315 else 313 else
316 drop_nlink(&ip->i_inode); 314 drop_nlink(&ip->i_inode);
317 315
318 ip->i_inode.i_ctime.tv_sec = get_seconds(); 316 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
319 317
320 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 318 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
321 gfs2_dinode_out(ip, dibh->b_data); 319 gfs2_dinode_out(ip, dibh->b_data);
322 brelse(dibh); 320 brelse(dibh);
323 mark_inode_dirty(&ip->i_inode); 321 mark_inode_dirty(&ip->i_inode);
324 322
325 if (ip->i_inode.i_nlink == 0) { 323 if (ip->i_inode.i_nlink == 0)
326 struct gfs2_rgrpd *rgd;
327 struct gfs2_holder ri_gh, rg_gh;
328
329 error = gfs2_rindex_hold(sdp, &ri_gh);
330 if (error)
331 goto out;
332 error = -EIO;
333 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
334 if (!rgd)
335 goto out_norgrp;
336 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
337 if (error)
338 goto out_norgrp;
339
340 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ 324 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
341 gfs2_glock_dq_uninit(&rg_gh); 325
342out_norgrp:
343 gfs2_glock_dq_uninit(&ri_gh);
344 }
345out:
346 return error; 326 return error;
347} 327}
348 328
349struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 329struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
350{ 330{
351 struct qstr qstr; 331 struct qstr qstr;
332 struct inode *inode;
352 gfs2_str2qstr(&qstr, name); 333 gfs2_str2qstr(&qstr, name);
353 return gfs2_lookupi(dip, &qstr, 1, NULL); 334 inode = gfs2_lookupi(dip, &qstr, 1, NULL);
335 /* gfs2_lookupi has inconsistent callers: vfs
336 * related routines expect NULL for no entry found,
337 * gfs2_lookup_simple callers expect ENOENT
338 * and do not check for NULL.
339 */
340 if (inode == NULL)
341 return ERR_PTR(-ENOENT);
342 else
343 return inode;
354} 344}
355 345
356 346
@@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
361 * @is_root: If 1, ignore the caller's permissions 351 * @is_root: If 1, ignore the caller's permissions
362 * @i_gh: An uninitialized holder for the new inode glock 352 * @i_gh: An uninitialized holder for the new inode glock
363 * 353 *
364 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless 354 * This can be called via the VFS filldir function when NFS is doing
365 * @is_root is true. 355 * a readdirplus and the inode which its intending to stat isn't
356 * already in cache. In this case we must not take the directory glock
357 * again, since the readdir call will have already taken that lock.
366 * 358 *
367 * Returns: errno 359 * Returns: errno
368 */ 360 */
@@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
375 struct gfs2_holder d_gh; 367 struct gfs2_holder d_gh;
376 struct gfs2_inum_host inum; 368 struct gfs2_inum_host inum;
377 unsigned int type; 369 unsigned int type;
378 int error = 0; 370 int error;
379 struct inode *inode = NULL; 371 struct inode *inode = NULL;
372 int unlock = 0;
380 373
381 if (!name->len || name->len > GFS2_FNAMESIZE) 374 if (!name->len || name->len > GFS2_FNAMESIZE)
382 return ERR_PTR(-ENAMETOOLONG); 375 return ERR_PTR(-ENAMETOOLONG);
@@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
388 return dir; 381 return dir;
389 } 382 }
390 383
391 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 384 if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) {
392 if (error) 385 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
393 return ERR_PTR(error); 386 if (error)
387 return ERR_PTR(error);
388 unlock = 1;
389 }
394 390
395 if (!is_root) { 391 if (!is_root) {
396 error = permission(dir, MAY_EXEC, NULL); 392 error = permission(dir, MAY_EXEC, NULL);
@@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
405 inode = gfs2_inode_lookup(sb, &inum, type); 401 inode = gfs2_inode_lookup(sb, &inum, type);
406 402
407out: 403out:
408 gfs2_glock_dq_uninit(&d_gh); 404 if (unlock)
405 gfs2_glock_dq_uninit(&d_gh);
409 if (error == -ENOENT) 406 if (error == -ENOENT)
410 return NULL; 407 return NULL;
411 return inode; 408 return inode ? inode : ERR_PTR(error);
412} 409}
413 410
414static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 411static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
index effe4a337c1d..e30673dd37e0 100644
--- a/fs/gfs2/lm.c
+++ b/fs/gfs2/lm.c
@@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
104 vprintk(fmt, args); 104 vprintk(fmt, args);
105 va_end(args); 105 va_end(args);
106 106
107 fs_err(sdp, "about to withdraw from the cluster\n"); 107 fs_err(sdp, "about to withdraw this file system\n");
108 BUG_ON(sdp->sd_args.ar_debug); 108 BUG_ON(sdp->sd_args.ar_debug);
109 109
110
111 fs_err(sdp, "waiting for outstanding I/O\n");
112
113 /* FIXME: suspend dm device so oustanding bio's complete
114 and all further io requests fail */
115
116 fs_err(sdp, "telling LM to withdraw\n"); 110 fs_err(sdp, "telling LM to withdraw\n");
117 gfs2_withdraw_lockproto(&sdp->sd_lockstruct); 111 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
118 fs_err(sdp, "withdrawn\n"); 112 fs_err(sdp, "withdrawn\n");
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index 33af707a4d3f..a87c7bf3c568 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -36,7 +36,7 @@
36 36
37#define GDLM_STRNAME_BYTES 24 37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32 38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 50000 39#define GDLM_DROP_COUNT 200000
40#define GDLM_DROP_PERIOD 60 40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128 41#define GDLM_NAME_LEN 128
42 42
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
index 2194b1d5b5ec..a0e7eda643ed 100644
--- a/fs/gfs2/locking/dlm/main.c
+++ b/fs/gfs2/locking/dlm/main.c
@@ -11,9 +11,6 @@
11 11
12#include "lock_dlm.h" 12#include "lock_dlm.h"
13 13
14extern int gdlm_drop_count;
15extern int gdlm_drop_period;
16
17extern struct lm_lockops gdlm_ops; 14extern struct lm_lockops gdlm_ops;
18 15
19static int __init init_lock_dlm(void) 16static int __init init_lock_dlm(void)
@@ -40,9 +37,6 @@ static int __init init_lock_dlm(void)
40 return error; 37 return error;
41 } 38 }
42 39
43 gdlm_drop_count = GDLM_DROP_COUNT;
44 gdlm_drop_period = GDLM_DROP_PERIOD;
45
46 printk(KERN_INFO 40 printk(KERN_INFO
47 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); 41 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
48 return 0; 42 return 0;
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index cdd1694e889b..1d8faa3da8af 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -9,8 +9,6 @@
9 9
10#include "lock_dlm.h" 10#include "lock_dlm.h"
11 11
12int gdlm_drop_count;
13int gdlm_drop_period;
14const struct lm_lockops gdlm_ops; 12const struct lm_lockops gdlm_ops;
15 13
16 14
@@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
24 if (!ls) 22 if (!ls)
25 return NULL; 23 return NULL;
26 24
27 ls->drop_locks_count = gdlm_drop_count; 25 ls->drop_locks_count = GDLM_DROP_COUNT;
28 ls->drop_locks_period = gdlm_drop_period; 26 ls->drop_locks_period = GDLM_DROP_PERIOD;
29 ls->fscb = cb; 27 ls->fscb = cb;
30 ls->sdp = sdp; 28 ls->sdp = sdp;
31 ls->fsflags = flags; 29 ls->fsflags = flags;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 29ae06f94944..4746b884662d 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
116 return sprintf(buf, "%d\n", ls->recover_jid_status); 116 return sprintf(buf, "%d\n", ls->recover_jid_status);
117} 117}
118 118
119static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
120{
121 return sprintf(buf, "%d\n", ls->drop_locks_count);
122}
123
124static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
125{
126 ls->drop_locks_count = simple_strtol(buf, NULL, 0);
127 return len;
128}
129
119struct gdlm_attr { 130struct gdlm_attr {
120 struct attribute attr; 131 struct attribute attr;
121 ssize_t (*show)(struct gdlm_ls *, char *); 132 ssize_t (*show)(struct gdlm_ls *, char *);
@@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL);
135GDLM_ATTR(recover, 0644, recover_show, recover_store); 146GDLM_ATTR(recover, 0644, recover_show, recover_store);
136GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 147GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
137GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); 148GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
149GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store);
138 150
139static struct attribute *gdlm_attrs[] = { 151static struct attribute *gdlm_attrs[] = {
140 &gdlm_attr_proto_name.attr, 152 &gdlm_attr_proto_name.attr,
@@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = {
147 &gdlm_attr_recover.attr, 159 &gdlm_attr_recover.attr,
148 &gdlm_attr_recover_done.attr, 160 &gdlm_attr_recover_done.attr,
149 &gdlm_attr_recover_status.attr, 161 &gdlm_attr_recover_status.attr,
162 &gdlm_attr_drop_count.attr,
150 NULL, 163 NULL,
151}; 164};
152 165
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4d7f94d8c7bd..16bb4b4561ae 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
69 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 69 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
70 struct gfs2_trans *tr; 70 struct gfs2_trans *tr;
71 71
72 if (!list_empty(&bd->bd_list_tr)) 72 gfs2_log_lock(sdp);
73 if (!list_empty(&bd->bd_list_tr)) {
74 gfs2_log_unlock(sdp);
73 return; 75 return;
74 76 }
75 tr = current->journal_info; 77 tr = current->journal_info;
76 tr->tr_touched = 1; 78 tr->tr_touched = 1;
77 tr->tr_num_buf++; 79 tr->tr_num_buf++;
78 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 80 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
81 gfs2_log_unlock(sdp);
79 82
80 if (!list_empty(&le->le_list)) 83 if (!list_empty(&le->le_list))
81 return; 84 return;
@@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
84 87
85 gfs2_meta_check(sdp, bd->bd_bh); 88 gfs2_meta_check(sdp, bd->bd_bh);
86 gfs2_pin(sdp, bd->bd_bh); 89 gfs2_pin(sdp, bd->bd_bh);
87
88 gfs2_log_lock(sdp); 90 gfs2_log_lock(sdp);
89 sdp->sd_log_num_buf++; 91 sdp->sd_log_num_buf++;
90 list_add(&le->le_list, &sdp->sd_log_le_buf); 92 list_add(&le->le_list, &sdp->sd_log_le_buf);
@@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
98 struct list_head *head = &tr->tr_list_buf; 100 struct list_head *head = &tr->tr_list_buf;
99 struct gfs2_bufdata *bd; 101 struct gfs2_bufdata *bd;
100 102
103 gfs2_log_lock(sdp);
101 while (!list_empty(head)) { 104 while (!list_empty(head)) {
102 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); 105 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
103 list_del_init(&bd->bd_list_tr); 106 list_del_init(&bd->bd_list_tr);
104 tr->tr_num_buf--; 107 tr->tr_num_buf--;
105 } 108 }
109 gfs2_log_unlock(sdp);
106 gfs2_assert_warn(sdp, !tr->tr_num_buf); 110 gfs2_assert_warn(sdp, !tr->tr_num_buf);
107} 111}
108 112
@@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
462 struct address_space *mapping = bd->bd_bh->b_page->mapping; 466 struct address_space *mapping = bd->bd_bh->b_page->mapping;
463 struct gfs2_inode *ip = GFS2_I(mapping->host); 467 struct gfs2_inode *ip = GFS2_I(mapping->host);
464 468
469 gfs2_log_lock(sdp);
465 tr->tr_touched = 1; 470 tr->tr_touched = 1;
466 if (list_empty(&bd->bd_list_tr) && 471 if (list_empty(&bd->bd_list_tr) &&
467 (ip->i_di.di_flags & GFS2_DIF_JDATA)) { 472 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
468 tr->tr_num_buf++; 473 tr->tr_num_buf++;
469 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 474 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
475 gfs2_log_unlock(sdp);
470 gfs2_pin(sdp, bd->bd_bh); 476 gfs2_pin(sdp, bd->bd_bh);
471 tr->tr_num_buf_new++; 477 tr->tr_num_buf_new++;
478 } else {
479 gfs2_log_unlock(sdp);
472 } 480 }
473 gfs2_trans_add_gl(bd->bd_gl); 481 gfs2_trans_add_gl(bd->bd_gl);
474 gfs2_log_lock(sdp); 482 gfs2_log_lock(sdp);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d8d69a72a10d..56e33590b656 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -16,6 +16,7 @@
16#include <linux/pagevec.h> 16#include <linux/pagevec.h>
17#include <linux/mpage.h> 17#include <linux/mpage.h>
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/writeback.h>
19#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h> 21#include <linux/lm_interface.h>
21 22
@@ -157,6 +158,32 @@ out_ignore:
157} 158}
158 159
159/** 160/**
161 * gfs2_writepages - Write a bunch of dirty pages back to disk
162 * @mapping: The mapping to write
163 * @wbc: Write-back control
164 *
165 * For journaled files and/or ordered writes this just falls back to the
166 * kernel's default writepages path for now. We will probably want to change
167 * that eventually (i.e. when we look at allocate on flush).
168 *
169 * For the data=writeback case though we can already ignore buffer heads
170 * and write whole extents at once. This is a big reduction in the
171 * number of I/O requests we send and the bmap calls we make in this case.
172 */
173static int gfs2_writepages(struct address_space *mapping,
174 struct writeback_control *wbc)
175{
176 struct inode *inode = mapping->host;
177 struct gfs2_inode *ip = GFS2_I(inode);
178 struct gfs2_sbd *sdp = GFS2_SB(inode);
179
180 if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
181 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
182
183 return generic_writepages(mapping, wbc);
184}
185
186/**
160 * stuffed_readpage - Fill in a Linux page with stuffed file data 187 * stuffed_readpage - Fill in a Linux page with stuffed file data
161 * @ip: the inode 188 * @ip: the inode
162 * @page: the page 189 * @page: the page
@@ -256,7 +283,7 @@ out_unlock:
256 * the page lock and the glock) and return having done no I/O. Its 283 * the page lock and the glock) and return having done no I/O. Its
257 * obviously not something we'd want to do on too regular a basis. 284 * obviously not something we'd want to do on too regular a basis.
258 * Any I/O we ignore at this time will be done via readpage later. 285 * Any I/O we ignore at this time will be done via readpage later.
259 * 2. We have to handle stuffed files here too. 286 * 2. We don't handle stuffed files here we let readpage do the honours.
260 * 3. mpage_readpages() does most of the heavy lifting in the common case. 287 * 3. mpage_readpages() does most of the heavy lifting in the common case.
261 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. 288 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
262 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as 289 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
269 struct gfs2_inode *ip = GFS2_I(inode); 296 struct gfs2_inode *ip = GFS2_I(inode);
270 struct gfs2_sbd *sdp = GFS2_SB(inode); 297 struct gfs2_sbd *sdp = GFS2_SB(inode);
271 struct gfs2_holder gh; 298 struct gfs2_holder gh;
272 unsigned page_idx; 299 int ret = 0;
273 int ret;
274 int do_unlock = 0; 300 int do_unlock = 0;
275 301
276 if (likely(file != &gfs2_internal_file_sentinel)) { 302 if (likely(file != &gfs2_internal_file_sentinel)) {
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
289 goto out_unlock; 315 goto out_unlock;
290 } 316 }
291skip_lock: 317skip_lock:
292 if (gfs2_is_stuffed(ip)) { 318 if (!gfs2_is_stuffed(ip))
293 struct pagevec lru_pvec;
294 pagevec_init(&lru_pvec, 0);
295 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
296 struct page *page = list_entry(pages->prev, struct page, lru);
297 prefetchw(&page->flags);
298 list_del(&page->lru);
299 if (!add_to_page_cache(page, mapping,
300 page->index, GFP_KERNEL)) {
301 ret = stuffed_readpage(ip, page);
302 unlock_page(page);
303 if (!pagevec_add(&lru_pvec, page))
304 __pagevec_lru_add(&lru_pvec);
305 } else {
306 page_cache_release(page);
307 }
308 }
309 pagevec_lru_add(&lru_pvec);
310 ret = 0;
311 } else {
312 /* What we really want to do .... */
313 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); 319 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
314 }
315 320
316 if (do_unlock) { 321 if (do_unlock) {
317 gfs2_glock_dq_m(1, &gh); 322 gfs2_glock_dq_m(1, &gh);
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
356 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); 361 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
357 error = gfs2_glock_nq_atime(&ip->i_gh); 362 error = gfs2_glock_nq_atime(&ip->i_gh);
358 if (unlikely(error)) { 363 if (unlikely(error)) {
359 if (error == GLR_TRYFAILED) 364 if (error == GLR_TRYFAILED) {
365 unlock_page(page);
360 error = AOP_TRUNCATED_PAGE; 366 error = AOP_TRUNCATED_PAGE;
367 }
361 goto out_uninit; 368 goto out_uninit;
362 } 369 }
363 370
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
594 return; 601 return;
595} 602}
596 603
604/**
605 * gfs2_ok_for_dio - check that dio is valid on this file
606 * @ip: The inode
607 * @rw: READ or WRITE
608 * @offset: The offset at which we are reading or writing
609 *
610 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
611 * 1 (to accept the i/o request)
612 */
613static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
614{
615 /*
616 * Should we return an error here? I can't see that O_DIRECT for
617 * a journaled file makes any sense. For now we'll silently fall
618 * back to buffered I/O, likewise we do the same for stuffed
619 * files since they are (a) small and (b) unaligned.
620 */
621 if (gfs2_is_jdata(ip))
622 return 0;
623
624 if (gfs2_is_stuffed(ip))
625 return 0;
626
627 if (offset > i_size_read(&ip->i_inode))
628 return 0;
629 return 1;
630}
631
632
633
597static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, 634static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
598 const struct iovec *iov, loff_t offset, 635 const struct iovec *iov, loff_t offset,
599 unsigned long nr_segs) 636 unsigned long nr_segs)
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
604 struct gfs2_holder gh; 641 struct gfs2_holder gh;
605 int rv; 642 int rv;
606 643
607 if (rw == READ)
608 mutex_lock(&inode->i_mutex);
609 /* 644 /*
610 * Shared lock, even if its a write, since we do no allocation 645 * Deferred lock, even if its a write, since we do no allocation
611 * on this path. All we need change is atime. 646 * on this path. All we need change is atime, and this lock mode
647 * ensures that other nodes have flushed their buffered read caches
648 * (i.e. their page cache entries for this inode). We do not,
649 * unfortunately have the option of only flushing a range like
650 * the VFS does.
612 */ 651 */
613 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 652 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
614 rv = gfs2_glock_nq_atime(&gh); 653 rv = gfs2_glock_nq_atime(&gh);
615 if (rv) 654 if (rv)
616 goto out; 655 return rv;
617 656 rv = gfs2_ok_for_dio(ip, rw, offset);
618 if (offset > i_size_read(inode)) 657 if (rv != 1)
619 goto out; 658 goto out; /* dio not valid, fall back to buffered i/o */
620 659
621 /* 660 rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
622 * Should we return an error here? I can't see that O_DIRECT for 661 iov, offset, nr_segs,
623 * a journaled file makes any sense. For now we'll silently fall 662 gfs2_get_block_direct, NULL);
624 * back to buffered I/O, likewise we do the same for stuffed
625 * files since they are (a) small and (b) unaligned.
626 */
627 if (gfs2_is_jdata(ip))
628 goto out;
629
630 if (gfs2_is_stuffed(ip))
631 goto out;
632
633 rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
634 inode->i_sb->s_bdev,
635 iov, offset, nr_segs,
636 gfs2_get_block_direct, NULL);
637out: 663out:
638 gfs2_glock_dq_m(1, &gh); 664 gfs2_glock_dq_m(1, &gh);
639 gfs2_holder_uninit(&gh); 665 gfs2_holder_uninit(&gh);
640 if (rw == READ)
641 mutex_unlock(&inode->i_mutex);
642
643 return rv; 666 return rv;
644} 667}
645 668
@@ -763,6 +786,7 @@ out:
763 786
764const struct address_space_operations gfs2_file_aops = { 787const struct address_space_operations gfs2_file_aops = {
765 .writepage = gfs2_writepage, 788 .writepage = gfs2_writepage,
789 .writepages = gfs2_writepages,
766 .readpage = gfs2_readpage, 790 .readpage = gfs2_readpage,
767 .readpages = gfs2_readpages, 791 .readpages = gfs2_readpages,
768 .sync_page = block_sync_page, 792 .sync_page = block_sync_page,
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index d355899585d8..9187eb174b43 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
46 struct gfs2_inum_host inum; 46 struct gfs2_inum_host inum;
47 unsigned int type; 47 unsigned int type;
48 int error; 48 int error;
49 int had_lock=0;
49 50
50 if (inode && is_bad_inode(inode)) 51 if (inode && is_bad_inode(inode))
51 goto invalid; 52 goto invalid;
@@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
53 if (sdp->sd_args.ar_localcaching) 54 if (sdp->sd_args.ar_localcaching)
54 goto valid; 55 goto valid;
55 56
56 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 57 had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
57 if (error) 58 if (!had_lock) {
58 goto fail; 59 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
60 if (error)
61 goto fail;
62 }
59 63
60 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); 64 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
61 switch (error) { 65 switch (error) {
@@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
82 } 86 }
83 87
84valid_gunlock: 88valid_gunlock:
85 gfs2_glock_dq_uninit(&d_gh); 89 if (!had_lock)
90 gfs2_glock_dq_uninit(&d_gh);
86valid: 91valid:
87 dput(parent); 92 dput(parent);
88 return 1; 93 return 1;
89 94
90invalid_gunlock: 95invalid_gunlock:
91 gfs2_glock_dq_uninit(&d_gh); 96 if (!had_lock)
97 gfs2_glock_dq_uninit(&d_gh);
92invalid: 98invalid:
93 if (inode && S_ISDIR(inode->i_mode)) { 99 if (inode && S_ISDIR(inode->i_mode)) {
94 if (have_submounts(dentry)) 100 if (have_submounts(dentry))
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index b4e7b8775315..4855e8cca622 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,6 +22,7 @@
22#include "glock.h" 22#include "glock.h"
23#include "glops.h" 23#include "glops.h"
24#include "inode.h" 24#include "inode.h"
25#include "ops_dentry.h"
25#include "ops_export.h" 26#include "ops_export.h"
26#include "rgrp.h" 27#include "rgrp.h"
27#include "util.h" 28#include "util.h"
@@ -112,13 +113,12 @@ struct get_name_filldir {
112 char *name; 113 char *name;
113}; 114};
114 115
115static int get_name_filldir(void *opaque, const char *name, unsigned int length, 116static int get_name_filldir(void *opaque, const char *name, int length,
116 u64 offset, struct gfs2_inum_host *inum, 117 loff_t offset, u64 inum, unsigned int type)
117 unsigned int type)
118{ 118{
119 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; 119 struct get_name_filldir *gnfd = opaque;
120 120
121 if (!gfs2_inum_equal(inum, &gnfd->inum)) 121 if (inum != gnfd->inum.no_addr)
122 return 0; 122 return 0;
123 123
124 memcpy(gnfd->name, name, length); 124 memcpy(gnfd->name, name, length);
@@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
189 return ERR_PTR(-ENOMEM); 189 return ERR_PTR(-ENOMEM);
190 } 190 }
191 191
192 dentry->d_op = &gfs2_dops;
192 return dentry; 193 return dentry;
193} 194}
194 195
@@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
215 } 216 }
216 217
217 error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, 218 error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
218 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, 219 LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
219 &i_gh);
220 if (error) 220 if (error)
221 return ERR_PTR(error); 221 return ERR_PTR(error);
222 222
@@ -269,6 +269,7 @@ out_inode:
269 return ERR_PTR(-ENOMEM); 269 return ERR_PTR(-ENOMEM);
270 } 270 }
271 271
272 dentry->d_op = &gfs2_dops;
272 return dentry; 273 return dentry;
273 274
274fail_rgd: 275fail_rgd:
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index faa07e4b97d0..c996aa739a05 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -43,15 +43,6 @@
43#include "util.h" 43#include "util.h"
44#include "eaops.h" 44#include "eaops.h"
45 45
46/* For regular, non-NFS */
47struct filldir_reg {
48 struct gfs2_sbd *fdr_sbd;
49 int fdr_prefetch;
50
51 filldir_t fdr_filldir;
52 void *fdr_opaque;
53};
54
55/* 46/*
56 * Most fields left uninitialised to catch anybody who tries to 47 * Most fields left uninitialised to catch anybody who tries to
57 * use them. f_flags set to prevent file_accessed() from touching 48 * use them. f_flags set to prevent file_accessed() from touching
@@ -128,41 +119,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
128} 119}
129 120
130/** 121/**
131 * filldir_func - Report a directory entry to the caller of gfs2_dir_read()
132 * @opaque: opaque data used by the function
133 * @name: the name of the directory entry
134 * @length: the length of the name
135 * @offset: the entry's offset in the directory
136 * @inum: the inode number the entry points to
137 * @type: the type of inode the entry points to
138 *
139 * Returns: 0 on success, 1 if buffer full
140 */
141
142static int filldir_func(void *opaque, const char *name, unsigned int length,
143 u64 offset, struct gfs2_inum_host *inum,
144 unsigned int type)
145{
146 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
147 struct gfs2_sbd *sdp = fdr->fdr_sbd;
148 int error;
149
150 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
151 inum->no_addr, type);
152 if (error)
153 return 1;
154
155 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
156 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops,
157 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
158 gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops,
159 LM_ST_SHARED, LM_FLAG_TRY);
160 }
161
162 return 0;
163}
164
165/**
166 * gfs2_readdir - Read directory entries from a directory 122 * gfs2_readdir - Read directory entries from a directory
167 * @file: The directory to read from 123 * @file: The directory to read from
168 * @dirent: Buffer for dirents 124 * @dirent: Buffer for dirents
@@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
175{ 131{
176 struct inode *dir = file->f_mapping->host; 132 struct inode *dir = file->f_mapping->host;
177 struct gfs2_inode *dip = GFS2_I(dir); 133 struct gfs2_inode *dip = GFS2_I(dir);
178 struct filldir_reg fdr;
179 struct gfs2_holder d_gh; 134 struct gfs2_holder d_gh;
180 u64 offset = file->f_pos; 135 u64 offset = file->f_pos;
181 int error; 136 int error;
182 137
183 fdr.fdr_sbd = GFS2_SB(dir);
184 fdr.fdr_prefetch = 1;
185 fdr.fdr_filldir = filldir;
186 fdr.fdr_opaque = dirent;
187
188 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); 138 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
189 error = gfs2_glock_nq_atime(&d_gh); 139 error = gfs2_glock_nq_atime(&d_gh);
190 if (error) { 140 if (error) {
@@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
192 return error; 142 return error;
193 } 143 }
194 144
195 error = gfs2_dir_read(dir, &offset, &fdr, filldir_func); 145 error = gfs2_dir_read(dir, &offset, dirent, filldir);
196 146
197 gfs2_glock_dq_uninit(&d_gh); 147 gfs2_glock_dq_uninit(&d_gh);
198 148
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 636dda4c7d38..f40a84807d75 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
264 struct gfs2_inode *dip = GFS2_I(dir); 264 struct gfs2_inode *dip = GFS2_I(dir);
265 struct gfs2_sbd *sdp = GFS2_SB(dir); 265 struct gfs2_sbd *sdp = GFS2_SB(dir);
266 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 266 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
267 struct gfs2_holder ghs[2]; 267 struct gfs2_holder ghs[3];
268 struct gfs2_rgrpd *rgd;
269 struct gfs2_holder ri_gh;
268 int error; 270 int error;
269 271
272 error = gfs2_rindex_hold(sdp, &ri_gh);
273 if (error)
274 return error;
275
270 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 276 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
271 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 277 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
272 278
273 error = gfs2_glock_nq_m(2, ghs); 279 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
280 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
281
282
283 error = gfs2_glock_nq_m(3, ghs);
274 if (error) 284 if (error)
275 goto out; 285 goto out;
276 286
@@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
291out_end_trans: 301out_end_trans:
292 gfs2_trans_end(sdp); 302 gfs2_trans_end(sdp);
293out_gunlock: 303out_gunlock:
294 gfs2_glock_dq_m(2, ghs); 304 gfs2_glock_dq_m(3, ghs);
295out: 305out:
296 gfs2_holder_uninit(ghs); 306 gfs2_holder_uninit(ghs);
297 gfs2_holder_uninit(ghs + 1); 307 gfs2_holder_uninit(ghs + 1);
308 gfs2_holder_uninit(ghs + 2);
309 gfs2_glock_dq_uninit(&ri_gh);
298 return error; 310 return error;
299} 311}
300 312
@@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
449 struct gfs2_inode *dip = GFS2_I(dir); 461 struct gfs2_inode *dip = GFS2_I(dir);
450 struct gfs2_sbd *sdp = GFS2_SB(dir); 462 struct gfs2_sbd *sdp = GFS2_SB(dir);
451 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 463 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
452 struct gfs2_holder ghs[2]; 464 struct gfs2_holder ghs[3];
465 struct gfs2_rgrpd *rgd;
466 struct gfs2_holder ri_gh;
453 int error; 467 int error;
454 468
469
470 error = gfs2_rindex_hold(sdp, &ri_gh);
471 if (error)
472 return error;
455 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 473 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
456 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 474 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
457 475
458 error = gfs2_glock_nq_m(2, ghs); 476 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
477 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
478
479 error = gfs2_glock_nq_m(3, ghs);
459 if (error) 480 if (error)
460 goto out; 481 goto out;
461 482
@@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
483 gfs2_trans_end(sdp); 504 gfs2_trans_end(sdp);
484 505
485out_gunlock: 506out_gunlock:
486 gfs2_glock_dq_m(2, ghs); 507 gfs2_glock_dq_m(3, ghs);
487out: 508out:
488 gfs2_holder_uninit(ghs); 509 gfs2_holder_uninit(ghs);
489 gfs2_holder_uninit(ghs + 1); 510 gfs2_holder_uninit(ghs + 1);
511 gfs2_holder_uninit(ghs + 2);
512 gfs2_glock_dq_uninit(&ri_gh);
490 return error; 513 return error;
491} 514}
492 515
@@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
547 struct gfs2_inode *ip = GFS2_I(odentry->d_inode); 570 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
548 struct gfs2_inode *nip = NULL; 571 struct gfs2_inode *nip = NULL;
549 struct gfs2_sbd *sdp = GFS2_SB(odir); 572 struct gfs2_sbd *sdp = GFS2_SB(odir);
550 struct gfs2_holder ghs[4], r_gh; 573 struct gfs2_holder ghs[5], r_gh;
574 struct gfs2_rgrpd *nrgd;
551 unsigned int num_gh; 575 unsigned int num_gh;
552 int dir_rename = 0; 576 int dir_rename = 0;
553 int alloc_required; 577 int alloc_required;
@@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
587 if (nip) { 611 if (nip) {
588 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); 612 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
589 num_gh++; 613 num_gh++;
614 /* grab the resource lock for unlink flag twiddling
615 * this is the case of the target file already existing
616 * so we unlink before doing the rename
617 */
618 nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
619 if (nrgd)
620 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
590 } 621 }
591 622
592 error = gfs2_glock_nq_m(num_gh, ghs); 623 error = gfs2_glock_nq_m(num_gh, ghs);
@@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
684 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 715 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
685 al->al_rgd->rd_ri.ri_length + 716 al->al_rgd->rd_ri.ri_length +
686 4 * RES_DINODE + 4 * RES_LEAF + 717 4 * RES_DINODE + 4 * RES_LEAF +
687 RES_STATFS + RES_QUOTA, 0); 718 RES_STATFS + RES_QUOTA + 4, 0);
688 if (error) 719 if (error)
689 goto out_ipreserv; 720 goto out_ipreserv;
690 } else { 721 } else {
691 error = gfs2_trans_begin(sdp, 4 * RES_DINODE + 722 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
692 5 * RES_LEAF, 0); 723 5 * RES_LEAF + 4, 0);
693 if (error) 724 if (error)
694 goto out_gunlock; 725 goto out_gunlock;
695 } 726 }
@@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
728 error = gfs2_meta_inode_buffer(ip, &dibh); 759 error = gfs2_meta_inode_buffer(ip, &dibh);
729 if (error) 760 if (error)
730 goto out_end_trans; 761 goto out_end_trans;
731 ip->i_inode.i_ctime.tv_sec = get_seconds(); 762 ip->i_inode.i_ctime = CURRENT_TIME_SEC;
732 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 763 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
733 gfs2_dinode_out(ip, dibh->b_data); 764 gfs2_dinode_out(ip, dibh->b_data);
734 brelse(dibh); 765 brelse(dibh);
@@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1018 } 1049 }
1019 1050
1020 generic_fillattr(inode, stat); 1051 generic_fillattr(inode, stat);
1021 if (unlock); 1052 if (unlock)
1022 gfs2_glock_dq_uninit(&gh); 1053 gfs2_glock_dq_uninit(&gh);
1023 1054
1024 return 0; 1055 return 0;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 7685b46f934b..47369d011214 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb)
173 struct gfs2_sbd *sdp = sb->s_fs_info; 173 struct gfs2_sbd *sdp = sb->s_fs_info;
174 int error; 174 int error;
175 175
176 if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
177 return;
178
176 for (;;) { 179 for (;;) {
177 error = gfs2_freeze_fs(sdp); 180 error = gfs2_freeze_fs(sdp);
178 if (!error) 181 if (!error)
@@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode)
426 } 429 }
427 430
428 error = gfs2_dinode_dealloc(ip); 431 error = gfs2_dinode_dealloc(ip);
432 /*
433 * Must do this before unlock to avoid trying to write back
434 * potentially dirty data now that inode no longer exists
435 * on disk.
436 */
437 truncate_inode_pages(&inode->i_data, 0);
429 438
430out_unlock: 439out_unlock:
431 gfs2_glock_dq(&ip->i_iopen_gh); 440 gfs2_glock_dq(&ip->i_iopen_gh);
@@ -443,14 +452,12 @@ out:
443 452
444static struct inode *gfs2_alloc_inode(struct super_block *sb) 453static struct inode *gfs2_alloc_inode(struct super_block *sb)
445{ 454{
446 struct gfs2_sbd *sdp = sb->s_fs_info;
447 struct gfs2_inode *ip; 455 struct gfs2_inode *ip;
448 456
449 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); 457 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
450 if (ip) { 458 if (ip) {
451 ip->i_flags = 0; 459 ip->i_flags = 0;
452 ip->i_gl = NULL; 460 ip->i_gl = NULL;
453 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
454 ip->i_last_pfault = jiffies; 461 ip->i_last_pfault = jiffies;
455 } 462 }
456 return &ip->i_inode; 463 return &ip->i_inode;
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index 45a5f11fc39a..14b380fb0602 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -28,34 +28,13 @@
28#include "trans.h" 28#include "trans.h"
29#include "util.h" 29#include "util.h"
30 30
31static void pfault_be_greedy(struct gfs2_inode *ip)
32{
33 unsigned int time;
34
35 spin_lock(&ip->i_spin);
36 time = ip->i_greedy;
37 ip->i_last_pfault = jiffies;
38 spin_unlock(&ip->i_spin);
39
40 igrab(&ip->i_inode);
41 if (gfs2_glock_be_greedy(ip->i_gl, time))
42 iput(&ip->i_inode);
43}
44
45static struct page *gfs2_private_nopage(struct vm_area_struct *area, 31static struct page *gfs2_private_nopage(struct vm_area_struct *area,
46 unsigned long address, int *type) 32 unsigned long address, int *type)
47{ 33{
48 struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); 34 struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
49 struct page *result;
50 35
51 set_bit(GIF_PAGED, &ip->i_flags); 36 set_bit(GIF_PAGED, &ip->i_flags);
52 37 return filemap_nopage(area, address, type);
53 result = filemap_nopage(area, address, type);
54
55 if (result && result != NOPAGE_OOM)
56 pfault_be_greedy(ip);
57
58 return result;
59} 38}
60 39
61static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) 40static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
@@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
167 set_page_dirty(result); 146 set_page_dirty(result);
168 } 147 }
169 148
170 pfault_be_greedy(ip);
171out: 149out:
172 gfs2_glock_dq_uninit(&i_gh); 150 gfs2_glock_dq_uninit(&i_gh);
173 151
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 43a24f2e5905..70f424fcf1cd 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt)
71 gt->gt_atime_quantum = 3600; 71 gt->gt_atime_quantum = 3600;
72 gt->gt_new_files_jdata = 0; 72 gt->gt_new_files_jdata = 0;
73 gt->gt_new_files_directio = 0; 73 gt->gt_new_files_directio = 0;
74 gt->gt_max_atomic_write = 4 << 20;
75 gt->gt_max_readahead = 1 << 18; 74 gt->gt_max_readahead = 1 << 18;
76 gt->gt_lockdump_size = 131072; 75 gt->gt_lockdump_size = 131072;
77 gt->gt_stall_secs = 600; 76 gt->gt_stall_secs = 600;
78 gt->gt_complain_secs = 10; 77 gt->gt_complain_secs = 10;
79 gt->gt_reclaim_limit = 5000; 78 gt->gt_reclaim_limit = 5000;
80 gt->gt_entries_per_readdir = 32; 79 gt->gt_entries_per_readdir = 32;
81 gt->gt_prefetch_secs = 10;
82 gt->gt_greedy_default = HZ / 10;
83 gt->gt_greedy_quantum = HZ / 40;
84 gt->gt_greedy_max = HZ / 4;
85 gt->gt_statfs_quantum = 30; 80 gt->gt_statfs_quantum = 30;
86 gt->gt_statfs_slow = 0; 81 gt->gt_statfs_slow = 0;
87} 82}
@@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
359 mutex_lock(&sdp->sd_jindex_mutex); 354 mutex_lock(&sdp->sd_jindex_mutex);
360 355
361 for (;;) { 356 for (;;) {
362 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 357 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
363 GL_LOCAL_EXCL, ji_gh);
364 if (error) 358 if (error)
365 break; 359 break;
366 360
@@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
529 struct gfs2_log_header_host head; 523 struct gfs2_log_header_host head;
530 int error; 524 int error;
531 525
532 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 526 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
533 GL_LOCAL_EXCL, &t_gh);
534 if (error) 527 if (error)
535 return error; 528 return error;
536 529
@@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
583 gfs2_quota_sync(sdp); 576 gfs2_quota_sync(sdp);
584 gfs2_statfs_sync(sdp); 577 gfs2_statfs_sync(sdp);
585 578
586 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 579 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
587 GL_LOCAL_EXCL | GL_NOCACHE, 580 &t_gh);
588 &t_gh);
589 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 581 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
590 return error; 582 return error;
591 583
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 983eaf1e06be..d01f9f0fda26 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0);
436TUNE_ATTR(max_readahead, 0); 436TUNE_ATTR(max_readahead, 0);
437TUNE_ATTR(complain_secs, 0); 437TUNE_ATTR(complain_secs, 0);
438TUNE_ATTR(reclaim_limit, 0); 438TUNE_ATTR(reclaim_limit, 0);
439TUNE_ATTR(prefetch_secs, 0);
440TUNE_ATTR(statfs_slow, 0); 439TUNE_ATTR(statfs_slow, 0);
441TUNE_ATTR(new_files_jdata, 0); 440TUNE_ATTR(new_files_jdata, 0);
442TUNE_ATTR(new_files_directio, 0); 441TUNE_ATTR(new_files_directio, 0);
443TUNE_ATTR(quota_simul_sync, 1); 442TUNE_ATTR(quota_simul_sync, 1);
444TUNE_ATTR(quota_cache_secs, 1); 443TUNE_ATTR(quota_cache_secs, 1);
445TUNE_ATTR(max_atomic_write, 1);
446TUNE_ATTR(stall_secs, 1); 444TUNE_ATTR(stall_secs, 1);
447TUNE_ATTR(greedy_default, 1);
448TUNE_ATTR(greedy_quantum, 1);
449TUNE_ATTR(greedy_max, 1);
450TUNE_ATTR(statfs_quantum, 1); 445TUNE_ATTR(statfs_quantum, 1);
451TUNE_ATTR_DAEMON(scand_secs, scand_process); 446TUNE_ATTR_DAEMON(scand_secs, scand_process);
452TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); 447TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = {
465 &tune_attr_max_readahead.attr, 460 &tune_attr_max_readahead.attr,
466 &tune_attr_complain_secs.attr, 461 &tune_attr_complain_secs.attr,
467 &tune_attr_reclaim_limit.attr, 462 &tune_attr_reclaim_limit.attr,
468 &tune_attr_prefetch_secs.attr,
469 &tune_attr_statfs_slow.attr, 463 &tune_attr_statfs_slow.attr,
470 &tune_attr_quota_simul_sync.attr, 464 &tune_attr_quota_simul_sync.attr,
471 &tune_attr_quota_cache_secs.attr, 465 &tune_attr_quota_cache_secs.attr,
472 &tune_attr_max_atomic_write.attr,
473 &tune_attr_stall_secs.attr, 466 &tune_attr_stall_secs.attr,
474 &tune_attr_greedy_default.attr,
475 &tune_attr_greedy_quantum.attr,
476 &tune_attr_greedy_max.attr,
477 &tune_attr_statfs_quantum.attr, 467 &tune_attr_statfs_quantum.attr,
478 &tune_attr_scand_secs.attr, 468 &tune_attr_scand_secs.attr,
479 &tune_attr_recoverd_secs.attr, 469 &tune_attr_recoverd_secs.attr,