diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-07 11:09:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-07 11:09:00 -0500 |
commit | d3f8fd765e94b9137e1f27bbb0ac25289f9e565c (patch) | |
tree | a9ee7f05b3ef9c03292b101e1e2e0ed0e1c3e85a | |
parent | 0670afdf0e69e5e73c8358da9c39bf3a8807b03e (diff) | |
parent | a2cf822274b3d58a16a65c8338e299e18b3dc3a4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (57 commits)
[GFS2] make gfs2_writepages() static
[GFS2] Unlock page on prepare_write try lock failure
[GFS2] nfsd readdirplus assertion failure
[DLM] fix softlockup in dlm_recv
[DLM] zero new user lvbs
[DLM/GFS2] indent help text
[GFS2] Fix unlink deadlocks
[GFS2] Put back semaphore to avoid umount problem
[GFS2] more CURRENT_TIME_SEC
[GFS2/DLM] fix GFS2 circular dependency
[GFS2/DLM] use sysfs
[GFS2] make lock_dlm drop_count tunable in sysfs
[GFS2] increase default lock limit
[GFS2] Fix list corruption in lops.c
[GFS2] Fix recursive locking attempt with NFS
[DLM] can miss clearing resend flag
[DLM] saved dlm message can be dropped
[DLM] Make sock_sem into a mutex
[GFS2] Fix typo in glock.c
[GFS2] use CURRENT_TIME_SEC instead of get_seconds in gfs2
...
39 files changed, 866 insertions, 1119 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index b5654a284fef..6fa7b0d5c043 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig | |||
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager" | |||
3 | 3 | ||
4 | config DLM | 4 | config DLM |
5 | tristate "Distributed Lock Manager (DLM)" | 5 | tristate "Distributed Lock Manager (DLM)" |
6 | depends on IPV6 || IPV6=n | 6 | depends on SYSFS && (IPV6 || IPV6=n) |
7 | select CONFIGFS_FS | 7 | select CONFIGFS_FS |
8 | select IP_SCTP if DLM_SCTP | 8 | select IP_SCTP if DLM_SCTP |
9 | help | 9 | help |
10 | A general purpose distributed lock manager for kernel or userspace | 10 | A general purpose distributed lock manager for kernel or userspace |
11 | applications. | 11 | applications. |
12 | 12 | ||
13 | choice | 13 | choice |
14 | prompt "Select DLM communications protocol" | 14 | prompt "Select DLM communications protocol" |
15 | depends on DLM | 15 | depends on DLM |
16 | default DLM_TCP | 16 | default DLM_TCP |
17 | help | 17 | help |
18 | The DLM Can use TCP or SCTP for it's network communications. | 18 | The DLM Can use TCP or SCTP for it's network communications. |
19 | SCTP supports multi-homed operations whereas TCP doesn't. | 19 | SCTP supports multi-homed operations whereas TCP doesn't. |
20 | However, SCTP seems to have stability problems at the moment. | 20 | However, SCTP seems to have stability problems at the moment. |
21 | 21 | ||
22 | config DLM_TCP | 22 | config DLM_TCP |
23 | bool "TCP/IP" | 23 | bool "TCP/IP" |
@@ -31,8 +31,8 @@ config DLM_DEBUG | |||
31 | bool "DLM debugging" | 31 | bool "DLM debugging" |
32 | depends on DLM | 32 | depends on DLM |
33 | help | 33 | help |
34 | Under the debugfs mount point, the name of each lockspace will | 34 | Under the debugfs mount point, the name of each lockspace will |
35 | appear as a file in the "dlm" directory. The output is the | 35 | appear as a file in the "dlm" directory. The output is the |
36 | list of resource and locks the local node knows about. | 36 | list of resource and locks the local node knows about. |
37 | 37 | ||
38 | endmenu | 38 | endmenu |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 88553054bbfa..8665c88e5af2 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *); | |||
54 | static void drop_node(struct config_group *, struct config_item *); | 54 | static void drop_node(struct config_group *, struct config_item *); |
55 | static void release_node(struct config_item *); | 55 | static void release_node(struct config_item *); |
56 | 56 | ||
57 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | ||
58 | char *buf); | ||
59 | static ssize_t store_cluster(struct config_item *i, | ||
60 | struct configfs_attribute *a, | ||
61 | const char *buf, size_t len); | ||
57 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 62 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
58 | char *buf); | 63 | char *buf); |
59 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | 64 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, |
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); | |||
73 | static ssize_t node_weight_read(struct node *nd, char *buf); | 78 | static ssize_t node_weight_read(struct node *nd, char *buf); |
74 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); | 79 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); |
75 | 80 | ||
81 | struct cluster { | ||
82 | struct config_group group; | ||
83 | unsigned int cl_tcp_port; | ||
84 | unsigned int cl_buffer_size; | ||
85 | unsigned int cl_rsbtbl_size; | ||
86 | unsigned int cl_lkbtbl_size; | ||
87 | unsigned int cl_dirtbl_size; | ||
88 | unsigned int cl_recover_timer; | ||
89 | unsigned int cl_toss_secs; | ||
90 | unsigned int cl_scan_secs; | ||
91 | unsigned int cl_log_debug; | ||
92 | }; | ||
93 | |||
94 | enum { | ||
95 | CLUSTER_ATTR_TCP_PORT = 0, | ||
96 | CLUSTER_ATTR_BUFFER_SIZE, | ||
97 | CLUSTER_ATTR_RSBTBL_SIZE, | ||
98 | CLUSTER_ATTR_LKBTBL_SIZE, | ||
99 | CLUSTER_ATTR_DIRTBL_SIZE, | ||
100 | CLUSTER_ATTR_RECOVER_TIMER, | ||
101 | CLUSTER_ATTR_TOSS_SECS, | ||
102 | CLUSTER_ATTR_SCAN_SECS, | ||
103 | CLUSTER_ATTR_LOG_DEBUG, | ||
104 | }; | ||
105 | |||
106 | struct cluster_attribute { | ||
107 | struct configfs_attribute attr; | ||
108 | ssize_t (*show)(struct cluster *, char *); | ||
109 | ssize_t (*store)(struct cluster *, const char *, size_t); | ||
110 | }; | ||
111 | |||
112 | static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, | ||
113 | unsigned int *info_field, int check_zero, | ||
114 | const char *buf, size_t len) | ||
115 | { | ||
116 | unsigned int x; | ||
117 | |||
118 | if (!capable(CAP_SYS_ADMIN)) | ||
119 | return -EACCES; | ||
120 | |||
121 | x = simple_strtoul(buf, NULL, 0); | ||
122 | |||
123 | if (check_zero && !x) | ||
124 | return -EINVAL; | ||
125 | |||
126 | *cl_field = x; | ||
127 | *info_field = x; | ||
128 | |||
129 | return len; | ||
130 | } | ||
131 | |||
132 | #define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \ | ||
133 | .attr = { .ca_name = __stringify(_name), \ | ||
134 | .ca_mode = _mode, \ | ||
135 | .ca_owner = THIS_MODULE }, \ | ||
136 | .show = _read, \ | ||
137 | .store = _write, \ | ||
138 | } | ||
139 | |||
140 | #define CLUSTER_ATTR(name, check_zero) \ | ||
141 | static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ | ||
142 | { \ | ||
143 | return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ | ||
144 | check_zero, buf, len); \ | ||
145 | } \ | ||
146 | static ssize_t name##_read(struct cluster *cl, char *buf) \ | ||
147 | { \ | ||
148 | return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ | ||
149 | } \ | ||
150 | static struct cluster_attribute cluster_attr_##name = \ | ||
151 | __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) | ||
152 | |||
153 | CLUSTER_ATTR(tcp_port, 1); | ||
154 | CLUSTER_ATTR(buffer_size, 1); | ||
155 | CLUSTER_ATTR(rsbtbl_size, 1); | ||
156 | CLUSTER_ATTR(lkbtbl_size, 1); | ||
157 | CLUSTER_ATTR(dirtbl_size, 1); | ||
158 | CLUSTER_ATTR(recover_timer, 1); | ||
159 | CLUSTER_ATTR(toss_secs, 1); | ||
160 | CLUSTER_ATTR(scan_secs, 1); | ||
161 | CLUSTER_ATTR(log_debug, 0); | ||
162 | |||
163 | static struct configfs_attribute *cluster_attrs[] = { | ||
164 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | ||
165 | [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, | ||
166 | [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, | ||
167 | [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr, | ||
168 | [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, | ||
169 | [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, | ||
170 | [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, | ||
171 | [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, | ||
172 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, | ||
173 | NULL, | ||
174 | }; | ||
175 | |||
76 | enum { | 176 | enum { |
77 | COMM_ATTR_NODEID = 0, | 177 | COMM_ATTR_NODEID = 0, |
78 | COMM_ATTR_LOCAL, | 178 | COMM_ATTR_LOCAL, |
@@ -152,10 +252,6 @@ struct clusters { | |||
152 | struct configfs_subsystem subsys; | 252 | struct configfs_subsystem subsys; |
153 | }; | 253 | }; |
154 | 254 | ||
155 | struct cluster { | ||
156 | struct config_group group; | ||
157 | }; | ||
158 | |||
159 | struct spaces { | 255 | struct spaces { |
160 | struct config_group ss_group; | 256 | struct config_group ss_group; |
161 | }; | 257 | }; |
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = { | |||
197 | 293 | ||
198 | static struct configfs_item_operations cluster_ops = { | 294 | static struct configfs_item_operations cluster_ops = { |
199 | .release = release_cluster, | 295 | .release = release_cluster, |
296 | .show_attribute = show_cluster, | ||
297 | .store_attribute = store_cluster, | ||
200 | }; | 298 | }; |
201 | 299 | ||
202 | static struct configfs_group_operations spaces_ops = { | 300 | static struct configfs_group_operations spaces_ops = { |
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = { | |||
237 | 335 | ||
238 | static struct config_item_type cluster_type = { | 336 | static struct config_item_type cluster_type = { |
239 | .ct_item_ops = &cluster_ops, | 337 | .ct_item_ops = &cluster_ops, |
338 | .ct_attrs = cluster_attrs, | ||
240 | .ct_owner = THIS_MODULE, | 339 | .ct_owner = THIS_MODULE, |
241 | }; | 340 | }; |
242 | 341 | ||
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g, | |||
317 | cl->group.default_groups[1] = &cms->cs_group; | 416 | cl->group.default_groups[1] = &cms->cs_group; |
318 | cl->group.default_groups[2] = NULL; | 417 | cl->group.default_groups[2] = NULL; |
319 | 418 | ||
419 | cl->cl_tcp_port = dlm_config.ci_tcp_port; | ||
420 | cl->cl_buffer_size = dlm_config.ci_buffer_size; | ||
421 | cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; | ||
422 | cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size; | ||
423 | cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; | ||
424 | cl->cl_recover_timer = dlm_config.ci_recover_timer; | ||
425 | cl->cl_toss_secs = dlm_config.ci_toss_secs; | ||
426 | cl->cl_scan_secs = dlm_config.ci_scan_secs; | ||
427 | cl->cl_log_debug = dlm_config.ci_log_debug; | ||
428 | |||
320 | space_list = &sps->ss_group; | 429 | space_list = &sps->ss_group; |
321 | comm_list = &cms->cs_group; | 430 | comm_list = &cms->cs_group; |
322 | return &cl->group; | 431 | return &cl->group; |
@@ -509,6 +618,25 @@ void dlm_config_exit(void) | |||
509 | * Functions for user space to read/write attributes | 618 | * Functions for user space to read/write attributes |
510 | */ | 619 | */ |
511 | 620 | ||
621 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | ||
622 | char *buf) | ||
623 | { | ||
624 | struct cluster *cl = to_cluster(i); | ||
625 | struct cluster_attribute *cla = | ||
626 | container_of(a, struct cluster_attribute, attr); | ||
627 | return cla->show ? cla->show(cl, buf) : 0; | ||
628 | } | ||
629 | |||
630 | static ssize_t store_cluster(struct config_item *i, | ||
631 | struct configfs_attribute *a, | ||
632 | const char *buf, size_t len) | ||
633 | { | ||
634 | struct cluster *cl = to_cluster(i); | ||
635 | struct cluster_attribute *cla = | ||
636 | container_of(a, struct cluster_attribute, attr); | ||
637 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; | ||
638 | } | ||
639 | |||
512 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 640 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
513 | char *buf) | 641 | char *buf) |
514 | { | 642 | { |
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
775 | #define DEFAULT_RECOVER_TIMER 5 | 903 | #define DEFAULT_RECOVER_TIMER 5 |
776 | #define DEFAULT_TOSS_SECS 10 | 904 | #define DEFAULT_TOSS_SECS 10 |
777 | #define DEFAULT_SCAN_SECS 5 | 905 | #define DEFAULT_SCAN_SECS 5 |
906 | #define DEFAULT_LOG_DEBUG 0 | ||
778 | 907 | ||
779 | struct dlm_config_info dlm_config = { | 908 | struct dlm_config_info dlm_config = { |
780 | .tcp_port = DEFAULT_TCP_PORT, | 909 | .ci_tcp_port = DEFAULT_TCP_PORT, |
781 | .buffer_size = DEFAULT_BUFFER_SIZE, | 910 | .ci_buffer_size = DEFAULT_BUFFER_SIZE, |
782 | .rsbtbl_size = DEFAULT_RSBTBL_SIZE, | 911 | .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, |
783 | .lkbtbl_size = DEFAULT_LKBTBL_SIZE, | 912 | .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE, |
784 | .dirtbl_size = DEFAULT_DIRTBL_SIZE, | 913 | .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, |
785 | .recover_timer = DEFAULT_RECOVER_TIMER, | 914 | .ci_recover_timer = DEFAULT_RECOVER_TIMER, |
786 | .toss_secs = DEFAULT_TOSS_SECS, | 915 | .ci_toss_secs = DEFAULT_TOSS_SECS, |
787 | .scan_secs = DEFAULT_SCAN_SECS | 916 | .ci_scan_secs = DEFAULT_SCAN_SECS, |
917 | .ci_log_debug = DEFAULT_LOG_DEBUG | ||
788 | }; | 918 | }; |
789 | 919 | ||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 9da7839958a9..1e978611a96e 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
@@ -17,14 +17,15 @@ | |||
17 | #define DLM_MAX_ADDR_COUNT 3 | 17 | #define DLM_MAX_ADDR_COUNT 3 |
18 | 18 | ||
19 | struct dlm_config_info { | 19 | struct dlm_config_info { |
20 | int tcp_port; | 20 | int ci_tcp_port; |
21 | int buffer_size; | 21 | int ci_buffer_size; |
22 | int rsbtbl_size; | 22 | int ci_rsbtbl_size; |
23 | int lkbtbl_size; | 23 | int ci_lkbtbl_size; |
24 | int dirtbl_size; | 24 | int ci_dirtbl_size; |
25 | int recover_timer; | 25 | int ci_recover_timer; |
26 | int toss_secs; | 26 | int ci_toss_secs; |
27 | int scan_secs; | 27 | int ci_scan_secs; |
28 | int ci_log_debug; | ||
28 | }; | 29 | }; |
29 | 30 | ||
30 | extern struct dlm_config_info dlm_config; | 31 | extern struct dlm_config_info dlm_config; |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 1ee8195e6fc0..61d93201e1b2 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | 42 | ||
43 | #include <linux/dlm.h> | 43 | #include <linux/dlm.h> |
44 | #include "config.h" | ||
44 | 45 | ||
45 | #define DLM_LOCKSPACE_LEN 64 | 46 | #define DLM_LOCKSPACE_LEN 64 |
46 | 47 | ||
@@ -69,12 +70,12 @@ struct dlm_mhandle; | |||
69 | #define log_error(ls, fmt, args...) \ | 70 | #define log_error(ls, fmt, args...) \ |
70 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) | 71 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) |
71 | 72 | ||
72 | #define DLM_LOG_DEBUG | 73 | #define log_debug(ls, fmt, args...) \ |
73 | #ifdef DLM_LOG_DEBUG | 74 | do { \ |
74 | #define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) | 75 | if (dlm_config.ci_log_debug) \ |
75 | #else | 76 | printk(KERN_DEBUG "dlm: %s: " fmt "\n", \ |
76 | #define log_debug(ls, fmt, args...) | 77 | (ls)->ls_name , ##args); \ |
77 | #endif | 78 | } while (0) |
78 | 79 | ||
79 | #define DLM_ASSERT(x, do) \ | 80 | #define DLM_ASSERT(x, do) \ |
80 | { \ | 81 | { \ |
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) | |||
309 | 310 | ||
310 | /* dlm_header is first element of all structs sent between nodes */ | 311 | /* dlm_header is first element of all structs sent between nodes */ |
311 | 312 | ||
312 | #define DLM_HEADER_MAJOR 0x00020000 | 313 | #define DLM_HEADER_MAJOR 0x00030000 |
313 | #define DLM_HEADER_MINOR 0x00000001 | 314 | #define DLM_HEADER_MINOR 0x00000000 |
314 | 315 | ||
315 | #define DLM_MSG 1 | 316 | #define DLM_MSG 1 |
316 | #define DLM_RCOM 2 | 317 | #define DLM_RCOM 2 |
@@ -386,6 +387,8 @@ struct dlm_rcom { | |||
386 | uint32_t rc_type; /* DLM_RCOM_ */ | 387 | uint32_t rc_type; /* DLM_RCOM_ */ |
387 | int rc_result; /* multi-purpose */ | 388 | int rc_result; /* multi-purpose */ |
388 | uint64_t rc_id; /* match reply with request */ | 389 | uint64_t rc_id; /* match reply with request */ |
390 | uint64_t rc_seq; /* sender's ls_recover_seq */ | ||
391 | uint64_t rc_seq_reply; /* remote ls_recover_seq */ | ||
389 | char rc_buf[0]; | 392 | char rc_buf[0]; |
390 | }; | 393 | }; |
391 | 394 | ||
@@ -523,6 +526,7 @@ struct dlm_user_proc { | |||
523 | spinlock_t asts_spin; | 526 | spinlock_t asts_spin; |
524 | struct list_head locks; | 527 | struct list_head locks; |
525 | spinlock_t locks_spin; | 528 | spinlock_t locks_spin; |
529 | struct list_head unlocking; | ||
526 | wait_queue_head_t wait; | 530 | wait_queue_head_t wait; |
527 | }; | 531 | }; |
528 | 532 | ||
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 30878defaeb6..e725005fafd0 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
754 | mutex_unlock(&ls->ls_waiters_mutex); | 754 | mutex_unlock(&ls->ls_waiters_mutex); |
755 | } | 755 | } |
756 | 756 | ||
757 | /* We clear the RESEND flag because we might be taking an lkb off the waiters | ||
758 | list as part of process_requestqueue (e.g. a lookup that has an optimized | ||
759 | request reply on the requestqueue) between dlm_recover_waiters_pre() which | ||
760 | set RESEND and dlm_recover_waiters_post() */ | ||
761 | |||
757 | static int _remove_from_waiters(struct dlm_lkb *lkb) | 762 | static int _remove_from_waiters(struct dlm_lkb *lkb) |
758 | { | 763 | { |
759 | int error = 0; | 764 | int error = 0; |
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb) | |||
764 | goto out; | 769 | goto out; |
765 | } | 770 | } |
766 | lkb->lkb_wait_type = 0; | 771 | lkb->lkb_wait_type = 0; |
772 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | ||
767 | list_del(&lkb->lkb_wait_reply); | 773 | list_del(&lkb->lkb_wait_reply); |
768 | unhold_lkb(lkb); | 774 | unhold_lkb(lkb); |
769 | out: | 775 | out: |
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b) | |||
810 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, | 816 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, |
811 | res_hashchain) { | 817 | res_hashchain) { |
812 | if (!time_after_eq(jiffies, r->res_toss_time + | 818 | if (!time_after_eq(jiffies, r->res_toss_time + |
813 | dlm_config.toss_secs * HZ)) | 819 | dlm_config.ci_toss_secs * HZ)) |
814 | continue; | 820 | continue; |
815 | found = 1; | 821 | found = 1; |
816 | break; | 822 | break; |
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
2144 | if (lkb->lkb_astaddr) | 2150 | if (lkb->lkb_astaddr) |
2145 | ms->m_asts |= AST_COMP; | 2151 | ms->m_asts |= AST_COMP; |
2146 | 2152 | ||
2147 | if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) | 2153 | /* compare with switch in create_message; send_remove() doesn't |
2148 | memcpy(ms->m_extra, r->res_name, r->res_length); | 2154 | use send_args() */ |
2149 | 2155 | ||
2150 | else if (lkb->lkb_lvbptr) | 2156 | switch (ms->m_type) { |
2157 | case DLM_MSG_REQUEST: | ||
2158 | case DLM_MSG_LOOKUP: | ||
2159 | memcpy(ms->m_extra, r->res_name, r->res_length); | ||
2160 | break; | ||
2161 | case DLM_MSG_CONVERT: | ||
2162 | case DLM_MSG_UNLOCK: | ||
2163 | case DLM_MSG_REQUEST_REPLY: | ||
2164 | case DLM_MSG_CONVERT_REPLY: | ||
2165 | case DLM_MSG_GRANT: | ||
2166 | if (!lkb->lkb_lvbptr) | ||
2167 | break; | ||
2151 | memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | 2168 | memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); |
2152 | 2169 | break; | |
2170 | } | ||
2153 | } | 2171 | } |
2154 | 2172 | ||
2155 | static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | 2173 | static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) |
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
2418 | 2436 | ||
2419 | DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); | 2437 | DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); |
2420 | 2438 | ||
2421 | if (receive_lvb(ls, lkb, ms)) | 2439 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { |
2422 | return -ENOMEM; | 2440 | /* lkb was just created so there won't be an lvb yet */ |
2441 | lkb->lkb_lvbptr = allocate_lvb(ls); | ||
2442 | if (!lkb->lkb_lvbptr) | ||
2443 | return -ENOMEM; | ||
2444 | } | ||
2423 | 2445 | ||
2424 | return 0; | 2446 | return 0; |
2425 | } | 2447 | } |
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) | |||
3002 | { | 3024 | { |
3003 | struct dlm_message *ms = (struct dlm_message *) hd; | 3025 | struct dlm_message *ms = (struct dlm_message *) hd; |
3004 | struct dlm_ls *ls; | 3026 | struct dlm_ls *ls; |
3005 | int error; | 3027 | int error = 0; |
3006 | 3028 | ||
3007 | if (!recovery) | 3029 | if (!recovery) |
3008 | dlm_message_in(ms); | 3030 | dlm_message_in(ms); |
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) | |||
3119 | out: | 3141 | out: |
3120 | dlm_put_lockspace(ls); | 3142 | dlm_put_lockspace(ls); |
3121 | dlm_astd_wake(); | 3143 | dlm_astd_wake(); |
3122 | return 0; | 3144 | return error; |
3123 | } | 3145 | } |
3124 | 3146 | ||
3125 | 3147 | ||
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
3132 | if (middle_conversion(lkb)) { | 3154 | if (middle_conversion(lkb)) { |
3133 | hold_lkb(lkb); | 3155 | hold_lkb(lkb); |
3134 | ls->ls_stub_ms.m_result = -EINPROGRESS; | 3156 | ls->ls_stub_ms.m_result = -EINPROGRESS; |
3157 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
3135 | _remove_from_waiters(lkb); | 3158 | _remove_from_waiters(lkb); |
3136 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | 3159 | _receive_convert_reply(lkb, &ls->ls_stub_ms); |
3137 | 3160 | ||
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
3205 | case DLM_MSG_UNLOCK: | 3228 | case DLM_MSG_UNLOCK: |
3206 | hold_lkb(lkb); | 3229 | hold_lkb(lkb); |
3207 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; | 3230 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; |
3231 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
3208 | _remove_from_waiters(lkb); | 3232 | _remove_from_waiters(lkb); |
3209 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | 3233 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); |
3210 | dlm_put_lkb(lkb); | 3234 | dlm_put_lkb(lkb); |
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
3213 | case DLM_MSG_CANCEL: | 3237 | case DLM_MSG_CANCEL: |
3214 | hold_lkb(lkb); | 3238 | hold_lkb(lkb); |
3215 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; | 3239 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; |
3240 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
3216 | _remove_from_waiters(lkb); | 3241 | _remove_from_waiters(lkb); |
3217 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | 3242 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); |
3218 | dlm_put_lkb(lkb); | 3243 | dlm_put_lkb(lkb); |
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
3571 | lock_rsb(r); | 3596 | lock_rsb(r); |
3572 | 3597 | ||
3573 | switch (error) { | 3598 | switch (error) { |
3599 | case -EBADR: | ||
3600 | /* There's a chance the new master received our lock before | ||
3601 | dlm_recover_master_reply(), this wouldn't happen if we did | ||
3602 | a barrier between recover_masters and recover_locks. */ | ||
3603 | log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id, | ||
3604 | (unsigned long)r, r->res_name); | ||
3605 | dlm_send_rcom_lock(r, lkb); | ||
3606 | goto out; | ||
3574 | case -EEXIST: | 3607 | case -EEXIST: |
3575 | log_debug(ls, "master copy exists %x", lkb->lkb_id); | 3608 | log_debug(ls, "master copy exists %x", lkb->lkb_id); |
3576 | /* fall through */ | 3609 | /* fall through */ |
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
3585 | /* an ack for dlm_recover_locks() which waits for replies from | 3618 | /* an ack for dlm_recover_locks() which waits for replies from |
3586 | all the locks it sends to new masters */ | 3619 | all the locks it sends to new masters */ |
3587 | dlm_recovered_lock(r); | 3620 | dlm_recovered_lock(r); |
3588 | 3621 | out: | |
3589 | unlock_rsb(r); | 3622 | unlock_rsb(r); |
3590 | put_rsb(r); | 3623 | put_rsb(r); |
3591 | dlm_put_lkb(lkb); | 3624 | dlm_put_lkb(lkb); |
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, | |||
3610 | } | 3643 | } |
3611 | 3644 | ||
3612 | if (flags & DLM_LKF_VALBLK) { | 3645 | if (flags & DLM_LKF_VALBLK) { |
3613 | ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); | 3646 | ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); |
3614 | if (!ua->lksb.sb_lvbptr) { | 3647 | if (!ua->lksb.sb_lvbptr) { |
3615 | kfree(ua); | 3648 | kfree(ua); |
3616 | __put_lkb(ls, lkb); | 3649 | __put_lkb(ls, lkb); |
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
3679 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | 3712 | ua = (struct dlm_user_args *)lkb->lkb_astparam; |
3680 | 3713 | ||
3681 | if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { | 3714 | if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { |
3682 | ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); | 3715 | ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); |
3683 | if (!ua->lksb.sb_lvbptr) { | 3716 | if (!ua->lksb.sb_lvbptr) { |
3684 | error = -ENOMEM; | 3717 | error = -ENOMEM; |
3685 | goto out_put; | 3718 | goto out_put; |
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
3745 | goto out_put; | 3778 | goto out_put; |
3746 | 3779 | ||
3747 | spin_lock(&ua->proc->locks_spin); | 3780 | spin_lock(&ua->proc->locks_spin); |
3748 | list_del_init(&lkb->lkb_ownqueue); | 3781 | /* dlm_user_add_ast() may have already taken lkb off the proc list */ |
3782 | if (!list_empty(&lkb->lkb_ownqueue)) | ||
3783 | list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); | ||
3749 | spin_unlock(&ua->proc->locks_spin); | 3784 | spin_unlock(&ua->proc->locks_spin); |
3750 | |||
3751 | /* this removes the reference for the proc->locks list added by | ||
3752 | dlm_user_request */ | ||
3753 | unhold_lkb(lkb); | ||
3754 | out_put: | 3785 | out_put: |
3755 | dlm_put_lkb(lkb); | 3786 | dlm_put_lkb(lkb); |
3756 | out: | 3787 | out: |
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
3790 | /* this lkb was removed from the WAITING queue */ | 3821 | /* this lkb was removed from the WAITING queue */ |
3791 | if (lkb->lkb_grmode == DLM_LOCK_IV) { | 3822 | if (lkb->lkb_grmode == DLM_LOCK_IV) { |
3792 | spin_lock(&ua->proc->locks_spin); | 3823 | spin_lock(&ua->proc->locks_spin); |
3793 | list_del_init(&lkb->lkb_ownqueue); | 3824 | list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); |
3794 | spin_unlock(&ua->proc->locks_spin); | 3825 | spin_unlock(&ua->proc->locks_spin); |
3795 | unhold_lkb(lkb); | ||
3796 | } | 3826 | } |
3797 | out_put: | 3827 | out_put: |
3798 | dlm_put_lkb(lkb); | 3828 | dlm_put_lkb(lkb); |
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
3853 | mutex_lock(&ls->ls_clear_proc_locks); | 3883 | mutex_lock(&ls->ls_clear_proc_locks); |
3854 | 3884 | ||
3855 | list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { | 3885 | list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { |
3856 | if (lkb->lkb_ast_type) { | ||
3857 | list_del(&lkb->lkb_astqueue); | ||
3858 | unhold_lkb(lkb); | ||
3859 | } | ||
3860 | |||
3861 | list_del_init(&lkb->lkb_ownqueue); | 3886 | list_del_init(&lkb->lkb_ownqueue); |
3862 | 3887 | ||
3863 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { | 3888 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { |
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
3874 | 3899 | ||
3875 | dlm_put_lkb(lkb); | 3900 | dlm_put_lkb(lkb); |
3876 | } | 3901 | } |
3902 | |||
3903 | /* in-progress unlocks */ | ||
3904 | list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { | ||
3905 | list_del_init(&lkb->lkb_ownqueue); | ||
3906 | lkb->lkb_flags |= DLM_IFL_DEAD; | ||
3907 | dlm_put_lkb(lkb); | ||
3908 | } | ||
3909 | |||
3910 | list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { | ||
3911 | list_del(&lkb->lkb_astqueue); | ||
3912 | dlm_put_lkb(lkb); | ||
3913 | } | ||
3914 | |||
3877 | mutex_unlock(&ls->ls_clear_proc_locks); | 3915 | mutex_unlock(&ls->ls_clear_proc_locks); |
3878 | unlock_recovery(ls); | 3916 | unlock_recovery(ls); |
3879 | } | 3917 | } |
3918 | |||
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 59012b089e8d..f40817b53c6f 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -236,7 +236,7 @@ static int dlm_scand(void *data) | |||
236 | while (!kthread_should_stop()) { | 236 | while (!kthread_should_stop()) { |
237 | list_for_each_entry(ls, &lslist, ls_list) | 237 | list_for_each_entry(ls, &lslist, ls_list) |
238 | dlm_scan_rsbs(ls); | 238 | dlm_scan_rsbs(ls); |
239 | schedule_timeout_interruptible(dlm_config.scan_secs * HZ); | 239 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); |
240 | } | 240 | } |
241 | return 0; | 241 | return 0; |
242 | } | 242 | } |
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
422 | ls->ls_count = 0; | 422 | ls->ls_count = 0; |
423 | ls->ls_flags = 0; | 423 | ls->ls_flags = 0; |
424 | 424 | ||
425 | size = dlm_config.rsbtbl_size; | 425 | size = dlm_config.ci_rsbtbl_size; |
426 | ls->ls_rsbtbl_size = size; | 426 | ls->ls_rsbtbl_size = size; |
427 | 427 | ||
428 | ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); | 428 | ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); |
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
434 | rwlock_init(&ls->ls_rsbtbl[i].lock); | 434 | rwlock_init(&ls->ls_rsbtbl[i].lock); |
435 | } | 435 | } |
436 | 436 | ||
437 | size = dlm_config.lkbtbl_size; | 437 | size = dlm_config.ci_lkbtbl_size; |
438 | ls->ls_lkbtbl_size = size; | 438 | ls->ls_lkbtbl_size = size; |
439 | 439 | ||
440 | ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); | 440 | ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); |
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
446 | ls->ls_lkbtbl[i].counter = 1; | 446 | ls->ls_lkbtbl[i].counter = 1; |
447 | } | 447 | } |
448 | 448 | ||
449 | size = dlm_config.dirtbl_size; | 449 | size = dlm_config.ci_dirtbl_size; |
450 | ls->ls_dirtbl_size = size; | 450 | ls->ls_dirtbl_size = size; |
451 | 451 | ||
452 | ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); | 452 | ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); |
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
489 | mutex_init(&ls->ls_requestqueue_mutex); | 489 | mutex_init(&ls->ls_requestqueue_mutex); |
490 | mutex_init(&ls->ls_clear_proc_locks); | 490 | mutex_init(&ls->ls_clear_proc_locks); |
491 | 491 | ||
492 | ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | 492 | ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); |
493 | if (!ls->ls_recover_buf) | 493 | if (!ls->ls_recover_buf) |
494 | goto out_dirfree; | 494 | goto out_dirfree; |
495 | 495 | ||
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c index fe158d7a9285..dc83a9d979b5 100644 --- a/fs/dlm/lowcomms-sctp.c +++ b/fs/dlm/lowcomms-sctp.c | |||
@@ -72,6 +72,8 @@ struct nodeinfo { | |||
72 | struct list_head writequeue; /* outgoing writequeue_entries */ | 72 | struct list_head writequeue; /* outgoing writequeue_entries */ |
73 | spinlock_t writequeue_lock; | 73 | spinlock_t writequeue_lock; |
74 | int nodeid; | 74 | int nodeid; |
75 | struct work_struct swork; /* Send workqueue */ | ||
76 | struct work_struct lwork; /* Locking workqueue */ | ||
75 | }; | 77 | }; |
76 | 78 | ||
77 | static DEFINE_IDR(nodeinfo_idr); | 79 | static DEFINE_IDR(nodeinfo_idr); |
@@ -96,6 +98,7 @@ struct connection { | |||
96 | atomic_t waiting_requests; | 98 | atomic_t waiting_requests; |
97 | struct cbuf cb; | 99 | struct cbuf cb; |
98 | int eagain_flag; | 100 | int eagain_flag; |
101 | struct work_struct work; /* Send workqueue */ | ||
99 | }; | 102 | }; |
100 | 103 | ||
101 | /* An entry waiting to be sent */ | 104 | /* An entry waiting to be sent */ |
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n) | |||
137 | static LIST_HEAD(write_nodes); | 140 | static LIST_HEAD(write_nodes); |
138 | static DEFINE_SPINLOCK(write_nodes_lock); | 141 | static DEFINE_SPINLOCK(write_nodes_lock); |
139 | 142 | ||
143 | |||
140 | /* Maximum number of incoming messages to process before | 144 | /* Maximum number of incoming messages to process before |
141 | * doing a schedule() | 145 | * doing a schedule() |
142 | */ | 146 | */ |
143 | #define MAX_RX_MSG_COUNT 25 | 147 | #define MAX_RX_MSG_COUNT 25 |
144 | 148 | ||
145 | /* Manage daemons */ | 149 | /* Work queues */ |
146 | static struct task_struct *recv_task; | 150 | static struct workqueue_struct *recv_workqueue; |
147 | static struct task_struct *send_task; | 151 | static struct workqueue_struct *send_workqueue; |
148 | static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait); | 152 | static struct workqueue_struct *lock_workqueue; |
149 | 153 | ||
150 | /* The SCTP connection */ | 154 | /* The SCTP connection */ |
151 | static struct connection sctp_con; | 155 | static struct connection sctp_con; |
152 | 156 | ||
157 | static void process_send_sockets(struct work_struct *work); | ||
158 | static void process_recv_sockets(struct work_struct *work); | ||
159 | static void process_lock_request(struct work_struct *work); | ||
153 | 160 | ||
154 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | 161 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) |
155 | { | 162 | { |
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc) | |||
222 | spin_lock_init(&ni->lock); | 229 | spin_lock_init(&ni->lock); |
223 | INIT_LIST_HEAD(&ni->writequeue); | 230 | INIT_LIST_HEAD(&ni->writequeue); |
224 | spin_lock_init(&ni->writequeue_lock); | 231 | spin_lock_init(&ni->writequeue_lock); |
232 | INIT_WORK(&ni->lwork, process_lock_request); | ||
233 | INIT_WORK(&ni->swork, process_send_sockets); | ||
225 | ni->nodeid = nodeid; | 234 | ni->nodeid = nodeid; |
226 | 235 | ||
227 | if (nodeid > max_nodeid) | 236 | if (nodeid > max_nodeid) |
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc) | |||
249 | /* Data or notification available on socket */ | 258 | /* Data or notification available on socket */ |
250 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | 259 | static void lowcomms_data_ready(struct sock *sk, int count_unused) |
251 | { | 260 | { |
252 | atomic_inc(&sctp_con.waiting_requests); | ||
253 | if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) | 261 | if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) |
254 | return; | 262 | queue_work(recv_workqueue, &sctp_con.work); |
255 | |||
256 | wake_up_interruptible(&lowcomms_recv_wait); | ||
257 | } | 263 | } |
258 | 264 | ||
259 | 265 | ||
@@ -361,10 +367,10 @@ static void init_failed(void) | |||
361 | spin_lock_bh(&write_nodes_lock); | 367 | spin_lock_bh(&write_nodes_lock); |
362 | list_add_tail(&ni->write_list, &write_nodes); | 368 | list_add_tail(&ni->write_list, &write_nodes); |
363 | spin_unlock_bh(&write_nodes_lock); | 369 | spin_unlock_bh(&write_nodes_lock); |
370 | queue_work(send_workqueue, &ni->swork); | ||
364 | } | 371 | } |
365 | } | 372 | } |
366 | } | 373 | } |
367 | wake_up_process(send_task); | ||
368 | } | 374 | } |
369 | 375 | ||
370 | /* Something happened to an association */ | 376 | /* Something happened to an association */ |
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf) | |||
446 | spin_lock_bh(&write_nodes_lock); | 452 | spin_lock_bh(&write_nodes_lock); |
447 | list_add_tail(&ni->write_list, &write_nodes); | 453 | list_add_tail(&ni->write_list, &write_nodes); |
448 | spin_unlock_bh(&write_nodes_lock); | 454 | spin_unlock_bh(&write_nodes_lock); |
455 | queue_work(send_workqueue, &ni->swork); | ||
449 | } | 456 | } |
450 | wake_up_process(send_task); | ||
451 | } | 457 | } |
452 | break; | 458 | break; |
453 | 459 | ||
@@ -580,8 +586,8 @@ static int receive_from_sock(void) | |||
580 | spin_lock_bh(&write_nodes_lock); | 586 | spin_lock_bh(&write_nodes_lock); |
581 | list_add_tail(&ni->write_list, &write_nodes); | 587 | list_add_tail(&ni->write_list, &write_nodes); |
582 | spin_unlock_bh(&write_nodes_lock); | 588 | spin_unlock_bh(&write_nodes_lock); |
589 | queue_work(send_workqueue, &ni->swork); | ||
583 | } | 590 | } |
584 | wake_up_process(send_task); | ||
585 | } | 591 | } |
586 | } | 592 | } |
587 | 593 | ||
@@ -590,6 +596,7 @@ static int receive_from_sock(void) | |||
590 | return 0; | 596 | return 0; |
591 | 597 | ||
592 | cbuf_add(&sctp_con.cb, ret); | 598 | cbuf_add(&sctp_con.cb, ret); |
599 | // PJC: TODO: Add to node's workqueue....can we ?? | ||
593 | ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), | 600 | ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), |
594 | page_address(sctp_con.rx_page), | 601 | page_address(sctp_con.rx_page), |
595 | sctp_con.cb.base, sctp_con.cb.len, | 602 | sctp_con.cb.base, sctp_con.cb.len, |
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num) | |||
635 | 642 | ||
636 | if (result < 0) | 643 | if (result < 0) |
637 | log_print("Can't bind to port %d addr number %d", | 644 | log_print("Can't bind to port %d addr number %d", |
638 | dlm_config.tcp_port, num); | 645 | dlm_config.ci_tcp_port, num); |
639 | 646 | ||
640 | return result; | 647 | return result; |
641 | } | 648 | } |
@@ -711,7 +718,7 @@ static int init_sock(void) | |||
711 | /* Bind to all interfaces. */ | 718 | /* Bind to all interfaces. */ |
712 | for (i = 0; i < dlm_local_count; i++) { | 719 | for (i = 0; i < dlm_local_count; i++) { |
713 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); | 720 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); |
714 | make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); | 721 | make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len); |
715 | 722 | ||
716 | result = add_bind_addr(&localaddr, addr_len, num); | 723 | result = add_bind_addr(&localaddr, addr_len, num); |
717 | if (result) | 724 | if (result) |
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg) | |||
820 | spin_lock_bh(&write_nodes_lock); | 827 | spin_lock_bh(&write_nodes_lock); |
821 | list_add_tail(&ni->write_list, &write_nodes); | 828 | list_add_tail(&ni->write_list, &write_nodes); |
822 | spin_unlock_bh(&write_nodes_lock); | 829 | spin_unlock_bh(&write_nodes_lock); |
823 | wake_up_process(send_task); | 830 | |
831 | queue_work(send_workqueue, &ni->swork); | ||
824 | } | 832 | } |
825 | return; | 833 | return; |
826 | 834 | ||
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid) | |||
863 | return; | 871 | return; |
864 | } | 872 | } |
865 | 873 | ||
866 | make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); | 874 | make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen); |
867 | 875 | ||
868 | outmessage.msg_name = &rem_addr; | 876 | outmessage.msg_name = &rem_addr; |
869 | outmessage.msg_namelen = addrlen; | 877 | outmessage.msg_namelen = addrlen; |
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid) | |||
1088 | return 0; | 1096 | return 0; |
1089 | } | 1097 | } |
1090 | 1098 | ||
1091 | static int write_list_empty(void) | 1099 | // PJC: The work queue function for receiving. |
1100 | static void process_recv_sockets(struct work_struct *work) | ||
1092 | { | 1101 | { |
1093 | int status; | 1102 | if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { |
1094 | 1103 | int ret; | |
1095 | spin_lock_bh(&write_nodes_lock); | ||
1096 | status = list_empty(&write_nodes); | ||
1097 | spin_unlock_bh(&write_nodes_lock); | ||
1098 | |||
1099 | return status; | ||
1100 | } | ||
1101 | |||
1102 | static int dlm_recvd(void *data) | ||
1103 | { | ||
1104 | DECLARE_WAITQUEUE(wait, current); | ||
1105 | |||
1106 | while (!kthread_should_stop()) { | ||
1107 | int count = 0; | 1104 | int count = 0; |
1108 | 1105 | ||
1109 | set_current_state(TASK_INTERRUPTIBLE); | 1106 | do { |
1110 | add_wait_queue(&lowcomms_recv_wait, &wait); | 1107 | ret = receive_from_sock(); |
1111 | if (!test_bit(CF_READ_PENDING, &sctp_con.flags)) | ||
1112 | cond_resched(); | ||
1113 | remove_wait_queue(&lowcomms_recv_wait, &wait); | ||
1114 | set_current_state(TASK_RUNNING); | ||
1115 | |||
1116 | if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { | ||
1117 | int ret; | ||
1118 | |||
1119 | do { | ||
1120 | ret = receive_from_sock(); | ||
1121 | 1108 | ||
1122 | /* Don't starve out everyone else */ | 1109 | /* Don't starve out everyone else */ |
1123 | if (++count >= MAX_RX_MSG_COUNT) { | 1110 | if (++count >= MAX_RX_MSG_COUNT) { |
1124 | cond_resched(); | 1111 | cond_resched(); |
1125 | count = 0; | 1112 | count = 0; |
1126 | } | 1113 | } |
1127 | } while (!kthread_should_stop() && ret >=0); | 1114 | } while (!kthread_should_stop() && ret >=0); |
1128 | } | ||
1129 | cond_resched(); | ||
1130 | } | 1115 | } |
1131 | 1116 | cond_resched(); | |
1132 | return 0; | ||
1133 | } | 1117 | } |
1134 | 1118 | ||
1135 | static int dlm_sendd(void *data) | 1119 | // PJC: the work queue function for sending |
1120 | static void process_send_sockets(struct work_struct *work) | ||
1136 | { | 1121 | { |
1137 | DECLARE_WAITQUEUE(wait, current); | 1122 | if (sctp_con.eagain_flag) { |
1138 | 1123 | sctp_con.eagain_flag = 0; | |
1139 | add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | 1124 | refill_write_queue(); |
1140 | |||
1141 | while (!kthread_should_stop()) { | ||
1142 | set_current_state(TASK_INTERRUPTIBLE); | ||
1143 | if (write_list_empty()) | ||
1144 | cond_resched(); | ||
1145 | set_current_state(TASK_RUNNING); | ||
1146 | |||
1147 | if (sctp_con.eagain_flag) { | ||
1148 | sctp_con.eagain_flag = 0; | ||
1149 | refill_write_queue(); | ||
1150 | } | ||
1151 | process_output_queue(); | ||
1152 | } | 1125 | } |
1126 | process_output_queue(); | ||
1127 | } | ||
1153 | 1128 | ||
1154 | remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | 1129 | // PJC: Process lock requests from a particular node. |
1155 | 1130 | // TODO: can we optimise this out on UP ?? | |
1156 | return 0; | 1131 | static void process_lock_request(struct work_struct *work) |
1132 | { | ||
1157 | } | 1133 | } |
1158 | 1134 | ||
1159 | static void daemons_stop(void) | 1135 | static void daemons_stop(void) |
1160 | { | 1136 | { |
1161 | kthread_stop(recv_task); | 1137 | destroy_workqueue(recv_workqueue); |
1162 | kthread_stop(send_task); | 1138 | destroy_workqueue(send_workqueue); |
1139 | destroy_workqueue(lock_workqueue); | ||
1163 | } | 1140 | } |
1164 | 1141 | ||
1165 | static int daemons_start(void) | 1142 | static int daemons_start(void) |
1166 | { | 1143 | { |
1167 | struct task_struct *p; | ||
1168 | int error; | 1144 | int error; |
1145 | recv_workqueue = create_workqueue("dlm_recv"); | ||
1146 | error = IS_ERR(recv_workqueue); | ||
1147 | if (error) { | ||
1148 | log_print("can't start dlm_recv %d", error); | ||
1149 | return error; | ||
1150 | } | ||
1169 | 1151 | ||
1170 | p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); | 1152 | send_workqueue = create_singlethread_workqueue("dlm_send"); |
1171 | error = IS_ERR(p); | 1153 | error = IS_ERR(send_workqueue); |
1172 | if (error) { | 1154 | if (error) { |
1173 | log_print("can't start dlm_recvd %d", error); | 1155 | log_print("can't start dlm_send %d", error); |
1156 | destroy_workqueue(recv_workqueue); | ||
1174 | return error; | 1157 | return error; |
1175 | } | 1158 | } |
1176 | recv_task = p; | ||
1177 | 1159 | ||
1178 | p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); | 1160 | lock_workqueue = create_workqueue("dlm_rlock"); |
1179 | error = IS_ERR(p); | 1161 | error = IS_ERR(lock_workqueue); |
1180 | if (error) { | 1162 | if (error) { |
1181 | log_print("can't start dlm_sendd %d", error); | 1163 | log_print("can't start dlm_rlock %d", error); |
1182 | kthread_stop(recv_task); | 1164 | destroy_workqueue(send_workqueue); |
1165 | destroy_workqueue(recv_workqueue); | ||
1183 | return error; | 1166 | return error; |
1184 | } | 1167 | } |
1185 | send_task = p; | ||
1186 | 1168 | ||
1187 | return 0; | 1169 | return 0; |
1188 | } | 1170 | } |
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void) | |||
1194 | { | 1176 | { |
1195 | int error; | 1177 | int error; |
1196 | 1178 | ||
1179 | INIT_WORK(&sctp_con.work, process_recv_sockets); | ||
1180 | |||
1197 | error = init_sock(); | 1181 | error = init_sock(); |
1198 | if (error) | 1182 | if (error) |
1199 | goto fail_sock; | 1183 | goto fail_sock; |
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void) | |||
1224 | for (i = 0; i < dlm_local_count; i++) | 1208 | for (i = 0; i < dlm_local_count; i++) |
1225 | kfree(dlm_local_addr[i]); | 1209 | kfree(dlm_local_addr[i]); |
1226 | } | 1210 | } |
1227 | |||
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c index 9be3a440c42a..f1efd17b2614 100644 --- a/fs/dlm/lowcomms-tcp.c +++ b/fs/dlm/lowcomms-tcp.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb) | |||
96 | struct connection { | 96 | struct connection { |
97 | struct socket *sock; /* NULL if not connected */ | 97 | struct socket *sock; /* NULL if not connected */ |
98 | uint32_t nodeid; /* So we know who we are in the list */ | 98 | uint32_t nodeid; /* So we know who we are in the list */ |
99 | struct rw_semaphore sock_sem; /* Stop connect races */ | 99 | struct mutex sock_mutex; |
100 | struct list_head read_list; /* On this list when ready for reading */ | ||
101 | struct list_head write_list; /* On this list when ready for writing */ | ||
102 | struct list_head state_list; /* On this list when ready to connect */ | ||
103 | unsigned long flags; /* bit 1,2 = We are on the read/write lists */ | 100 | unsigned long flags; /* bit 1,2 = We are on the read/write lists */ |
104 | #define CF_READ_PENDING 1 | 101 | #define CF_READ_PENDING 1 |
105 | #define CF_WRITE_PENDING 2 | 102 | #define CF_WRITE_PENDING 2 |
@@ -112,9 +109,10 @@ struct connection { | |||
112 | struct page *rx_page; | 109 | struct page *rx_page; |
113 | struct cbuf cb; | 110 | struct cbuf cb; |
114 | int retries; | 111 | int retries; |
115 | atomic_t waiting_requests; | ||
116 | #define MAX_CONNECT_RETRIES 3 | 112 | #define MAX_CONNECT_RETRIES 3 |
117 | struct connection *othercon; | 113 | struct connection *othercon; |
114 | struct work_struct rwork; /* Receive workqueue */ | ||
115 | struct work_struct swork; /* Send workqueue */ | ||
118 | }; | 116 | }; |
119 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) | 117 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
120 | 118 | ||
@@ -131,14 +129,9 @@ struct writequeue_entry { | |||
131 | 129 | ||
132 | static struct sockaddr_storage dlm_local_addr; | 130 | static struct sockaddr_storage dlm_local_addr; |
133 | 131 | ||
134 | /* Manage daemons */ | 132 | /* Work queues */ |
135 | static struct task_struct *recv_task; | 133 | static struct workqueue_struct *recv_workqueue; |
136 | static struct task_struct *send_task; | 134 | static struct workqueue_struct *send_workqueue; |
137 | |||
138 | static wait_queue_t lowcomms_send_waitq_head; | ||
139 | static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq); | ||
140 | static wait_queue_t lowcomms_recv_waitq_head; | ||
141 | static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq); | ||
142 | 135 | ||
143 | /* An array of pointers to connections, indexed by NODEID */ | 136 | /* An array of pointers to connections, indexed by NODEID */ |
144 | static struct connection **connections; | 137 | static struct connection **connections; |
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock); | |||
146 | static struct kmem_cache *con_cache; | 139 | static struct kmem_cache *con_cache; |
147 | static int conn_array_size; | 140 | static int conn_array_size; |
148 | 141 | ||
149 | /* List of sockets that have reads pending */ | 142 | static void process_recv_sockets(struct work_struct *work); |
150 | static LIST_HEAD(read_sockets); | 143 | static void process_send_sockets(struct work_struct *work); |
151 | static DEFINE_SPINLOCK(read_sockets_lock); | ||
152 | |||
153 | /* List of sockets which have writes pending */ | ||
154 | static LIST_HEAD(write_sockets); | ||
155 | static DEFINE_SPINLOCK(write_sockets_lock); | ||
156 | |||
157 | /* List of sockets which have connects pending */ | ||
158 | static LIST_HEAD(state_sockets); | ||
159 | static DEFINE_SPINLOCK(state_sockets_lock); | ||
160 | 144 | ||
161 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | 145 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
162 | { | 146 | { |
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) | |||
186 | goto finish; | 170 | goto finish; |
187 | 171 | ||
188 | con->nodeid = nodeid; | 172 | con->nodeid = nodeid; |
189 | init_rwsem(&con->sock_sem); | 173 | mutex_init(&con->sock_mutex); |
190 | INIT_LIST_HEAD(&con->writequeue); | 174 | INIT_LIST_HEAD(&con->writequeue); |
191 | spin_lock_init(&con->writequeue_lock); | 175 | spin_lock_init(&con->writequeue_lock); |
176 | INIT_WORK(&con->swork, process_send_sockets); | ||
177 | INIT_WORK(&con->rwork, process_recv_sockets); | ||
192 | 178 | ||
193 | connections[nodeid] = con; | 179 | connections[nodeid] = con; |
194 | } | 180 | } |
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused) | |||
203 | { | 189 | { |
204 | struct connection *con = sock2con(sk); | 190 | struct connection *con = sock2con(sk); |
205 | 191 | ||
206 | atomic_inc(&con->waiting_requests); | 192 | if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) |
207 | if (test_and_set_bit(CF_READ_PENDING, &con->flags)) | 193 | queue_work(recv_workqueue, &con->rwork); |
208 | return; | ||
209 | |||
210 | spin_lock_bh(&read_sockets_lock); | ||
211 | list_add_tail(&con->read_list, &read_sockets); | ||
212 | spin_unlock_bh(&read_sockets_lock); | ||
213 | |||
214 | wake_up_interruptible(&lowcomms_recv_waitq); | ||
215 | } | 194 | } |
216 | 195 | ||
217 | static void lowcomms_write_space(struct sock *sk) | 196 | static void lowcomms_write_space(struct sock *sk) |
218 | { | 197 | { |
219 | struct connection *con = sock2con(sk); | 198 | struct connection *con = sock2con(sk); |
220 | 199 | ||
221 | if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | 200 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) |
222 | return; | 201 | queue_work(send_workqueue, &con->swork); |
223 | |||
224 | spin_lock_bh(&write_sockets_lock); | ||
225 | list_add_tail(&con->write_list, &write_sockets); | ||
226 | spin_unlock_bh(&write_sockets_lock); | ||
227 | |||
228 | wake_up_interruptible(&lowcomms_send_waitq); | ||
229 | } | 202 | } |
230 | 203 | ||
231 | static inline void lowcomms_connect_sock(struct connection *con) | 204 | static inline void lowcomms_connect_sock(struct connection *con) |
232 | { | 205 | { |
233 | if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) | 206 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) |
234 | return; | 207 | queue_work(send_workqueue, &con->swork); |
235 | |||
236 | spin_lock_bh(&state_sockets_lock); | ||
237 | list_add_tail(&con->state_list, &state_sockets); | ||
238 | spin_unlock_bh(&state_sockets_lock); | ||
239 | |||
240 | wake_up_interruptible(&lowcomms_send_waitq); | ||
241 | } | 208 | } |
242 | 209 | ||
243 | static void lowcomms_state_change(struct sock *sk) | 210 | static void lowcomms_state_change(struct sock *sk) |
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | |||
279 | /* Close a remote connection and tidy up */ | 246 | /* Close a remote connection and tidy up */ |
280 | static void close_connection(struct connection *con, bool and_other) | 247 | static void close_connection(struct connection *con, bool and_other) |
281 | { | 248 | { |
282 | down_write(&con->sock_sem); | 249 | mutex_lock(&con->sock_mutex); |
283 | 250 | ||
284 | if (con->sock) { | 251 | if (con->sock) { |
285 | sock_release(con->sock); | 252 | sock_release(con->sock); |
@@ -294,7 +261,7 @@ static void close_connection(struct connection *con, bool and_other) | |||
294 | con->rx_page = NULL; | 261 | con->rx_page = NULL; |
295 | } | 262 | } |
296 | con->retries = 0; | 263 | con->retries = 0; |
297 | up_write(&con->sock_sem); | 264 | mutex_unlock(&con->sock_mutex); |
298 | } | 265 | } |
299 | 266 | ||
300 | /* Data received from remote end */ | 267 | /* Data received from remote end */ |
@@ -308,10 +275,13 @@ static int receive_from_sock(struct connection *con) | |||
308 | int r; | 275 | int r; |
309 | int call_again_soon = 0; | 276 | int call_again_soon = 0; |
310 | 277 | ||
311 | down_read(&con->sock_sem); | 278 | mutex_lock(&con->sock_mutex); |
279 | |||
280 | if (con->sock == NULL) { | ||
281 | ret = -EAGAIN; | ||
282 | goto out_close; | ||
283 | } | ||
312 | 284 | ||
313 | if (con->sock == NULL) | ||
314 | goto out; | ||
315 | if (con->rx_page == NULL) { | 285 | if (con->rx_page == NULL) { |
316 | /* | 286 | /* |
317 | * This doesn't need to be atomic, but I think it should | 287 | * This doesn't need to be atomic, but I think it should |
@@ -359,6 +329,9 @@ static int receive_from_sock(struct connection *con) | |||
359 | 329 | ||
360 | if (ret <= 0) | 330 | if (ret <= 0) |
361 | goto out_close; | 331 | goto out_close; |
332 | if (ret == -EAGAIN) | ||
333 | goto out_resched; | ||
334 | |||
362 | if (ret == len) | 335 | if (ret == len) |
363 | call_again_soon = 1; | 336 | call_again_soon = 1; |
364 | cbuf_add(&con->cb, ret); | 337 | cbuf_add(&con->cb, ret); |
@@ -381,24 +354,26 @@ static int receive_from_sock(struct connection *con) | |||
381 | con->rx_page = NULL; | 354 | con->rx_page = NULL; |
382 | } | 355 | } |
383 | 356 | ||
384 | out: | ||
385 | if (call_again_soon) | 357 | if (call_again_soon) |
386 | goto out_resched; | 358 | goto out_resched; |
387 | up_read(&con->sock_sem); | 359 | mutex_unlock(&con->sock_mutex); |
388 | return 0; | 360 | return 0; |
389 | 361 | ||
390 | out_resched: | 362 | out_resched: |
391 | lowcomms_data_ready(con->sock->sk, 0); | 363 | if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) |
392 | up_read(&con->sock_sem); | 364 | queue_work(recv_workqueue, &con->rwork); |
393 | cond_resched(); | 365 | mutex_unlock(&con->sock_mutex); |
394 | return 0; | 366 | return -EAGAIN; |
395 | 367 | ||
396 | out_close: | 368 | out_close: |
397 | up_read(&con->sock_sem); | 369 | mutex_unlock(&con->sock_mutex); |
398 | if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { | 370 | if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { |
399 | close_connection(con, false); | 371 | close_connection(con, false); |
400 | /* Reconnect when there is something to send */ | 372 | /* Reconnect when there is something to send */ |
401 | } | 373 | } |
374 | /* Don't return success if we really got EOF */ | ||
375 | if (ret == 0) | ||
376 | ret = -EAGAIN; | ||
402 | 377 | ||
403 | return ret; | 378 | return ret; |
404 | } | 379 | } |
@@ -412,6 +387,7 @@ static int accept_from_sock(struct connection *con) | |||
412 | int len; | 387 | int len; |
413 | int nodeid; | 388 | int nodeid; |
414 | struct connection *newcon; | 389 | struct connection *newcon; |
390 | struct connection *addcon; | ||
415 | 391 | ||
416 | memset(&peeraddr, 0, sizeof(peeraddr)); | 392 | memset(&peeraddr, 0, sizeof(peeraddr)); |
417 | result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, | 393 | result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, |
@@ -419,7 +395,7 @@ static int accept_from_sock(struct connection *con) | |||
419 | if (result < 0) | 395 | if (result < 0) |
420 | return -ENOMEM; | 396 | return -ENOMEM; |
421 | 397 | ||
422 | down_read(&con->sock_sem); | 398 | mutex_lock_nested(&con->sock_mutex, 0); |
423 | 399 | ||
424 | result = -ENOTCONN; | 400 | result = -ENOTCONN; |
425 | if (con->sock == NULL) | 401 | if (con->sock == NULL) |
@@ -445,7 +421,7 @@ static int accept_from_sock(struct connection *con) | |||
445 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { | 421 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { |
446 | printk("dlm: connect from non cluster node\n"); | 422 | printk("dlm: connect from non cluster node\n"); |
447 | sock_release(newsock); | 423 | sock_release(newsock); |
448 | up_read(&con->sock_sem); | 424 | mutex_unlock(&con->sock_mutex); |
449 | return -1; | 425 | return -1; |
450 | } | 426 | } |
451 | 427 | ||
@@ -462,7 +438,7 @@ static int accept_from_sock(struct connection *con) | |||
462 | result = -ENOMEM; | 438 | result = -ENOMEM; |
463 | goto accept_err; | 439 | goto accept_err; |
464 | } | 440 | } |
465 | down_write(&newcon->sock_sem); | 441 | mutex_lock_nested(&newcon->sock_mutex, 1); |
466 | if (newcon->sock) { | 442 | if (newcon->sock) { |
467 | struct connection *othercon = newcon->othercon; | 443 | struct connection *othercon = newcon->othercon; |
468 | 444 | ||
@@ -470,41 +446,45 @@ static int accept_from_sock(struct connection *con) | |||
470 | othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); | 446 | othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); |
471 | if (!othercon) { | 447 | if (!othercon) { |
472 | printk("dlm: failed to allocate incoming socket\n"); | 448 | printk("dlm: failed to allocate incoming socket\n"); |
473 | up_write(&newcon->sock_sem); | 449 | mutex_unlock(&newcon->sock_mutex); |
474 | result = -ENOMEM; | 450 | result = -ENOMEM; |
475 | goto accept_err; | 451 | goto accept_err; |
476 | } | 452 | } |
477 | othercon->nodeid = nodeid; | 453 | othercon->nodeid = nodeid; |
478 | othercon->rx_action = receive_from_sock; | 454 | othercon->rx_action = receive_from_sock; |
479 | init_rwsem(&othercon->sock_sem); | 455 | mutex_init(&othercon->sock_mutex); |
456 | INIT_WORK(&othercon->swork, process_send_sockets); | ||
457 | INIT_WORK(&othercon->rwork, process_recv_sockets); | ||
480 | set_bit(CF_IS_OTHERCON, &othercon->flags); | 458 | set_bit(CF_IS_OTHERCON, &othercon->flags); |
481 | newcon->othercon = othercon; | 459 | newcon->othercon = othercon; |
482 | } | 460 | } |
483 | othercon->sock = newsock; | 461 | othercon->sock = newsock; |
484 | newsock->sk->sk_user_data = othercon; | 462 | newsock->sk->sk_user_data = othercon; |
485 | add_sock(newsock, othercon); | 463 | add_sock(newsock, othercon); |
464 | addcon = othercon; | ||
486 | } | 465 | } |
487 | else { | 466 | else { |
488 | newsock->sk->sk_user_data = newcon; | 467 | newsock->sk->sk_user_data = newcon; |
489 | newcon->rx_action = receive_from_sock; | 468 | newcon->rx_action = receive_from_sock; |
490 | add_sock(newsock, newcon); | 469 | add_sock(newsock, newcon); |
491 | 470 | addcon = newcon; | |
492 | } | 471 | } |
493 | 472 | ||
494 | up_write(&newcon->sock_sem); | 473 | mutex_unlock(&newcon->sock_mutex); |
495 | 474 | ||
496 | /* | 475 | /* |
497 | * Add it to the active queue in case we got data | 476 | * Add it to the active queue in case we got data |
498 | * beween processing the accept adding the socket | 477 | * beween processing the accept adding the socket |
499 | * to the read_sockets list | 478 | * to the read_sockets list |
500 | */ | 479 | */ |
501 | lowcomms_data_ready(newsock->sk, 0); | 480 | if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) |
502 | up_read(&con->sock_sem); | 481 | queue_work(recv_workqueue, &addcon->rwork); |
482 | mutex_unlock(&con->sock_mutex); | ||
503 | 483 | ||
504 | return 0; | 484 | return 0; |
505 | 485 | ||
506 | accept_err: | 486 | accept_err: |
507 | up_read(&con->sock_sem); | 487 | mutex_unlock(&con->sock_mutex); |
508 | sock_release(newsock); | 488 | sock_release(newsock); |
509 | 489 | ||
510 | if (result != -EAGAIN) | 490 | if (result != -EAGAIN) |
@@ -525,7 +505,7 @@ static void connect_to_sock(struct connection *con) | |||
525 | return; | 505 | return; |
526 | } | 506 | } |
527 | 507 | ||
528 | down_write(&con->sock_sem); | 508 | mutex_lock(&con->sock_mutex); |
529 | if (con->retries++ > MAX_CONNECT_RETRIES) | 509 | if (con->retries++ > MAX_CONNECT_RETRIES) |
530 | goto out; | 510 | goto out; |
531 | 511 | ||
@@ -548,7 +528,7 @@ static void connect_to_sock(struct connection *con) | |||
548 | sock->sk->sk_user_data = con; | 528 | sock->sk->sk_user_data = con; |
549 | con->rx_action = receive_from_sock; | 529 | con->rx_action = receive_from_sock; |
550 | 530 | ||
551 | make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); | 531 | make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); |
552 | 532 | ||
553 | add_sock(sock, con); | 533 | add_sock(sock, con); |
554 | 534 | ||
@@ -577,7 +557,7 @@ out_err: | |||
577 | result = 0; | 557 | result = 0; |
578 | } | 558 | } |
579 | out: | 559 | out: |
580 | up_write(&con->sock_sem); | 560 | mutex_unlock(&con->sock_mutex); |
581 | return; | 561 | return; |
582 | } | 562 | } |
583 | 563 | ||
@@ -616,10 +596,10 @@ static struct socket *create_listen_sock(struct connection *con, | |||
616 | con->sock = sock; | 596 | con->sock = sock; |
617 | 597 | ||
618 | /* Bind to our port */ | 598 | /* Bind to our port */ |
619 | make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); | 599 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); |
620 | result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); | 600 | result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); |
621 | if (result < 0) { | 601 | if (result < 0) { |
622 | printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); | 602 | printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port); |
623 | sock_release(sock); | 603 | sock_release(sock); |
624 | sock = NULL; | 604 | sock = NULL; |
625 | con->sock = NULL; | 605 | con->sock = NULL; |
@@ -638,7 +618,7 @@ static struct socket *create_listen_sock(struct connection *con, | |||
638 | 618 | ||
639 | result = sock->ops->listen(sock, 5); | 619 | result = sock->ops->listen(sock, 5); |
640 | if (result < 0) { | 620 | if (result < 0) { |
641 | printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); | 621 | printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port); |
642 | sock_release(sock); | 622 | sock_release(sock); |
643 | sock = NULL; | 623 | sock = NULL; |
644 | goto create_out; | 624 | goto create_out; |
@@ -709,6 +689,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, | |||
709 | if (!con) | 689 | if (!con) |
710 | return NULL; | 690 | return NULL; |
711 | 691 | ||
692 | spin_lock(&con->writequeue_lock); | ||
712 | e = list_entry(con->writequeue.prev, struct writequeue_entry, list); | 693 | e = list_entry(con->writequeue.prev, struct writequeue_entry, list); |
713 | if ((&e->list == &con->writequeue) || | 694 | if ((&e->list == &con->writequeue) || |
714 | (PAGE_CACHE_SIZE - e->end < len)) { | 695 | (PAGE_CACHE_SIZE - e->end < len)) { |
@@ -747,6 +728,7 @@ void dlm_lowcomms_commit_buffer(void *mh) | |||
747 | struct connection *con = e->con; | 728 | struct connection *con = e->con; |
748 | int users; | 729 | int users; |
749 | 730 | ||
731 | spin_lock(&con->writequeue_lock); | ||
750 | users = --e->users; | 732 | users = --e->users; |
751 | if (users) | 733 | if (users) |
752 | goto out; | 734 | goto out; |
@@ -754,12 +736,8 @@ void dlm_lowcomms_commit_buffer(void *mh) | |||
754 | kunmap(e->page); | 736 | kunmap(e->page); |
755 | spin_unlock(&con->writequeue_lock); | 737 | spin_unlock(&con->writequeue_lock); |
756 | 738 | ||
757 | if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { | 739 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { |
758 | spin_lock_bh(&write_sockets_lock); | 740 | queue_work(send_workqueue, &con->swork); |
759 | list_add_tail(&con->write_list, &write_sockets); | ||
760 | spin_unlock_bh(&write_sockets_lock); | ||
761 | |||
762 | wake_up_interruptible(&lowcomms_send_waitq); | ||
763 | } | 741 | } |
764 | return; | 742 | return; |
765 | 743 | ||
@@ -783,7 +761,7 @@ static void send_to_sock(struct connection *con) | |||
783 | struct writequeue_entry *e; | 761 | struct writequeue_entry *e; |
784 | int len, offset; | 762 | int len, offset; |
785 | 763 | ||
786 | down_read(&con->sock_sem); | 764 | mutex_lock(&con->sock_mutex); |
787 | if (con->sock == NULL) | 765 | if (con->sock == NULL) |
788 | goto out_connect; | 766 | goto out_connect; |
789 | 767 | ||
@@ -800,6 +778,7 @@ static void send_to_sock(struct connection *con) | |||
800 | offset = e->offset; | 778 | offset = e->offset; |
801 | BUG_ON(len == 0 && e->users == 0); | 779 | BUG_ON(len == 0 && e->users == 0); |
802 | spin_unlock(&con->writequeue_lock); | 780 | spin_unlock(&con->writequeue_lock); |
781 | kmap(e->page); | ||
803 | 782 | ||
804 | ret = 0; | 783 | ret = 0; |
805 | if (len) { | 784 | if (len) { |
@@ -828,18 +807,18 @@ static void send_to_sock(struct connection *con) | |||
828 | } | 807 | } |
829 | spin_unlock(&con->writequeue_lock); | 808 | spin_unlock(&con->writequeue_lock); |
830 | out: | 809 | out: |
831 | up_read(&con->sock_sem); | 810 | mutex_unlock(&con->sock_mutex); |
832 | return; | 811 | return; |
833 | 812 | ||
834 | send_error: | 813 | send_error: |
835 | up_read(&con->sock_sem); | 814 | mutex_unlock(&con->sock_mutex); |
836 | close_connection(con, false); | 815 | close_connection(con, false); |
837 | lowcomms_connect_sock(con); | 816 | lowcomms_connect_sock(con); |
838 | return; | 817 | return; |
839 | 818 | ||
840 | out_connect: | 819 | out_connect: |
841 | up_read(&con->sock_sem); | 820 | mutex_unlock(&con->sock_mutex); |
842 | lowcomms_connect_sock(con); | 821 | connect_to_sock(con); |
843 | return; | 822 | return; |
844 | } | 823 | } |
845 | 824 | ||
@@ -872,7 +851,6 @@ int dlm_lowcomms_close(int nodeid) | |||
872 | if (con) { | 851 | if (con) { |
873 | clean_one_writequeue(con); | 852 | clean_one_writequeue(con); |
874 | close_connection(con, true); | 853 | close_connection(con, true); |
875 | atomic_set(&con->waiting_requests, 0); | ||
876 | } | 854 | } |
877 | return 0; | 855 | return 0; |
878 | 856 | ||
@@ -880,102 +858,29 @@ out: | |||
880 | return -1; | 858 | return -1; |
881 | } | 859 | } |
882 | 860 | ||
883 | /* API send message call, may queue the request */ | ||
884 | /* N.B. This is the old interface - use the new one for new calls */ | ||
885 | int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation) | ||
886 | { | ||
887 | struct writequeue_entry *e; | ||
888 | char *b; | ||
889 | |||
890 | e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b); | ||
891 | if (e) { | ||
892 | memcpy(b, buf, len); | ||
893 | dlm_lowcomms_commit_buffer(e); | ||
894 | return 0; | ||
895 | } | ||
896 | return -ENOBUFS; | ||
897 | } | ||
898 | |||
899 | /* Look for activity on active sockets */ | 861 | /* Look for activity on active sockets */ |
900 | static void process_sockets(void) | 862 | static void process_recv_sockets(struct work_struct *work) |
901 | { | 863 | { |
902 | struct list_head *list; | 864 | struct connection *con = container_of(work, struct connection, rwork); |
903 | struct list_head *temp; | 865 | int err; |
904 | int count = 0; | ||
905 | |||
906 | spin_lock_bh(&read_sockets_lock); | ||
907 | list_for_each_safe(list, temp, &read_sockets) { | ||
908 | 866 | ||
909 | struct connection *con = | 867 | clear_bit(CF_READ_PENDING, &con->flags); |
910 | list_entry(list, struct connection, read_list); | 868 | do { |
911 | list_del(&con->read_list); | 869 | err = con->rx_action(con); |
912 | clear_bit(CF_READ_PENDING, &con->flags); | 870 | } while (!err); |
913 | |||
914 | spin_unlock_bh(&read_sockets_lock); | ||
915 | |||
916 | /* This can reach zero if we are processing requests | ||
917 | * as they come in. | ||
918 | */ | ||
919 | if (atomic_read(&con->waiting_requests) == 0) { | ||
920 | spin_lock_bh(&read_sockets_lock); | ||
921 | continue; | ||
922 | } | ||
923 | |||
924 | do { | ||
925 | con->rx_action(con); | ||
926 | |||
927 | /* Don't starve out everyone else */ | ||
928 | if (++count >= MAX_RX_MSG_COUNT) { | ||
929 | cond_resched(); | ||
930 | count = 0; | ||
931 | } | ||
932 | |||
933 | } while (!atomic_dec_and_test(&con->waiting_requests) && | ||
934 | !kthread_should_stop()); | ||
935 | |||
936 | spin_lock_bh(&read_sockets_lock); | ||
937 | } | ||
938 | spin_unlock_bh(&read_sockets_lock); | ||
939 | } | 871 | } |
940 | 872 | ||
941 | /* Try to send any messages that are pending | ||
942 | */ | ||
943 | static void process_output_queue(void) | ||
944 | { | ||
945 | struct list_head *list; | ||
946 | struct list_head *temp; | ||
947 | |||
948 | spin_lock_bh(&write_sockets_lock); | ||
949 | list_for_each_safe(list, temp, &write_sockets) { | ||
950 | struct connection *con = | ||
951 | list_entry(list, struct connection, write_list); | ||
952 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
953 | list_del(&con->write_list); | ||
954 | |||
955 | spin_unlock_bh(&write_sockets_lock); | ||
956 | send_to_sock(con); | ||
957 | spin_lock_bh(&write_sockets_lock); | ||
958 | } | ||
959 | spin_unlock_bh(&write_sockets_lock); | ||
960 | } | ||
961 | 873 | ||
962 | static void process_state_queue(void) | 874 | static void process_send_sockets(struct work_struct *work) |
963 | { | 875 | { |
964 | struct list_head *list; | 876 | struct connection *con = container_of(work, struct connection, swork); |
965 | struct list_head *temp; | ||
966 | |||
967 | spin_lock_bh(&state_sockets_lock); | ||
968 | list_for_each_safe(list, temp, &state_sockets) { | ||
969 | struct connection *con = | ||
970 | list_entry(list, struct connection, state_list); | ||
971 | list_del(&con->state_list); | ||
972 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
973 | spin_unlock_bh(&state_sockets_lock); | ||
974 | 877 | ||
878 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
975 | connect_to_sock(con); | 879 | connect_to_sock(con); |
976 | spin_lock_bh(&state_sockets_lock); | ||
977 | } | 880 | } |
978 | spin_unlock_bh(&state_sockets_lock); | 881 | |
882 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
883 | send_to_sock(con); | ||
979 | } | 884 | } |
980 | 885 | ||
981 | 886 | ||
@@ -992,109 +897,33 @@ static void clean_writequeues(void) | |||
992 | } | 897 | } |
993 | } | 898 | } |
994 | 899 | ||
995 | static int read_list_empty(void) | 900 | static void work_stop(void) |
996 | { | 901 | { |
997 | int status; | 902 | destroy_workqueue(recv_workqueue); |
998 | 903 | destroy_workqueue(send_workqueue); | |
999 | spin_lock_bh(&read_sockets_lock); | ||
1000 | status = list_empty(&read_sockets); | ||
1001 | spin_unlock_bh(&read_sockets_lock); | ||
1002 | |||
1003 | return status; | ||
1004 | } | ||
1005 | |||
1006 | /* DLM Transport comms receive daemon */ | ||
1007 | static int dlm_recvd(void *data) | ||
1008 | { | ||
1009 | init_waitqueue_entry(&lowcomms_recv_waitq_head, current); | ||
1010 | add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); | ||
1011 | |||
1012 | while (!kthread_should_stop()) { | ||
1013 | set_current_state(TASK_INTERRUPTIBLE); | ||
1014 | if (read_list_empty()) | ||
1015 | cond_resched(); | ||
1016 | set_current_state(TASK_RUNNING); | ||
1017 | |||
1018 | process_sockets(); | ||
1019 | } | ||
1020 | |||
1021 | return 0; | ||
1022 | } | 904 | } |
1023 | 905 | ||
1024 | static int write_and_state_lists_empty(void) | 906 | static int work_start(void) |
1025 | { | 907 | { |
1026 | int status; | ||
1027 | |||
1028 | spin_lock_bh(&write_sockets_lock); | ||
1029 | status = list_empty(&write_sockets); | ||
1030 | spin_unlock_bh(&write_sockets_lock); | ||
1031 | |||
1032 | spin_lock_bh(&state_sockets_lock); | ||
1033 | if (list_empty(&state_sockets) == 0) | ||
1034 | status = 0; | ||
1035 | spin_unlock_bh(&state_sockets_lock); | ||
1036 | |||
1037 | return status; | ||
1038 | } | ||
1039 | |||
1040 | /* DLM Transport send daemon */ | ||
1041 | static int dlm_sendd(void *data) | ||
1042 | { | ||
1043 | init_waitqueue_entry(&lowcomms_send_waitq_head, current); | ||
1044 | add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head); | ||
1045 | |||
1046 | while (!kthread_should_stop()) { | ||
1047 | set_current_state(TASK_INTERRUPTIBLE); | ||
1048 | if (write_and_state_lists_empty()) | ||
1049 | cond_resched(); | ||
1050 | set_current_state(TASK_RUNNING); | ||
1051 | |||
1052 | process_state_queue(); | ||
1053 | process_output_queue(); | ||
1054 | } | ||
1055 | |||
1056 | return 0; | ||
1057 | } | ||
1058 | |||
1059 | static void daemons_stop(void) | ||
1060 | { | ||
1061 | kthread_stop(recv_task); | ||
1062 | kthread_stop(send_task); | ||
1063 | } | ||
1064 | |||
1065 | static int daemons_start(void) | ||
1066 | { | ||
1067 | struct task_struct *p; | ||
1068 | int error; | 908 | int error; |
1069 | 909 | recv_workqueue = create_workqueue("dlm_recv"); | |
1070 | p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); | 910 | error = IS_ERR(recv_workqueue); |
1071 | error = IS_ERR(p); | ||
1072 | if (error) { | 911 | if (error) { |
1073 | log_print("can't start dlm_recvd %d", error); | 912 | log_print("can't start dlm_recv %d", error); |
1074 | return error; | 913 | return error; |
1075 | } | 914 | } |
1076 | recv_task = p; | ||
1077 | 915 | ||
1078 | p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); | 916 | send_workqueue = create_singlethread_workqueue("dlm_send"); |
1079 | error = IS_ERR(p); | 917 | error = IS_ERR(send_workqueue); |
1080 | if (error) { | 918 | if (error) { |
1081 | log_print("can't start dlm_sendd %d", error); | 919 | log_print("can't start dlm_send %d", error); |
1082 | kthread_stop(recv_task); | 920 | destroy_workqueue(recv_workqueue); |
1083 | return error; | 921 | return error; |
1084 | } | 922 | } |
1085 | send_task = p; | ||
1086 | 923 | ||
1087 | return 0; | 924 | return 0; |
1088 | } | 925 | } |
1089 | 926 | ||
1090 | /* | ||
1091 | * Return the largest buffer size we can cope with. | ||
1092 | */ | ||
1093 | int lowcomms_max_buffer_size(void) | ||
1094 | { | ||
1095 | return PAGE_CACHE_SIZE; | ||
1096 | } | ||
1097 | |||
1098 | void dlm_lowcomms_stop(void) | 927 | void dlm_lowcomms_stop(void) |
1099 | { | 928 | { |
1100 | int i; | 929 | int i; |
@@ -1107,7 +936,7 @@ void dlm_lowcomms_stop(void) | |||
1107 | connections[i]->flags |= 0xFF; | 936 | connections[i]->flags |= 0xFF; |
1108 | } | 937 | } |
1109 | 938 | ||
1110 | daemons_stop(); | 939 | work_stop(); |
1111 | clean_writequeues(); | 940 | clean_writequeues(); |
1112 | 941 | ||
1113 | for (i = 0; i < conn_array_size; i++) { | 942 | for (i = 0; i < conn_array_size; i++) { |
@@ -1159,7 +988,7 @@ int dlm_lowcomms_start(void) | |||
1159 | if (error) | 988 | if (error) |
1160 | goto fail_unlisten; | 989 | goto fail_unlisten; |
1161 | 990 | ||
1162 | error = daemons_start(); | 991 | error = work_start(); |
1163 | if (error) | 992 | if (error) |
1164 | goto fail_unlisten; | 993 | goto fail_unlisten; |
1165 | 994 | ||
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index c9b1c3d535f4..a5126e0c68a6 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c | |||
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, | |||
82 | if (msglen < sizeof(struct dlm_header)) | 82 | if (msglen < sizeof(struct dlm_header)) |
83 | break; | 83 | break; |
84 | err = -E2BIG; | 84 | err = -E2BIG; |
85 | if (msglen > dlm_config.buffer_size) { | 85 | if (msglen > dlm_config.ci_buffer_size) { |
86 | log_print("message size %d from %d too big, buf len %d", | 86 | log_print("message size %d from %d too big, buf len %d", |
87 | msglen, nodeid, len); | 87 | msglen, nodeid, len); |
88 | break; | 88 | break; |
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, | |||
103 | 103 | ||
104 | if (msglen > sizeof(__tmp) && | 104 | if (msglen > sizeof(__tmp) && |
105 | msg == (struct dlm_header *) __tmp) { | 105 | msg == (struct dlm_header *) __tmp) { |
106 | msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | 106 | msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); |
107 | if (msg == NULL) | 107 | if (msg == NULL) |
108 | return ret; | 108 | return ret; |
109 | } | 109 | } |
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 4cc31be9cd9d..6bfbd6153809 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len, | |||
56 | 56 | ||
57 | rc->rc_type = type; | 57 | rc->rc_type = type; |
58 | 58 | ||
59 | spin_lock(&ls->ls_recover_lock); | ||
60 | rc->rc_seq = ls->ls_recover_seq; | ||
61 | spin_unlock(&ls->ls_recover_lock); | ||
62 | |||
59 | *mh_ret = mh; | 63 | *mh_ret = mh; |
60 | *rc_ret = rc; | 64 | *rc_ret = rc; |
61 | return 0; | 65 | return 0; |
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf) | |||
78 | rf->rf_lsflags = ls->ls_exflags; | 82 | rf->rf_lsflags = ls->ls_exflags; |
79 | } | 83 | } |
80 | 84 | ||
81 | static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) | 85 | static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) |
82 | { | 86 | { |
87 | struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; | ||
88 | |||
89 | if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { | ||
90 | log_error(ls, "version mismatch: %x nodeid %d: %x", | ||
91 | DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, | ||
92 | rc->rc_header.h_version); | ||
93 | return -EINVAL; | ||
94 | } | ||
95 | |||
83 | if (rf->rf_lvblen != ls->ls_lvblen || | 96 | if (rf->rf_lvblen != ls->ls_lvblen || |
84 | rf->rf_lsflags != ls->ls_exflags) { | 97 | rf->rf_lsflags != ls->ls_exflags) { |
85 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", | 98 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", |
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | |||
125 | goto out; | 138 | goto out; |
126 | 139 | ||
127 | allow_sync_reply(ls, &rc->rc_id); | 140 | allow_sync_reply(ls, &rc->rc_id); |
128 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | 141 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); |
129 | 142 | ||
130 | send_rcom(ls, mh, rc); | 143 | send_rcom(ls, mh, rc); |
131 | 144 | ||
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | |||
141 | log_debug(ls, "remote node %d not ready", nodeid); | 154 | log_debug(ls, "remote node %d not ready", nodeid); |
142 | rc->rc_result = 0; | 155 | rc->rc_result = 0; |
143 | } else | 156 | } else |
144 | error = check_config(ls, (struct rcom_config *) rc->rc_buf, | 157 | error = check_config(ls, rc, nodeid); |
145 | nodeid); | ||
146 | /* the caller looks at rc_result for the remote recovery status */ | 158 | /* the caller looks at rc_result for the remote recovery status */ |
147 | out: | 159 | out: |
148 | return error; | 160 | return error; |
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
159 | if (error) | 171 | if (error) |
160 | return; | 172 | return; |
161 | rc->rc_id = rc_in->rc_id; | 173 | rc->rc_id = rc_in->rc_id; |
174 | rc->rc_seq_reply = rc_in->rc_seq; | ||
162 | rc->rc_result = dlm_recover_status(ls); | 175 | rc->rc_result = dlm_recover_status(ls); |
163 | make_config(ls, (struct rcom_config *) rc->rc_buf); | 176 | make_config(ls, (struct rcom_config *) rc->rc_buf); |
164 | 177 | ||
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | |||
200 | if (nodeid == dlm_our_nodeid()) { | 213 | if (nodeid == dlm_our_nodeid()) { |
201 | dlm_copy_master_names(ls, last_name, last_len, | 214 | dlm_copy_master_names(ls, last_name, last_len, |
202 | ls->ls_recover_buf + len, | 215 | ls->ls_recover_buf + len, |
203 | dlm_config.buffer_size - len, nodeid); | 216 | dlm_config.ci_buffer_size - len, nodeid); |
204 | goto out; | 217 | goto out; |
205 | } | 218 | } |
206 | 219 | ||
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | |||
210 | memcpy(rc->rc_buf, last_name, last_len); | 223 | memcpy(rc->rc_buf, last_name, last_len); |
211 | 224 | ||
212 | allow_sync_reply(ls, &rc->rc_id); | 225 | allow_sync_reply(ls, &rc->rc_id); |
213 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | 226 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); |
214 | 227 | ||
215 | send_rcom(ls, mh, rc); | 228 | send_rcom(ls, mh, rc); |
216 | 229 | ||
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
224 | { | 237 | { |
225 | struct dlm_rcom *rc; | 238 | struct dlm_rcom *rc; |
226 | struct dlm_mhandle *mh; | 239 | struct dlm_mhandle *mh; |
227 | int error, inlen, outlen; | 240 | int error, inlen, outlen, nodeid; |
228 | int nodeid = rc_in->rc_header.h_nodeid; | ||
229 | uint32_t status = dlm_recover_status(ls); | ||
230 | |||
231 | /* | ||
232 | * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while | ||
233 | * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes). | ||
234 | * It could only happen in rare cases where we get a late NAMES | ||
235 | * message from a previous instance of recovery. | ||
236 | */ | ||
237 | |||
238 | if (!(status & DLM_RS_NODES)) { | ||
239 | log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid); | ||
240 | return; | ||
241 | } | ||
242 | 241 | ||
243 | nodeid = rc_in->rc_header.h_nodeid; | 242 | nodeid = rc_in->rc_header.h_nodeid; |
244 | inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); | 243 | inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); |
245 | outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); | 244 | outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom); |
246 | 245 | ||
247 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); | 246 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); |
248 | if (error) | 247 | if (error) |
249 | return; | 248 | return; |
250 | rc->rc_id = rc_in->rc_id; | 249 | rc->rc_id = rc_in->rc_id; |
250 | rc->rc_seq_reply = rc_in->rc_seq; | ||
251 | 251 | ||
252 | dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, | 252 | dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, |
253 | nodeid); | 253 | nodeid); |
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
294 | ret_nodeid = error; | 294 | ret_nodeid = error; |
295 | rc->rc_result = ret_nodeid; | 295 | rc->rc_result = ret_nodeid; |
296 | rc->rc_id = rc_in->rc_id; | 296 | rc->rc_id = rc_in->rc_id; |
297 | rc->rc_seq_reply = rc_in->rc_seq; | ||
297 | 298 | ||
298 | send_rcom(ls, mh, rc); | 299 | send_rcom(ls, mh, rc); |
299 | } | 300 | } |
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
375 | 376 | ||
376 | memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); | 377 | memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); |
377 | rc->rc_id = rc_in->rc_id; | 378 | rc->rc_id = rc_in->rc_id; |
379 | rc->rc_seq_reply = rc_in->rc_seq; | ||
378 | 380 | ||
379 | send_rcom(ls, mh, rc); | 381 | send_rcom(ls, mh, rc); |
380 | } | 382 | } |
381 | 383 | ||
382 | static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | 384 | static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) |
383 | { | 385 | { |
384 | uint32_t status = dlm_recover_status(ls); | ||
385 | |||
386 | if (!(status & DLM_RS_DIR)) { | ||
387 | log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u", | ||
388 | rc_in->rc_header.h_nodeid); | ||
389 | return; | ||
390 | } | ||
391 | |||
392 | dlm_recover_process_copy(ls, rc_in); | 386 | dlm_recover_process_copy(ls, rc_in); |
393 | } | 387 | } |
394 | 388 | ||
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | |||
415 | 409 | ||
416 | rc->rc_type = DLM_RCOM_STATUS_REPLY; | 410 | rc->rc_type = DLM_RCOM_STATUS_REPLY; |
417 | rc->rc_id = rc_in->rc_id; | 411 | rc->rc_id = rc_in->rc_id; |
412 | rc->rc_seq_reply = rc_in->rc_seq; | ||
418 | rc->rc_result = -ESRCH; | 413 | rc->rc_result = -ESRCH; |
419 | 414 | ||
420 | rf = (struct rcom_config *) rc->rc_buf; | 415 | rf = (struct rcom_config *) rc->rc_buf; |
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | |||
426 | return 0; | 421 | return 0; |
427 | } | 422 | } |
428 | 423 | ||
424 | static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
425 | { | ||
426 | uint64_t seq; | ||
427 | int rv = 0; | ||
428 | |||
429 | switch (rc->rc_type) { | ||
430 | case DLM_RCOM_STATUS_REPLY: | ||
431 | case DLM_RCOM_NAMES_REPLY: | ||
432 | case DLM_RCOM_LOOKUP_REPLY: | ||
433 | case DLM_RCOM_LOCK_REPLY: | ||
434 | spin_lock(&ls->ls_recover_lock); | ||
435 | seq = ls->ls_recover_seq; | ||
436 | spin_unlock(&ls->ls_recover_lock); | ||
437 | if (rc->rc_seq_reply != seq) { | ||
438 | log_debug(ls, "ignoring old reply %x from %d " | ||
439 | "seq_reply %llx expect %llx", | ||
440 | rc->rc_type, rc->rc_header.h_nodeid, | ||
441 | (unsigned long long)rc->rc_seq_reply, | ||
442 | (unsigned long long)seq); | ||
443 | rv = 1; | ||
444 | } | ||
445 | } | ||
446 | return rv; | ||
447 | } | ||
448 | |||
429 | /* Called by dlm_recvd; corresponds to dlm_receive_message() but special | 449 | /* Called by dlm_recvd; corresponds to dlm_receive_message() but special |
430 | recovery-only comms are sent through here. */ | 450 | recovery-only comms are sent through here. */ |
431 | 451 | ||
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) | |||
449 | } | 469 | } |
450 | 470 | ||
451 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { | 471 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { |
452 | log_error(ls, "ignoring recovery message %x from %d", | 472 | log_debug(ls, "ignoring recovery message %x from %d", |
453 | rc->rc_type, nodeid); | 473 | rc->rc_type, nodeid); |
454 | goto out; | 474 | goto out; |
455 | } | 475 | } |
456 | 476 | ||
477 | if (is_old_reply(ls, rc)) | ||
478 | goto out; | ||
479 | |||
457 | if (nodeid != rc->rc_header.h_nodeid) { | 480 | if (nodeid != rc->rc_header.h_nodeid) { |
458 | log_error(ls, "bad rcom nodeid %d from %d", | 481 | log_error(ls, "bad rcom nodeid %d from %d", |
459 | rc->rc_header.h_nodeid, nodeid); | 482 | rc->rc_header.h_nodeid, nodeid); |
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index cf9f6831bab5..c2cc7694cd16 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -44,7 +44,7 @@ | |||
44 | static void dlm_wait_timer_fn(unsigned long data) | 44 | static void dlm_wait_timer_fn(unsigned long data) |
45 | { | 45 | { |
46 | struct dlm_ls *ls = (struct dlm_ls *) data; | 46 | struct dlm_ls *ls = (struct dlm_ls *) data; |
47 | mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); | 47 | mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ)); |
48 | wake_up(&ls->ls_wait_general); | 48 | wake_up(&ls->ls_wait_general); |
49 | } | 49 | } |
50 | 50 | ||
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) | |||
55 | init_timer(&ls->ls_timer); | 55 | init_timer(&ls->ls_timer); |
56 | ls->ls_timer.function = dlm_wait_timer_fn; | 56 | ls->ls_timer.function = dlm_wait_timer_fn; |
57 | ls->ls_timer.data = (long) ls; | 57 | ls->ls_timer.data = (long) ls; |
58 | ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); | 58 | ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ); |
59 | add_timer(&ls->ls_timer); | 59 | add_timer(&ls->ls_timer); |
60 | 60 | ||
61 | wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); | 61 | wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); |
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls) | |||
397 | 397 | ||
398 | if (dlm_no_directory(ls)) | 398 | if (dlm_no_directory(ls)) |
399 | count += recover_master_static(r); | 399 | count += recover_master_static(r); |
400 | else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { | 400 | else if (!is_master(r) && |
401 | (dlm_is_removed(ls, r->res_nodeid) || | ||
402 | rsb_flag(r, RSB_NEW_MASTER))) { | ||
401 | recover_master(r); | 403 | recover_master(r); |
402 | count++; | 404 | count++; |
403 | } | 405 | } |
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 650536aa5139..3cb636d60249 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
77 | 77 | ||
78 | error = dlm_recover_members(ls, rv, &neg); | 78 | error = dlm_recover_members(ls, rv, &neg); |
79 | if (error) { | 79 | if (error) { |
80 | log_error(ls, "recover_members failed %d", error); | 80 | log_debug(ls, "recover_members failed %d", error); |
81 | goto fail; | 81 | goto fail; |
82 | } | 82 | } |
83 | start = jiffies; | 83 | start = jiffies; |
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
89 | 89 | ||
90 | error = dlm_recover_directory(ls); | 90 | error = dlm_recover_directory(ls); |
91 | if (error) { | 91 | if (error) { |
92 | log_error(ls, "recover_directory failed %d", error); | 92 | log_debug(ls, "recover_directory failed %d", error); |
93 | goto fail; | 93 | goto fail; |
94 | } | 94 | } |
95 | 95 | ||
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
99 | 99 | ||
100 | error = dlm_recover_directory_wait(ls); | 100 | error = dlm_recover_directory_wait(ls); |
101 | if (error) { | 101 | if (error) { |
102 | log_error(ls, "recover_directory_wait failed %d", error); | 102 | log_debug(ls, "recover_directory_wait failed %d", error); |
103 | goto fail; | 103 | goto fail; |
104 | } | 104 | } |
105 | 105 | ||
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
129 | 129 | ||
130 | error = dlm_recover_masters(ls); | 130 | error = dlm_recover_masters(ls); |
131 | if (error) { | 131 | if (error) { |
132 | log_error(ls, "recover_masters failed %d", error); | 132 | log_debug(ls, "recover_masters failed %d", error); |
133 | goto fail; | 133 | goto fail; |
134 | } | 134 | } |
135 | 135 | ||
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
139 | 139 | ||
140 | error = dlm_recover_locks(ls); | 140 | error = dlm_recover_locks(ls); |
141 | if (error) { | 141 | if (error) { |
142 | log_error(ls, "recover_locks failed %d", error); | 142 | log_debug(ls, "recover_locks failed %d", error); |
143 | goto fail; | 143 | goto fail; |
144 | } | 144 | } |
145 | 145 | ||
146 | error = dlm_recover_locks_wait(ls); | 146 | error = dlm_recover_locks_wait(ls); |
147 | if (error) { | 147 | if (error) { |
148 | log_error(ls, "recover_locks_wait failed %d", error); | 148 | log_debug(ls, "recover_locks_wait failed %d", error); |
149 | goto fail; | 149 | goto fail; |
150 | } | 150 | } |
151 | 151 | ||
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
166 | 166 | ||
167 | error = dlm_recover_locks_wait(ls); | 167 | error = dlm_recover_locks_wait(ls); |
168 | if (error) { | 168 | if (error) { |
169 | log_error(ls, "recover_locks_wait failed %d", error); | 169 | log_debug(ls, "recover_locks_wait failed %d", error); |
170 | goto fail; | 170 | goto fail; |
171 | } | 171 | } |
172 | } | 172 | } |
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
184 | dlm_set_recover_status(ls, DLM_RS_DONE); | 184 | dlm_set_recover_status(ls, DLM_RS_DONE); |
185 | error = dlm_recover_done_wait(ls); | 185 | error = dlm_recover_done_wait(ls); |
186 | if (error) { | 186 | if (error) { |
187 | log_error(ls, "recover_done_wait failed %d", error); | 187 | log_debug(ls, "recover_done_wait failed %d", error); |
188 | goto fail; | 188 | goto fail; |
189 | } | 189 | } |
190 | 190 | ||
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
192 | 192 | ||
193 | error = enable_locking(ls, rv->seq); | 193 | error = enable_locking(ls, rv->seq); |
194 | if (error) { | 194 | if (error) { |
195 | log_error(ls, "enable_locking failed %d", error); | 195 | log_debug(ls, "enable_locking failed %d", error); |
196 | goto fail; | 196 | goto fail; |
197 | } | 197 | } |
198 | 198 | ||
199 | error = dlm_process_requestqueue(ls); | 199 | error = dlm_process_requestqueue(ls); |
200 | if (error) { | 200 | if (error) { |
201 | log_error(ls, "process_requestqueue failed %d", error); | 201 | log_debug(ls, "process_requestqueue failed %d", error); |
202 | goto fail; | 202 | goto fail; |
203 | } | 203 | } |
204 | 204 | ||
205 | error = dlm_recover_waiters_post(ls); | 205 | error = dlm_recover_waiters_post(ls); |
206 | if (error) { | 206 | if (error) { |
207 | log_error(ls, "recover_waiters_post failed %d", error); | 207 | log_debug(ls, "recover_waiters_post failed %d", error); |
208 | goto fail; | 208 | goto fail; |
209 | } | 209 | } |
210 | 210 | ||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index c37e93e4f2df..d378b7fe2a1e 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
180 | ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) | 180 | ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) |
181 | remove_ownqueue = 1; | 181 | remove_ownqueue = 1; |
182 | 182 | ||
183 | /* unlocks or cancels of waiting requests need to be removed from the | ||
184 | proc's unlocking list, again there must be a better way... */ | ||
185 | |||
186 | if (ua->lksb.sb_status == -DLM_EUNLOCK || | ||
187 | (ua->lksb.sb_status == -DLM_ECANCEL && | ||
188 | lkb->lkb_grmode == DLM_LOCK_IV)) | ||
189 | remove_ownqueue = 1; | ||
190 | |||
183 | /* We want to copy the lvb to userspace when the completion | 191 | /* We want to copy the lvb to userspace when the completion |
184 | ast is read if the status is 0, the lock has an lvb and | 192 | ast is read if the status is 0, the lock has an lvb and |
185 | lvb_ops says we should. We could probably have set_lvb_lock() | 193 | lvb_ops says we should. We could probably have set_lvb_lock() |
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file) | |||
523 | proc->lockspace = ls->ls_local_handle; | 531 | proc->lockspace = ls->ls_local_handle; |
524 | INIT_LIST_HEAD(&proc->asts); | 532 | INIT_LIST_HEAD(&proc->asts); |
525 | INIT_LIST_HEAD(&proc->locks); | 533 | INIT_LIST_HEAD(&proc->locks); |
534 | INIT_LIST_HEAD(&proc->unlocking); | ||
526 | spin_lock_init(&proc->asts_spin); | 535 | spin_lock_init(&proc->asts_spin); |
527 | spin_lock_init(&proc->locks_spin); | 536 | spin_lock_init(&proc->locks_spin); |
528 | init_waitqueue_head(&proc->wait); | 537 | init_waitqueue_head(&proc->wait); |
diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 767197db9944..963889cf6740 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c | |||
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc) | |||
134 | rc->rc_type = cpu_to_le32(rc->rc_type); | 134 | rc->rc_type = cpu_to_le32(rc->rc_type); |
135 | rc->rc_result = cpu_to_le32(rc->rc_result); | 135 | rc->rc_result = cpu_to_le32(rc->rc_result); |
136 | rc->rc_id = cpu_to_le64(rc->rc_id); | 136 | rc->rc_id = cpu_to_le64(rc->rc_id); |
137 | rc->rc_seq = cpu_to_le64(rc->rc_seq); | ||
138 | rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); | ||
137 | 139 | ||
138 | if (type == DLM_RCOM_LOCK) | 140 | if (type == DLM_RCOM_LOCK) |
139 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); | 141 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); |
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc) | |||
151 | rc->rc_type = le32_to_cpu(rc->rc_type); | 153 | rc->rc_type = le32_to_cpu(rc->rc_type); |
152 | rc->rc_result = le32_to_cpu(rc->rc_result); | 154 | rc->rc_result = le32_to_cpu(rc->rc_result); |
153 | rc->rc_id = le64_to_cpu(rc->rc_id); | 155 | rc->rc_id = le64_to_cpu(rc->rc_id); |
156 | rc->rc_seq = le64_to_cpu(rc->rc_seq); | ||
157 | rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); | ||
154 | 158 | ||
155 | if (rc->rc_type == DLM_RCOM_LOCK) | 159 | if (rc->rc_type == DLM_RCOM_LOCK) |
156 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); | 160 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 6a2ffa2db14f..de8e64c03f73 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -4,44 +4,43 @@ config GFS2_FS | |||
4 | select FS_POSIX_ACL | 4 | select FS_POSIX_ACL |
5 | select CRC32 | 5 | select CRC32 |
6 | help | 6 | help |
7 | A cluster filesystem. | 7 | A cluster filesystem. |
8 | 8 | ||
9 | Allows a cluster of computers to simultaneously use a block device | 9 | Allows a cluster of computers to simultaneously use a block device |
10 | that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads | 10 | that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads |
11 | and writes to the block device like a local filesystem, but also uses | 11 | and writes to the block device like a local filesystem, but also uses |
12 | a lock module to allow the computers coordinate their I/O so | 12 | a lock module to allow the computers coordinate their I/O so |
13 | filesystem consistency is maintained. One of the nifty features of | 13 | filesystem consistency is maintained. One of the nifty features of |
14 | GFS is perfect consistency -- changes made to the filesystem on one | 14 | GFS is perfect consistency -- changes made to the filesystem on one |
15 | machine show up immediately on all other machines in the cluster. | 15 | machine show up immediately on all other machines in the cluster. |
16 | 16 | ||
17 | To use the GFS2 filesystem, you will need to enable one or more of | 17 | To use the GFS2 filesystem, you will need to enable one or more of |
18 | the below locking modules. Documentation and utilities for GFS2 can | 18 | the below locking modules. Documentation and utilities for GFS2 can |
19 | be found here: http://sources.redhat.com/cluster | 19 | be found here: http://sources.redhat.com/cluster |
20 | 20 | ||
21 | config GFS2_FS_LOCKING_NOLOCK | 21 | config GFS2_FS_LOCKING_NOLOCK |
22 | tristate "GFS2 \"nolock\" locking module" | 22 | tristate "GFS2 \"nolock\" locking module" |
23 | depends on GFS2_FS | 23 | depends on GFS2_FS |
24 | help | 24 | help |
25 | Single node locking module for GFS2. | 25 | Single node locking module for GFS2. |
26 | 26 | ||
27 | Use this module if you want to use GFS2 on a single node without | 27 | Use this module if you want to use GFS2 on a single node without |
28 | its clustering features. You can still take advantage of the | 28 | its clustering features. You can still take advantage of the |
29 | large file support, and upgrade to running a full cluster later on | 29 | large file support, and upgrade to running a full cluster later on |
30 | if required. | 30 | if required. |
31 | 31 | ||
32 | If you will only be using GFS2 in cluster mode, you do not need this | 32 | If you will only be using GFS2 in cluster mode, you do not need this |
33 | module. | 33 | module. |
34 | 34 | ||
35 | config GFS2_FS_LOCKING_DLM | 35 | config GFS2_FS_LOCKING_DLM |
36 | tristate "GFS2 DLM locking module" | 36 | tristate "GFS2 DLM locking module" |
37 | depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n) | 37 | depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n) |
38 | select IP_SCTP if DLM_SCTP | 38 | select IP_SCTP if DLM_SCTP |
39 | select CONFIGFS_FS | 39 | select CONFIGFS_FS |
40 | select DLM | 40 | select DLM |
41 | help | 41 | help |
42 | Multiple node locking module for GFS2 | 42 | Multiple node locking module for GFS2 |
43 | |||
44 | Most users of GFS2 will require this module. It provides the locking | ||
45 | interface between GFS2 and the DLM, which is required to use GFS2 | ||
46 | in a cluster environment. | ||
47 | 43 | ||
44 | Most users of GFS2 will require this module. It provides the locking | ||
45 | interface between GFS2 and the DLM, which is required to use GFS2 | ||
46 | in a cluster environment. | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8240c1ff94f4..113f6c9110c7 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
773 | gfs2_free_data(ip, bstart, blen); | 773 | gfs2_free_data(ip, bstart, blen); |
774 | } | 774 | } |
775 | 775 | ||
776 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 776 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
777 | 777 | ||
778 | gfs2_dinode_out(ip, dibh->b_data); | 778 | gfs2_dinode_out(ip, dibh->b_data); |
779 | 779 | ||
@@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) | |||
848 | } | 848 | } |
849 | 849 | ||
850 | ip->i_di.di_size = size; | 850 | ip->i_di.di_size = size; |
851 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 851 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
852 | 852 | ||
853 | error = gfs2_meta_inode_buffer(ip, &dibh); | 853 | error = gfs2_meta_inode_buffer(ip, &dibh); |
854 | if (error) | 854 | if (error) |
@@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
963 | 963 | ||
964 | if (gfs2_is_stuffed(ip)) { | 964 | if (gfs2_is_stuffed(ip)) { |
965 | ip->i_di.di_size = size; | 965 | ip->i_di.di_size = size; |
966 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 966 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
967 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 967 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
968 | gfs2_dinode_out(ip, dibh->b_data); | 968 | gfs2_dinode_out(ip, dibh->b_data); |
969 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); | 969 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); |
@@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
975 | 975 | ||
976 | if (!error) { | 976 | if (!error) { |
977 | ip->i_di.di_size = size; | 977 | ip->i_di.di_size = size; |
978 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 978 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
979 | ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; | 979 | ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; |
980 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 980 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
981 | gfs2_dinode_out(ip, dibh->b_data); | 981 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip) | |||
1048 | ip->i_num.no_addr; | 1048 | ip->i_num.no_addr; |
1049 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1049 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
1050 | } | 1050 | } |
1051 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 1051 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1052 | ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; | 1052 | ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; |
1053 | 1053 | ||
1054 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1054 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0fdcb7713cd9..c93ca8f361b5 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, | |||
131 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); | 131 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); |
132 | if (ip->i_di.di_size < offset + size) | 132 | if (ip->i_di.di_size < offset + size) |
133 | ip->i_di.di_size = offset + size; | 133 | ip->i_di.di_size = offset + size; |
134 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 134 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
135 | gfs2_dinode_out(ip, dibh->b_data); | 135 | gfs2_dinode_out(ip, dibh->b_data); |
136 | 136 | ||
137 | brelse(dibh); | 137 | brelse(dibh); |
@@ -229,7 +229,7 @@ out: | |||
229 | 229 | ||
230 | if (ip->i_di.di_size < offset + copied) | 230 | if (ip->i_di.di_size < offset + copied) |
231 | ip->i_di.di_size = offset + copied; | 231 | ip->i_di.di_size = offset + copied; |
232 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 232 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
233 | 233 | ||
234 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 234 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
235 | gfs2_dinode_out(ip, dibh->b_data); | 235 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b) | |||
1198 | */ | 1198 | */ |
1199 | 1199 | ||
1200 | static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | 1200 | static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, |
1201 | void *opaque, gfs2_filldir_t filldir, | 1201 | void *opaque, filldir_t filldir, |
1202 | const struct gfs2_dirent **darr, u32 entries, | 1202 | const struct gfs2_dirent **darr, u32 entries, |
1203 | int *copied) | 1203 | int *copied) |
1204 | { | 1204 | { |
1205 | const struct gfs2_dirent *dent, *dent_next; | 1205 | const struct gfs2_dirent *dent, *dent_next; |
1206 | struct gfs2_inum_host inum; | ||
1207 | u64 off, off_next; | 1206 | u64 off, off_next; |
1208 | unsigned int x, y; | 1207 | unsigned int x, y; |
1209 | int run = 0; | 1208 | int run = 0; |
@@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
1240 | *offset = off; | 1239 | *offset = off; |
1241 | } | 1240 | } |
1242 | 1241 | ||
1243 | gfs2_inum_in(&inum, (char *)&dent->de_inum); | ||
1244 | |||
1245 | error = filldir(opaque, (const char *)(dent + 1), | 1242 | error = filldir(opaque, (const char *)(dent + 1), |
1246 | be16_to_cpu(dent->de_name_len), | 1243 | be16_to_cpu(dent->de_name_len), |
1247 | off, &inum, | 1244 | off, be64_to_cpu(dent->de_inum.no_addr), |
1248 | be16_to_cpu(dent->de_type)); | 1245 | be16_to_cpu(dent->de_type)); |
1249 | if (error) | 1246 | if (error) |
1250 | return 1; | 1247 | return 1; |
@@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
1262 | } | 1259 | } |
1263 | 1260 | ||
1264 | static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | 1261 | static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, |
1265 | gfs2_filldir_t filldir, int *copied, | 1262 | filldir_t filldir, int *copied, unsigned *depth, |
1266 | unsigned *depth, u64 leaf_no) | 1263 | u64 leaf_no) |
1267 | { | 1264 | { |
1268 | struct gfs2_inode *ip = GFS2_I(inode); | 1265 | struct gfs2_inode *ip = GFS2_I(inode); |
1269 | struct buffer_head *bh; | 1266 | struct buffer_head *bh; |
@@ -1343,7 +1340,7 @@ out: | |||
1343 | */ | 1340 | */ |
1344 | 1341 | ||
1345 | static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | 1342 | static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, |
1346 | gfs2_filldir_t filldir) | 1343 | filldir_t filldir) |
1347 | { | 1344 | { |
1348 | struct gfs2_inode *dip = GFS2_I(inode); | 1345 | struct gfs2_inode *dip = GFS2_I(inode); |
1349 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 1346 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -1402,7 +1399,7 @@ out: | |||
1402 | } | 1399 | } |
1403 | 1400 | ||
1404 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 1401 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
1405 | gfs2_filldir_t filldir) | 1402 | filldir_t filldir) |
1406 | { | 1403 | { |
1407 | struct gfs2_inode *dip = GFS2_I(inode); | 1404 | struct gfs2_inode *dip = GFS2_I(inode); |
1408 | struct dirent_gather g; | 1405 | struct dirent_gather g; |
@@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
1568 | break; | 1565 | break; |
1569 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 1566 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
1570 | ip->i_di.di_entries++; | 1567 | ip->i_di.di_entries++; |
1571 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 1568 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1572 | gfs2_dinode_out(ip, bh->b_data); | 1569 | gfs2_dinode_out(ip, bh->b_data); |
1573 | brelse(bh); | 1570 | brelse(bh); |
1574 | error = 0; | 1571 | error = 0; |
@@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) | |||
1654 | gfs2_consist_inode(dip); | 1651 | gfs2_consist_inode(dip); |
1655 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | 1652 | gfs2_trans_add_bh(dip->i_gl, bh, 1); |
1656 | dip->i_di.di_entries--; | 1653 | dip->i_di.di_entries--; |
1657 | dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); | 1654 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1658 | gfs2_dinode_out(dip, bh->b_data); | 1655 | gfs2_dinode_out(dip, bh->b_data); |
1659 | brelse(bh); | 1656 | brelse(bh); |
1660 | mark_inode_dirty(&dip->i_inode); | 1657 | mark_inode_dirty(&dip->i_inode); |
@@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | |||
1702 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | 1699 | gfs2_trans_add_bh(dip->i_gl, bh, 1); |
1703 | } | 1700 | } |
1704 | 1701 | ||
1705 | dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); | 1702 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1706 | gfs2_dinode_out(dip, bh->b_data); | 1703 | gfs2_dinode_out(dip, bh->b_data); |
1707 | brelse(bh); | 1704 | brelse(bh); |
1708 | return 0; | 1705 | return 0; |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index b21b33668a5b..48fe89046bba 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -16,30 +16,13 @@ struct inode; | |||
16 | struct gfs2_inode; | 16 | struct gfs2_inode; |
17 | struct gfs2_inum; | 17 | struct gfs2_inum; |
18 | 18 | ||
19 | /** | ||
20 | * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read() | ||
21 | * @opaque: opaque data used by the function | ||
22 | * @name: the name of the directory entry | ||
23 | * @length: the length of the name | ||
24 | * @offset: the entry's offset in the directory | ||
25 | * @inum: the inode number the entry points to | ||
26 | * @type: the type of inode the entry points to | ||
27 | * | ||
28 | * Returns: 0 on success, 1 if buffer full | ||
29 | */ | ||
30 | |||
31 | typedef int (*gfs2_filldir_t) (void *opaque, | ||
32 | const char *name, unsigned int length, | ||
33 | u64 offset, | ||
34 | struct gfs2_inum_host *inum, unsigned int type); | ||
35 | |||
36 | int gfs2_dir_search(struct inode *dir, const struct qstr *filename, | 19 | int gfs2_dir_search(struct inode *dir, const struct qstr *filename, |
37 | struct gfs2_inum_host *inum, unsigned int *type); | 20 | struct gfs2_inum_host *inum, unsigned int *type); |
38 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 21 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
39 | const struct gfs2_inum_host *inum, unsigned int type); | 22 | const struct gfs2_inum_host *inum, unsigned int type); |
40 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); | 23 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); |
41 | int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, | 24 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
42 | gfs2_filldir_t filldir); | 25 | filldir_t filldir); |
43 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 26 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
44 | struct gfs2_inum_host *new_inum, unsigned int new_type); | 27 | struct gfs2_inum_host *new_inum, unsigned int new_type); |
45 | 28 | ||
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index ebebbdcd7057..0c83c7f4dda8 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c | |||
@@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | |||
301 | 301 | ||
302 | error = gfs2_meta_inode_buffer(ip, &dibh); | 302 | error = gfs2_meta_inode_buffer(ip, &dibh); |
303 | if (!error) { | 303 | if (!error) { |
304 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 304 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
305 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 305 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
306 | gfs2_dinode_out(ip, dibh->b_data); | 306 | gfs2_dinode_out(ip, dibh->b_data); |
307 | brelse(dibh); | 307 | brelse(dibh); |
@@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
718 | (er->er_mode & S_IFMT)); | 718 | (er->er_mode & S_IFMT)); |
719 | ip->i_inode.i_mode = er->er_mode; | 719 | ip->i_inode.i_mode = er->er_mode; |
720 | } | 720 | } |
721 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 721 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
722 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 722 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
723 | gfs2_dinode_out(ip, dibh->b_data); | 723 | gfs2_dinode_out(ip, dibh->b_data); |
724 | brelse(dibh); | 724 | brelse(dibh); |
@@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, | |||
853 | (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); | 853 | (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); |
854 | ip->i_inode.i_mode = er->er_mode; | 854 | ip->i_inode.i_mode = er->er_mode; |
855 | } | 855 | } |
856 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 856 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
857 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 857 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
858 | gfs2_dinode_out(ip, dibh->b_data); | 858 | gfs2_dinode_out(ip, dibh->b_data); |
859 | brelse(dibh); | 859 | brelse(dibh); |
@@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) | |||
1134 | 1134 | ||
1135 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1135 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1136 | if (!error) { | 1136 | if (!error) { |
1137 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 1137 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1138 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1138 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1139 | gfs2_dinode_out(ip, dibh->b_data); | 1139 | gfs2_dinode_out(ip, dibh->b_data); |
1140 | brelse(dibh); | 1140 | brelse(dibh); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 438146904b58..6618c1190252 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/gfs2_ondisk.h> | 19 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/lm_interface.h> | 21 | #include <linux/lm_interface.h> |
22 | #include <linux/wait.h> | ||
23 | #include <linux/rwsem.h> | ||
22 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
23 | 25 | ||
24 | #include "gfs2.h" | 26 | #include "gfs2.h" |
@@ -33,11 +35,6 @@ | |||
33 | #include "super.h" | 35 | #include "super.h" |
34 | #include "util.h" | 36 | #include "util.h" |
35 | 37 | ||
36 | struct greedy { | ||
37 | struct gfs2_holder gr_gh; | ||
38 | struct delayed_work gr_work; | ||
39 | }; | ||
40 | |||
41 | struct gfs2_gl_hash_bucket { | 38 | struct gfs2_gl_hash_bucket { |
42 | struct hlist_head hb_list; | 39 | struct hlist_head hb_list; |
43 | }; | 40 | }; |
@@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl); | |||
47 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | 44 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); |
48 | static int dump_glock(struct gfs2_glock *gl); | 45 | static int dump_glock(struct gfs2_glock *gl); |
49 | static int dump_inode(struct gfs2_inode *ip); | 46 | static int dump_inode(struct gfs2_inode *ip); |
47 | static void gfs2_glock_xmote_th(struct gfs2_holder *gh); | ||
48 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); | ||
49 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | ||
50 | 50 | ||
51 | #define GFS2_GL_HASH_SHIFT 15 | 51 | #define GFS2_GL_HASH_SHIFT 15 |
52 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) | 52 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) |
@@ -213,30 +213,6 @@ out: | |||
213 | } | 213 | } |
214 | 214 | ||
215 | /** | 215 | /** |
216 | * queue_empty - check to see if a glock's queue is empty | ||
217 | * @gl: the glock | ||
218 | * @head: the head of the queue to check | ||
219 | * | ||
220 | * This function protects the list in the event that a process already | ||
221 | * has a holder on the list and is adding a second holder for itself. | ||
222 | * The glmutex lock is what generally prevents processes from working | ||
223 | * on the same glock at once, but the special case of adding a second | ||
224 | * holder for yourself ("recursive" locking) doesn't involve locking | ||
225 | * glmutex, making the spin lock necessary. | ||
226 | * | ||
227 | * Returns: 1 if the queue is empty | ||
228 | */ | ||
229 | |||
230 | static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head) | ||
231 | { | ||
232 | int empty; | ||
233 | spin_lock(&gl->gl_spin); | ||
234 | empty = list_empty(head); | ||
235 | spin_unlock(&gl->gl_spin); | ||
236 | return empty; | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * search_bucket() - Find struct gfs2_glock by lock number | 216 | * search_bucket() - Find struct gfs2_glock by lock number |
241 | * @bucket: the bucket to search | 217 | * @bucket: the bucket to search |
242 | * @name: The lock name | 218 | * @name: The lock name |
@@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | |||
395 | gh->gh_flags = flags; | 371 | gh->gh_flags = flags; |
396 | gh->gh_error = 0; | 372 | gh->gh_error = 0; |
397 | gh->gh_iflags = 0; | 373 | gh->gh_iflags = 0; |
398 | init_completion(&gh->gh_wait); | ||
399 | |||
400 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
401 | gh->gh_flags |= GL_LOCAL_EXCL; | ||
402 | |||
403 | gfs2_glock_hold(gl); | 374 | gfs2_glock_hold(gl); |
404 | } | 375 | } |
405 | 376 | ||
@@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * | |||
417 | { | 388 | { |
418 | gh->gh_state = state; | 389 | gh->gh_state = state; |
419 | gh->gh_flags = flags; | 390 | gh->gh_flags = flags; |
420 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
421 | gh->gh_flags |= GL_LOCAL_EXCL; | ||
422 | |||
423 | gh->gh_iflags &= 1 << HIF_ALLOCED; | 391 | gh->gh_iflags &= 1 << HIF_ALLOCED; |
424 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 392 | gh->gh_ip = (unsigned long)__builtin_return_address(0); |
425 | } | 393 | } |
@@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh) | |||
479 | kfree(gh); | 447 | kfree(gh); |
480 | } | 448 | } |
481 | 449 | ||
450 | static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh) | ||
451 | { | ||
452 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) { | ||
453 | gfs2_holder_put(gh); | ||
454 | return; | ||
455 | } | ||
456 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
457 | smp_mb(); | ||
458 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
459 | } | ||
460 | |||
461 | static int holder_wait(void *word) | ||
462 | { | ||
463 | schedule(); | ||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | static void wait_on_holder(struct gfs2_holder *gh) | ||
468 | { | ||
469 | might_sleep(); | ||
470 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); | ||
471 | } | ||
472 | |||
482 | /** | 473 | /** |
483 | * rq_mutex - process a mutex request in the queue | 474 | * rq_mutex - process a mutex request in the queue |
484 | * @gh: the glock holder | 475 | * @gh: the glock holder |
@@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh) | |||
493 | list_del_init(&gh->gh_list); | 484 | list_del_init(&gh->gh_list); |
494 | /* gh->gh_error never examined. */ | 485 | /* gh->gh_error never examined. */ |
495 | set_bit(GLF_LOCK, &gl->gl_flags); | 486 | set_bit(GLF_LOCK, &gl->gl_flags); |
496 | complete(&gh->gh_wait); | 487 | clear_bit(HIF_WAIT, &gh->gh_iflags); |
488 | smp_mb(); | ||
489 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
497 | 490 | ||
498 | return 1; | 491 | return 1; |
499 | } | 492 | } |
@@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh) | |||
511 | { | 504 | { |
512 | struct gfs2_glock *gl = gh->gh_gl; | 505 | struct gfs2_glock *gl = gh->gh_gl; |
513 | struct gfs2_sbd *sdp = gl->gl_sbd; | 506 | struct gfs2_sbd *sdp = gl->gl_sbd; |
514 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
515 | 507 | ||
516 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | 508 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { |
517 | if (list_empty(&gl->gl_holders)) { | 509 | if (list_empty(&gl->gl_holders)) { |
@@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh) | |||
526 | gfs2_reclaim_glock(sdp); | 518 | gfs2_reclaim_glock(sdp); |
527 | } | 519 | } |
528 | 520 | ||
529 | glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); | 521 | gfs2_glock_xmote_th(gh); |
530 | spin_lock(&gl->gl_spin); | 522 | spin_lock(&gl->gl_spin); |
531 | } | 523 | } |
532 | return 1; | 524 | return 1; |
@@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh) | |||
537 | set_bit(GLF_LOCK, &gl->gl_flags); | 529 | set_bit(GLF_LOCK, &gl->gl_flags); |
538 | } else { | 530 | } else { |
539 | struct gfs2_holder *next_gh; | 531 | struct gfs2_holder *next_gh; |
540 | if (gh->gh_flags & GL_LOCAL_EXCL) | 532 | if (gh->gh_state == LM_ST_EXCLUSIVE) |
541 | return 1; | 533 | return 1; |
542 | next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, | 534 | next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, |
543 | gh_list); | 535 | gh_list); |
544 | if (next_gh->gh_flags & GL_LOCAL_EXCL) | 536 | if (next_gh->gh_state == LM_ST_EXCLUSIVE) |
545 | return 1; | 537 | return 1; |
546 | } | 538 | } |
547 | 539 | ||
@@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh) | |||
549 | gh->gh_error = 0; | 541 | gh->gh_error = 0; |
550 | set_bit(HIF_HOLDER, &gh->gh_iflags); | 542 | set_bit(HIF_HOLDER, &gh->gh_iflags); |
551 | 543 | ||
552 | complete(&gh->gh_wait); | 544 | gfs2_holder_dispose_or_wake(gh); |
553 | 545 | ||
554 | return 0; | 546 | return 0; |
555 | } | 547 | } |
@@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh) | |||
564 | static int rq_demote(struct gfs2_holder *gh) | 556 | static int rq_demote(struct gfs2_holder *gh) |
565 | { | 557 | { |
566 | struct gfs2_glock *gl = gh->gh_gl; | 558 | struct gfs2_glock *gl = gh->gh_gl; |
567 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
568 | 559 | ||
569 | if (!list_empty(&gl->gl_holders)) | 560 | if (!list_empty(&gl->gl_holders)) |
570 | return 1; | 561 | return 1; |
@@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh) | |||
573 | list_del_init(&gh->gh_list); | 564 | list_del_init(&gh->gh_list); |
574 | gh->gh_error = 0; | 565 | gh->gh_error = 0; |
575 | spin_unlock(&gl->gl_spin); | 566 | spin_unlock(&gl->gl_spin); |
576 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | 567 | gfs2_holder_dispose_or_wake(gh); |
577 | gfs2_holder_put(gh); | ||
578 | else | ||
579 | complete(&gh->gh_wait); | ||
580 | spin_lock(&gl->gl_spin); | 568 | spin_lock(&gl->gl_spin); |
581 | } else { | 569 | } else { |
582 | gl->gl_req_gh = gh; | 570 | gl->gl_req_gh = gh; |
@@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh) | |||
585 | 573 | ||
586 | if (gh->gh_state == LM_ST_UNLOCKED || | 574 | if (gh->gh_state == LM_ST_UNLOCKED || |
587 | gl->gl_state != LM_ST_EXCLUSIVE) | 575 | gl->gl_state != LM_ST_EXCLUSIVE) |
588 | glops->go_drop_th(gl); | 576 | gfs2_glock_drop_th(gl); |
589 | else | 577 | else |
590 | glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); | 578 | gfs2_glock_xmote_th(gh); |
591 | 579 | ||
592 | spin_lock(&gl->gl_spin); | 580 | spin_lock(&gl->gl_spin); |
593 | } | 581 | } |
@@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh) | |||
596 | } | 584 | } |
597 | 585 | ||
598 | /** | 586 | /** |
599 | * rq_greedy - process a queued request to drop greedy status | ||
600 | * @gh: the glock holder | ||
601 | * | ||
602 | * Returns: 1 if the queue is blocked | ||
603 | */ | ||
604 | |||
605 | static int rq_greedy(struct gfs2_holder *gh) | ||
606 | { | ||
607 | struct gfs2_glock *gl = gh->gh_gl; | ||
608 | |||
609 | list_del_init(&gh->gh_list); | ||
610 | /* gh->gh_error never examined. */ | ||
611 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
612 | spin_unlock(&gl->gl_spin); | ||
613 | |||
614 | gfs2_holder_uninit(gh); | ||
615 | kfree(container_of(gh, struct greedy, gr_gh)); | ||
616 | |||
617 | spin_lock(&gl->gl_spin); | ||
618 | |||
619 | return 0; | ||
620 | } | ||
621 | |||
622 | /** | ||
623 | * run_queue - process holder structures on a glock | 587 | * run_queue - process holder structures on a glock |
624 | * @gl: the glock | 588 | * @gl: the glock |
625 | * | 589 | * |
@@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl) | |||
649 | 613 | ||
650 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) | 614 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) |
651 | blocked = rq_demote(gh); | 615 | blocked = rq_demote(gh); |
652 | else if (test_bit(HIF_GREEDY, &gh->gh_iflags)) | ||
653 | blocked = rq_greedy(gh); | ||
654 | else | 616 | else |
655 | gfs2_assert_warn(gl->gl_sbd, 0); | 617 | gfs2_assert_warn(gl->gl_sbd, 0); |
656 | 618 | ||
@@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) | |||
684 | 646 | ||
685 | gfs2_holder_init(gl, 0, 0, &gh); | 647 | gfs2_holder_init(gl, 0, 0, &gh); |
686 | set_bit(HIF_MUTEX, &gh.gh_iflags); | 648 | set_bit(HIF_MUTEX, &gh.gh_iflags); |
649 | if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) | ||
650 | BUG(); | ||
687 | 651 | ||
688 | spin_lock(&gl->gl_spin); | 652 | spin_lock(&gl->gl_spin); |
689 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 653 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { |
@@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) | |||
691 | } else { | 655 | } else { |
692 | gl->gl_owner = current; | 656 | gl->gl_owner = current; |
693 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 657 | gl->gl_ip = (unsigned long)__builtin_return_address(0); |
694 | complete(&gh.gh_wait); | 658 | clear_bit(HIF_WAIT, &gh.gh_iflags); |
659 | smp_mb(); | ||
660 | wake_up_bit(&gh.gh_iflags, HIF_WAIT); | ||
695 | } | 661 | } |
696 | spin_unlock(&gl->gl_spin); | 662 | spin_unlock(&gl->gl_spin); |
697 | 663 | ||
698 | wait_for_completion(&gh.gh_wait); | 664 | wait_on_holder(&gh); |
699 | gfs2_holder_uninit(&gh); | 665 | gfs2_holder_uninit(&gh); |
700 | } | 666 | } |
701 | 667 | ||
@@ -774,6 +740,7 @@ restart: | |||
774 | return; | 740 | return; |
775 | set_bit(HIF_DEMOTE, &new_gh->gh_iflags); | 741 | set_bit(HIF_DEMOTE, &new_gh->gh_iflags); |
776 | set_bit(HIF_DEALLOC, &new_gh->gh_iflags); | 742 | set_bit(HIF_DEALLOC, &new_gh->gh_iflags); |
743 | set_bit(HIF_WAIT, &new_gh->gh_iflags); | ||
777 | 744 | ||
778 | goto restart; | 745 | goto restart; |
779 | } | 746 | } |
@@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
825 | int op_done = 1; | 792 | int op_done = 1; |
826 | 793 | ||
827 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 794 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
828 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 795 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
829 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); | 796 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); |
830 | 797 | ||
831 | state_change(gl, ret & LM_OUT_ST_MASK); | 798 | state_change(gl, ret & LM_OUT_ST_MASK); |
@@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
908 | 875 | ||
909 | gfs2_glock_put(gl); | 876 | gfs2_glock_put(gl); |
910 | 877 | ||
911 | if (gh) { | 878 | if (gh) |
912 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | 879 | gfs2_holder_dispose_or_wake(gh); |
913 | gfs2_holder_put(gh); | ||
914 | else | ||
915 | complete(&gh->gh_wait); | ||
916 | } | ||
917 | } | 880 | } |
918 | 881 | ||
919 | /** | 882 | /** |
@@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
924 | * | 887 | * |
925 | */ | 888 | */ |
926 | 889 | ||
927 | void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) | 890 | void gfs2_glock_xmote_th(struct gfs2_holder *gh) |
928 | { | 891 | { |
892 | struct gfs2_glock *gl = gh->gh_gl; | ||
929 | struct gfs2_sbd *sdp = gl->gl_sbd; | 893 | struct gfs2_sbd *sdp = gl->gl_sbd; |
894 | int flags = gh->gh_flags; | ||
895 | unsigned state = gh->gh_state; | ||
930 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 896 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
931 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | | 897 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | |
932 | LM_FLAG_NOEXP | LM_FLAG_ANY | | 898 | LM_FLAG_NOEXP | LM_FLAG_ANY | |
933 | LM_FLAG_PRIORITY); | 899 | LM_FLAG_PRIORITY); |
934 | unsigned int lck_ret; | 900 | unsigned int lck_ret; |
935 | 901 | ||
902 | if (glops->go_xmote_th) | ||
903 | glops->go_xmote_th(gl); | ||
904 | |||
936 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 905 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
937 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 906 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
938 | gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); | 907 | gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); |
939 | gfs2_assert_warn(sdp, state != gl->gl_state); | 908 | gfs2_assert_warn(sdp, state != gl->gl_state); |
940 | 909 | ||
941 | if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) | ||
942 | glops->go_sync(gl); | ||
943 | |||
944 | gfs2_glock_hold(gl); | 910 | gfs2_glock_hold(gl); |
945 | gl->gl_req_bh = xmote_bh; | 911 | gl->gl_req_bh = xmote_bh; |
946 | 912 | ||
@@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
971 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 937 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
972 | struct gfs2_holder *gh = gl->gl_req_gh; | 938 | struct gfs2_holder *gh = gl->gl_req_gh; |
973 | 939 | ||
974 | clear_bit(GLF_PREFETCH, &gl->gl_flags); | ||
975 | |||
976 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 940 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
977 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 941 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
978 | gfs2_assert_warn(sdp, !ret); | 942 | gfs2_assert_warn(sdp, !ret); |
979 | 943 | ||
980 | state_change(gl, LM_ST_UNLOCKED); | 944 | state_change(gl, LM_ST_UNLOCKED); |
@@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
1001 | 965 | ||
1002 | gfs2_glock_put(gl); | 966 | gfs2_glock_put(gl); |
1003 | 967 | ||
1004 | if (gh) { | 968 | if (gh) |
1005 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | 969 | gfs2_holder_dispose_or_wake(gh); |
1006 | gfs2_holder_put(gh); | ||
1007 | else | ||
1008 | complete(&gh->gh_wait); | ||
1009 | } | ||
1010 | } | 970 | } |
1011 | 971 | ||
1012 | /** | 972 | /** |
@@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
1015 | * | 975 | * |
1016 | */ | 976 | */ |
1017 | 977 | ||
1018 | void gfs2_glock_drop_th(struct gfs2_glock *gl) | 978 | static void gfs2_glock_drop_th(struct gfs2_glock *gl) |
1019 | { | 979 | { |
1020 | struct gfs2_sbd *sdp = gl->gl_sbd; | 980 | struct gfs2_sbd *sdp = gl->gl_sbd; |
1021 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 981 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1022 | unsigned int ret; | 982 | unsigned int ret; |
1023 | 983 | ||
984 | if (glops->go_drop_th) | ||
985 | glops->go_drop_th(gl); | ||
986 | |||
1024 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 987 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
1025 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 988 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
1026 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); | 989 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); |
1027 | 990 | ||
1028 | if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) | ||
1029 | glops->go_sync(gl); | ||
1030 | |||
1031 | gfs2_glock_hold(gl); | 991 | gfs2_glock_hold(gl); |
1032 | gl->gl_req_bh = drop_bh; | 992 | gl->gl_req_bh = drop_bh; |
1033 | 993 | ||
@@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh) | |||
1107 | if (gh->gh_flags & LM_FLAG_PRIORITY) | 1067 | if (gh->gh_flags & LM_FLAG_PRIORITY) |
1108 | do_cancels(gh); | 1068 | do_cancels(gh); |
1109 | 1069 | ||
1110 | wait_for_completion(&gh->gh_wait); | 1070 | wait_on_holder(gh); |
1111 | |||
1112 | if (gh->gh_error) | 1071 | if (gh->gh_error) |
1113 | return gh->gh_error; | 1072 | return gh->gh_error; |
1114 | 1073 | ||
@@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh) | |||
1164 | struct gfs2_holder *existing; | 1123 | struct gfs2_holder *existing; |
1165 | 1124 | ||
1166 | BUG_ON(!gh->gh_owner); | 1125 | BUG_ON(!gh->gh_owner); |
1126 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) | ||
1127 | BUG(); | ||
1167 | 1128 | ||
1168 | existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); | 1129 | existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); |
1169 | if (existing) { | 1130 | if (existing) { |
@@ -1227,8 +1188,6 @@ restart: | |||
1227 | } | 1188 | } |
1228 | } | 1189 | } |
1229 | 1190 | ||
1230 | clear_bit(GLF_PREFETCH, &gl->gl_flags); | ||
1231 | |||
1232 | return error; | 1191 | return error; |
1233 | } | 1192 | } |
1234 | 1193 | ||
@@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1321 | } | 1280 | } |
1322 | 1281 | ||
1323 | /** | 1282 | /** |
1324 | * gfs2_glock_prefetch - Try to prefetch a glock | ||
1325 | * @gl: the glock | ||
1326 | * @state: the state to prefetch in | ||
1327 | * @flags: flags passed to go_xmote_th() | ||
1328 | * | ||
1329 | */ | ||
1330 | |||
1331 | static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, | ||
1332 | int flags) | ||
1333 | { | ||
1334 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
1335 | |||
1336 | spin_lock(&gl->gl_spin); | ||
1337 | |||
1338 | if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) || | ||
1339 | !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) || | ||
1340 | !list_empty(&gl->gl_waiters3) || | ||
1341 | relaxed_state_ok(gl->gl_state, state, flags)) { | ||
1342 | spin_unlock(&gl->gl_spin); | ||
1343 | return; | ||
1344 | } | ||
1345 | |||
1346 | set_bit(GLF_PREFETCH, &gl->gl_flags); | ||
1347 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
1348 | spin_unlock(&gl->gl_spin); | ||
1349 | |||
1350 | glops->go_xmote_th(gl, state, flags); | ||
1351 | } | ||
1352 | |||
1353 | static void greedy_work(struct work_struct *work) | ||
1354 | { | ||
1355 | struct greedy *gr = container_of(work, struct greedy, gr_work.work); | ||
1356 | struct gfs2_holder *gh = &gr->gr_gh; | ||
1357 | struct gfs2_glock *gl = gh->gh_gl; | ||
1358 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
1359 | |||
1360 | clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); | ||
1361 | |||
1362 | if (glops->go_greedy) | ||
1363 | glops->go_greedy(gl); | ||
1364 | |||
1365 | spin_lock(&gl->gl_spin); | ||
1366 | |||
1367 | if (list_empty(&gl->gl_waiters2)) { | ||
1368 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
1369 | spin_unlock(&gl->gl_spin); | ||
1370 | gfs2_holder_uninit(gh); | ||
1371 | kfree(gr); | ||
1372 | } else { | ||
1373 | gfs2_glock_hold(gl); | ||
1374 | list_add_tail(&gh->gh_list, &gl->gl_waiters2); | ||
1375 | run_queue(gl); | ||
1376 | spin_unlock(&gl->gl_spin); | ||
1377 | gfs2_glock_put(gl); | ||
1378 | } | ||
1379 | } | ||
1380 | |||
1381 | /** | ||
1382 | * gfs2_glock_be_greedy - | ||
1383 | * @gl: | ||
1384 | * @time: | ||
1385 | * | ||
1386 | * Returns: 0 if go_greedy will be called, 1 otherwise | ||
1387 | */ | ||
1388 | |||
1389 | int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time) | ||
1390 | { | ||
1391 | struct greedy *gr; | ||
1392 | struct gfs2_holder *gh; | ||
1393 | |||
1394 | if (!time || gl->gl_sbd->sd_args.ar_localcaching || | ||
1395 | test_and_set_bit(GLF_GREEDY, &gl->gl_flags)) | ||
1396 | return 1; | ||
1397 | |||
1398 | gr = kmalloc(sizeof(struct greedy), GFP_KERNEL); | ||
1399 | if (!gr) { | ||
1400 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
1401 | return 1; | ||
1402 | } | ||
1403 | gh = &gr->gr_gh; | ||
1404 | |||
1405 | gfs2_holder_init(gl, 0, 0, gh); | ||
1406 | set_bit(HIF_GREEDY, &gh->gh_iflags); | ||
1407 | INIT_DELAYED_WORK(&gr->gr_work, greedy_work); | ||
1408 | |||
1409 | set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); | ||
1410 | schedule_delayed_work(&gr->gr_work, time); | ||
1411 | |||
1412 | return 0; | ||
1413 | } | ||
1414 | |||
1415 | /** | ||
1416 | * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it | 1283 | * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it |
1417 | * @gh: the holder structure | 1284 | * @gh: the holder structure |
1418 | * | 1285 | * |
@@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b) | |||
1470 | return 1; | 1337 | return 1; |
1471 | if (a->ln_number < b->ln_number) | 1338 | if (a->ln_number < b->ln_number) |
1472 | return -1; | 1339 | return -1; |
1473 | if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) | 1340 | BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); |
1474 | return 1; | ||
1475 | if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL)) | ||
1476 | return 1; | ||
1477 | return 0; | 1341 | return 0; |
1478 | } | 1342 | } |
1479 | 1343 | ||
@@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
1618 | } | 1482 | } |
1619 | 1483 | ||
1620 | /** | 1484 | /** |
1621 | * gfs2_glock_prefetch_num - prefetch a glock based on lock number | ||
1622 | * @sdp: the filesystem | ||
1623 | * @number: the lock number | ||
1624 | * @glops: the glock operations for the type of glock | ||
1625 | * @state: the state to acquire the glock in | ||
1626 | * @flags: modifier flags for the aquisition | ||
1627 | * | ||
1628 | * Returns: errno | ||
1629 | */ | ||
1630 | |||
1631 | void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, | ||
1632 | const struct gfs2_glock_operations *glops, | ||
1633 | unsigned int state, int flags) | ||
1634 | { | ||
1635 | struct gfs2_glock *gl; | ||
1636 | int error; | ||
1637 | |||
1638 | if (atomic_read(&sdp->sd_reclaim_count) < | ||
1639 | gfs2_tune_get(sdp, gt_reclaim_limit)) { | ||
1640 | error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); | ||
1641 | if (!error) { | ||
1642 | gfs2_glock_prefetch(gl, state, flags); | ||
1643 | gfs2_glock_put(gl); | ||
1644 | } | ||
1645 | } | ||
1646 | } | ||
1647 | |||
1648 | /** | ||
1649 | * gfs2_lvb_hold - attach a LVB from a glock | 1485 | * gfs2_lvb_hold - attach a LVB from a glock |
1650 | * @gl: The glock in question | 1486 | * @gl: The glock in question |
1651 | * | 1487 | * |
@@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
1703 | if (!gl) | 1539 | if (!gl) |
1704 | return; | 1540 | return; |
1705 | 1541 | ||
1706 | if (gl->gl_ops->go_callback) | ||
1707 | gl->gl_ops->go_callback(gl, state); | ||
1708 | handle_callback(gl, state); | 1542 | handle_callback(gl, state); |
1709 | 1543 | ||
1710 | spin_lock(&gl->gl_spin); | 1544 | spin_lock(&gl->gl_spin); |
@@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
1746 | struct lm_async_cb *async = data; | 1580 | struct lm_async_cb *async = data; |
1747 | struct gfs2_glock *gl; | 1581 | struct gfs2_glock *gl; |
1748 | 1582 | ||
1583 | down_read(&gfs2_umount_flush_sem); | ||
1749 | gl = gfs2_glock_find(sdp, &async->lc_name); | 1584 | gl = gfs2_glock_find(sdp, &async->lc_name); |
1750 | if (gfs2_assert_warn(sdp, gl)) | 1585 | if (gfs2_assert_warn(sdp, gl)) |
1751 | return; | 1586 | return; |
1752 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) | 1587 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) |
1753 | gl->gl_req_bh(gl, async->lc_ret); | 1588 | gl->gl_req_bh(gl, async->lc_ret); |
1754 | gfs2_glock_put(gl); | 1589 | gfs2_glock_put(gl); |
1590 | up_read(&gfs2_umount_flush_sem); | ||
1755 | return; | 1591 | return; |
1756 | } | 1592 | } |
1757 | 1593 | ||
@@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
1781 | 1617 | ||
1782 | static int demote_ok(struct gfs2_glock *gl) | 1618 | static int demote_ok(struct gfs2_glock *gl) |
1783 | { | 1619 | { |
1784 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1785 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 1620 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1786 | int demote = 1; | 1621 | int demote = 1; |
1787 | 1622 | ||
1788 | if (test_bit(GLF_STICKY, &gl->gl_flags)) | 1623 | if (test_bit(GLF_STICKY, &gl->gl_flags)) |
1789 | demote = 0; | 1624 | demote = 0; |
1790 | else if (test_bit(GLF_PREFETCH, &gl->gl_flags)) | ||
1791 | demote = time_after_eq(jiffies, gl->gl_stamp + | ||
1792 | gfs2_tune_get(sdp, gt_prefetch_secs) * HZ); | ||
1793 | else if (glops->go_demote_ok) | 1625 | else if (glops->go_demote_ok) |
1794 | demote = glops->go_demote_ok(gl); | 1626 | demote = glops->go_demote_ok(gl); |
1795 | 1627 | ||
@@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | |||
1845 | atomic_inc(&sdp->sd_reclaimed); | 1677 | atomic_inc(&sdp->sd_reclaimed); |
1846 | 1678 | ||
1847 | if (gfs2_glmutex_trylock(gl)) { | 1679 | if (gfs2_glmutex_trylock(gl)) { |
1848 | if (queue_empty(gl, &gl->gl_holders) && | 1680 | if (list_empty(&gl->gl_holders) && |
1849 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1681 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) |
1850 | handle_callback(gl, LM_ST_UNLOCKED); | 1682 | handle_callback(gl, LM_ST_UNLOCKED); |
1851 | gfs2_glmutex_unlock(gl); | 1683 | gfs2_glmutex_unlock(gl); |
@@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl) | |||
1909 | return; | 1741 | return; |
1910 | 1742 | ||
1911 | if (gfs2_glmutex_trylock(gl)) { | 1743 | if (gfs2_glmutex_trylock(gl)) { |
1912 | if (queue_empty(gl, &gl->gl_holders) && | 1744 | if (list_empty(&gl->gl_holders) && |
1913 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1745 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) |
1914 | goto out_schedule; | 1746 | goto out_schedule; |
1915 | gfs2_glmutex_unlock(gl); | 1747 | gfs2_glmutex_unlock(gl); |
@@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl) | |||
1958 | } | 1790 | } |
1959 | 1791 | ||
1960 | if (gfs2_glmutex_trylock(gl)) { | 1792 | if (gfs2_glmutex_trylock(gl)) { |
1961 | if (queue_empty(gl, &gl->gl_holders) && | 1793 | if (list_empty(&gl->gl_holders) && |
1962 | gl->gl_state != LM_ST_UNLOCKED) | 1794 | gl->gl_state != LM_ST_UNLOCKED) |
1963 | handle_callback(gl, LM_ST_UNLOCKED); | 1795 | handle_callback(gl, LM_ST_UNLOCKED); |
1964 | gfs2_glmutex_unlock(gl); | 1796 | gfs2_glmutex_unlock(gl); |
@@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
2000 | t = jiffies; | 1832 | t = jiffies; |
2001 | } | 1833 | } |
2002 | 1834 | ||
1835 | down_write(&gfs2_umount_flush_sem); | ||
2003 | invalidate_inodes(sdp->sd_vfs); | 1836 | invalidate_inodes(sdp->sd_vfs); |
1837 | up_write(&gfs2_umount_flush_sem); | ||
2004 | msleep(10); | 1838 | msleep(10); |
2005 | } | 1839 | } |
2006 | } | 1840 | } |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index fb39108fc05c..f50e40ceca43 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -20,7 +20,6 @@ | |||
20 | #define LM_FLAG_ANY 0x00000008 | 20 | #define LM_FLAG_ANY 0x00000008 |
21 | #define LM_FLAG_PRIORITY 0x00000010 */ | 21 | #define LM_FLAG_PRIORITY 0x00000010 */ |
22 | 22 | ||
23 | #define GL_LOCAL_EXCL 0x00000020 | ||
24 | #define GL_ASYNC 0x00000040 | 23 | #define GL_ASYNC 0x00000040 |
25 | #define GL_EXACT 0x00000080 | 24 | #define GL_EXACT 0x00000080 |
26 | #define GL_SKIP 0x00000100 | 25 | #define GL_SKIP 0x00000100 |
@@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | |||
83 | void gfs2_holder_reinit(unsigned int state, unsigned flags, | 82 | void gfs2_holder_reinit(unsigned int state, unsigned flags, |
84 | struct gfs2_holder *gh); | 83 | struct gfs2_holder *gh); |
85 | void gfs2_holder_uninit(struct gfs2_holder *gh); | 84 | void gfs2_holder_uninit(struct gfs2_holder *gh); |
86 | |||
87 | void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags); | ||
88 | void gfs2_glock_drop_th(struct gfs2_glock *gl); | ||
89 | |||
90 | int gfs2_glock_nq(struct gfs2_holder *gh); | 85 | int gfs2_glock_nq(struct gfs2_holder *gh); |
91 | int gfs2_glock_poll(struct gfs2_holder *gh); | 86 | int gfs2_glock_poll(struct gfs2_holder *gh); |
92 | int gfs2_glock_wait(struct gfs2_holder *gh); | 87 | int gfs2_glock_wait(struct gfs2_holder *gh); |
93 | void gfs2_glock_dq(struct gfs2_holder *gh); | 88 | void gfs2_glock_dq(struct gfs2_holder *gh); |
94 | 89 | ||
95 | int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time); | ||
96 | |||
97 | void gfs2_glock_dq_uninit(struct gfs2_holder *gh); | 90 | void gfs2_glock_dq_uninit(struct gfs2_holder *gh); |
98 | int gfs2_glock_nq_num(struct gfs2_sbd *sdp, | 91 | int gfs2_glock_nq_num(struct gfs2_sbd *sdp, |
99 | u64 number, const struct gfs2_glock_operations *glops, | 92 | u64 number, const struct gfs2_glock_operations *glops, |
@@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | |||
103 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 96 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
104 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | 97 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); |
105 | 98 | ||
106 | void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, | ||
107 | const struct gfs2_glock_operations *glops, | ||
108 | unsigned int state, int flags); | ||
109 | |||
110 | /** | 99 | /** |
111 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 100 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock |
112 | * @gl: the glock | 101 | * @gl: the glock |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index b068d10bcb6e..c4b0391b7aa2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) | |||
117 | 117 | ||
118 | static void meta_go_sync(struct gfs2_glock *gl) | 118 | static void meta_go_sync(struct gfs2_glock *gl) |
119 | { | 119 | { |
120 | if (gl->gl_state != LM_ST_EXCLUSIVE) | ||
121 | return; | ||
122 | |||
120 | if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { | 123 | if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { |
121 | gfs2_log_flush(gl->gl_sbd, gl); | 124 | gfs2_log_flush(gl->gl_sbd, gl); |
122 | gfs2_meta_sync(gl); | 125 | gfs2_meta_sync(gl); |
123 | gfs2_ail_empty_gl(gl); | 126 | gfs2_ail_empty_gl(gl); |
124 | } | 127 | } |
125 | |||
126 | } | 128 | } |
127 | 129 | ||
128 | /** | 130 | /** |
@@ -142,6 +144,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
142 | } | 144 | } |
143 | 145 | ||
144 | /** | 146 | /** |
147 | * inode_go_sync - Sync the dirty data and/or metadata for an inode glock | ||
148 | * @gl: the glock protecting the inode | ||
149 | * | ||
150 | */ | ||
151 | |||
152 | static void inode_go_sync(struct gfs2_glock *gl) | ||
153 | { | ||
154 | struct gfs2_inode *ip = gl->gl_object; | ||
155 | |||
156 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | ||
157 | ip = NULL; | ||
158 | |||
159 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { | ||
160 | gfs2_log_flush(gl->gl_sbd, gl); | ||
161 | if (ip) | ||
162 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
163 | gfs2_meta_sync(gl); | ||
164 | if (ip) { | ||
165 | struct address_space *mapping = ip->i_inode.i_mapping; | ||
166 | int error = filemap_fdatawait(mapping); | ||
167 | if (error == -ENOSPC) | ||
168 | set_bit(AS_ENOSPC, &mapping->flags); | ||
169 | else if (error) | ||
170 | set_bit(AS_EIO, &mapping->flags); | ||
171 | } | ||
172 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
173 | gfs2_ail_empty_gl(gl); | ||
174 | } | ||
175 | } | ||
176 | |||
177 | /** | ||
145 | * inode_go_xmote_th - promote/demote a glock | 178 | * inode_go_xmote_th - promote/demote a glock |
146 | * @gl: the glock | 179 | * @gl: the glock |
147 | * @state: the requested state | 180 | * @state: the requested state |
@@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
149 | * | 182 | * |
150 | */ | 183 | */ |
151 | 184 | ||
152 | static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | 185 | static void inode_go_xmote_th(struct gfs2_glock *gl) |
153 | int flags) | ||
154 | { | 186 | { |
155 | if (gl->gl_state != LM_ST_UNLOCKED) | 187 | if (gl->gl_state != LM_ST_UNLOCKED) |
156 | gfs2_pte_inval(gl); | 188 | gfs2_pte_inval(gl); |
157 | gfs2_glock_xmote_th(gl, state, flags); | 189 | if (gl->gl_state == LM_ST_EXCLUSIVE) |
190 | inode_go_sync(gl); | ||
158 | } | 191 | } |
159 | 192 | ||
160 | /** | 193 | /** |
@@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl) | |||
189 | static void inode_go_drop_th(struct gfs2_glock *gl) | 222 | static void inode_go_drop_th(struct gfs2_glock *gl) |
190 | { | 223 | { |
191 | gfs2_pte_inval(gl); | 224 | gfs2_pte_inval(gl); |
192 | gfs2_glock_drop_th(gl); | 225 | if (gl->gl_state == LM_ST_EXCLUSIVE) |
193 | } | 226 | inode_go_sync(gl); |
194 | |||
195 | /** | ||
196 | * inode_go_sync - Sync the dirty data and/or metadata for an inode glock | ||
197 | * @gl: the glock protecting the inode | ||
198 | * | ||
199 | */ | ||
200 | |||
201 | static void inode_go_sync(struct gfs2_glock *gl) | ||
202 | { | ||
203 | struct gfs2_inode *ip = gl->gl_object; | ||
204 | |||
205 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | ||
206 | ip = NULL; | ||
207 | |||
208 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { | ||
209 | gfs2_log_flush(gl->gl_sbd, gl); | ||
210 | if (ip) | ||
211 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
212 | gfs2_meta_sync(gl); | ||
213 | if (ip) { | ||
214 | struct address_space *mapping = ip->i_inode.i_mapping; | ||
215 | int error = filemap_fdatawait(mapping); | ||
216 | if (error == -ENOSPC) | ||
217 | set_bit(AS_ENOSPC, &mapping->flags); | ||
218 | else if (error) | ||
219 | set_bit(AS_EIO, &mapping->flags); | ||
220 | } | ||
221 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
222 | gfs2_ail_empty_gl(gl); | ||
223 | } | ||
224 | } | 227 | } |
225 | 228 | ||
226 | /** | 229 | /** |
@@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
295 | 298 | ||
296 | if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && | 299 | if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && |
297 | (gl->gl_state == LM_ST_EXCLUSIVE) && | 300 | (gl->gl_state == LM_ST_EXCLUSIVE) && |
298 | (gh->gh_flags & GL_LOCAL_EXCL)) | 301 | (gh->gh_state == LM_ST_EXCLUSIVE)) |
299 | error = gfs2_truncatei_resume(ip); | 302 | error = gfs2_truncatei_resume(ip); |
300 | 303 | ||
301 | return error; | 304 | return error; |
@@ -319,39 +322,6 @@ static void inode_go_unlock(struct gfs2_holder *gh) | |||
319 | } | 322 | } |
320 | 323 | ||
321 | /** | 324 | /** |
322 | * inode_greedy - | ||
323 | * @gl: the glock | ||
324 | * | ||
325 | */ | ||
326 | |||
327 | static void inode_greedy(struct gfs2_glock *gl) | ||
328 | { | ||
329 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
330 | struct gfs2_inode *ip = gl->gl_object; | ||
331 | unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum); | ||
332 | unsigned int max = gfs2_tune_get(sdp, gt_greedy_max); | ||
333 | unsigned int new_time; | ||
334 | |||
335 | spin_lock(&ip->i_spin); | ||
336 | |||
337 | if (time_after(ip->i_last_pfault + quantum, jiffies)) { | ||
338 | new_time = ip->i_greedy + quantum; | ||
339 | if (new_time > max) | ||
340 | new_time = max; | ||
341 | } else { | ||
342 | new_time = ip->i_greedy - quantum; | ||
343 | if (!new_time || new_time > max) | ||
344 | new_time = 1; | ||
345 | } | ||
346 | |||
347 | ip->i_greedy = new_time; | ||
348 | |||
349 | spin_unlock(&ip->i_spin); | ||
350 | |||
351 | iput(&ip->i_inode); | ||
352 | } | ||
353 | |||
354 | /** | ||
355 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | 325 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock |
356 | * @gl: the glock | 326 | * @gl: the glock |
357 | * | 327 | * |
@@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) | |||
398 | * | 368 | * |
399 | */ | 369 | */ |
400 | 370 | ||
401 | static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | 371 | static void trans_go_xmote_th(struct gfs2_glock *gl) |
402 | int flags) | ||
403 | { | 372 | { |
404 | struct gfs2_sbd *sdp = gl->gl_sbd; | 373 | struct gfs2_sbd *sdp = gl->gl_sbd; |
405 | 374 | ||
@@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | |||
408 | gfs2_meta_syncfs(sdp); | 377 | gfs2_meta_syncfs(sdp); |
409 | gfs2_log_shutdown(sdp); | 378 | gfs2_log_shutdown(sdp); |
410 | } | 379 | } |
411 | |||
412 | gfs2_glock_xmote_th(gl, state, flags); | ||
413 | } | 380 | } |
414 | 381 | ||
415 | /** | 382 | /** |
@@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl) | |||
461 | gfs2_meta_syncfs(sdp); | 428 | gfs2_meta_syncfs(sdp); |
462 | gfs2_log_shutdown(sdp); | 429 | gfs2_log_shutdown(sdp); |
463 | } | 430 | } |
464 | |||
465 | gfs2_glock_drop_th(gl); | ||
466 | } | 431 | } |
467 | 432 | ||
468 | /** | 433 | /** |
@@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl) | |||
478 | } | 443 | } |
479 | 444 | ||
480 | const struct gfs2_glock_operations gfs2_meta_glops = { | 445 | const struct gfs2_glock_operations gfs2_meta_glops = { |
481 | .go_xmote_th = gfs2_glock_xmote_th, | 446 | .go_xmote_th = meta_go_sync, |
482 | .go_drop_th = gfs2_glock_drop_th, | 447 | .go_drop_th = meta_go_sync, |
483 | .go_type = LM_TYPE_META, | 448 | .go_type = LM_TYPE_META, |
484 | }; | 449 | }; |
485 | 450 | ||
@@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = { | |||
487 | .go_xmote_th = inode_go_xmote_th, | 452 | .go_xmote_th = inode_go_xmote_th, |
488 | .go_xmote_bh = inode_go_xmote_bh, | 453 | .go_xmote_bh = inode_go_xmote_bh, |
489 | .go_drop_th = inode_go_drop_th, | 454 | .go_drop_th = inode_go_drop_th, |
490 | .go_sync = inode_go_sync, | ||
491 | .go_inval = inode_go_inval, | 455 | .go_inval = inode_go_inval, |
492 | .go_demote_ok = inode_go_demote_ok, | 456 | .go_demote_ok = inode_go_demote_ok, |
493 | .go_lock = inode_go_lock, | 457 | .go_lock = inode_go_lock, |
494 | .go_unlock = inode_go_unlock, | 458 | .go_unlock = inode_go_unlock, |
495 | .go_greedy = inode_greedy, | ||
496 | .go_type = LM_TYPE_INODE, | 459 | .go_type = LM_TYPE_INODE, |
497 | }; | 460 | }; |
498 | 461 | ||
499 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 462 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
500 | .go_xmote_th = gfs2_glock_xmote_th, | ||
501 | .go_drop_th = gfs2_glock_drop_th, | ||
502 | .go_sync = meta_go_sync, | ||
503 | .go_inval = meta_go_inval, | 463 | .go_inval = meta_go_inval, |
504 | .go_demote_ok = rgrp_go_demote_ok, | 464 | .go_demote_ok = rgrp_go_demote_ok, |
505 | .go_lock = rgrp_go_lock, | 465 | .go_lock = rgrp_go_lock, |
@@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = { | |||
515 | }; | 475 | }; |
516 | 476 | ||
517 | const struct gfs2_glock_operations gfs2_iopen_glops = { | 477 | const struct gfs2_glock_operations gfs2_iopen_glops = { |
518 | .go_xmote_th = gfs2_glock_xmote_th, | ||
519 | .go_drop_th = gfs2_glock_drop_th, | ||
520 | .go_type = LM_TYPE_IOPEN, | 478 | .go_type = LM_TYPE_IOPEN, |
521 | }; | 479 | }; |
522 | 480 | ||
523 | const struct gfs2_glock_operations gfs2_flock_glops = { | 481 | const struct gfs2_glock_operations gfs2_flock_glops = { |
524 | .go_xmote_th = gfs2_glock_xmote_th, | ||
525 | .go_drop_th = gfs2_glock_drop_th, | ||
526 | .go_type = LM_TYPE_FLOCK, | 482 | .go_type = LM_TYPE_FLOCK, |
527 | }; | 483 | }; |
528 | 484 | ||
529 | const struct gfs2_glock_operations gfs2_nondisk_glops = { | 485 | const struct gfs2_glock_operations gfs2_nondisk_glops = { |
530 | .go_xmote_th = gfs2_glock_xmote_th, | ||
531 | .go_drop_th = gfs2_glock_drop_th, | ||
532 | .go_type = LM_TYPE_NONDISK, | 486 | .go_type = LM_TYPE_NONDISK, |
533 | }; | 487 | }; |
534 | 488 | ||
535 | const struct gfs2_glock_operations gfs2_quota_glops = { | 489 | const struct gfs2_glock_operations gfs2_quota_glops = { |
536 | .go_xmote_th = gfs2_glock_xmote_th, | ||
537 | .go_drop_th = gfs2_glock_drop_th, | ||
538 | .go_demote_ok = quota_go_demote_ok, | 490 | .go_demote_ok = quota_go_demote_ok, |
539 | .go_type = LM_TYPE_QUOTA, | 491 | .go_type = LM_TYPE_QUOTA, |
540 | }; | 492 | }; |
541 | 493 | ||
542 | const struct gfs2_glock_operations gfs2_journal_glops = { | 494 | const struct gfs2_glock_operations gfs2_journal_glops = { |
543 | .go_xmote_th = gfs2_glock_xmote_th, | ||
544 | .go_drop_th = gfs2_glock_drop_th, | ||
545 | .go_type = LM_TYPE_JOURNAL, | 495 | .go_type = LM_TYPE_JOURNAL, |
546 | }; | 496 | }; |
547 | 497 | ||
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 734421edae85..12c80fd28db5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -101,17 +101,14 @@ struct gfs2_bufdata { | |||
101 | }; | 101 | }; |
102 | 102 | ||
103 | struct gfs2_glock_operations { | 103 | struct gfs2_glock_operations { |
104 | void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags); | 104 | void (*go_xmote_th) (struct gfs2_glock *gl); |
105 | void (*go_xmote_bh) (struct gfs2_glock *gl); | 105 | void (*go_xmote_bh) (struct gfs2_glock *gl); |
106 | void (*go_drop_th) (struct gfs2_glock *gl); | 106 | void (*go_drop_th) (struct gfs2_glock *gl); |
107 | void (*go_drop_bh) (struct gfs2_glock *gl); | 107 | void (*go_drop_bh) (struct gfs2_glock *gl); |
108 | void (*go_sync) (struct gfs2_glock *gl); | ||
109 | void (*go_inval) (struct gfs2_glock *gl, int flags); | 108 | void (*go_inval) (struct gfs2_glock *gl, int flags); |
110 | int (*go_demote_ok) (struct gfs2_glock *gl); | 109 | int (*go_demote_ok) (struct gfs2_glock *gl); |
111 | int (*go_lock) (struct gfs2_holder *gh); | 110 | int (*go_lock) (struct gfs2_holder *gh); |
112 | void (*go_unlock) (struct gfs2_holder *gh); | 111 | void (*go_unlock) (struct gfs2_holder *gh); |
113 | void (*go_callback) (struct gfs2_glock *gl, unsigned int state); | ||
114 | void (*go_greedy) (struct gfs2_glock *gl); | ||
115 | const int go_type; | 112 | const int go_type; |
116 | }; | 113 | }; |
117 | 114 | ||
@@ -120,7 +117,6 @@ enum { | |||
120 | HIF_MUTEX = 0, | 117 | HIF_MUTEX = 0, |
121 | HIF_PROMOTE = 1, | 118 | HIF_PROMOTE = 1, |
122 | HIF_DEMOTE = 2, | 119 | HIF_DEMOTE = 2, |
123 | HIF_GREEDY = 3, | ||
124 | 120 | ||
125 | /* States */ | 121 | /* States */ |
126 | HIF_ALLOCED = 4, | 122 | HIF_ALLOCED = 4, |
@@ -128,6 +124,7 @@ enum { | |||
128 | HIF_HOLDER = 6, | 124 | HIF_HOLDER = 6, |
129 | HIF_FIRST = 7, | 125 | HIF_FIRST = 7, |
130 | HIF_ABORTED = 9, | 126 | HIF_ABORTED = 9, |
127 | HIF_WAIT = 10, | ||
131 | }; | 128 | }; |
132 | 129 | ||
133 | struct gfs2_holder { | 130 | struct gfs2_holder { |
@@ -140,17 +137,14 @@ struct gfs2_holder { | |||
140 | 137 | ||
141 | int gh_error; | 138 | int gh_error; |
142 | unsigned long gh_iflags; | 139 | unsigned long gh_iflags; |
143 | struct completion gh_wait; | ||
144 | unsigned long gh_ip; | 140 | unsigned long gh_ip; |
145 | }; | 141 | }; |
146 | 142 | ||
147 | enum { | 143 | enum { |
148 | GLF_LOCK = 1, | 144 | GLF_LOCK = 1, |
149 | GLF_STICKY = 2, | 145 | GLF_STICKY = 2, |
150 | GLF_PREFETCH = 3, | ||
151 | GLF_DIRTY = 5, | 146 | GLF_DIRTY = 5, |
152 | GLF_SKIP_WAITERS2 = 6, | 147 | GLF_SKIP_WAITERS2 = 6, |
153 | GLF_GREEDY = 7, | ||
154 | }; | 148 | }; |
155 | 149 | ||
156 | struct gfs2_glock { | 150 | struct gfs2_glock { |
@@ -167,7 +161,7 @@ struct gfs2_glock { | |||
167 | unsigned long gl_ip; | 161 | unsigned long gl_ip; |
168 | struct list_head gl_holders; | 162 | struct list_head gl_holders; |
169 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 163 | struct list_head gl_waiters1; /* HIF_MUTEX */ |
170 | struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ | 164 | struct list_head gl_waiters2; /* HIF_DEMOTE */ |
171 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 165 | struct list_head gl_waiters3; /* HIF_PROMOTE */ |
172 | 166 | ||
173 | const struct gfs2_glock_operations *gl_ops; | 167 | const struct gfs2_glock_operations *gl_ops; |
@@ -236,7 +230,6 @@ struct gfs2_inode { | |||
236 | 230 | ||
237 | spinlock_t i_spin; | 231 | spinlock_t i_spin; |
238 | struct rw_semaphore i_rw_mutex; | 232 | struct rw_semaphore i_rw_mutex; |
239 | unsigned int i_greedy; | ||
240 | unsigned long i_last_pfault; | 233 | unsigned long i_last_pfault; |
241 | 234 | ||
242 | struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; | 235 | struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; |
@@ -418,17 +411,12 @@ struct gfs2_tune { | |||
418 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | 411 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ |
419 | unsigned int gt_new_files_jdata; | 412 | unsigned int gt_new_files_jdata; |
420 | unsigned int gt_new_files_directio; | 413 | unsigned int gt_new_files_directio; |
421 | unsigned int gt_max_atomic_write; /* Split big writes into this size */ | ||
422 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 414 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
423 | unsigned int gt_lockdump_size; | 415 | unsigned int gt_lockdump_size; |
424 | unsigned int gt_stall_secs; /* Detects trouble! */ | 416 | unsigned int gt_stall_secs; /* Detects trouble! */ |
425 | unsigned int gt_complain_secs; | 417 | unsigned int gt_complain_secs; |
426 | unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ | 418 | unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ |
427 | unsigned int gt_entries_per_readdir; | 419 | unsigned int gt_entries_per_readdir; |
428 | unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */ | ||
429 | unsigned int gt_greedy_default; | ||
430 | unsigned int gt_greedy_quantum; | ||
431 | unsigned int gt_greedy_max; | ||
432 | unsigned int gt_statfs_quantum; | 420 | unsigned int gt_statfs_quantum; |
433 | unsigned int gt_statfs_slow; | 421 | unsigned int gt_statfs_slow; |
434 | }; | 422 | }; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d122074c45e1..0d6831a40565 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -287,10 +287,8 @@ out: | |||
287 | * | 287 | * |
288 | * Returns: errno | 288 | * Returns: errno |
289 | */ | 289 | */ |
290 | |||
291 | int gfs2_change_nlink(struct gfs2_inode *ip, int diff) | 290 | int gfs2_change_nlink(struct gfs2_inode *ip, int diff) |
292 | { | 291 | { |
293 | struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info; | ||
294 | struct buffer_head *dibh; | 292 | struct buffer_head *dibh; |
295 | u32 nlink; | 293 | u32 nlink; |
296 | int error; | 294 | int error; |
@@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) | |||
315 | else | 313 | else |
316 | drop_nlink(&ip->i_inode); | 314 | drop_nlink(&ip->i_inode); |
317 | 315 | ||
318 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 316 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
319 | 317 | ||
320 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 318 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
321 | gfs2_dinode_out(ip, dibh->b_data); | 319 | gfs2_dinode_out(ip, dibh->b_data); |
322 | brelse(dibh); | 320 | brelse(dibh); |
323 | mark_inode_dirty(&ip->i_inode); | 321 | mark_inode_dirty(&ip->i_inode); |
324 | 322 | ||
325 | if (ip->i_inode.i_nlink == 0) { | 323 | if (ip->i_inode.i_nlink == 0) |
326 | struct gfs2_rgrpd *rgd; | ||
327 | struct gfs2_holder ri_gh, rg_gh; | ||
328 | |||
329 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
330 | if (error) | ||
331 | goto out; | ||
332 | error = -EIO; | ||
333 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); | ||
334 | if (!rgd) | ||
335 | goto out_norgrp; | ||
336 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); | ||
337 | if (error) | ||
338 | goto out_norgrp; | ||
339 | |||
340 | gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ | 324 | gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ |
341 | gfs2_glock_dq_uninit(&rg_gh); | 325 | |
342 | out_norgrp: | ||
343 | gfs2_glock_dq_uninit(&ri_gh); | ||
344 | } | ||
345 | out: | ||
346 | return error; | 326 | return error; |
347 | } | 327 | } |
348 | 328 | ||
349 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) | 329 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) |
350 | { | 330 | { |
351 | struct qstr qstr; | 331 | struct qstr qstr; |
332 | struct inode *inode; | ||
352 | gfs2_str2qstr(&qstr, name); | 333 | gfs2_str2qstr(&qstr, name); |
353 | return gfs2_lookupi(dip, &qstr, 1, NULL); | 334 | inode = gfs2_lookupi(dip, &qstr, 1, NULL); |
335 | /* gfs2_lookupi has inconsistent callers: vfs | ||
336 | * related routines expect NULL for no entry found, | ||
337 | * gfs2_lookup_simple callers expect ENOENT | ||
338 | * and do not check for NULL. | ||
339 | */ | ||
340 | if (inode == NULL) | ||
341 | return ERR_PTR(-ENOENT); | ||
342 | else | ||
343 | return inode; | ||
354 | } | 344 | } |
355 | 345 | ||
356 | 346 | ||
@@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) | |||
361 | * @is_root: If 1, ignore the caller's permissions | 351 | * @is_root: If 1, ignore the caller's permissions |
362 | * @i_gh: An uninitialized holder for the new inode glock | 352 | * @i_gh: An uninitialized holder for the new inode glock |
363 | * | 353 | * |
364 | * There will always be a vnode (Linux VFS inode) for the d_gh inode unless | 354 | * This can be called via the VFS filldir function when NFS is doing |
365 | * @is_root is true. | 355 | * a readdirplus and the inode which its intending to stat isn't |
356 | * already in cache. In this case we must not take the directory glock | ||
357 | * again, since the readdir call will have already taken that lock. | ||
366 | * | 358 | * |
367 | * Returns: errno | 359 | * Returns: errno |
368 | */ | 360 | */ |
@@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
375 | struct gfs2_holder d_gh; | 367 | struct gfs2_holder d_gh; |
376 | struct gfs2_inum_host inum; | 368 | struct gfs2_inum_host inum; |
377 | unsigned int type; | 369 | unsigned int type; |
378 | int error = 0; | 370 | int error; |
379 | struct inode *inode = NULL; | 371 | struct inode *inode = NULL; |
372 | int unlock = 0; | ||
380 | 373 | ||
381 | if (!name->len || name->len > GFS2_FNAMESIZE) | 374 | if (!name->len || name->len > GFS2_FNAMESIZE) |
382 | return ERR_PTR(-ENAMETOOLONG); | 375 | return ERR_PTR(-ENAMETOOLONG); |
@@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
388 | return dir; | 381 | return dir; |
389 | } | 382 | } |
390 | 383 | ||
391 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 384 | if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { |
392 | if (error) | 385 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
393 | return ERR_PTR(error); | 386 | if (error) |
387 | return ERR_PTR(error); | ||
388 | unlock = 1; | ||
389 | } | ||
394 | 390 | ||
395 | if (!is_root) { | 391 | if (!is_root) { |
396 | error = permission(dir, MAY_EXEC, NULL); | 392 | error = permission(dir, MAY_EXEC, NULL); |
@@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
405 | inode = gfs2_inode_lookup(sb, &inum, type); | 401 | inode = gfs2_inode_lookup(sb, &inum, type); |
406 | 402 | ||
407 | out: | 403 | out: |
408 | gfs2_glock_dq_uninit(&d_gh); | 404 | if (unlock) |
405 | gfs2_glock_dq_uninit(&d_gh); | ||
409 | if (error == -ENOENT) | 406 | if (error == -ENOENT) |
410 | return NULL; | 407 | return NULL; |
411 | return inode; | 408 | return inode ? inode : ERR_PTR(error); |
412 | } | 409 | } |
413 | 410 | ||
414 | static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) | 411 | static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) |
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c index effe4a337c1d..e30673dd37e0 100644 --- a/fs/gfs2/lm.c +++ b/fs/gfs2/lm.c | |||
@@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | |||
104 | vprintk(fmt, args); | 104 | vprintk(fmt, args); |
105 | va_end(args); | 105 | va_end(args); |
106 | 106 | ||
107 | fs_err(sdp, "about to withdraw from the cluster\n"); | 107 | fs_err(sdp, "about to withdraw this file system\n"); |
108 | BUG_ON(sdp->sd_args.ar_debug); | 108 | BUG_ON(sdp->sd_args.ar_debug); |
109 | 109 | ||
110 | |||
111 | fs_err(sdp, "waiting for outstanding I/O\n"); | ||
112 | |||
113 | /* FIXME: suspend dm device so oustanding bio's complete | ||
114 | and all further io requests fail */ | ||
115 | |||
116 | fs_err(sdp, "telling LM to withdraw\n"); | 110 | fs_err(sdp, "telling LM to withdraw\n"); |
117 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); | 111 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); |
118 | fs_err(sdp, "withdrawn\n"); | 112 | fs_err(sdp, "withdrawn\n"); |
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 33af707a4d3f..a87c7bf3c568 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
@@ -36,7 +36,7 @@ | |||
36 | 36 | ||
37 | #define GDLM_STRNAME_BYTES 24 | 37 | #define GDLM_STRNAME_BYTES 24 |
38 | #define GDLM_LVB_SIZE 32 | 38 | #define GDLM_LVB_SIZE 32 |
39 | #define GDLM_DROP_COUNT 50000 | 39 | #define GDLM_DROP_COUNT 200000 |
40 | #define GDLM_DROP_PERIOD 60 | 40 | #define GDLM_DROP_PERIOD 60 |
41 | #define GDLM_NAME_LEN 128 | 41 | #define GDLM_NAME_LEN 128 |
42 | 42 | ||
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c index 2194b1d5b5ec..a0e7eda643ed 100644 --- a/fs/gfs2/locking/dlm/main.c +++ b/fs/gfs2/locking/dlm/main.c | |||
@@ -11,9 +11,6 @@ | |||
11 | 11 | ||
12 | #include "lock_dlm.h" | 12 | #include "lock_dlm.h" |
13 | 13 | ||
14 | extern int gdlm_drop_count; | ||
15 | extern int gdlm_drop_period; | ||
16 | |||
17 | extern struct lm_lockops gdlm_ops; | 14 | extern struct lm_lockops gdlm_ops; |
18 | 15 | ||
19 | static int __init init_lock_dlm(void) | 16 | static int __init init_lock_dlm(void) |
@@ -40,9 +37,6 @@ static int __init init_lock_dlm(void) | |||
40 | return error; | 37 | return error; |
41 | } | 38 | } |
42 | 39 | ||
43 | gdlm_drop_count = GDLM_DROP_COUNT; | ||
44 | gdlm_drop_period = GDLM_DROP_PERIOD; | ||
45 | |||
46 | printk(KERN_INFO | 40 | printk(KERN_INFO |
47 | "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); | 41 | "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); |
48 | return 0; | 42 | return 0; |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index cdd1694e889b..1d8faa3da8af 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -9,8 +9,6 @@ | |||
9 | 9 | ||
10 | #include "lock_dlm.h" | 10 | #include "lock_dlm.h" |
11 | 11 | ||
12 | int gdlm_drop_count; | ||
13 | int gdlm_drop_period; | ||
14 | const struct lm_lockops gdlm_ops; | 12 | const struct lm_lockops gdlm_ops; |
15 | 13 | ||
16 | 14 | ||
@@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, | |||
24 | if (!ls) | 22 | if (!ls) |
25 | return NULL; | 23 | return NULL; |
26 | 24 | ||
27 | ls->drop_locks_count = gdlm_drop_count; | 25 | ls->drop_locks_count = GDLM_DROP_COUNT; |
28 | ls->drop_locks_period = gdlm_drop_period; | 26 | ls->drop_locks_period = GDLM_DROP_PERIOD; |
29 | ls->fscb = cb; | 27 | ls->fscb = cb; |
30 | ls->sdp = sdp; | 28 | ls->sdp = sdp; |
31 | ls->fsflags = flags; | 29 | ls->fsflags = flags; |
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index 29ae06f94944..4746b884662d 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
@@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) | |||
116 | return sprintf(buf, "%d\n", ls->recover_jid_status); | 116 | return sprintf(buf, "%d\n", ls->recover_jid_status); |
117 | } | 117 | } |
118 | 118 | ||
119 | static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf) | ||
120 | { | ||
121 | return sprintf(buf, "%d\n", ls->drop_locks_count); | ||
122 | } | ||
123 | |||
124 | static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len) | ||
125 | { | ||
126 | ls->drop_locks_count = simple_strtol(buf, NULL, 0); | ||
127 | return len; | ||
128 | } | ||
129 | |||
119 | struct gdlm_attr { | 130 | struct gdlm_attr { |
120 | struct attribute attr; | 131 | struct attribute attr; |
121 | ssize_t (*show)(struct gdlm_ls *, char *); | 132 | ssize_t (*show)(struct gdlm_ls *, char *); |
@@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL); | |||
135 | GDLM_ATTR(recover, 0644, recover_show, recover_store); | 146 | GDLM_ATTR(recover, 0644, recover_show, recover_store); |
136 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 147 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
137 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | 148 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); |
149 | GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store); | ||
138 | 150 | ||
139 | static struct attribute *gdlm_attrs[] = { | 151 | static struct attribute *gdlm_attrs[] = { |
140 | &gdlm_attr_proto_name.attr, | 152 | &gdlm_attr_proto_name.attr, |
@@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = { | |||
147 | &gdlm_attr_recover.attr, | 159 | &gdlm_attr_recover.attr, |
148 | &gdlm_attr_recover_done.attr, | 160 | &gdlm_attr_recover_done.attr, |
149 | &gdlm_attr_recover_status.attr, | 161 | &gdlm_attr_recover_status.attr, |
162 | &gdlm_attr_drop_count.attr, | ||
150 | NULL, | 163 | NULL, |
151 | }; | 164 | }; |
152 | 165 | ||
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 4d7f94d8c7bd..16bb4b4561ae 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
69 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); | 69 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); |
70 | struct gfs2_trans *tr; | 70 | struct gfs2_trans *tr; |
71 | 71 | ||
72 | if (!list_empty(&bd->bd_list_tr)) | 72 | gfs2_log_lock(sdp); |
73 | if (!list_empty(&bd->bd_list_tr)) { | ||
74 | gfs2_log_unlock(sdp); | ||
73 | return; | 75 | return; |
74 | 76 | } | |
75 | tr = current->journal_info; | 77 | tr = current->journal_info; |
76 | tr->tr_touched = 1; | 78 | tr->tr_touched = 1; |
77 | tr->tr_num_buf++; | 79 | tr->tr_num_buf++; |
78 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 80 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); |
81 | gfs2_log_unlock(sdp); | ||
79 | 82 | ||
80 | if (!list_empty(&le->le_list)) | 83 | if (!list_empty(&le->le_list)) |
81 | return; | 84 | return; |
@@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
84 | 87 | ||
85 | gfs2_meta_check(sdp, bd->bd_bh); | 88 | gfs2_meta_check(sdp, bd->bd_bh); |
86 | gfs2_pin(sdp, bd->bd_bh); | 89 | gfs2_pin(sdp, bd->bd_bh); |
87 | |||
88 | gfs2_log_lock(sdp); | 90 | gfs2_log_lock(sdp); |
89 | sdp->sd_log_num_buf++; | 91 | sdp->sd_log_num_buf++; |
90 | list_add(&le->le_list, &sdp->sd_log_le_buf); | 92 | list_add(&le->le_list, &sdp->sd_log_le_buf); |
@@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
98 | struct list_head *head = &tr->tr_list_buf; | 100 | struct list_head *head = &tr->tr_list_buf; |
99 | struct gfs2_bufdata *bd; | 101 | struct gfs2_bufdata *bd; |
100 | 102 | ||
103 | gfs2_log_lock(sdp); | ||
101 | while (!list_empty(head)) { | 104 | while (!list_empty(head)) { |
102 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); | 105 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); |
103 | list_del_init(&bd->bd_list_tr); | 106 | list_del_init(&bd->bd_list_tr); |
104 | tr->tr_num_buf--; | 107 | tr->tr_num_buf--; |
105 | } | 108 | } |
109 | gfs2_log_unlock(sdp); | ||
106 | gfs2_assert_warn(sdp, !tr->tr_num_buf); | 110 | gfs2_assert_warn(sdp, !tr->tr_num_buf); |
107 | } | 111 | } |
108 | 112 | ||
@@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
462 | struct address_space *mapping = bd->bd_bh->b_page->mapping; | 466 | struct address_space *mapping = bd->bd_bh->b_page->mapping; |
463 | struct gfs2_inode *ip = GFS2_I(mapping->host); | 467 | struct gfs2_inode *ip = GFS2_I(mapping->host); |
464 | 468 | ||
469 | gfs2_log_lock(sdp); | ||
465 | tr->tr_touched = 1; | 470 | tr->tr_touched = 1; |
466 | if (list_empty(&bd->bd_list_tr) && | 471 | if (list_empty(&bd->bd_list_tr) && |
467 | (ip->i_di.di_flags & GFS2_DIF_JDATA)) { | 472 | (ip->i_di.di_flags & GFS2_DIF_JDATA)) { |
468 | tr->tr_num_buf++; | 473 | tr->tr_num_buf++; |
469 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 474 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); |
475 | gfs2_log_unlock(sdp); | ||
470 | gfs2_pin(sdp, bd->bd_bh); | 476 | gfs2_pin(sdp, bd->bd_bh); |
471 | tr->tr_num_buf_new++; | 477 | tr->tr_num_buf_new++; |
478 | } else { | ||
479 | gfs2_log_unlock(sdp); | ||
472 | } | 480 | } |
473 | gfs2_trans_add_gl(bd->bd_gl); | 481 | gfs2_trans_add_gl(bd->bd_gl); |
474 | gfs2_log_lock(sdp); | 482 | gfs2_log_lock(sdp); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d8d69a72a10d..56e33590b656 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/pagevec.h> | 16 | #include <linux/pagevec.h> |
17 | #include <linux/mpage.h> | 17 | #include <linux/mpage.h> |
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/writeback.h> | ||
19 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/lm_interface.h> | 21 | #include <linux/lm_interface.h> |
21 | 22 | ||
@@ -157,6 +158,32 @@ out_ignore: | |||
157 | } | 158 | } |
158 | 159 | ||
159 | /** | 160 | /** |
161 | * gfs2_writepages - Write a bunch of dirty pages back to disk | ||
162 | * @mapping: The mapping to write | ||
163 | * @wbc: Write-back control | ||
164 | * | ||
165 | * For journaled files and/or ordered writes this just falls back to the | ||
166 | * kernel's default writepages path for now. We will probably want to change | ||
167 | * that eventually (i.e. when we look at allocate on flush). | ||
168 | * | ||
169 | * For the data=writeback case though we can already ignore buffer heads | ||
170 | * and write whole extents at once. This is a big reduction in the | ||
171 | * number of I/O requests we send and the bmap calls we make in this case. | ||
172 | */ | ||
173 | static int gfs2_writepages(struct address_space *mapping, | ||
174 | struct writeback_control *wbc) | ||
175 | { | ||
176 | struct inode *inode = mapping->host; | ||
177 | struct gfs2_inode *ip = GFS2_I(inode); | ||
178 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
179 | |||
180 | if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) | ||
181 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | ||
182 | |||
183 | return generic_writepages(mapping, wbc); | ||
184 | } | ||
185 | |||
186 | /** | ||
160 | * stuffed_readpage - Fill in a Linux page with stuffed file data | 187 | * stuffed_readpage - Fill in a Linux page with stuffed file data |
161 | * @ip: the inode | 188 | * @ip: the inode |
162 | * @page: the page | 189 | * @page: the page |
@@ -256,7 +283,7 @@ out_unlock: | |||
256 | * the page lock and the glock) and return having done no I/O. Its | 283 | * the page lock and the glock) and return having done no I/O. Its |
257 | * obviously not something we'd want to do on too regular a basis. | 284 | * obviously not something we'd want to do on too regular a basis. |
258 | * Any I/O we ignore at this time will be done via readpage later. | 285 | * Any I/O we ignore at this time will be done via readpage later. |
259 | * 2. We have to handle stuffed files here too. | 286 | * 2. We don't handle stuffed files here we let readpage do the honours. |
260 | * 3. mpage_readpages() does most of the heavy lifting in the common case. | 287 | * 3. mpage_readpages() does most of the heavy lifting in the common case. |
261 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. | 288 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. |
262 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as | 289 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as |
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
269 | struct gfs2_inode *ip = GFS2_I(inode); | 296 | struct gfs2_inode *ip = GFS2_I(inode); |
270 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 297 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
271 | struct gfs2_holder gh; | 298 | struct gfs2_holder gh; |
272 | unsigned page_idx; | 299 | int ret = 0; |
273 | int ret; | ||
274 | int do_unlock = 0; | 300 | int do_unlock = 0; |
275 | 301 | ||
276 | if (likely(file != &gfs2_internal_file_sentinel)) { | 302 | if (likely(file != &gfs2_internal_file_sentinel)) { |
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
289 | goto out_unlock; | 315 | goto out_unlock; |
290 | } | 316 | } |
291 | skip_lock: | 317 | skip_lock: |
292 | if (gfs2_is_stuffed(ip)) { | 318 | if (!gfs2_is_stuffed(ip)) |
293 | struct pagevec lru_pvec; | ||
294 | pagevec_init(&lru_pvec, 0); | ||
295 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | ||
296 | struct page *page = list_entry(pages->prev, struct page, lru); | ||
297 | prefetchw(&page->flags); | ||
298 | list_del(&page->lru); | ||
299 | if (!add_to_page_cache(page, mapping, | ||
300 | page->index, GFP_KERNEL)) { | ||
301 | ret = stuffed_readpage(ip, page); | ||
302 | unlock_page(page); | ||
303 | if (!pagevec_add(&lru_pvec, page)) | ||
304 | __pagevec_lru_add(&lru_pvec); | ||
305 | } else { | ||
306 | page_cache_release(page); | ||
307 | } | ||
308 | } | ||
309 | pagevec_lru_add(&lru_pvec); | ||
310 | ret = 0; | ||
311 | } else { | ||
312 | /* What we really want to do .... */ | ||
313 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); | 319 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); |
314 | } | ||
315 | 320 | ||
316 | if (do_unlock) { | 321 | if (do_unlock) { |
317 | gfs2_glock_dq_m(1, &gh); | 322 | gfs2_glock_dq_m(1, &gh); |
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page, | |||
356 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); | 361 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); |
357 | error = gfs2_glock_nq_atime(&ip->i_gh); | 362 | error = gfs2_glock_nq_atime(&ip->i_gh); |
358 | if (unlikely(error)) { | 363 | if (unlikely(error)) { |
359 | if (error == GLR_TRYFAILED) | 364 | if (error == GLR_TRYFAILED) { |
365 | unlock_page(page); | ||
360 | error = AOP_TRUNCATED_PAGE; | 366 | error = AOP_TRUNCATED_PAGE; |
367 | } | ||
361 | goto out_uninit; | 368 | goto out_uninit; |
362 | } | 369 | } |
363 | 370 | ||
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset) | |||
594 | return; | 601 | return; |
595 | } | 602 | } |
596 | 603 | ||
604 | /** | ||
605 | * gfs2_ok_for_dio - check that dio is valid on this file | ||
606 | * @ip: The inode | ||
607 | * @rw: READ or WRITE | ||
608 | * @offset: The offset at which we are reading or writing | ||
609 | * | ||
610 | * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) | ||
611 | * 1 (to accept the i/o request) | ||
612 | */ | ||
613 | static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | ||
614 | { | ||
615 | /* | ||
616 | * Should we return an error here? I can't see that O_DIRECT for | ||
617 | * a journaled file makes any sense. For now we'll silently fall | ||
618 | * back to buffered I/O, likewise we do the same for stuffed | ||
619 | * files since they are (a) small and (b) unaligned. | ||
620 | */ | ||
621 | if (gfs2_is_jdata(ip)) | ||
622 | return 0; | ||
623 | |||
624 | if (gfs2_is_stuffed(ip)) | ||
625 | return 0; | ||
626 | |||
627 | if (offset > i_size_read(&ip->i_inode)) | ||
628 | return 0; | ||
629 | return 1; | ||
630 | } | ||
631 | |||
632 | |||
633 | |||
597 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | 634 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, |
598 | const struct iovec *iov, loff_t offset, | 635 | const struct iovec *iov, loff_t offset, |
599 | unsigned long nr_segs) | 636 | unsigned long nr_segs) |
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
604 | struct gfs2_holder gh; | 641 | struct gfs2_holder gh; |
605 | int rv; | 642 | int rv; |
606 | 643 | ||
607 | if (rw == READ) | ||
608 | mutex_lock(&inode->i_mutex); | ||
609 | /* | 644 | /* |
610 | * Shared lock, even if its a write, since we do no allocation | 645 | * Deferred lock, even if its a write, since we do no allocation |
611 | * on this path. All we need change is atime. | 646 | * on this path. All we need change is atime, and this lock mode |
647 | * ensures that other nodes have flushed their buffered read caches | ||
648 | * (i.e. their page cache entries for this inode). We do not, | ||
649 | * unfortunately have the option of only flushing a range like | ||
650 | * the VFS does. | ||
612 | */ | 651 | */ |
613 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 652 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); |
614 | rv = gfs2_glock_nq_atime(&gh); | 653 | rv = gfs2_glock_nq_atime(&gh); |
615 | if (rv) | 654 | if (rv) |
616 | goto out; | 655 | return rv; |
617 | 656 | rv = gfs2_ok_for_dio(ip, rw, offset); | |
618 | if (offset > i_size_read(inode)) | 657 | if (rv != 1) |
619 | goto out; | 658 | goto out; /* dio not valid, fall back to buffered i/o */ |
620 | 659 | ||
621 | /* | 660 | rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, |
622 | * Should we return an error here? I can't see that O_DIRECT for | 661 | iov, offset, nr_segs, |
623 | * a journaled file makes any sense. For now we'll silently fall | 662 | gfs2_get_block_direct, NULL); |
624 | * back to buffered I/O, likewise we do the same for stuffed | ||
625 | * files since they are (a) small and (b) unaligned. | ||
626 | */ | ||
627 | if (gfs2_is_jdata(ip)) | ||
628 | goto out; | ||
629 | |||
630 | if (gfs2_is_stuffed(ip)) | ||
631 | goto out; | ||
632 | |||
633 | rv = blockdev_direct_IO_own_locking(rw, iocb, inode, | ||
634 | inode->i_sb->s_bdev, | ||
635 | iov, offset, nr_segs, | ||
636 | gfs2_get_block_direct, NULL); | ||
637 | out: | 663 | out: |
638 | gfs2_glock_dq_m(1, &gh); | 664 | gfs2_glock_dq_m(1, &gh); |
639 | gfs2_holder_uninit(&gh); | 665 | gfs2_holder_uninit(&gh); |
640 | if (rw == READ) | ||
641 | mutex_unlock(&inode->i_mutex); | ||
642 | |||
643 | return rv; | 666 | return rv; |
644 | } | 667 | } |
645 | 668 | ||
@@ -763,6 +786,7 @@ out: | |||
763 | 786 | ||
764 | const struct address_space_operations gfs2_file_aops = { | 787 | const struct address_space_operations gfs2_file_aops = { |
765 | .writepage = gfs2_writepage, | 788 | .writepage = gfs2_writepage, |
789 | .writepages = gfs2_writepages, | ||
766 | .readpage = gfs2_readpage, | 790 | .readpage = gfs2_readpage, |
767 | .readpages = gfs2_readpages, | 791 | .readpages = gfs2_readpages, |
768 | .sync_page = block_sync_page, | 792 | .sync_page = block_sync_page, |
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index d355899585d8..9187eb174b43 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c | |||
@@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
46 | struct gfs2_inum_host inum; | 46 | struct gfs2_inum_host inum; |
47 | unsigned int type; | 47 | unsigned int type; |
48 | int error; | 48 | int error; |
49 | int had_lock=0; | ||
49 | 50 | ||
50 | if (inode && is_bad_inode(inode)) | 51 | if (inode && is_bad_inode(inode)) |
51 | goto invalid; | 52 | goto invalid; |
@@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
53 | if (sdp->sd_args.ar_localcaching) | 54 | if (sdp->sd_args.ar_localcaching) |
54 | goto valid; | 55 | goto valid; |
55 | 56 | ||
56 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 57 | had_lock = gfs2_glock_is_locked_by_me(dip->i_gl); |
57 | if (error) | 58 | if (!had_lock) { |
58 | goto fail; | 59 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
60 | if (error) | ||
61 | goto fail; | ||
62 | } | ||
59 | 63 | ||
60 | error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); | 64 | error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); |
61 | switch (error) { | 65 | switch (error) { |
@@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
82 | } | 86 | } |
83 | 87 | ||
84 | valid_gunlock: | 88 | valid_gunlock: |
85 | gfs2_glock_dq_uninit(&d_gh); | 89 | if (!had_lock) |
90 | gfs2_glock_dq_uninit(&d_gh); | ||
86 | valid: | 91 | valid: |
87 | dput(parent); | 92 | dput(parent); |
88 | return 1; | 93 | return 1; |
89 | 94 | ||
90 | invalid_gunlock: | 95 | invalid_gunlock: |
91 | gfs2_glock_dq_uninit(&d_gh); | 96 | if (!had_lock) |
97 | gfs2_glock_dq_uninit(&d_gh); | ||
92 | invalid: | 98 | invalid: |
93 | if (inode && S_ISDIR(inode->i_mode)) { | 99 | if (inode && S_ISDIR(inode->i_mode)) { |
94 | if (have_submounts(dentry)) | 100 | if (have_submounts(dentry)) |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b4e7b8775315..4855e8cca622 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "glock.h" | 22 | #include "glock.h" |
23 | #include "glops.h" | 23 | #include "glops.h" |
24 | #include "inode.h" | 24 | #include "inode.h" |
25 | #include "ops_dentry.h" | ||
25 | #include "ops_export.h" | 26 | #include "ops_export.h" |
26 | #include "rgrp.h" | 27 | #include "rgrp.h" |
27 | #include "util.h" | 28 | #include "util.h" |
@@ -112,13 +113,12 @@ struct get_name_filldir { | |||
112 | char *name; | 113 | char *name; |
113 | }; | 114 | }; |
114 | 115 | ||
115 | static int get_name_filldir(void *opaque, const char *name, unsigned int length, | 116 | static int get_name_filldir(void *opaque, const char *name, int length, |
116 | u64 offset, struct gfs2_inum_host *inum, | 117 | loff_t offset, u64 inum, unsigned int type) |
117 | unsigned int type) | ||
118 | { | 118 | { |
119 | struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; | 119 | struct get_name_filldir *gnfd = opaque; |
120 | 120 | ||
121 | if (!gfs2_inum_equal(inum, &gnfd->inum)) | 121 | if (inum != gnfd->inum.no_addr) |
122 | return 0; | 122 | return 0; |
123 | 123 | ||
124 | memcpy(gnfd->name, name, length); | 124 | memcpy(gnfd->name, name, length); |
@@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) | |||
189 | return ERR_PTR(-ENOMEM); | 189 | return ERR_PTR(-ENOMEM); |
190 | } | 190 | } |
191 | 191 | ||
192 | dentry->d_op = &gfs2_dops; | ||
192 | return dentry; | 193 | return dentry; |
193 | } | 194 | } |
194 | 195 | ||
@@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) | |||
215 | } | 216 | } |
216 | 217 | ||
217 | error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, | 218 | error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, |
218 | LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, | 219 | LM_ST_SHARED, LM_FLAG_ANY, &i_gh); |
219 | &i_gh); | ||
220 | if (error) | 220 | if (error) |
221 | return ERR_PTR(error); | 221 | return ERR_PTR(error); |
222 | 222 | ||
@@ -269,6 +269,7 @@ out_inode: | |||
269 | return ERR_PTR(-ENOMEM); | 269 | return ERR_PTR(-ENOMEM); |
270 | } | 270 | } |
271 | 271 | ||
272 | dentry->d_op = &gfs2_dops; | ||
272 | return dentry; | 273 | return dentry; |
273 | 274 | ||
274 | fail_rgd: | 275 | fail_rgd: |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index faa07e4b97d0..c996aa739a05 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -43,15 +43,6 @@ | |||
43 | #include "util.h" | 43 | #include "util.h" |
44 | #include "eaops.h" | 44 | #include "eaops.h" |
45 | 45 | ||
46 | /* For regular, non-NFS */ | ||
47 | struct filldir_reg { | ||
48 | struct gfs2_sbd *fdr_sbd; | ||
49 | int fdr_prefetch; | ||
50 | |||
51 | filldir_t fdr_filldir; | ||
52 | void *fdr_opaque; | ||
53 | }; | ||
54 | |||
55 | /* | 46 | /* |
56 | * Most fields left uninitialised to catch anybody who tries to | 47 | * Most fields left uninitialised to catch anybody who tries to |
57 | * use them. f_flags set to prevent file_accessed() from touching | 48 | * use them. f_flags set to prevent file_accessed() from touching |
@@ -128,41 +119,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) | |||
128 | } | 119 | } |
129 | 120 | ||
130 | /** | 121 | /** |
131 | * filldir_func - Report a directory entry to the caller of gfs2_dir_read() | ||
132 | * @opaque: opaque data used by the function | ||
133 | * @name: the name of the directory entry | ||
134 | * @length: the length of the name | ||
135 | * @offset: the entry's offset in the directory | ||
136 | * @inum: the inode number the entry points to | ||
137 | * @type: the type of inode the entry points to | ||
138 | * | ||
139 | * Returns: 0 on success, 1 if buffer full | ||
140 | */ | ||
141 | |||
142 | static int filldir_func(void *opaque, const char *name, unsigned int length, | ||
143 | u64 offset, struct gfs2_inum_host *inum, | ||
144 | unsigned int type) | ||
145 | { | ||
146 | struct filldir_reg *fdr = (struct filldir_reg *)opaque; | ||
147 | struct gfs2_sbd *sdp = fdr->fdr_sbd; | ||
148 | int error; | ||
149 | |||
150 | error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, | ||
151 | inum->no_addr, type); | ||
152 | if (error) | ||
153 | return 1; | ||
154 | |||
155 | if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { | ||
156 | gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops, | ||
157 | LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); | ||
158 | gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops, | ||
159 | LM_ST_SHARED, LM_FLAG_TRY); | ||
160 | } | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * gfs2_readdir - Read directory entries from a directory | 122 | * gfs2_readdir - Read directory entries from a directory |
167 | * @file: The directory to read from | 123 | * @file: The directory to read from |
168 | * @dirent: Buffer for dirents | 124 | * @dirent: Buffer for dirents |
@@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
175 | { | 131 | { |
176 | struct inode *dir = file->f_mapping->host; | 132 | struct inode *dir = file->f_mapping->host; |
177 | struct gfs2_inode *dip = GFS2_I(dir); | 133 | struct gfs2_inode *dip = GFS2_I(dir); |
178 | struct filldir_reg fdr; | ||
179 | struct gfs2_holder d_gh; | 134 | struct gfs2_holder d_gh; |
180 | u64 offset = file->f_pos; | 135 | u64 offset = file->f_pos; |
181 | int error; | 136 | int error; |
182 | 137 | ||
183 | fdr.fdr_sbd = GFS2_SB(dir); | ||
184 | fdr.fdr_prefetch = 1; | ||
185 | fdr.fdr_filldir = filldir; | ||
186 | fdr.fdr_opaque = dirent; | ||
187 | |||
188 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | 138 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); |
189 | error = gfs2_glock_nq_atime(&d_gh); | 139 | error = gfs2_glock_nq_atime(&d_gh); |
190 | if (error) { | 140 | if (error) { |
@@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
192 | return error; | 142 | return error; |
193 | } | 143 | } |
194 | 144 | ||
195 | error = gfs2_dir_read(dir, &offset, &fdr, filldir_func); | 145 | error = gfs2_dir_read(dir, &offset, dirent, filldir); |
196 | 146 | ||
197 | gfs2_glock_dq_uninit(&d_gh); | 147 | gfs2_glock_dq_uninit(&d_gh); |
198 | 148 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 636dda4c7d38..f40a84807d75 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
264 | struct gfs2_inode *dip = GFS2_I(dir); | 264 | struct gfs2_inode *dip = GFS2_I(dir); |
265 | struct gfs2_sbd *sdp = GFS2_SB(dir); | 265 | struct gfs2_sbd *sdp = GFS2_SB(dir); |
266 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 266 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
267 | struct gfs2_holder ghs[2]; | 267 | struct gfs2_holder ghs[3]; |
268 | struct gfs2_rgrpd *rgd; | ||
269 | struct gfs2_holder ri_gh; | ||
268 | int error; | 270 | int error; |
269 | 271 | ||
272 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
273 | if (error) | ||
274 | return error; | ||
275 | |||
270 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 276 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
271 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 277 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
272 | 278 | ||
273 | error = gfs2_glock_nq_m(2, ghs); | 279 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); |
280 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | ||
281 | |||
282 | |||
283 | error = gfs2_glock_nq_m(3, ghs); | ||
274 | if (error) | 284 | if (error) |
275 | goto out; | 285 | goto out; |
276 | 286 | ||
@@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
291 | out_end_trans: | 301 | out_end_trans: |
292 | gfs2_trans_end(sdp); | 302 | gfs2_trans_end(sdp); |
293 | out_gunlock: | 303 | out_gunlock: |
294 | gfs2_glock_dq_m(2, ghs); | 304 | gfs2_glock_dq_m(3, ghs); |
295 | out: | 305 | out: |
296 | gfs2_holder_uninit(ghs); | 306 | gfs2_holder_uninit(ghs); |
297 | gfs2_holder_uninit(ghs + 1); | 307 | gfs2_holder_uninit(ghs + 1); |
308 | gfs2_holder_uninit(ghs + 2); | ||
309 | gfs2_glock_dq_uninit(&ri_gh); | ||
298 | return error; | 310 | return error; |
299 | } | 311 | } |
300 | 312 | ||
@@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
449 | struct gfs2_inode *dip = GFS2_I(dir); | 461 | struct gfs2_inode *dip = GFS2_I(dir); |
450 | struct gfs2_sbd *sdp = GFS2_SB(dir); | 462 | struct gfs2_sbd *sdp = GFS2_SB(dir); |
451 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 463 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
452 | struct gfs2_holder ghs[2]; | 464 | struct gfs2_holder ghs[3]; |
465 | struct gfs2_rgrpd *rgd; | ||
466 | struct gfs2_holder ri_gh; | ||
453 | int error; | 467 | int error; |
454 | 468 | ||
469 | |||
470 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
471 | if (error) | ||
472 | return error; | ||
455 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 473 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
456 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 474 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
457 | 475 | ||
458 | error = gfs2_glock_nq_m(2, ghs); | 476 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); |
477 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | ||
478 | |||
479 | error = gfs2_glock_nq_m(3, ghs); | ||
459 | if (error) | 480 | if (error) |
460 | goto out; | 481 | goto out; |
461 | 482 | ||
@@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
483 | gfs2_trans_end(sdp); | 504 | gfs2_trans_end(sdp); |
484 | 505 | ||
485 | out_gunlock: | 506 | out_gunlock: |
486 | gfs2_glock_dq_m(2, ghs); | 507 | gfs2_glock_dq_m(3, ghs); |
487 | out: | 508 | out: |
488 | gfs2_holder_uninit(ghs); | 509 | gfs2_holder_uninit(ghs); |
489 | gfs2_holder_uninit(ghs + 1); | 510 | gfs2_holder_uninit(ghs + 1); |
511 | gfs2_holder_uninit(ghs + 2); | ||
512 | gfs2_glock_dq_uninit(&ri_gh); | ||
490 | return error; | 513 | return error; |
491 | } | 514 | } |
492 | 515 | ||
@@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
547 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 570 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
548 | struct gfs2_inode *nip = NULL; | 571 | struct gfs2_inode *nip = NULL; |
549 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 572 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
550 | struct gfs2_holder ghs[4], r_gh; | 573 | struct gfs2_holder ghs[5], r_gh; |
574 | struct gfs2_rgrpd *nrgd; | ||
551 | unsigned int num_gh; | 575 | unsigned int num_gh; |
552 | int dir_rename = 0; | 576 | int dir_rename = 0; |
553 | int alloc_required; | 577 | int alloc_required; |
@@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
587 | if (nip) { | 611 | if (nip) { |
588 | gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); | 612 | gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); |
589 | num_gh++; | 613 | num_gh++; |
614 | /* grab the resource lock for unlink flag twiddling | ||
615 | * this is the case of the target file already existing | ||
616 | * so we unlink before doing the rename | ||
617 | */ | ||
618 | nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr); | ||
619 | if (nrgd) | ||
620 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); | ||
590 | } | 621 | } |
591 | 622 | ||
592 | error = gfs2_glock_nq_m(num_gh, ghs); | 623 | error = gfs2_glock_nq_m(num_gh, ghs); |
@@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
684 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 715 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
685 | al->al_rgd->rd_ri.ri_length + | 716 | al->al_rgd->rd_ri.ri_length + |
686 | 4 * RES_DINODE + 4 * RES_LEAF + | 717 | 4 * RES_DINODE + 4 * RES_LEAF + |
687 | RES_STATFS + RES_QUOTA, 0); | 718 | RES_STATFS + RES_QUOTA + 4, 0); |
688 | if (error) | 719 | if (error) |
689 | goto out_ipreserv; | 720 | goto out_ipreserv; |
690 | } else { | 721 | } else { |
691 | error = gfs2_trans_begin(sdp, 4 * RES_DINODE + | 722 | error = gfs2_trans_begin(sdp, 4 * RES_DINODE + |
692 | 5 * RES_LEAF, 0); | 723 | 5 * RES_LEAF + 4, 0); |
693 | if (error) | 724 | if (error) |
694 | goto out_gunlock; | 725 | goto out_gunlock; |
695 | } | 726 | } |
@@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
728 | error = gfs2_meta_inode_buffer(ip, &dibh); | 759 | error = gfs2_meta_inode_buffer(ip, &dibh); |
729 | if (error) | 760 | if (error) |
730 | goto out_end_trans; | 761 | goto out_end_trans; |
731 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 762 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
732 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 763 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
733 | gfs2_dinode_out(ip, dibh->b_data); | 764 | gfs2_dinode_out(ip, dibh->b_data); |
734 | brelse(dibh); | 765 | brelse(dibh); |
@@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1018 | } | 1049 | } |
1019 | 1050 | ||
1020 | generic_fillattr(inode, stat); | 1051 | generic_fillattr(inode, stat); |
1021 | if (unlock); | 1052 | if (unlock) |
1022 | gfs2_glock_dq_uninit(&gh); | 1053 | gfs2_glock_dq_uninit(&gh); |
1023 | 1054 | ||
1024 | return 0; | 1055 | return 0; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 7685b46f934b..47369d011214 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb) | |||
173 | struct gfs2_sbd *sdp = sb->s_fs_info; | 173 | struct gfs2_sbd *sdp = sb->s_fs_info; |
174 | int error; | 174 | int error; |
175 | 175 | ||
176 | if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
177 | return; | ||
178 | |||
176 | for (;;) { | 179 | for (;;) { |
177 | error = gfs2_freeze_fs(sdp); | 180 | error = gfs2_freeze_fs(sdp); |
178 | if (!error) | 181 | if (!error) |
@@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode) | |||
426 | } | 429 | } |
427 | 430 | ||
428 | error = gfs2_dinode_dealloc(ip); | 431 | error = gfs2_dinode_dealloc(ip); |
432 | /* | ||
433 | * Must do this before unlock to avoid trying to write back | ||
434 | * potentially dirty data now that inode no longer exists | ||
435 | * on disk. | ||
436 | */ | ||
437 | truncate_inode_pages(&inode->i_data, 0); | ||
429 | 438 | ||
430 | out_unlock: | 439 | out_unlock: |
431 | gfs2_glock_dq(&ip->i_iopen_gh); | 440 | gfs2_glock_dq(&ip->i_iopen_gh); |
@@ -443,14 +452,12 @@ out: | |||
443 | 452 | ||
444 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 453 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
445 | { | 454 | { |
446 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
447 | struct gfs2_inode *ip; | 455 | struct gfs2_inode *ip; |
448 | 456 | ||
449 | ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); | 457 | ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); |
450 | if (ip) { | 458 | if (ip) { |
451 | ip->i_flags = 0; | 459 | ip->i_flags = 0; |
452 | ip->i_gl = NULL; | 460 | ip->i_gl = NULL; |
453 | ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default); | ||
454 | ip->i_last_pfault = jiffies; | 461 | ip->i_last_pfault = jiffies; |
455 | } | 462 | } |
456 | return &ip->i_inode; | 463 | return &ip->i_inode; |
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 45a5f11fc39a..14b380fb0602 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c | |||
@@ -28,34 +28,13 @@ | |||
28 | #include "trans.h" | 28 | #include "trans.h" |
29 | #include "util.h" | 29 | #include "util.h" |
30 | 30 | ||
31 | static void pfault_be_greedy(struct gfs2_inode *ip) | ||
32 | { | ||
33 | unsigned int time; | ||
34 | |||
35 | spin_lock(&ip->i_spin); | ||
36 | time = ip->i_greedy; | ||
37 | ip->i_last_pfault = jiffies; | ||
38 | spin_unlock(&ip->i_spin); | ||
39 | |||
40 | igrab(&ip->i_inode); | ||
41 | if (gfs2_glock_be_greedy(ip->i_gl, time)) | ||
42 | iput(&ip->i_inode); | ||
43 | } | ||
44 | |||
45 | static struct page *gfs2_private_nopage(struct vm_area_struct *area, | 31 | static struct page *gfs2_private_nopage(struct vm_area_struct *area, |
46 | unsigned long address, int *type) | 32 | unsigned long address, int *type) |
47 | { | 33 | { |
48 | struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); | 34 | struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); |
49 | struct page *result; | ||
50 | 35 | ||
51 | set_bit(GIF_PAGED, &ip->i_flags); | 36 | set_bit(GIF_PAGED, &ip->i_flags); |
52 | 37 | return filemap_nopage(area, address, type); | |
53 | result = filemap_nopage(area, address, type); | ||
54 | |||
55 | if (result && result != NOPAGE_OOM) | ||
56 | pfault_be_greedy(ip); | ||
57 | |||
58 | return result; | ||
59 | } | 38 | } |
60 | 39 | ||
61 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) | 40 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) |
@@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, | |||
167 | set_page_dirty(result); | 146 | set_page_dirty(result); |
168 | } | 147 | } |
169 | 148 | ||
170 | pfault_be_greedy(ip); | ||
171 | out: | 149 | out: |
172 | gfs2_glock_dq_uninit(&i_gh); | 150 | gfs2_glock_dq_uninit(&i_gh); |
173 | 151 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 43a24f2e5905..70f424fcf1cd 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
71 | gt->gt_atime_quantum = 3600; | 71 | gt->gt_atime_quantum = 3600; |
72 | gt->gt_new_files_jdata = 0; | 72 | gt->gt_new_files_jdata = 0; |
73 | gt->gt_new_files_directio = 0; | 73 | gt->gt_new_files_directio = 0; |
74 | gt->gt_max_atomic_write = 4 << 20; | ||
75 | gt->gt_max_readahead = 1 << 18; | 74 | gt->gt_max_readahead = 1 << 18; |
76 | gt->gt_lockdump_size = 131072; | 75 | gt->gt_lockdump_size = 131072; |
77 | gt->gt_stall_secs = 600; | 76 | gt->gt_stall_secs = 600; |
78 | gt->gt_complain_secs = 10; | 77 | gt->gt_complain_secs = 10; |
79 | gt->gt_reclaim_limit = 5000; | 78 | gt->gt_reclaim_limit = 5000; |
80 | gt->gt_entries_per_readdir = 32; | 79 | gt->gt_entries_per_readdir = 32; |
81 | gt->gt_prefetch_secs = 10; | ||
82 | gt->gt_greedy_default = HZ / 10; | ||
83 | gt->gt_greedy_quantum = HZ / 40; | ||
84 | gt->gt_greedy_max = HZ / 4; | ||
85 | gt->gt_statfs_quantum = 30; | 80 | gt->gt_statfs_quantum = 30; |
86 | gt->gt_statfs_slow = 0; | 81 | gt->gt_statfs_slow = 0; |
87 | } | 82 | } |
@@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
359 | mutex_lock(&sdp->sd_jindex_mutex); | 354 | mutex_lock(&sdp->sd_jindex_mutex); |
360 | 355 | ||
361 | for (;;) { | 356 | for (;;) { |
362 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, | 357 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); |
363 | GL_LOCAL_EXCL, ji_gh); | ||
364 | if (error) | 358 | if (error) |
365 | break; | 359 | break; |
366 | 360 | ||
@@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
529 | struct gfs2_log_header_host head; | 523 | struct gfs2_log_header_host head; |
530 | int error; | 524 | int error; |
531 | 525 | ||
532 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | 526 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); |
533 | GL_LOCAL_EXCL, &t_gh); | ||
534 | if (error) | 527 | if (error) |
535 | return error; | 528 | return error; |
536 | 529 | ||
@@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | |||
583 | gfs2_quota_sync(sdp); | 576 | gfs2_quota_sync(sdp); |
584 | gfs2_statfs_sync(sdp); | 577 | gfs2_statfs_sync(sdp); |
585 | 578 | ||
586 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | 579 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, |
587 | GL_LOCAL_EXCL | GL_NOCACHE, | 580 | &t_gh); |
588 | &t_gh); | ||
589 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | 581 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) |
590 | return error; | 582 | return error; |
591 | 583 | ||
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 983eaf1e06be..d01f9f0fda26 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0); | |||
436 | TUNE_ATTR(max_readahead, 0); | 436 | TUNE_ATTR(max_readahead, 0); |
437 | TUNE_ATTR(complain_secs, 0); | 437 | TUNE_ATTR(complain_secs, 0); |
438 | TUNE_ATTR(reclaim_limit, 0); | 438 | TUNE_ATTR(reclaim_limit, 0); |
439 | TUNE_ATTR(prefetch_secs, 0); | ||
440 | TUNE_ATTR(statfs_slow, 0); | 439 | TUNE_ATTR(statfs_slow, 0); |
441 | TUNE_ATTR(new_files_jdata, 0); | 440 | TUNE_ATTR(new_files_jdata, 0); |
442 | TUNE_ATTR(new_files_directio, 0); | 441 | TUNE_ATTR(new_files_directio, 0); |
443 | TUNE_ATTR(quota_simul_sync, 1); | 442 | TUNE_ATTR(quota_simul_sync, 1); |
444 | TUNE_ATTR(quota_cache_secs, 1); | 443 | TUNE_ATTR(quota_cache_secs, 1); |
445 | TUNE_ATTR(max_atomic_write, 1); | ||
446 | TUNE_ATTR(stall_secs, 1); | 444 | TUNE_ATTR(stall_secs, 1); |
447 | TUNE_ATTR(greedy_default, 1); | ||
448 | TUNE_ATTR(greedy_quantum, 1); | ||
449 | TUNE_ATTR(greedy_max, 1); | ||
450 | TUNE_ATTR(statfs_quantum, 1); | 445 | TUNE_ATTR(statfs_quantum, 1); |
451 | TUNE_ATTR_DAEMON(scand_secs, scand_process); | 446 | TUNE_ATTR_DAEMON(scand_secs, scand_process); |
452 | TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); | 447 | TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); |
@@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = { | |||
465 | &tune_attr_max_readahead.attr, | 460 | &tune_attr_max_readahead.attr, |
466 | &tune_attr_complain_secs.attr, | 461 | &tune_attr_complain_secs.attr, |
467 | &tune_attr_reclaim_limit.attr, | 462 | &tune_attr_reclaim_limit.attr, |
468 | &tune_attr_prefetch_secs.attr, | ||
469 | &tune_attr_statfs_slow.attr, | 463 | &tune_attr_statfs_slow.attr, |
470 | &tune_attr_quota_simul_sync.attr, | 464 | &tune_attr_quota_simul_sync.attr, |
471 | &tune_attr_quota_cache_secs.attr, | 465 | &tune_attr_quota_cache_secs.attr, |
472 | &tune_attr_max_atomic_write.attr, | ||
473 | &tune_attr_stall_secs.attr, | 466 | &tune_attr_stall_secs.attr, |
474 | &tune_attr_greedy_default.attr, | ||
475 | &tune_attr_greedy_quantum.attr, | ||
476 | &tune_attr_greedy_max.attr, | ||
477 | &tune_attr_statfs_quantum.attr, | 467 | &tune_attr_statfs_quantum.attr, |
478 | &tune_attr_scand_secs.attr, | 468 | &tune_attr_scand_secs.attr, |
479 | &tune_attr_recoverd_secs.attr, | 469 | &tune_attr_recoverd_secs.attr, |