diff options
Diffstat (limited to 'fs')
80 files changed, 2508 insertions, 1714 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 8cd2417a14db..5e8e9d9ccb33 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -426,7 +426,6 @@ config OCFS2_FS | |||
426 | select CONFIGFS_FS | 426 | select CONFIGFS_FS |
427 | select JBD | 427 | select JBD |
428 | select CRC32 | 428 | select CRC32 |
429 | select INET | ||
430 | help | 429 | help |
431 | OCFS2 is a general purpose extent based shared disk cluster file | 430 | OCFS2 is a general purpose extent based shared disk cluster file |
432 | system with many similarities to ext3. It supports 64 bit inode | 431 | system with many similarities to ext3. It supports 64 bit inode |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index d04d2f7448d9..85e3850bf2c9 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -1,6 +1,8 @@ | |||
1 | Version 1.47 | 1 | Version 1.47 |
2 | ------------ | 2 | ------------ |
3 | Fix oops in list_del during mount caused by unaligned string. | 3 | Fix oops in list_del during mount caused by unaligned string. |
4 | Seek to SEEK_END forces check for update of file size for non-cached | ||
5 | files. | ||
4 | 6 | ||
5 | Version 1.46 | 7 | Version 1.46 |
6 | ------------ | 8 | ------------ |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 10c90294cd18..93ef09971d2f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -511,7 +511,15 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) | |||
511 | { | 511 | { |
512 | /* origin == SEEK_END => we must revalidate the cached file length */ | 512 | /* origin == SEEK_END => we must revalidate the cached file length */ |
513 | if (origin == SEEK_END) { | 513 | if (origin == SEEK_END) { |
514 | int retval = cifs_revalidate(file->f_path.dentry); | 514 | int retval; |
515 | |||
516 | /* some applications poll for the file length in this strange | ||
517 | way so we must seek to end on non-oplocked files by | ||
518 | setting the revalidate time to zero */ | ||
519 | if(file->f_path.dentry->d_inode) | ||
520 | CIFS_I(file->f_path.dentry->d_inode)->time = 0; | ||
521 | |||
522 | retval = cifs_revalidate(file->f_path.dentry); | ||
515 | if (retval < 0) | 523 | if (retval < 0) |
516 | return (loff_t)retval; | 524 | return (loff_t)retval; |
517 | } | 525 | } |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8a49b2e77d37..e9dcf5ee29a2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1146,7 +1146,7 @@ static int cifs_writepages(struct address_space *mapping, | |||
1146 | pgoff_t end; | 1146 | pgoff_t end; |
1147 | pgoff_t index; | 1147 | pgoff_t index; |
1148 | int range_whole = 0; | 1148 | int range_whole = 0; |
1149 | struct kvec iov[32]; | 1149 | struct kvec * iov; |
1150 | int len; | 1150 | int len; |
1151 | int n_iov = 0; | 1151 | int n_iov = 0; |
1152 | pgoff_t next; | 1152 | pgoff_t next; |
@@ -1171,15 +1171,21 @@ static int cifs_writepages(struct address_space *mapping, | |||
1171 | if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) | 1171 | if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) |
1172 | if(cifs_sb->tcon->ses->server->secMode & | 1172 | if(cifs_sb->tcon->ses->server->secMode & |
1173 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 1173 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) |
1174 | if(!experimEnabled) | 1174 | if(!experimEnabled) |
1175 | return generic_writepages(mapping, wbc); | 1175 | return generic_writepages(mapping, wbc); |
1176 | 1176 | ||
1177 | iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL); | ||
1178 | if(iov == NULL) | ||
1179 | return generic_writepages(mapping, wbc); | ||
1180 | |||
1181 | |||
1177 | /* | 1182 | /* |
1178 | * BB: Is this meaningful for a non-block-device file system? | 1183 | * BB: Is this meaningful for a non-block-device file system? |
1179 | * If it is, we should test it again after we do I/O | 1184 | * If it is, we should test it again after we do I/O |
1180 | */ | 1185 | */ |
1181 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 1186 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
1182 | wbc->encountered_congestion = 1; | 1187 | wbc->encountered_congestion = 1; |
1188 | kfree(iov); | ||
1183 | return 0; | 1189 | return 0; |
1184 | } | 1190 | } |
1185 | 1191 | ||
@@ -1345,7 +1351,7 @@ retry: | |||
1345 | mapping->writeback_index = index; | 1351 | mapping->writeback_index = index; |
1346 | 1352 | ||
1347 | FreeXid(xid); | 1353 | FreeXid(xid); |
1348 | 1354 | kfree(iov); | |
1349 | return rc; | 1355 | return rc; |
1350 | } | 1356 | } |
1351 | 1357 | ||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 99dfb5337e31..782940be550f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -156,9 +156,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, | |||
156 | tmp_inode->i_atime = cnvrtDosUnixTm( | 156 | tmp_inode->i_atime = cnvrtDosUnixTm( |
157 | le16_to_cpu(pfindData->LastAccessDate), | 157 | le16_to_cpu(pfindData->LastAccessDate), |
158 | le16_to_cpu(pfindData->LastAccessTime)); | 158 | le16_to_cpu(pfindData->LastAccessTime)); |
159 | tmp_inode->i_ctime = cnvrtDosUnixTm( | 159 | tmp_inode->i_ctime = cnvrtDosUnixTm( |
160 | le16_to_cpu(pfindData->LastWriteDate), | 160 | le16_to_cpu(pfindData->LastWriteDate), |
161 | le16_to_cpu(pfindData->LastWriteTime)); | 161 | le16_to_cpu(pfindData->LastWriteTime)); |
162 | AdjustForTZ(cifs_sb->tcon, tmp_inode); | 162 | AdjustForTZ(cifs_sb->tcon, tmp_inode); |
163 | attr = le16_to_cpu(pfindData->Attributes); | 163 | attr = le16_to_cpu(pfindData->Attributes); |
164 | allocation_size = le32_to_cpu(pfindData->AllocationSize); | 164 | allocation_size = le32_to_cpu(pfindData->AllocationSize); |
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c index 7a1b2b961ec8..1b1daf63f062 100644 --- a/fs/cifs/smbdes.c +++ b/fs/cifs/smbdes.c | |||
@@ -196,7 +196,7 @@ dohash(char *out, char *in, char *key, int forw) | |||
196 | char c[28]; | 196 | char c[28]; |
197 | char d[28]; | 197 | char d[28]; |
198 | char *cd; | 198 | char *cd; |
199 | char ki[16][48]; | 199 | char (*ki)[48]; |
200 | char *pd1; | 200 | char *pd1; |
201 | char l[32], r[32]; | 201 | char l[32], r[32]; |
202 | char *rl; | 202 | char *rl; |
@@ -206,6 +206,12 @@ dohash(char *out, char *in, char *key, int forw) | |||
206 | if(pk1 == NULL) | 206 | if(pk1 == NULL) |
207 | return; | 207 | return; |
208 | 208 | ||
209 | ki = kmalloc(16*48, GFP_KERNEL); | ||
210 | if(ki == NULL) { | ||
211 | kfree(pk1); | ||
212 | return; | ||
213 | } | ||
214 | |||
209 | cd = pk1 + 56; | 215 | cd = pk1 + 56; |
210 | pd1= cd + 56; | 216 | pd1= cd + 56; |
211 | rl = pd1 + 64; | 217 | rl = pd1 + 64; |
@@ -243,6 +249,7 @@ dohash(char *out, char *in, char *key, int forw) | |||
243 | er = kmalloc(48+48+32+32+32, GFP_KERNEL); | 249 | er = kmalloc(48+48+32+32+32, GFP_KERNEL); |
244 | if(er == NULL) { | 250 | if(er == NULL) { |
245 | kfree(pk1); | 251 | kfree(pk1); |
252 | kfree(ki); | ||
246 | return; | 253 | return; |
247 | } | 254 | } |
248 | erk = er+48; | 255 | erk = er+48; |
@@ -290,6 +297,7 @@ dohash(char *out, char *in, char *key, int forw) | |||
290 | 297 | ||
291 | permute(out, rl, perm6, 64); | 298 | permute(out, rl, perm6, 64); |
292 | kfree(pk1); | 299 | kfree(pk1); |
300 | kfree(ki); | ||
293 | } | 301 | } |
294 | 302 | ||
295 | static void | 303 | static void |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2a7cb086e80c..d98be5e01328 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -162,14 +162,17 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size | |||
162 | int error; | 162 | int error; |
163 | 163 | ||
164 | if (!buffer->page) | 164 | if (!buffer->page) |
165 | buffer->page = (char *)get_zeroed_page(GFP_KERNEL); | 165 | buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0); |
166 | if (!buffer->page) | 166 | if (!buffer->page) |
167 | return -ENOMEM; | 167 | return -ENOMEM; |
168 | 168 | ||
169 | if (count > PAGE_SIZE) | 169 | if (count >= PAGE_SIZE) |
170 | count = PAGE_SIZE; | 170 | count = PAGE_SIZE - 1; |
171 | error = copy_from_user(buffer->page,buf,count); | 171 | error = copy_from_user(buffer->page,buf,count); |
172 | buffer->needs_read_fill = 1; | 172 | buffer->needs_read_fill = 1; |
173 | /* if buf is assumed to contain a string, terminate it by \0, | ||
174 | * so e.g. sscanf() can scan the string easily */ | ||
175 | buffer->page[count] = 0; | ||
173 | return error ? -EFAULT : count; | 176 | return error ? -EFAULT : count; |
174 | } | 177 | } |
175 | 178 | ||
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index b5654a284fef..6fa7b0d5c043 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig | |||
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager" | |||
3 | 3 | ||
4 | config DLM | 4 | config DLM |
5 | tristate "Distributed Lock Manager (DLM)" | 5 | tristate "Distributed Lock Manager (DLM)" |
6 | depends on IPV6 || IPV6=n | 6 | depends on SYSFS && (IPV6 || IPV6=n) |
7 | select CONFIGFS_FS | 7 | select CONFIGFS_FS |
8 | select IP_SCTP if DLM_SCTP | 8 | select IP_SCTP if DLM_SCTP |
9 | help | 9 | help |
10 | A general purpose distributed lock manager for kernel or userspace | 10 | A general purpose distributed lock manager for kernel or userspace |
11 | applications. | 11 | applications. |
12 | 12 | ||
13 | choice | 13 | choice |
14 | prompt "Select DLM communications protocol" | 14 | prompt "Select DLM communications protocol" |
15 | depends on DLM | 15 | depends on DLM |
16 | default DLM_TCP | 16 | default DLM_TCP |
17 | help | 17 | help |
18 | The DLM Can use TCP or SCTP for it's network communications. | 18 | The DLM Can use TCP or SCTP for it's network communications. |
19 | SCTP supports multi-homed operations whereas TCP doesn't. | 19 | SCTP supports multi-homed operations whereas TCP doesn't. |
20 | However, SCTP seems to have stability problems at the moment. | 20 | However, SCTP seems to have stability problems at the moment. |
21 | 21 | ||
22 | config DLM_TCP | 22 | config DLM_TCP |
23 | bool "TCP/IP" | 23 | bool "TCP/IP" |
@@ -31,8 +31,8 @@ config DLM_DEBUG | |||
31 | bool "DLM debugging" | 31 | bool "DLM debugging" |
32 | depends on DLM | 32 | depends on DLM |
33 | help | 33 | help |
34 | Under the debugfs mount point, the name of each lockspace will | 34 | Under the debugfs mount point, the name of each lockspace will |
35 | appear as a file in the "dlm" directory. The output is the | 35 | appear as a file in the "dlm" directory. The output is the |
36 | list of resource and locks the local node knows about. | 36 | list of resource and locks the local node knows about. |
37 | 37 | ||
38 | endmenu | 38 | endmenu |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 88553054bbfa..8665c88e5af2 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *); | |||
54 | static void drop_node(struct config_group *, struct config_item *); | 54 | static void drop_node(struct config_group *, struct config_item *); |
55 | static void release_node(struct config_item *); | 55 | static void release_node(struct config_item *); |
56 | 56 | ||
57 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | ||
58 | char *buf); | ||
59 | static ssize_t store_cluster(struct config_item *i, | ||
60 | struct configfs_attribute *a, | ||
61 | const char *buf, size_t len); | ||
57 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 62 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
58 | char *buf); | 63 | char *buf); |
59 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | 64 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, |
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); | |||
73 | static ssize_t node_weight_read(struct node *nd, char *buf); | 78 | static ssize_t node_weight_read(struct node *nd, char *buf); |
74 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); | 79 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); |
75 | 80 | ||
81 | struct cluster { | ||
82 | struct config_group group; | ||
83 | unsigned int cl_tcp_port; | ||
84 | unsigned int cl_buffer_size; | ||
85 | unsigned int cl_rsbtbl_size; | ||
86 | unsigned int cl_lkbtbl_size; | ||
87 | unsigned int cl_dirtbl_size; | ||
88 | unsigned int cl_recover_timer; | ||
89 | unsigned int cl_toss_secs; | ||
90 | unsigned int cl_scan_secs; | ||
91 | unsigned int cl_log_debug; | ||
92 | }; | ||
93 | |||
94 | enum { | ||
95 | CLUSTER_ATTR_TCP_PORT = 0, | ||
96 | CLUSTER_ATTR_BUFFER_SIZE, | ||
97 | CLUSTER_ATTR_RSBTBL_SIZE, | ||
98 | CLUSTER_ATTR_LKBTBL_SIZE, | ||
99 | CLUSTER_ATTR_DIRTBL_SIZE, | ||
100 | CLUSTER_ATTR_RECOVER_TIMER, | ||
101 | CLUSTER_ATTR_TOSS_SECS, | ||
102 | CLUSTER_ATTR_SCAN_SECS, | ||
103 | CLUSTER_ATTR_LOG_DEBUG, | ||
104 | }; | ||
105 | |||
106 | struct cluster_attribute { | ||
107 | struct configfs_attribute attr; | ||
108 | ssize_t (*show)(struct cluster *, char *); | ||
109 | ssize_t (*store)(struct cluster *, const char *, size_t); | ||
110 | }; | ||
111 | |||
112 | static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, | ||
113 | unsigned int *info_field, int check_zero, | ||
114 | const char *buf, size_t len) | ||
115 | { | ||
116 | unsigned int x; | ||
117 | |||
118 | if (!capable(CAP_SYS_ADMIN)) | ||
119 | return -EACCES; | ||
120 | |||
121 | x = simple_strtoul(buf, NULL, 0); | ||
122 | |||
123 | if (check_zero && !x) | ||
124 | return -EINVAL; | ||
125 | |||
126 | *cl_field = x; | ||
127 | *info_field = x; | ||
128 | |||
129 | return len; | ||
130 | } | ||
131 | |||
132 | #define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \ | ||
133 | .attr = { .ca_name = __stringify(_name), \ | ||
134 | .ca_mode = _mode, \ | ||
135 | .ca_owner = THIS_MODULE }, \ | ||
136 | .show = _read, \ | ||
137 | .store = _write, \ | ||
138 | } | ||
139 | |||
140 | #define CLUSTER_ATTR(name, check_zero) \ | ||
141 | static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ | ||
142 | { \ | ||
143 | return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ | ||
144 | check_zero, buf, len); \ | ||
145 | } \ | ||
146 | static ssize_t name##_read(struct cluster *cl, char *buf) \ | ||
147 | { \ | ||
148 | return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ | ||
149 | } \ | ||
150 | static struct cluster_attribute cluster_attr_##name = \ | ||
151 | __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) | ||
152 | |||
153 | CLUSTER_ATTR(tcp_port, 1); | ||
154 | CLUSTER_ATTR(buffer_size, 1); | ||
155 | CLUSTER_ATTR(rsbtbl_size, 1); | ||
156 | CLUSTER_ATTR(lkbtbl_size, 1); | ||
157 | CLUSTER_ATTR(dirtbl_size, 1); | ||
158 | CLUSTER_ATTR(recover_timer, 1); | ||
159 | CLUSTER_ATTR(toss_secs, 1); | ||
160 | CLUSTER_ATTR(scan_secs, 1); | ||
161 | CLUSTER_ATTR(log_debug, 0); | ||
162 | |||
163 | static struct configfs_attribute *cluster_attrs[] = { | ||
164 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | ||
165 | [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, | ||
166 | [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, | ||
167 | [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr, | ||
168 | [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, | ||
169 | [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, | ||
170 | [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, | ||
171 | [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, | ||
172 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, | ||
173 | NULL, | ||
174 | }; | ||
175 | |||
76 | enum { | 176 | enum { |
77 | COMM_ATTR_NODEID = 0, | 177 | COMM_ATTR_NODEID = 0, |
78 | COMM_ATTR_LOCAL, | 178 | COMM_ATTR_LOCAL, |
@@ -152,10 +252,6 @@ struct clusters { | |||
152 | struct configfs_subsystem subsys; | 252 | struct configfs_subsystem subsys; |
153 | }; | 253 | }; |
154 | 254 | ||
155 | struct cluster { | ||
156 | struct config_group group; | ||
157 | }; | ||
158 | |||
159 | struct spaces { | 255 | struct spaces { |
160 | struct config_group ss_group; | 256 | struct config_group ss_group; |
161 | }; | 257 | }; |
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = { | |||
197 | 293 | ||
198 | static struct configfs_item_operations cluster_ops = { | 294 | static struct configfs_item_operations cluster_ops = { |
199 | .release = release_cluster, | 295 | .release = release_cluster, |
296 | .show_attribute = show_cluster, | ||
297 | .store_attribute = store_cluster, | ||
200 | }; | 298 | }; |
201 | 299 | ||
202 | static struct configfs_group_operations spaces_ops = { | 300 | static struct configfs_group_operations spaces_ops = { |
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = { | |||
237 | 335 | ||
238 | static struct config_item_type cluster_type = { | 336 | static struct config_item_type cluster_type = { |
239 | .ct_item_ops = &cluster_ops, | 337 | .ct_item_ops = &cluster_ops, |
338 | .ct_attrs = cluster_attrs, | ||
240 | .ct_owner = THIS_MODULE, | 339 | .ct_owner = THIS_MODULE, |
241 | }; | 340 | }; |
242 | 341 | ||
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g, | |||
317 | cl->group.default_groups[1] = &cms->cs_group; | 416 | cl->group.default_groups[1] = &cms->cs_group; |
318 | cl->group.default_groups[2] = NULL; | 417 | cl->group.default_groups[2] = NULL; |
319 | 418 | ||
419 | cl->cl_tcp_port = dlm_config.ci_tcp_port; | ||
420 | cl->cl_buffer_size = dlm_config.ci_buffer_size; | ||
421 | cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; | ||
422 | cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size; | ||
423 | cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; | ||
424 | cl->cl_recover_timer = dlm_config.ci_recover_timer; | ||
425 | cl->cl_toss_secs = dlm_config.ci_toss_secs; | ||
426 | cl->cl_scan_secs = dlm_config.ci_scan_secs; | ||
427 | cl->cl_log_debug = dlm_config.ci_log_debug; | ||
428 | |||
320 | space_list = &sps->ss_group; | 429 | space_list = &sps->ss_group; |
321 | comm_list = &cms->cs_group; | 430 | comm_list = &cms->cs_group; |
322 | return &cl->group; | 431 | return &cl->group; |
@@ -509,6 +618,25 @@ void dlm_config_exit(void) | |||
509 | * Functions for user space to read/write attributes | 618 | * Functions for user space to read/write attributes |
510 | */ | 619 | */ |
511 | 620 | ||
621 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | ||
622 | char *buf) | ||
623 | { | ||
624 | struct cluster *cl = to_cluster(i); | ||
625 | struct cluster_attribute *cla = | ||
626 | container_of(a, struct cluster_attribute, attr); | ||
627 | return cla->show ? cla->show(cl, buf) : 0; | ||
628 | } | ||
629 | |||
630 | static ssize_t store_cluster(struct config_item *i, | ||
631 | struct configfs_attribute *a, | ||
632 | const char *buf, size_t len) | ||
633 | { | ||
634 | struct cluster *cl = to_cluster(i); | ||
635 | struct cluster_attribute *cla = | ||
636 | container_of(a, struct cluster_attribute, attr); | ||
637 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; | ||
638 | } | ||
639 | |||
512 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 640 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
513 | char *buf) | 641 | char *buf) |
514 | { | 642 | { |
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
775 | #define DEFAULT_RECOVER_TIMER 5 | 903 | #define DEFAULT_RECOVER_TIMER 5 |
776 | #define DEFAULT_TOSS_SECS 10 | 904 | #define DEFAULT_TOSS_SECS 10 |
777 | #define DEFAULT_SCAN_SECS 5 | 905 | #define DEFAULT_SCAN_SECS 5 |
906 | #define DEFAULT_LOG_DEBUG 0 | ||
778 | 907 | ||
779 | struct dlm_config_info dlm_config = { | 908 | struct dlm_config_info dlm_config = { |
780 | .tcp_port = DEFAULT_TCP_PORT, | 909 | .ci_tcp_port = DEFAULT_TCP_PORT, |
781 | .buffer_size = DEFAULT_BUFFER_SIZE, | 910 | .ci_buffer_size = DEFAULT_BUFFER_SIZE, |
782 | .rsbtbl_size = DEFAULT_RSBTBL_SIZE, | 911 | .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, |
783 | .lkbtbl_size = DEFAULT_LKBTBL_SIZE, | 912 | .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE, |
784 | .dirtbl_size = DEFAULT_DIRTBL_SIZE, | 913 | .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, |
785 | .recover_timer = DEFAULT_RECOVER_TIMER, | 914 | .ci_recover_timer = DEFAULT_RECOVER_TIMER, |
786 | .toss_secs = DEFAULT_TOSS_SECS, | 915 | .ci_toss_secs = DEFAULT_TOSS_SECS, |
787 | .scan_secs = DEFAULT_SCAN_SECS | 916 | .ci_scan_secs = DEFAULT_SCAN_SECS, |
917 | .ci_log_debug = DEFAULT_LOG_DEBUG | ||
788 | }; | 918 | }; |
789 | 919 | ||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 9da7839958a9..1e978611a96e 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
@@ -17,14 +17,15 @@ | |||
17 | #define DLM_MAX_ADDR_COUNT 3 | 17 | #define DLM_MAX_ADDR_COUNT 3 |
18 | 18 | ||
19 | struct dlm_config_info { | 19 | struct dlm_config_info { |
20 | int tcp_port; | 20 | int ci_tcp_port; |
21 | int buffer_size; | 21 | int ci_buffer_size; |
22 | int rsbtbl_size; | 22 | int ci_rsbtbl_size; |
23 | int lkbtbl_size; | 23 | int ci_lkbtbl_size; |
24 | int dirtbl_size; | 24 | int ci_dirtbl_size; |
25 | int recover_timer; | 25 | int ci_recover_timer; |
26 | int toss_secs; | 26 | int ci_toss_secs; |
27 | int scan_secs; | 27 | int ci_scan_secs; |
28 | int ci_log_debug; | ||
28 | }; | 29 | }; |
29 | 30 | ||
30 | extern struct dlm_config_info dlm_config; | 31 | extern struct dlm_config_info dlm_config; |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 1ee8195e6fc0..61d93201e1b2 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | 42 | ||
43 | #include <linux/dlm.h> | 43 | #include <linux/dlm.h> |
44 | #include "config.h" | ||
44 | 45 | ||
45 | #define DLM_LOCKSPACE_LEN 64 | 46 | #define DLM_LOCKSPACE_LEN 64 |
46 | 47 | ||
@@ -69,12 +70,12 @@ struct dlm_mhandle; | |||
69 | #define log_error(ls, fmt, args...) \ | 70 | #define log_error(ls, fmt, args...) \ |
70 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) | 71 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) |
71 | 72 | ||
72 | #define DLM_LOG_DEBUG | 73 | #define log_debug(ls, fmt, args...) \ |
73 | #ifdef DLM_LOG_DEBUG | 74 | do { \ |
74 | #define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) | 75 | if (dlm_config.ci_log_debug) \ |
75 | #else | 76 | printk(KERN_DEBUG "dlm: %s: " fmt "\n", \ |
76 | #define log_debug(ls, fmt, args...) | 77 | (ls)->ls_name , ##args); \ |
77 | #endif | 78 | } while (0) |
78 | 79 | ||
79 | #define DLM_ASSERT(x, do) \ | 80 | #define DLM_ASSERT(x, do) \ |
80 | { \ | 81 | { \ |
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) | |||
309 | 310 | ||
310 | /* dlm_header is first element of all structs sent between nodes */ | 311 | /* dlm_header is first element of all structs sent between nodes */ |
311 | 312 | ||
312 | #define DLM_HEADER_MAJOR 0x00020000 | 313 | #define DLM_HEADER_MAJOR 0x00030000 |
313 | #define DLM_HEADER_MINOR 0x00000001 | 314 | #define DLM_HEADER_MINOR 0x00000000 |
314 | 315 | ||
315 | #define DLM_MSG 1 | 316 | #define DLM_MSG 1 |
316 | #define DLM_RCOM 2 | 317 | #define DLM_RCOM 2 |
@@ -386,6 +387,8 @@ struct dlm_rcom { | |||
386 | uint32_t rc_type; /* DLM_RCOM_ */ | 387 | uint32_t rc_type; /* DLM_RCOM_ */ |
387 | int rc_result; /* multi-purpose */ | 388 | int rc_result; /* multi-purpose */ |
388 | uint64_t rc_id; /* match reply with request */ | 389 | uint64_t rc_id; /* match reply with request */ |
390 | uint64_t rc_seq; /* sender's ls_recover_seq */ | ||
391 | uint64_t rc_seq_reply; /* remote ls_recover_seq */ | ||
389 | char rc_buf[0]; | 392 | char rc_buf[0]; |
390 | }; | 393 | }; |
391 | 394 | ||
@@ -523,6 +526,7 @@ struct dlm_user_proc { | |||
523 | spinlock_t asts_spin; | 526 | spinlock_t asts_spin; |
524 | struct list_head locks; | 527 | struct list_head locks; |
525 | spinlock_t locks_spin; | 528 | spinlock_t locks_spin; |
529 | struct list_head unlocking; | ||
526 | wait_queue_head_t wait; | 530 | wait_queue_head_t wait; |
527 | }; | 531 | }; |
528 | 532 | ||
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 30878defaeb6..e725005fafd0 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
754 | mutex_unlock(&ls->ls_waiters_mutex); | 754 | mutex_unlock(&ls->ls_waiters_mutex); |
755 | } | 755 | } |
756 | 756 | ||
757 | /* We clear the RESEND flag because we might be taking an lkb off the waiters | ||
758 | list as part of process_requestqueue (e.g. a lookup that has an optimized | ||
759 | request reply on the requestqueue) between dlm_recover_waiters_pre() which | ||
760 | set RESEND and dlm_recover_waiters_post() */ | ||
761 | |||
757 | static int _remove_from_waiters(struct dlm_lkb *lkb) | 762 | static int _remove_from_waiters(struct dlm_lkb *lkb) |
758 | { | 763 | { |
759 | int error = 0; | 764 | int error = 0; |
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb) | |||
764 | goto out; | 769 | goto out; |
765 | } | 770 | } |
766 | lkb->lkb_wait_type = 0; | 771 | lkb->lkb_wait_type = 0; |
772 | lkb->lkb_flags &= ~DLM_IFL_RESEND; | ||
767 | list_del(&lkb->lkb_wait_reply); | 773 | list_del(&lkb->lkb_wait_reply); |
768 | unhold_lkb(lkb); | 774 | unhold_lkb(lkb); |
769 | out: | 775 | out: |
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b) | |||
810 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, | 816 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, |
811 | res_hashchain) { | 817 | res_hashchain) { |
812 | if (!time_after_eq(jiffies, r->res_toss_time + | 818 | if (!time_after_eq(jiffies, r->res_toss_time + |
813 | dlm_config.toss_secs * HZ)) | 819 | dlm_config.ci_toss_secs * HZ)) |
814 | continue; | 820 | continue; |
815 | found = 1; | 821 | found = 1; |
816 | break; | 822 | break; |
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
2144 | if (lkb->lkb_astaddr) | 2150 | if (lkb->lkb_astaddr) |
2145 | ms->m_asts |= AST_COMP; | 2151 | ms->m_asts |= AST_COMP; |
2146 | 2152 | ||
2147 | if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) | 2153 | /* compare with switch in create_message; send_remove() doesn't |
2148 | memcpy(ms->m_extra, r->res_name, r->res_length); | 2154 | use send_args() */ |
2149 | 2155 | ||
2150 | else if (lkb->lkb_lvbptr) | 2156 | switch (ms->m_type) { |
2157 | case DLM_MSG_REQUEST: | ||
2158 | case DLM_MSG_LOOKUP: | ||
2159 | memcpy(ms->m_extra, r->res_name, r->res_length); | ||
2160 | break; | ||
2161 | case DLM_MSG_CONVERT: | ||
2162 | case DLM_MSG_UNLOCK: | ||
2163 | case DLM_MSG_REQUEST_REPLY: | ||
2164 | case DLM_MSG_CONVERT_REPLY: | ||
2165 | case DLM_MSG_GRANT: | ||
2166 | if (!lkb->lkb_lvbptr) | ||
2167 | break; | ||
2151 | memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); | 2168 | memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); |
2152 | 2169 | break; | |
2170 | } | ||
2153 | } | 2171 | } |
2154 | 2172 | ||
2155 | static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | 2173 | static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) |
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
2418 | 2436 | ||
2419 | DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); | 2437 | DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); |
2420 | 2438 | ||
2421 | if (receive_lvb(ls, lkb, ms)) | 2439 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { |
2422 | return -ENOMEM; | 2440 | /* lkb was just created so there won't be an lvb yet */ |
2441 | lkb->lkb_lvbptr = allocate_lvb(ls); | ||
2442 | if (!lkb->lkb_lvbptr) | ||
2443 | return -ENOMEM; | ||
2444 | } | ||
2423 | 2445 | ||
2424 | return 0; | 2446 | return 0; |
2425 | } | 2447 | } |
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) | |||
3002 | { | 3024 | { |
3003 | struct dlm_message *ms = (struct dlm_message *) hd; | 3025 | struct dlm_message *ms = (struct dlm_message *) hd; |
3004 | struct dlm_ls *ls; | 3026 | struct dlm_ls *ls; |
3005 | int error; | 3027 | int error = 0; |
3006 | 3028 | ||
3007 | if (!recovery) | 3029 | if (!recovery) |
3008 | dlm_message_in(ms); | 3030 | dlm_message_in(ms); |
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) | |||
3119 | out: | 3141 | out: |
3120 | dlm_put_lockspace(ls); | 3142 | dlm_put_lockspace(ls); |
3121 | dlm_astd_wake(); | 3143 | dlm_astd_wake(); |
3122 | return 0; | 3144 | return error; |
3123 | } | 3145 | } |
3124 | 3146 | ||
3125 | 3147 | ||
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
3132 | if (middle_conversion(lkb)) { | 3154 | if (middle_conversion(lkb)) { |
3133 | hold_lkb(lkb); | 3155 | hold_lkb(lkb); |
3134 | ls->ls_stub_ms.m_result = -EINPROGRESS; | 3156 | ls->ls_stub_ms.m_result = -EINPROGRESS; |
3157 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
3135 | _remove_from_waiters(lkb); | 3158 | _remove_from_waiters(lkb); |
3136 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | 3159 | _receive_convert_reply(lkb, &ls->ls_stub_ms); |
3137 | 3160 | ||
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
3205 | case DLM_MSG_UNLOCK: | 3228 | case DLM_MSG_UNLOCK: |
3206 | hold_lkb(lkb); | 3229 | hold_lkb(lkb); |
3207 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; | 3230 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; |
3231 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
3208 | _remove_from_waiters(lkb); | 3232 | _remove_from_waiters(lkb); |
3209 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | 3233 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); |
3210 | dlm_put_lkb(lkb); | 3234 | dlm_put_lkb(lkb); |
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
3213 | case DLM_MSG_CANCEL: | 3237 | case DLM_MSG_CANCEL: |
3214 | hold_lkb(lkb); | 3238 | hold_lkb(lkb); |
3215 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; | 3239 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; |
3240 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
3216 | _remove_from_waiters(lkb); | 3241 | _remove_from_waiters(lkb); |
3217 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | 3242 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); |
3218 | dlm_put_lkb(lkb); | 3243 | dlm_put_lkb(lkb); |
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
3571 | lock_rsb(r); | 3596 | lock_rsb(r); |
3572 | 3597 | ||
3573 | switch (error) { | 3598 | switch (error) { |
3599 | case -EBADR: | ||
3600 | /* There's a chance the new master received our lock before | ||
3601 | dlm_recover_master_reply(), this wouldn't happen if we did | ||
3602 | a barrier between recover_masters and recover_locks. */ | ||
3603 | log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id, | ||
3604 | (unsigned long)r, r->res_name); | ||
3605 | dlm_send_rcom_lock(r, lkb); | ||
3606 | goto out; | ||
3574 | case -EEXIST: | 3607 | case -EEXIST: |
3575 | log_debug(ls, "master copy exists %x", lkb->lkb_id); | 3608 | log_debug(ls, "master copy exists %x", lkb->lkb_id); |
3576 | /* fall through */ | 3609 | /* fall through */ |
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
3585 | /* an ack for dlm_recover_locks() which waits for replies from | 3618 | /* an ack for dlm_recover_locks() which waits for replies from |
3586 | all the locks it sends to new masters */ | 3619 | all the locks it sends to new masters */ |
3587 | dlm_recovered_lock(r); | 3620 | dlm_recovered_lock(r); |
3588 | 3621 | out: | |
3589 | unlock_rsb(r); | 3622 | unlock_rsb(r); |
3590 | put_rsb(r); | 3623 | put_rsb(r); |
3591 | dlm_put_lkb(lkb); | 3624 | dlm_put_lkb(lkb); |
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, | |||
3610 | } | 3643 | } |
3611 | 3644 | ||
3612 | if (flags & DLM_LKF_VALBLK) { | 3645 | if (flags & DLM_LKF_VALBLK) { |
3613 | ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); | 3646 | ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); |
3614 | if (!ua->lksb.sb_lvbptr) { | 3647 | if (!ua->lksb.sb_lvbptr) { |
3615 | kfree(ua); | 3648 | kfree(ua); |
3616 | __put_lkb(ls, lkb); | 3649 | __put_lkb(ls, lkb); |
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
3679 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | 3712 | ua = (struct dlm_user_args *)lkb->lkb_astparam; |
3680 | 3713 | ||
3681 | if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { | 3714 | if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { |
3682 | ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); | 3715 | ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); |
3683 | if (!ua->lksb.sb_lvbptr) { | 3716 | if (!ua->lksb.sb_lvbptr) { |
3684 | error = -ENOMEM; | 3717 | error = -ENOMEM; |
3685 | goto out_put; | 3718 | goto out_put; |
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
3745 | goto out_put; | 3778 | goto out_put; |
3746 | 3779 | ||
3747 | spin_lock(&ua->proc->locks_spin); | 3780 | spin_lock(&ua->proc->locks_spin); |
3748 | list_del_init(&lkb->lkb_ownqueue); | 3781 | /* dlm_user_add_ast() may have already taken lkb off the proc list */ |
3782 | if (!list_empty(&lkb->lkb_ownqueue)) | ||
3783 | list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); | ||
3749 | spin_unlock(&ua->proc->locks_spin); | 3784 | spin_unlock(&ua->proc->locks_spin); |
3750 | |||
3751 | /* this removes the reference for the proc->locks list added by | ||
3752 | dlm_user_request */ | ||
3753 | unhold_lkb(lkb); | ||
3754 | out_put: | 3785 | out_put: |
3755 | dlm_put_lkb(lkb); | 3786 | dlm_put_lkb(lkb); |
3756 | out: | 3787 | out: |
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, | |||
3790 | /* this lkb was removed from the WAITING queue */ | 3821 | /* this lkb was removed from the WAITING queue */ |
3791 | if (lkb->lkb_grmode == DLM_LOCK_IV) { | 3822 | if (lkb->lkb_grmode == DLM_LOCK_IV) { |
3792 | spin_lock(&ua->proc->locks_spin); | 3823 | spin_lock(&ua->proc->locks_spin); |
3793 | list_del_init(&lkb->lkb_ownqueue); | 3824 | list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); |
3794 | spin_unlock(&ua->proc->locks_spin); | 3825 | spin_unlock(&ua->proc->locks_spin); |
3795 | unhold_lkb(lkb); | ||
3796 | } | 3826 | } |
3797 | out_put: | 3827 | out_put: |
3798 | dlm_put_lkb(lkb); | 3828 | dlm_put_lkb(lkb); |
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
3853 | mutex_lock(&ls->ls_clear_proc_locks); | 3883 | mutex_lock(&ls->ls_clear_proc_locks); |
3854 | 3884 | ||
3855 | list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { | 3885 | list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { |
3856 | if (lkb->lkb_ast_type) { | ||
3857 | list_del(&lkb->lkb_astqueue); | ||
3858 | unhold_lkb(lkb); | ||
3859 | } | ||
3860 | |||
3861 | list_del_init(&lkb->lkb_ownqueue); | 3886 | list_del_init(&lkb->lkb_ownqueue); |
3862 | 3887 | ||
3863 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { | 3888 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { |
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
3874 | 3899 | ||
3875 | dlm_put_lkb(lkb); | 3900 | dlm_put_lkb(lkb); |
3876 | } | 3901 | } |
3902 | |||
3903 | /* in-progress unlocks */ | ||
3904 | list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { | ||
3905 | list_del_init(&lkb->lkb_ownqueue); | ||
3906 | lkb->lkb_flags |= DLM_IFL_DEAD; | ||
3907 | dlm_put_lkb(lkb); | ||
3908 | } | ||
3909 | |||
3910 | list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { | ||
3911 | list_del(&lkb->lkb_astqueue); | ||
3912 | dlm_put_lkb(lkb); | ||
3913 | } | ||
3914 | |||
3877 | mutex_unlock(&ls->ls_clear_proc_locks); | 3915 | mutex_unlock(&ls->ls_clear_proc_locks); |
3878 | unlock_recovery(ls); | 3916 | unlock_recovery(ls); |
3879 | } | 3917 | } |
3918 | |||
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 59012b089e8d..f40817b53c6f 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -236,7 +236,7 @@ static int dlm_scand(void *data) | |||
236 | while (!kthread_should_stop()) { | 236 | while (!kthread_should_stop()) { |
237 | list_for_each_entry(ls, &lslist, ls_list) | 237 | list_for_each_entry(ls, &lslist, ls_list) |
238 | dlm_scan_rsbs(ls); | 238 | dlm_scan_rsbs(ls); |
239 | schedule_timeout_interruptible(dlm_config.scan_secs * HZ); | 239 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); |
240 | } | 240 | } |
241 | return 0; | 241 | return 0; |
242 | } | 242 | } |
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
422 | ls->ls_count = 0; | 422 | ls->ls_count = 0; |
423 | ls->ls_flags = 0; | 423 | ls->ls_flags = 0; |
424 | 424 | ||
425 | size = dlm_config.rsbtbl_size; | 425 | size = dlm_config.ci_rsbtbl_size; |
426 | ls->ls_rsbtbl_size = size; | 426 | ls->ls_rsbtbl_size = size; |
427 | 427 | ||
428 | ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); | 428 | ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); |
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
434 | rwlock_init(&ls->ls_rsbtbl[i].lock); | 434 | rwlock_init(&ls->ls_rsbtbl[i].lock); |
435 | } | 435 | } |
436 | 436 | ||
437 | size = dlm_config.lkbtbl_size; | 437 | size = dlm_config.ci_lkbtbl_size; |
438 | ls->ls_lkbtbl_size = size; | 438 | ls->ls_lkbtbl_size = size; |
439 | 439 | ||
440 | ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); | 440 | ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); |
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
446 | ls->ls_lkbtbl[i].counter = 1; | 446 | ls->ls_lkbtbl[i].counter = 1; |
447 | } | 447 | } |
448 | 448 | ||
449 | size = dlm_config.dirtbl_size; | 449 | size = dlm_config.ci_dirtbl_size; |
450 | ls->ls_dirtbl_size = size; | 450 | ls->ls_dirtbl_size = size; |
451 | 451 | ||
452 | ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); | 452 | ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); |
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
489 | mutex_init(&ls->ls_requestqueue_mutex); | 489 | mutex_init(&ls->ls_requestqueue_mutex); |
490 | mutex_init(&ls->ls_clear_proc_locks); | 490 | mutex_init(&ls->ls_clear_proc_locks); |
491 | 491 | ||
492 | ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | 492 | ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); |
493 | if (!ls->ls_recover_buf) | 493 | if (!ls->ls_recover_buf) |
494 | goto out_dirfree; | 494 | goto out_dirfree; |
495 | 495 | ||
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c index fe158d7a9285..dc83a9d979b5 100644 --- a/fs/dlm/lowcomms-sctp.c +++ b/fs/dlm/lowcomms-sctp.c | |||
@@ -72,6 +72,8 @@ struct nodeinfo { | |||
72 | struct list_head writequeue; /* outgoing writequeue_entries */ | 72 | struct list_head writequeue; /* outgoing writequeue_entries */ |
73 | spinlock_t writequeue_lock; | 73 | spinlock_t writequeue_lock; |
74 | int nodeid; | 74 | int nodeid; |
75 | struct work_struct swork; /* Send workqueue */ | ||
76 | struct work_struct lwork; /* Locking workqueue */ | ||
75 | }; | 77 | }; |
76 | 78 | ||
77 | static DEFINE_IDR(nodeinfo_idr); | 79 | static DEFINE_IDR(nodeinfo_idr); |
@@ -96,6 +98,7 @@ struct connection { | |||
96 | atomic_t waiting_requests; | 98 | atomic_t waiting_requests; |
97 | struct cbuf cb; | 99 | struct cbuf cb; |
98 | int eagain_flag; | 100 | int eagain_flag; |
101 | struct work_struct work; /* Send workqueue */ | ||
99 | }; | 102 | }; |
100 | 103 | ||
101 | /* An entry waiting to be sent */ | 104 | /* An entry waiting to be sent */ |
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n) | |||
137 | static LIST_HEAD(write_nodes); | 140 | static LIST_HEAD(write_nodes); |
138 | static DEFINE_SPINLOCK(write_nodes_lock); | 141 | static DEFINE_SPINLOCK(write_nodes_lock); |
139 | 142 | ||
143 | |||
140 | /* Maximum number of incoming messages to process before | 144 | /* Maximum number of incoming messages to process before |
141 | * doing a schedule() | 145 | * doing a schedule() |
142 | */ | 146 | */ |
143 | #define MAX_RX_MSG_COUNT 25 | 147 | #define MAX_RX_MSG_COUNT 25 |
144 | 148 | ||
145 | /* Manage daemons */ | 149 | /* Work queues */ |
146 | static struct task_struct *recv_task; | 150 | static struct workqueue_struct *recv_workqueue; |
147 | static struct task_struct *send_task; | 151 | static struct workqueue_struct *send_workqueue; |
148 | static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait); | 152 | static struct workqueue_struct *lock_workqueue; |
149 | 153 | ||
150 | /* The SCTP connection */ | 154 | /* The SCTP connection */ |
151 | static struct connection sctp_con; | 155 | static struct connection sctp_con; |
152 | 156 | ||
157 | static void process_send_sockets(struct work_struct *work); | ||
158 | static void process_recv_sockets(struct work_struct *work); | ||
159 | static void process_lock_request(struct work_struct *work); | ||
153 | 160 | ||
154 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | 161 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) |
155 | { | 162 | { |
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc) | |||
222 | spin_lock_init(&ni->lock); | 229 | spin_lock_init(&ni->lock); |
223 | INIT_LIST_HEAD(&ni->writequeue); | 230 | INIT_LIST_HEAD(&ni->writequeue); |
224 | spin_lock_init(&ni->writequeue_lock); | 231 | spin_lock_init(&ni->writequeue_lock); |
232 | INIT_WORK(&ni->lwork, process_lock_request); | ||
233 | INIT_WORK(&ni->swork, process_send_sockets); | ||
225 | ni->nodeid = nodeid; | 234 | ni->nodeid = nodeid; |
226 | 235 | ||
227 | if (nodeid > max_nodeid) | 236 | if (nodeid > max_nodeid) |
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc) | |||
249 | /* Data or notification available on socket */ | 258 | /* Data or notification available on socket */ |
250 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | 259 | static void lowcomms_data_ready(struct sock *sk, int count_unused) |
251 | { | 260 | { |
252 | atomic_inc(&sctp_con.waiting_requests); | ||
253 | if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) | 261 | if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags)) |
254 | return; | 262 | queue_work(recv_workqueue, &sctp_con.work); |
255 | |||
256 | wake_up_interruptible(&lowcomms_recv_wait); | ||
257 | } | 263 | } |
258 | 264 | ||
259 | 265 | ||
@@ -361,10 +367,10 @@ static void init_failed(void) | |||
361 | spin_lock_bh(&write_nodes_lock); | 367 | spin_lock_bh(&write_nodes_lock); |
362 | list_add_tail(&ni->write_list, &write_nodes); | 368 | list_add_tail(&ni->write_list, &write_nodes); |
363 | spin_unlock_bh(&write_nodes_lock); | 369 | spin_unlock_bh(&write_nodes_lock); |
370 | queue_work(send_workqueue, &ni->swork); | ||
364 | } | 371 | } |
365 | } | 372 | } |
366 | } | 373 | } |
367 | wake_up_process(send_task); | ||
368 | } | 374 | } |
369 | 375 | ||
370 | /* Something happened to an association */ | 376 | /* Something happened to an association */ |
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf) | |||
446 | spin_lock_bh(&write_nodes_lock); | 452 | spin_lock_bh(&write_nodes_lock); |
447 | list_add_tail(&ni->write_list, &write_nodes); | 453 | list_add_tail(&ni->write_list, &write_nodes); |
448 | spin_unlock_bh(&write_nodes_lock); | 454 | spin_unlock_bh(&write_nodes_lock); |
455 | queue_work(send_workqueue, &ni->swork); | ||
449 | } | 456 | } |
450 | wake_up_process(send_task); | ||
451 | } | 457 | } |
452 | break; | 458 | break; |
453 | 459 | ||
@@ -580,8 +586,8 @@ static int receive_from_sock(void) | |||
580 | spin_lock_bh(&write_nodes_lock); | 586 | spin_lock_bh(&write_nodes_lock); |
581 | list_add_tail(&ni->write_list, &write_nodes); | 587 | list_add_tail(&ni->write_list, &write_nodes); |
582 | spin_unlock_bh(&write_nodes_lock); | 588 | spin_unlock_bh(&write_nodes_lock); |
589 | queue_work(send_workqueue, &ni->swork); | ||
583 | } | 590 | } |
584 | wake_up_process(send_task); | ||
585 | } | 591 | } |
586 | } | 592 | } |
587 | 593 | ||
@@ -590,6 +596,7 @@ static int receive_from_sock(void) | |||
590 | return 0; | 596 | return 0; |
591 | 597 | ||
592 | cbuf_add(&sctp_con.cb, ret); | 598 | cbuf_add(&sctp_con.cb, ret); |
599 | // PJC: TODO: Add to node's workqueue....can we ?? | ||
593 | ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), | 600 | ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid), |
594 | page_address(sctp_con.rx_page), | 601 | page_address(sctp_con.rx_page), |
595 | sctp_con.cb.base, sctp_con.cb.len, | 602 | sctp_con.cb.base, sctp_con.cb.len, |
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num) | |||
635 | 642 | ||
636 | if (result < 0) | 643 | if (result < 0) |
637 | log_print("Can't bind to port %d addr number %d", | 644 | log_print("Can't bind to port %d addr number %d", |
638 | dlm_config.tcp_port, num); | 645 | dlm_config.ci_tcp_port, num); |
639 | 646 | ||
640 | return result; | 647 | return result; |
641 | } | 648 | } |
@@ -711,7 +718,7 @@ static int init_sock(void) | |||
711 | /* Bind to all interfaces. */ | 718 | /* Bind to all interfaces. */ |
712 | for (i = 0; i < dlm_local_count; i++) { | 719 | for (i = 0; i < dlm_local_count; i++) { |
713 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); | 720 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); |
714 | make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len); | 721 | make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len); |
715 | 722 | ||
716 | result = add_bind_addr(&localaddr, addr_len, num); | 723 | result = add_bind_addr(&localaddr, addr_len, num); |
717 | if (result) | 724 | if (result) |
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg) | |||
820 | spin_lock_bh(&write_nodes_lock); | 827 | spin_lock_bh(&write_nodes_lock); |
821 | list_add_tail(&ni->write_list, &write_nodes); | 828 | list_add_tail(&ni->write_list, &write_nodes); |
822 | spin_unlock_bh(&write_nodes_lock); | 829 | spin_unlock_bh(&write_nodes_lock); |
823 | wake_up_process(send_task); | 830 | |
831 | queue_work(send_workqueue, &ni->swork); | ||
824 | } | 832 | } |
825 | return; | 833 | return; |
826 | 834 | ||
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid) | |||
863 | return; | 871 | return; |
864 | } | 872 | } |
865 | 873 | ||
866 | make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen); | 874 | make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen); |
867 | 875 | ||
868 | outmessage.msg_name = &rem_addr; | 876 | outmessage.msg_name = &rem_addr; |
869 | outmessage.msg_namelen = addrlen; | 877 | outmessage.msg_namelen = addrlen; |
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid) | |||
1088 | return 0; | 1096 | return 0; |
1089 | } | 1097 | } |
1090 | 1098 | ||
1091 | static int write_list_empty(void) | 1099 | // PJC: The work queue function for receiving. |
1100 | static void process_recv_sockets(struct work_struct *work) | ||
1092 | { | 1101 | { |
1093 | int status; | 1102 | if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { |
1094 | 1103 | int ret; | |
1095 | spin_lock_bh(&write_nodes_lock); | ||
1096 | status = list_empty(&write_nodes); | ||
1097 | spin_unlock_bh(&write_nodes_lock); | ||
1098 | |||
1099 | return status; | ||
1100 | } | ||
1101 | |||
1102 | static int dlm_recvd(void *data) | ||
1103 | { | ||
1104 | DECLARE_WAITQUEUE(wait, current); | ||
1105 | |||
1106 | while (!kthread_should_stop()) { | ||
1107 | int count = 0; | 1104 | int count = 0; |
1108 | 1105 | ||
1109 | set_current_state(TASK_INTERRUPTIBLE); | 1106 | do { |
1110 | add_wait_queue(&lowcomms_recv_wait, &wait); | 1107 | ret = receive_from_sock(); |
1111 | if (!test_bit(CF_READ_PENDING, &sctp_con.flags)) | ||
1112 | cond_resched(); | ||
1113 | remove_wait_queue(&lowcomms_recv_wait, &wait); | ||
1114 | set_current_state(TASK_RUNNING); | ||
1115 | |||
1116 | if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) { | ||
1117 | int ret; | ||
1118 | |||
1119 | do { | ||
1120 | ret = receive_from_sock(); | ||
1121 | 1108 | ||
1122 | /* Don't starve out everyone else */ | 1109 | /* Don't starve out everyone else */ |
1123 | if (++count >= MAX_RX_MSG_COUNT) { | 1110 | if (++count >= MAX_RX_MSG_COUNT) { |
1124 | cond_resched(); | 1111 | cond_resched(); |
1125 | count = 0; | 1112 | count = 0; |
1126 | } | 1113 | } |
1127 | } while (!kthread_should_stop() && ret >=0); | 1114 | } while (!kthread_should_stop() && ret >=0); |
1128 | } | ||
1129 | cond_resched(); | ||
1130 | } | 1115 | } |
1131 | 1116 | cond_resched(); | |
1132 | return 0; | ||
1133 | } | 1117 | } |
1134 | 1118 | ||
1135 | static int dlm_sendd(void *data) | 1119 | // PJC: the work queue function for sending |
1120 | static void process_send_sockets(struct work_struct *work) | ||
1136 | { | 1121 | { |
1137 | DECLARE_WAITQUEUE(wait, current); | 1122 | if (sctp_con.eagain_flag) { |
1138 | 1123 | sctp_con.eagain_flag = 0; | |
1139 | add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | 1124 | refill_write_queue(); |
1140 | |||
1141 | while (!kthread_should_stop()) { | ||
1142 | set_current_state(TASK_INTERRUPTIBLE); | ||
1143 | if (write_list_empty()) | ||
1144 | cond_resched(); | ||
1145 | set_current_state(TASK_RUNNING); | ||
1146 | |||
1147 | if (sctp_con.eagain_flag) { | ||
1148 | sctp_con.eagain_flag = 0; | ||
1149 | refill_write_queue(); | ||
1150 | } | ||
1151 | process_output_queue(); | ||
1152 | } | 1125 | } |
1126 | process_output_queue(); | ||
1127 | } | ||
1153 | 1128 | ||
1154 | remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait); | 1129 | // PJC: Process lock requests from a particular node. |
1155 | 1130 | // TODO: can we optimise this out on UP ?? | |
1156 | return 0; | 1131 | static void process_lock_request(struct work_struct *work) |
1132 | { | ||
1157 | } | 1133 | } |
1158 | 1134 | ||
1159 | static void daemons_stop(void) | 1135 | static void daemons_stop(void) |
1160 | { | 1136 | { |
1161 | kthread_stop(recv_task); | 1137 | destroy_workqueue(recv_workqueue); |
1162 | kthread_stop(send_task); | 1138 | destroy_workqueue(send_workqueue); |
1139 | destroy_workqueue(lock_workqueue); | ||
1163 | } | 1140 | } |
1164 | 1141 | ||
1165 | static int daemons_start(void) | 1142 | static int daemons_start(void) |
1166 | { | 1143 | { |
1167 | struct task_struct *p; | ||
1168 | int error; | 1144 | int error; |
1145 | recv_workqueue = create_workqueue("dlm_recv"); | ||
1146 | error = IS_ERR(recv_workqueue); | ||
1147 | if (error) { | ||
1148 | log_print("can't start dlm_recv %d", error); | ||
1149 | return error; | ||
1150 | } | ||
1169 | 1151 | ||
1170 | p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); | 1152 | send_workqueue = create_singlethread_workqueue("dlm_send"); |
1171 | error = IS_ERR(p); | 1153 | error = IS_ERR(send_workqueue); |
1172 | if (error) { | 1154 | if (error) { |
1173 | log_print("can't start dlm_recvd %d", error); | 1155 | log_print("can't start dlm_send %d", error); |
1156 | destroy_workqueue(recv_workqueue); | ||
1174 | return error; | 1157 | return error; |
1175 | } | 1158 | } |
1176 | recv_task = p; | ||
1177 | 1159 | ||
1178 | p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); | 1160 | lock_workqueue = create_workqueue("dlm_rlock"); |
1179 | error = IS_ERR(p); | 1161 | error = IS_ERR(lock_workqueue); |
1180 | if (error) { | 1162 | if (error) { |
1181 | log_print("can't start dlm_sendd %d", error); | 1163 | log_print("can't start dlm_rlock %d", error); |
1182 | kthread_stop(recv_task); | 1164 | destroy_workqueue(send_workqueue); |
1165 | destroy_workqueue(recv_workqueue); | ||
1183 | return error; | 1166 | return error; |
1184 | } | 1167 | } |
1185 | send_task = p; | ||
1186 | 1168 | ||
1187 | return 0; | 1169 | return 0; |
1188 | } | 1170 | } |
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void) | |||
1194 | { | 1176 | { |
1195 | int error; | 1177 | int error; |
1196 | 1178 | ||
1179 | INIT_WORK(&sctp_con.work, process_recv_sockets); | ||
1180 | |||
1197 | error = init_sock(); | 1181 | error = init_sock(); |
1198 | if (error) | 1182 | if (error) |
1199 | goto fail_sock; | 1183 | goto fail_sock; |
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void) | |||
1224 | for (i = 0; i < dlm_local_count; i++) | 1208 | for (i = 0; i < dlm_local_count; i++) |
1225 | kfree(dlm_local_addr[i]); | 1209 | kfree(dlm_local_addr[i]); |
1226 | } | 1210 | } |
1227 | |||
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c index 9be3a440c42a..f1efd17b2614 100644 --- a/fs/dlm/lowcomms-tcp.c +++ b/fs/dlm/lowcomms-tcp.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb) | |||
96 | struct connection { | 96 | struct connection { |
97 | struct socket *sock; /* NULL if not connected */ | 97 | struct socket *sock; /* NULL if not connected */ |
98 | uint32_t nodeid; /* So we know who we are in the list */ | 98 | uint32_t nodeid; /* So we know who we are in the list */ |
99 | struct rw_semaphore sock_sem; /* Stop connect races */ | 99 | struct mutex sock_mutex; |
100 | struct list_head read_list; /* On this list when ready for reading */ | ||
101 | struct list_head write_list; /* On this list when ready for writing */ | ||
102 | struct list_head state_list; /* On this list when ready to connect */ | ||
103 | unsigned long flags; /* bit 1,2 = We are on the read/write lists */ | 100 | unsigned long flags; /* bit 1,2 = We are on the read/write lists */ |
104 | #define CF_READ_PENDING 1 | 101 | #define CF_READ_PENDING 1 |
105 | #define CF_WRITE_PENDING 2 | 102 | #define CF_WRITE_PENDING 2 |
@@ -112,9 +109,10 @@ struct connection { | |||
112 | struct page *rx_page; | 109 | struct page *rx_page; |
113 | struct cbuf cb; | 110 | struct cbuf cb; |
114 | int retries; | 111 | int retries; |
115 | atomic_t waiting_requests; | ||
116 | #define MAX_CONNECT_RETRIES 3 | 112 | #define MAX_CONNECT_RETRIES 3 |
117 | struct connection *othercon; | 113 | struct connection *othercon; |
114 | struct work_struct rwork; /* Receive workqueue */ | ||
115 | struct work_struct swork; /* Send workqueue */ | ||
118 | }; | 116 | }; |
119 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) | 117 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
120 | 118 | ||
@@ -131,14 +129,9 @@ struct writequeue_entry { | |||
131 | 129 | ||
132 | static struct sockaddr_storage dlm_local_addr; | 130 | static struct sockaddr_storage dlm_local_addr; |
133 | 131 | ||
134 | /* Manage daemons */ | 132 | /* Work queues */ |
135 | static struct task_struct *recv_task; | 133 | static struct workqueue_struct *recv_workqueue; |
136 | static struct task_struct *send_task; | 134 | static struct workqueue_struct *send_workqueue; |
137 | |||
138 | static wait_queue_t lowcomms_send_waitq_head; | ||
139 | static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq); | ||
140 | static wait_queue_t lowcomms_recv_waitq_head; | ||
141 | static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq); | ||
142 | 135 | ||
143 | /* An array of pointers to connections, indexed by NODEID */ | 136 | /* An array of pointers to connections, indexed by NODEID */ |
144 | static struct connection **connections; | 137 | static struct connection **connections; |
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock); | |||
146 | static struct kmem_cache *con_cache; | 139 | static struct kmem_cache *con_cache; |
147 | static int conn_array_size; | 140 | static int conn_array_size; |
148 | 141 | ||
149 | /* List of sockets that have reads pending */ | 142 | static void process_recv_sockets(struct work_struct *work); |
150 | static LIST_HEAD(read_sockets); | 143 | static void process_send_sockets(struct work_struct *work); |
151 | static DEFINE_SPINLOCK(read_sockets_lock); | ||
152 | |||
153 | /* List of sockets which have writes pending */ | ||
154 | static LIST_HEAD(write_sockets); | ||
155 | static DEFINE_SPINLOCK(write_sockets_lock); | ||
156 | |||
157 | /* List of sockets which have connects pending */ | ||
158 | static LIST_HEAD(state_sockets); | ||
159 | static DEFINE_SPINLOCK(state_sockets_lock); | ||
160 | 144 | ||
161 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | 145 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
162 | { | 146 | { |
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) | |||
186 | goto finish; | 170 | goto finish; |
187 | 171 | ||
188 | con->nodeid = nodeid; | 172 | con->nodeid = nodeid; |
189 | init_rwsem(&con->sock_sem); | 173 | mutex_init(&con->sock_mutex); |
190 | INIT_LIST_HEAD(&con->writequeue); | 174 | INIT_LIST_HEAD(&con->writequeue); |
191 | spin_lock_init(&con->writequeue_lock); | 175 | spin_lock_init(&con->writequeue_lock); |
176 | INIT_WORK(&con->swork, process_send_sockets); | ||
177 | INIT_WORK(&con->rwork, process_recv_sockets); | ||
192 | 178 | ||
193 | connections[nodeid] = con; | 179 | connections[nodeid] = con; |
194 | } | 180 | } |
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused) | |||
203 | { | 189 | { |
204 | struct connection *con = sock2con(sk); | 190 | struct connection *con = sock2con(sk); |
205 | 191 | ||
206 | atomic_inc(&con->waiting_requests); | 192 | if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) |
207 | if (test_and_set_bit(CF_READ_PENDING, &con->flags)) | 193 | queue_work(recv_workqueue, &con->rwork); |
208 | return; | ||
209 | |||
210 | spin_lock_bh(&read_sockets_lock); | ||
211 | list_add_tail(&con->read_list, &read_sockets); | ||
212 | spin_unlock_bh(&read_sockets_lock); | ||
213 | |||
214 | wake_up_interruptible(&lowcomms_recv_waitq); | ||
215 | } | 194 | } |
216 | 195 | ||
217 | static void lowcomms_write_space(struct sock *sk) | 196 | static void lowcomms_write_space(struct sock *sk) |
218 | { | 197 | { |
219 | struct connection *con = sock2con(sk); | 198 | struct connection *con = sock2con(sk); |
220 | 199 | ||
221 | if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | 200 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) |
222 | return; | 201 | queue_work(send_workqueue, &con->swork); |
223 | |||
224 | spin_lock_bh(&write_sockets_lock); | ||
225 | list_add_tail(&con->write_list, &write_sockets); | ||
226 | spin_unlock_bh(&write_sockets_lock); | ||
227 | |||
228 | wake_up_interruptible(&lowcomms_send_waitq); | ||
229 | } | 202 | } |
230 | 203 | ||
231 | static inline void lowcomms_connect_sock(struct connection *con) | 204 | static inline void lowcomms_connect_sock(struct connection *con) |
232 | { | 205 | { |
233 | if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) | 206 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) |
234 | return; | 207 | queue_work(send_workqueue, &con->swork); |
235 | |||
236 | spin_lock_bh(&state_sockets_lock); | ||
237 | list_add_tail(&con->state_list, &state_sockets); | ||
238 | spin_unlock_bh(&state_sockets_lock); | ||
239 | |||
240 | wake_up_interruptible(&lowcomms_send_waitq); | ||
241 | } | 208 | } |
242 | 209 | ||
243 | static void lowcomms_state_change(struct sock *sk) | 210 | static void lowcomms_state_change(struct sock *sk) |
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | |||
279 | /* Close a remote connection and tidy up */ | 246 | /* Close a remote connection and tidy up */ |
280 | static void close_connection(struct connection *con, bool and_other) | 247 | static void close_connection(struct connection *con, bool and_other) |
281 | { | 248 | { |
282 | down_write(&con->sock_sem); | 249 | mutex_lock(&con->sock_mutex); |
283 | 250 | ||
284 | if (con->sock) { | 251 | if (con->sock) { |
285 | sock_release(con->sock); | 252 | sock_release(con->sock); |
@@ -294,7 +261,7 @@ static void close_connection(struct connection *con, bool and_other) | |||
294 | con->rx_page = NULL; | 261 | con->rx_page = NULL; |
295 | } | 262 | } |
296 | con->retries = 0; | 263 | con->retries = 0; |
297 | up_write(&con->sock_sem); | 264 | mutex_unlock(&con->sock_mutex); |
298 | } | 265 | } |
299 | 266 | ||
300 | /* Data received from remote end */ | 267 | /* Data received from remote end */ |
@@ -308,10 +275,13 @@ static int receive_from_sock(struct connection *con) | |||
308 | int r; | 275 | int r; |
309 | int call_again_soon = 0; | 276 | int call_again_soon = 0; |
310 | 277 | ||
311 | down_read(&con->sock_sem); | 278 | mutex_lock(&con->sock_mutex); |
279 | |||
280 | if (con->sock == NULL) { | ||
281 | ret = -EAGAIN; | ||
282 | goto out_close; | ||
283 | } | ||
312 | 284 | ||
313 | if (con->sock == NULL) | ||
314 | goto out; | ||
315 | if (con->rx_page == NULL) { | 285 | if (con->rx_page == NULL) { |
316 | /* | 286 | /* |
317 | * This doesn't need to be atomic, but I think it should | 287 | * This doesn't need to be atomic, but I think it should |
@@ -359,6 +329,9 @@ static int receive_from_sock(struct connection *con) | |||
359 | 329 | ||
360 | if (ret <= 0) | 330 | if (ret <= 0) |
361 | goto out_close; | 331 | goto out_close; |
332 | if (ret == -EAGAIN) | ||
333 | goto out_resched; | ||
334 | |||
362 | if (ret == len) | 335 | if (ret == len) |
363 | call_again_soon = 1; | 336 | call_again_soon = 1; |
364 | cbuf_add(&con->cb, ret); | 337 | cbuf_add(&con->cb, ret); |
@@ -381,24 +354,26 @@ static int receive_from_sock(struct connection *con) | |||
381 | con->rx_page = NULL; | 354 | con->rx_page = NULL; |
382 | } | 355 | } |
383 | 356 | ||
384 | out: | ||
385 | if (call_again_soon) | 357 | if (call_again_soon) |
386 | goto out_resched; | 358 | goto out_resched; |
387 | up_read(&con->sock_sem); | 359 | mutex_unlock(&con->sock_mutex); |
388 | return 0; | 360 | return 0; |
389 | 361 | ||
390 | out_resched: | 362 | out_resched: |
391 | lowcomms_data_ready(con->sock->sk, 0); | 363 | if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) |
392 | up_read(&con->sock_sem); | 364 | queue_work(recv_workqueue, &con->rwork); |
393 | cond_resched(); | 365 | mutex_unlock(&con->sock_mutex); |
394 | return 0; | 366 | return -EAGAIN; |
395 | 367 | ||
396 | out_close: | 368 | out_close: |
397 | up_read(&con->sock_sem); | 369 | mutex_unlock(&con->sock_mutex); |
398 | if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { | 370 | if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { |
399 | close_connection(con, false); | 371 | close_connection(con, false); |
400 | /* Reconnect when there is something to send */ | 372 | /* Reconnect when there is something to send */ |
401 | } | 373 | } |
374 | /* Don't return success if we really got EOF */ | ||
375 | if (ret == 0) | ||
376 | ret = -EAGAIN; | ||
402 | 377 | ||
403 | return ret; | 378 | return ret; |
404 | } | 379 | } |
@@ -412,6 +387,7 @@ static int accept_from_sock(struct connection *con) | |||
412 | int len; | 387 | int len; |
413 | int nodeid; | 388 | int nodeid; |
414 | struct connection *newcon; | 389 | struct connection *newcon; |
390 | struct connection *addcon; | ||
415 | 391 | ||
416 | memset(&peeraddr, 0, sizeof(peeraddr)); | 392 | memset(&peeraddr, 0, sizeof(peeraddr)); |
417 | result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, | 393 | result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, |
@@ -419,7 +395,7 @@ static int accept_from_sock(struct connection *con) | |||
419 | if (result < 0) | 395 | if (result < 0) |
420 | return -ENOMEM; | 396 | return -ENOMEM; |
421 | 397 | ||
422 | down_read(&con->sock_sem); | 398 | mutex_lock_nested(&con->sock_mutex, 0); |
423 | 399 | ||
424 | result = -ENOTCONN; | 400 | result = -ENOTCONN; |
425 | if (con->sock == NULL) | 401 | if (con->sock == NULL) |
@@ -445,7 +421,7 @@ static int accept_from_sock(struct connection *con) | |||
445 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { | 421 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { |
446 | printk("dlm: connect from non cluster node\n"); | 422 | printk("dlm: connect from non cluster node\n"); |
447 | sock_release(newsock); | 423 | sock_release(newsock); |
448 | up_read(&con->sock_sem); | 424 | mutex_unlock(&con->sock_mutex); |
449 | return -1; | 425 | return -1; |
450 | } | 426 | } |
451 | 427 | ||
@@ -462,7 +438,7 @@ static int accept_from_sock(struct connection *con) | |||
462 | result = -ENOMEM; | 438 | result = -ENOMEM; |
463 | goto accept_err; | 439 | goto accept_err; |
464 | } | 440 | } |
465 | down_write(&newcon->sock_sem); | 441 | mutex_lock_nested(&newcon->sock_mutex, 1); |
466 | if (newcon->sock) { | 442 | if (newcon->sock) { |
467 | struct connection *othercon = newcon->othercon; | 443 | struct connection *othercon = newcon->othercon; |
468 | 444 | ||
@@ -470,41 +446,45 @@ static int accept_from_sock(struct connection *con) | |||
470 | othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); | 446 | othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); |
471 | if (!othercon) { | 447 | if (!othercon) { |
472 | printk("dlm: failed to allocate incoming socket\n"); | 448 | printk("dlm: failed to allocate incoming socket\n"); |
473 | up_write(&newcon->sock_sem); | 449 | mutex_unlock(&newcon->sock_mutex); |
474 | result = -ENOMEM; | 450 | result = -ENOMEM; |
475 | goto accept_err; | 451 | goto accept_err; |
476 | } | 452 | } |
477 | othercon->nodeid = nodeid; | 453 | othercon->nodeid = nodeid; |
478 | othercon->rx_action = receive_from_sock; | 454 | othercon->rx_action = receive_from_sock; |
479 | init_rwsem(&othercon->sock_sem); | 455 | mutex_init(&othercon->sock_mutex); |
456 | INIT_WORK(&othercon->swork, process_send_sockets); | ||
457 | INIT_WORK(&othercon->rwork, process_recv_sockets); | ||
480 | set_bit(CF_IS_OTHERCON, &othercon->flags); | 458 | set_bit(CF_IS_OTHERCON, &othercon->flags); |
481 | newcon->othercon = othercon; | 459 | newcon->othercon = othercon; |
482 | } | 460 | } |
483 | othercon->sock = newsock; | 461 | othercon->sock = newsock; |
484 | newsock->sk->sk_user_data = othercon; | 462 | newsock->sk->sk_user_data = othercon; |
485 | add_sock(newsock, othercon); | 463 | add_sock(newsock, othercon); |
464 | addcon = othercon; | ||
486 | } | 465 | } |
487 | else { | 466 | else { |
488 | newsock->sk->sk_user_data = newcon; | 467 | newsock->sk->sk_user_data = newcon; |
489 | newcon->rx_action = receive_from_sock; | 468 | newcon->rx_action = receive_from_sock; |
490 | add_sock(newsock, newcon); | 469 | add_sock(newsock, newcon); |
491 | 470 | addcon = newcon; | |
492 | } | 471 | } |
493 | 472 | ||
494 | up_write(&newcon->sock_sem); | 473 | mutex_unlock(&newcon->sock_mutex); |
495 | 474 | ||
496 | /* | 475 | /* |
497 | * Add it to the active queue in case we got data | 476 | * Add it to the active queue in case we got data |
498 | * beween processing the accept adding the socket | 477 | * beween processing the accept adding the socket |
499 | * to the read_sockets list | 478 | * to the read_sockets list |
500 | */ | 479 | */ |
501 | lowcomms_data_ready(newsock->sk, 0); | 480 | if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) |
502 | up_read(&con->sock_sem); | 481 | queue_work(recv_workqueue, &addcon->rwork); |
482 | mutex_unlock(&con->sock_mutex); | ||
503 | 483 | ||
504 | return 0; | 484 | return 0; |
505 | 485 | ||
506 | accept_err: | 486 | accept_err: |
507 | up_read(&con->sock_sem); | 487 | mutex_unlock(&con->sock_mutex); |
508 | sock_release(newsock); | 488 | sock_release(newsock); |
509 | 489 | ||
510 | if (result != -EAGAIN) | 490 | if (result != -EAGAIN) |
@@ -525,7 +505,7 @@ static void connect_to_sock(struct connection *con) | |||
525 | return; | 505 | return; |
526 | } | 506 | } |
527 | 507 | ||
528 | down_write(&con->sock_sem); | 508 | mutex_lock(&con->sock_mutex); |
529 | if (con->retries++ > MAX_CONNECT_RETRIES) | 509 | if (con->retries++ > MAX_CONNECT_RETRIES) |
530 | goto out; | 510 | goto out; |
531 | 511 | ||
@@ -548,7 +528,7 @@ static void connect_to_sock(struct connection *con) | |||
548 | sock->sk->sk_user_data = con; | 528 | sock->sk->sk_user_data = con; |
549 | con->rx_action = receive_from_sock; | 529 | con->rx_action = receive_from_sock; |
550 | 530 | ||
551 | make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); | 531 | make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); |
552 | 532 | ||
553 | add_sock(sock, con); | 533 | add_sock(sock, con); |
554 | 534 | ||
@@ -577,7 +557,7 @@ out_err: | |||
577 | result = 0; | 557 | result = 0; |
578 | } | 558 | } |
579 | out: | 559 | out: |
580 | up_write(&con->sock_sem); | 560 | mutex_unlock(&con->sock_mutex); |
581 | return; | 561 | return; |
582 | } | 562 | } |
583 | 563 | ||
@@ -616,10 +596,10 @@ static struct socket *create_listen_sock(struct connection *con, | |||
616 | con->sock = sock; | 596 | con->sock = sock; |
617 | 597 | ||
618 | /* Bind to our port */ | 598 | /* Bind to our port */ |
619 | make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); | 599 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); |
620 | result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); | 600 | result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); |
621 | if (result < 0) { | 601 | if (result < 0) { |
622 | printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); | 602 | printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port); |
623 | sock_release(sock); | 603 | sock_release(sock); |
624 | sock = NULL; | 604 | sock = NULL; |
625 | con->sock = NULL; | 605 | con->sock = NULL; |
@@ -638,7 +618,7 @@ static struct socket *create_listen_sock(struct connection *con, | |||
638 | 618 | ||
639 | result = sock->ops->listen(sock, 5); | 619 | result = sock->ops->listen(sock, 5); |
640 | if (result < 0) { | 620 | if (result < 0) { |
641 | printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); | 621 | printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port); |
642 | sock_release(sock); | 622 | sock_release(sock); |
643 | sock = NULL; | 623 | sock = NULL; |
644 | goto create_out; | 624 | goto create_out; |
@@ -709,6 +689,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, | |||
709 | if (!con) | 689 | if (!con) |
710 | return NULL; | 690 | return NULL; |
711 | 691 | ||
692 | spin_lock(&con->writequeue_lock); | ||
712 | e = list_entry(con->writequeue.prev, struct writequeue_entry, list); | 693 | e = list_entry(con->writequeue.prev, struct writequeue_entry, list); |
713 | if ((&e->list == &con->writequeue) || | 694 | if ((&e->list == &con->writequeue) || |
714 | (PAGE_CACHE_SIZE - e->end < len)) { | 695 | (PAGE_CACHE_SIZE - e->end < len)) { |
@@ -747,6 +728,7 @@ void dlm_lowcomms_commit_buffer(void *mh) | |||
747 | struct connection *con = e->con; | 728 | struct connection *con = e->con; |
748 | int users; | 729 | int users; |
749 | 730 | ||
731 | spin_lock(&con->writequeue_lock); | ||
750 | users = --e->users; | 732 | users = --e->users; |
751 | if (users) | 733 | if (users) |
752 | goto out; | 734 | goto out; |
@@ -754,12 +736,8 @@ void dlm_lowcomms_commit_buffer(void *mh) | |||
754 | kunmap(e->page); | 736 | kunmap(e->page); |
755 | spin_unlock(&con->writequeue_lock); | 737 | spin_unlock(&con->writequeue_lock); |
756 | 738 | ||
757 | if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { | 739 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { |
758 | spin_lock_bh(&write_sockets_lock); | 740 | queue_work(send_workqueue, &con->swork); |
759 | list_add_tail(&con->write_list, &write_sockets); | ||
760 | spin_unlock_bh(&write_sockets_lock); | ||
761 | |||
762 | wake_up_interruptible(&lowcomms_send_waitq); | ||
763 | } | 741 | } |
764 | return; | 742 | return; |
765 | 743 | ||
@@ -783,7 +761,7 @@ static void send_to_sock(struct connection *con) | |||
783 | struct writequeue_entry *e; | 761 | struct writequeue_entry *e; |
784 | int len, offset; | 762 | int len, offset; |
785 | 763 | ||
786 | down_read(&con->sock_sem); | 764 | mutex_lock(&con->sock_mutex); |
787 | if (con->sock == NULL) | 765 | if (con->sock == NULL) |
788 | goto out_connect; | 766 | goto out_connect; |
789 | 767 | ||
@@ -800,6 +778,7 @@ static void send_to_sock(struct connection *con) | |||
800 | offset = e->offset; | 778 | offset = e->offset; |
801 | BUG_ON(len == 0 && e->users == 0); | 779 | BUG_ON(len == 0 && e->users == 0); |
802 | spin_unlock(&con->writequeue_lock); | 780 | spin_unlock(&con->writequeue_lock); |
781 | kmap(e->page); | ||
803 | 782 | ||
804 | ret = 0; | 783 | ret = 0; |
805 | if (len) { | 784 | if (len) { |
@@ -828,18 +807,18 @@ static void send_to_sock(struct connection *con) | |||
828 | } | 807 | } |
829 | spin_unlock(&con->writequeue_lock); | 808 | spin_unlock(&con->writequeue_lock); |
830 | out: | 809 | out: |
831 | up_read(&con->sock_sem); | 810 | mutex_unlock(&con->sock_mutex); |
832 | return; | 811 | return; |
833 | 812 | ||
834 | send_error: | 813 | send_error: |
835 | up_read(&con->sock_sem); | 814 | mutex_unlock(&con->sock_mutex); |
836 | close_connection(con, false); | 815 | close_connection(con, false); |
837 | lowcomms_connect_sock(con); | 816 | lowcomms_connect_sock(con); |
838 | return; | 817 | return; |
839 | 818 | ||
840 | out_connect: | 819 | out_connect: |
841 | up_read(&con->sock_sem); | 820 | mutex_unlock(&con->sock_mutex); |
842 | lowcomms_connect_sock(con); | 821 | connect_to_sock(con); |
843 | return; | 822 | return; |
844 | } | 823 | } |
845 | 824 | ||
@@ -872,7 +851,6 @@ int dlm_lowcomms_close(int nodeid) | |||
872 | if (con) { | 851 | if (con) { |
873 | clean_one_writequeue(con); | 852 | clean_one_writequeue(con); |
874 | close_connection(con, true); | 853 | close_connection(con, true); |
875 | atomic_set(&con->waiting_requests, 0); | ||
876 | } | 854 | } |
877 | return 0; | 855 | return 0; |
878 | 856 | ||
@@ -880,102 +858,29 @@ out: | |||
880 | return -1; | 858 | return -1; |
881 | } | 859 | } |
882 | 860 | ||
883 | /* API send message call, may queue the request */ | ||
884 | /* N.B. This is the old interface - use the new one for new calls */ | ||
885 | int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation) | ||
886 | { | ||
887 | struct writequeue_entry *e; | ||
888 | char *b; | ||
889 | |||
890 | e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b); | ||
891 | if (e) { | ||
892 | memcpy(b, buf, len); | ||
893 | dlm_lowcomms_commit_buffer(e); | ||
894 | return 0; | ||
895 | } | ||
896 | return -ENOBUFS; | ||
897 | } | ||
898 | |||
899 | /* Look for activity on active sockets */ | 861 | /* Look for activity on active sockets */ |
900 | static void process_sockets(void) | 862 | static void process_recv_sockets(struct work_struct *work) |
901 | { | 863 | { |
902 | struct list_head *list; | 864 | struct connection *con = container_of(work, struct connection, rwork); |
903 | struct list_head *temp; | 865 | int err; |
904 | int count = 0; | ||
905 | |||
906 | spin_lock_bh(&read_sockets_lock); | ||
907 | list_for_each_safe(list, temp, &read_sockets) { | ||
908 | 866 | ||
909 | struct connection *con = | 867 | clear_bit(CF_READ_PENDING, &con->flags); |
910 | list_entry(list, struct connection, read_list); | 868 | do { |
911 | list_del(&con->read_list); | 869 | err = con->rx_action(con); |
912 | clear_bit(CF_READ_PENDING, &con->flags); | 870 | } while (!err); |
913 | |||
914 | spin_unlock_bh(&read_sockets_lock); | ||
915 | |||
916 | /* This can reach zero if we are processing requests | ||
917 | * as they come in. | ||
918 | */ | ||
919 | if (atomic_read(&con->waiting_requests) == 0) { | ||
920 | spin_lock_bh(&read_sockets_lock); | ||
921 | continue; | ||
922 | } | ||
923 | |||
924 | do { | ||
925 | con->rx_action(con); | ||
926 | |||
927 | /* Don't starve out everyone else */ | ||
928 | if (++count >= MAX_RX_MSG_COUNT) { | ||
929 | cond_resched(); | ||
930 | count = 0; | ||
931 | } | ||
932 | |||
933 | } while (!atomic_dec_and_test(&con->waiting_requests) && | ||
934 | !kthread_should_stop()); | ||
935 | |||
936 | spin_lock_bh(&read_sockets_lock); | ||
937 | } | ||
938 | spin_unlock_bh(&read_sockets_lock); | ||
939 | } | 871 | } |
940 | 872 | ||
941 | /* Try to send any messages that are pending | ||
942 | */ | ||
943 | static void process_output_queue(void) | ||
944 | { | ||
945 | struct list_head *list; | ||
946 | struct list_head *temp; | ||
947 | |||
948 | spin_lock_bh(&write_sockets_lock); | ||
949 | list_for_each_safe(list, temp, &write_sockets) { | ||
950 | struct connection *con = | ||
951 | list_entry(list, struct connection, write_list); | ||
952 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
953 | list_del(&con->write_list); | ||
954 | |||
955 | spin_unlock_bh(&write_sockets_lock); | ||
956 | send_to_sock(con); | ||
957 | spin_lock_bh(&write_sockets_lock); | ||
958 | } | ||
959 | spin_unlock_bh(&write_sockets_lock); | ||
960 | } | ||
961 | 873 | ||
962 | static void process_state_queue(void) | 874 | static void process_send_sockets(struct work_struct *work) |
963 | { | 875 | { |
964 | struct list_head *list; | 876 | struct connection *con = container_of(work, struct connection, swork); |
965 | struct list_head *temp; | ||
966 | |||
967 | spin_lock_bh(&state_sockets_lock); | ||
968 | list_for_each_safe(list, temp, &state_sockets) { | ||
969 | struct connection *con = | ||
970 | list_entry(list, struct connection, state_list); | ||
971 | list_del(&con->state_list); | ||
972 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
973 | spin_unlock_bh(&state_sockets_lock); | ||
974 | 877 | ||
878 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
975 | connect_to_sock(con); | 879 | connect_to_sock(con); |
976 | spin_lock_bh(&state_sockets_lock); | ||
977 | } | 880 | } |
978 | spin_unlock_bh(&state_sockets_lock); | 881 | |
882 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
883 | send_to_sock(con); | ||
979 | } | 884 | } |
980 | 885 | ||
981 | 886 | ||
@@ -992,109 +897,33 @@ static void clean_writequeues(void) | |||
992 | } | 897 | } |
993 | } | 898 | } |
994 | 899 | ||
995 | static int read_list_empty(void) | 900 | static void work_stop(void) |
996 | { | 901 | { |
997 | int status; | 902 | destroy_workqueue(recv_workqueue); |
998 | 903 | destroy_workqueue(send_workqueue); | |
999 | spin_lock_bh(&read_sockets_lock); | ||
1000 | status = list_empty(&read_sockets); | ||
1001 | spin_unlock_bh(&read_sockets_lock); | ||
1002 | |||
1003 | return status; | ||
1004 | } | ||
1005 | |||
1006 | /* DLM Transport comms receive daemon */ | ||
1007 | static int dlm_recvd(void *data) | ||
1008 | { | ||
1009 | init_waitqueue_entry(&lowcomms_recv_waitq_head, current); | ||
1010 | add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); | ||
1011 | |||
1012 | while (!kthread_should_stop()) { | ||
1013 | set_current_state(TASK_INTERRUPTIBLE); | ||
1014 | if (read_list_empty()) | ||
1015 | cond_resched(); | ||
1016 | set_current_state(TASK_RUNNING); | ||
1017 | |||
1018 | process_sockets(); | ||
1019 | } | ||
1020 | |||
1021 | return 0; | ||
1022 | } | 904 | } |
1023 | 905 | ||
1024 | static int write_and_state_lists_empty(void) | 906 | static int work_start(void) |
1025 | { | 907 | { |
1026 | int status; | ||
1027 | |||
1028 | spin_lock_bh(&write_sockets_lock); | ||
1029 | status = list_empty(&write_sockets); | ||
1030 | spin_unlock_bh(&write_sockets_lock); | ||
1031 | |||
1032 | spin_lock_bh(&state_sockets_lock); | ||
1033 | if (list_empty(&state_sockets) == 0) | ||
1034 | status = 0; | ||
1035 | spin_unlock_bh(&state_sockets_lock); | ||
1036 | |||
1037 | return status; | ||
1038 | } | ||
1039 | |||
1040 | /* DLM Transport send daemon */ | ||
1041 | static int dlm_sendd(void *data) | ||
1042 | { | ||
1043 | init_waitqueue_entry(&lowcomms_send_waitq_head, current); | ||
1044 | add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head); | ||
1045 | |||
1046 | while (!kthread_should_stop()) { | ||
1047 | set_current_state(TASK_INTERRUPTIBLE); | ||
1048 | if (write_and_state_lists_empty()) | ||
1049 | cond_resched(); | ||
1050 | set_current_state(TASK_RUNNING); | ||
1051 | |||
1052 | process_state_queue(); | ||
1053 | process_output_queue(); | ||
1054 | } | ||
1055 | |||
1056 | return 0; | ||
1057 | } | ||
1058 | |||
1059 | static void daemons_stop(void) | ||
1060 | { | ||
1061 | kthread_stop(recv_task); | ||
1062 | kthread_stop(send_task); | ||
1063 | } | ||
1064 | |||
1065 | static int daemons_start(void) | ||
1066 | { | ||
1067 | struct task_struct *p; | ||
1068 | int error; | 908 | int error; |
1069 | 909 | recv_workqueue = create_workqueue("dlm_recv"); | |
1070 | p = kthread_run(dlm_recvd, NULL, "dlm_recvd"); | 910 | error = IS_ERR(recv_workqueue); |
1071 | error = IS_ERR(p); | ||
1072 | if (error) { | 911 | if (error) { |
1073 | log_print("can't start dlm_recvd %d", error); | 912 | log_print("can't start dlm_recv %d", error); |
1074 | return error; | 913 | return error; |
1075 | } | 914 | } |
1076 | recv_task = p; | ||
1077 | 915 | ||
1078 | p = kthread_run(dlm_sendd, NULL, "dlm_sendd"); | 916 | send_workqueue = create_singlethread_workqueue("dlm_send"); |
1079 | error = IS_ERR(p); | 917 | error = IS_ERR(send_workqueue); |
1080 | if (error) { | 918 | if (error) { |
1081 | log_print("can't start dlm_sendd %d", error); | 919 | log_print("can't start dlm_send %d", error); |
1082 | kthread_stop(recv_task); | 920 | destroy_workqueue(recv_workqueue); |
1083 | return error; | 921 | return error; |
1084 | } | 922 | } |
1085 | send_task = p; | ||
1086 | 923 | ||
1087 | return 0; | 924 | return 0; |
1088 | } | 925 | } |
1089 | 926 | ||
1090 | /* | ||
1091 | * Return the largest buffer size we can cope with. | ||
1092 | */ | ||
1093 | int lowcomms_max_buffer_size(void) | ||
1094 | { | ||
1095 | return PAGE_CACHE_SIZE; | ||
1096 | } | ||
1097 | |||
1098 | void dlm_lowcomms_stop(void) | 927 | void dlm_lowcomms_stop(void) |
1099 | { | 928 | { |
1100 | int i; | 929 | int i; |
@@ -1107,7 +936,7 @@ void dlm_lowcomms_stop(void) | |||
1107 | connections[i]->flags |= 0xFF; | 936 | connections[i]->flags |= 0xFF; |
1108 | } | 937 | } |
1109 | 938 | ||
1110 | daemons_stop(); | 939 | work_stop(); |
1111 | clean_writequeues(); | 940 | clean_writequeues(); |
1112 | 941 | ||
1113 | for (i = 0; i < conn_array_size; i++) { | 942 | for (i = 0; i < conn_array_size; i++) { |
@@ -1159,7 +988,7 @@ int dlm_lowcomms_start(void) | |||
1159 | if (error) | 988 | if (error) |
1160 | goto fail_unlisten; | 989 | goto fail_unlisten; |
1161 | 990 | ||
1162 | error = daemons_start(); | 991 | error = work_start(); |
1163 | if (error) | 992 | if (error) |
1164 | goto fail_unlisten; | 993 | goto fail_unlisten; |
1165 | 994 | ||
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index c9b1c3d535f4..a5126e0c68a6 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c | |||
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, | |||
82 | if (msglen < sizeof(struct dlm_header)) | 82 | if (msglen < sizeof(struct dlm_header)) |
83 | break; | 83 | break; |
84 | err = -E2BIG; | 84 | err = -E2BIG; |
85 | if (msglen > dlm_config.buffer_size) { | 85 | if (msglen > dlm_config.ci_buffer_size) { |
86 | log_print("message size %d from %d too big, buf len %d", | 86 | log_print("message size %d from %d too big, buf len %d", |
87 | msglen, nodeid, len); | 87 | msglen, nodeid, len); |
88 | break; | 88 | break; |
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, | |||
103 | 103 | ||
104 | if (msglen > sizeof(__tmp) && | 104 | if (msglen > sizeof(__tmp) && |
105 | msg == (struct dlm_header *) __tmp) { | 105 | msg == (struct dlm_header *) __tmp) { |
106 | msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); | 106 | msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); |
107 | if (msg == NULL) | 107 | if (msg == NULL) |
108 | return ret; | 108 | return ret; |
109 | } | 109 | } |
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 4cc31be9cd9d..6bfbd6153809 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len, | |||
56 | 56 | ||
57 | rc->rc_type = type; | 57 | rc->rc_type = type; |
58 | 58 | ||
59 | spin_lock(&ls->ls_recover_lock); | ||
60 | rc->rc_seq = ls->ls_recover_seq; | ||
61 | spin_unlock(&ls->ls_recover_lock); | ||
62 | |||
59 | *mh_ret = mh; | 63 | *mh_ret = mh; |
60 | *rc_ret = rc; | 64 | *rc_ret = rc; |
61 | return 0; | 65 | return 0; |
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf) | |||
78 | rf->rf_lsflags = ls->ls_exflags; | 82 | rf->rf_lsflags = ls->ls_exflags; |
79 | } | 83 | } |
80 | 84 | ||
81 | static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid) | 85 | static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) |
82 | { | 86 | { |
87 | struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; | ||
88 | |||
89 | if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { | ||
90 | log_error(ls, "version mismatch: %x nodeid %d: %x", | ||
91 | DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, | ||
92 | rc->rc_header.h_version); | ||
93 | return -EINVAL; | ||
94 | } | ||
95 | |||
83 | if (rf->rf_lvblen != ls->ls_lvblen || | 96 | if (rf->rf_lvblen != ls->ls_lvblen || |
84 | rf->rf_lsflags != ls->ls_exflags) { | 97 | rf->rf_lsflags != ls->ls_exflags) { |
85 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", | 98 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", |
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | |||
125 | goto out; | 138 | goto out; |
126 | 139 | ||
127 | allow_sync_reply(ls, &rc->rc_id); | 140 | allow_sync_reply(ls, &rc->rc_id); |
128 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | 141 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); |
129 | 142 | ||
130 | send_rcom(ls, mh, rc); | 143 | send_rcom(ls, mh, rc); |
131 | 144 | ||
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | |||
141 | log_debug(ls, "remote node %d not ready", nodeid); | 154 | log_debug(ls, "remote node %d not ready", nodeid); |
142 | rc->rc_result = 0; | 155 | rc->rc_result = 0; |
143 | } else | 156 | } else |
144 | error = check_config(ls, (struct rcom_config *) rc->rc_buf, | 157 | error = check_config(ls, rc, nodeid); |
145 | nodeid); | ||
146 | /* the caller looks at rc_result for the remote recovery status */ | 158 | /* the caller looks at rc_result for the remote recovery status */ |
147 | out: | 159 | out: |
148 | return error; | 160 | return error; |
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
159 | if (error) | 171 | if (error) |
160 | return; | 172 | return; |
161 | rc->rc_id = rc_in->rc_id; | 173 | rc->rc_id = rc_in->rc_id; |
174 | rc->rc_seq_reply = rc_in->rc_seq; | ||
162 | rc->rc_result = dlm_recover_status(ls); | 175 | rc->rc_result = dlm_recover_status(ls); |
163 | make_config(ls, (struct rcom_config *) rc->rc_buf); | 176 | make_config(ls, (struct rcom_config *) rc->rc_buf); |
164 | 177 | ||
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | |||
200 | if (nodeid == dlm_our_nodeid()) { | 213 | if (nodeid == dlm_our_nodeid()) { |
201 | dlm_copy_master_names(ls, last_name, last_len, | 214 | dlm_copy_master_names(ls, last_name, last_len, |
202 | ls->ls_recover_buf + len, | 215 | ls->ls_recover_buf + len, |
203 | dlm_config.buffer_size - len, nodeid); | 216 | dlm_config.ci_buffer_size - len, nodeid); |
204 | goto out; | 217 | goto out; |
205 | } | 218 | } |
206 | 219 | ||
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | |||
210 | memcpy(rc->rc_buf, last_name, last_len); | 223 | memcpy(rc->rc_buf, last_name, last_len); |
211 | 224 | ||
212 | allow_sync_reply(ls, &rc->rc_id); | 225 | allow_sync_reply(ls, &rc->rc_id); |
213 | memset(ls->ls_recover_buf, 0, dlm_config.buffer_size); | 226 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); |
214 | 227 | ||
215 | send_rcom(ls, mh, rc); | 228 | send_rcom(ls, mh, rc); |
216 | 229 | ||
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
224 | { | 237 | { |
225 | struct dlm_rcom *rc; | 238 | struct dlm_rcom *rc; |
226 | struct dlm_mhandle *mh; | 239 | struct dlm_mhandle *mh; |
227 | int error, inlen, outlen; | 240 | int error, inlen, outlen, nodeid; |
228 | int nodeid = rc_in->rc_header.h_nodeid; | ||
229 | uint32_t status = dlm_recover_status(ls); | ||
230 | |||
231 | /* | ||
232 | * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while | ||
233 | * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes). | ||
234 | * It could only happen in rare cases where we get a late NAMES | ||
235 | * message from a previous instance of recovery. | ||
236 | */ | ||
237 | |||
238 | if (!(status & DLM_RS_NODES)) { | ||
239 | log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid); | ||
240 | return; | ||
241 | } | ||
242 | 241 | ||
243 | nodeid = rc_in->rc_header.h_nodeid; | 242 | nodeid = rc_in->rc_header.h_nodeid; |
244 | inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); | 243 | inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); |
245 | outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); | 244 | outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom); |
246 | 245 | ||
247 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); | 246 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); |
248 | if (error) | 247 | if (error) |
249 | return; | 248 | return; |
250 | rc->rc_id = rc_in->rc_id; | 249 | rc->rc_id = rc_in->rc_id; |
250 | rc->rc_seq_reply = rc_in->rc_seq; | ||
251 | 251 | ||
252 | dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, | 252 | dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, |
253 | nodeid); | 253 | nodeid); |
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
294 | ret_nodeid = error; | 294 | ret_nodeid = error; |
295 | rc->rc_result = ret_nodeid; | 295 | rc->rc_result = ret_nodeid; |
296 | rc->rc_id = rc_in->rc_id; | 296 | rc->rc_id = rc_in->rc_id; |
297 | rc->rc_seq_reply = rc_in->rc_seq; | ||
297 | 298 | ||
298 | send_rcom(ls, mh, rc); | 299 | send_rcom(ls, mh, rc); |
299 | } | 300 | } |
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
375 | 376 | ||
376 | memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); | 377 | memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock)); |
377 | rc->rc_id = rc_in->rc_id; | 378 | rc->rc_id = rc_in->rc_id; |
379 | rc->rc_seq_reply = rc_in->rc_seq; | ||
378 | 380 | ||
379 | send_rcom(ls, mh, rc); | 381 | send_rcom(ls, mh, rc); |
380 | } | 382 | } |
381 | 383 | ||
382 | static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | 384 | static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) |
383 | { | 385 | { |
384 | uint32_t status = dlm_recover_status(ls); | ||
385 | |||
386 | if (!(status & DLM_RS_DIR)) { | ||
387 | log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u", | ||
388 | rc_in->rc_header.h_nodeid); | ||
389 | return; | ||
390 | } | ||
391 | |||
392 | dlm_recover_process_copy(ls, rc_in); | 386 | dlm_recover_process_copy(ls, rc_in); |
393 | } | 387 | } |
394 | 388 | ||
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | |||
415 | 409 | ||
416 | rc->rc_type = DLM_RCOM_STATUS_REPLY; | 410 | rc->rc_type = DLM_RCOM_STATUS_REPLY; |
417 | rc->rc_id = rc_in->rc_id; | 411 | rc->rc_id = rc_in->rc_id; |
412 | rc->rc_seq_reply = rc_in->rc_seq; | ||
418 | rc->rc_result = -ESRCH; | 413 | rc->rc_result = -ESRCH; |
419 | 414 | ||
420 | rf = (struct rcom_config *) rc->rc_buf; | 415 | rf = (struct rcom_config *) rc->rc_buf; |
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | |||
426 | return 0; | 421 | return 0; |
427 | } | 422 | } |
428 | 423 | ||
424 | static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
425 | { | ||
426 | uint64_t seq; | ||
427 | int rv = 0; | ||
428 | |||
429 | switch (rc->rc_type) { | ||
430 | case DLM_RCOM_STATUS_REPLY: | ||
431 | case DLM_RCOM_NAMES_REPLY: | ||
432 | case DLM_RCOM_LOOKUP_REPLY: | ||
433 | case DLM_RCOM_LOCK_REPLY: | ||
434 | spin_lock(&ls->ls_recover_lock); | ||
435 | seq = ls->ls_recover_seq; | ||
436 | spin_unlock(&ls->ls_recover_lock); | ||
437 | if (rc->rc_seq_reply != seq) { | ||
438 | log_debug(ls, "ignoring old reply %x from %d " | ||
439 | "seq_reply %llx expect %llx", | ||
440 | rc->rc_type, rc->rc_header.h_nodeid, | ||
441 | (unsigned long long)rc->rc_seq_reply, | ||
442 | (unsigned long long)seq); | ||
443 | rv = 1; | ||
444 | } | ||
445 | } | ||
446 | return rv; | ||
447 | } | ||
448 | |||
429 | /* Called by dlm_recvd; corresponds to dlm_receive_message() but special | 449 | /* Called by dlm_recvd; corresponds to dlm_receive_message() but special |
430 | recovery-only comms are sent through here. */ | 450 | recovery-only comms are sent through here. */ |
431 | 451 | ||
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) | |||
449 | } | 469 | } |
450 | 470 | ||
451 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { | 471 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { |
452 | log_error(ls, "ignoring recovery message %x from %d", | 472 | log_debug(ls, "ignoring recovery message %x from %d", |
453 | rc->rc_type, nodeid); | 473 | rc->rc_type, nodeid); |
454 | goto out; | 474 | goto out; |
455 | } | 475 | } |
456 | 476 | ||
477 | if (is_old_reply(ls, rc)) | ||
478 | goto out; | ||
479 | |||
457 | if (nodeid != rc->rc_header.h_nodeid) { | 480 | if (nodeid != rc->rc_header.h_nodeid) { |
458 | log_error(ls, "bad rcom nodeid %d from %d", | 481 | log_error(ls, "bad rcom nodeid %d from %d", |
459 | rc->rc_header.h_nodeid, nodeid); | 482 | rc->rc_header.h_nodeid, nodeid); |
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index cf9f6831bab5..c2cc7694cd16 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -44,7 +44,7 @@ | |||
44 | static void dlm_wait_timer_fn(unsigned long data) | 44 | static void dlm_wait_timer_fn(unsigned long data) |
45 | { | 45 | { |
46 | struct dlm_ls *ls = (struct dlm_ls *) data; | 46 | struct dlm_ls *ls = (struct dlm_ls *) data; |
47 | mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ)); | 47 | mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ)); |
48 | wake_up(&ls->ls_wait_general); | 48 | wake_up(&ls->ls_wait_general); |
49 | } | 49 | } |
50 | 50 | ||
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) | |||
55 | init_timer(&ls->ls_timer); | 55 | init_timer(&ls->ls_timer); |
56 | ls->ls_timer.function = dlm_wait_timer_fn; | 56 | ls->ls_timer.function = dlm_wait_timer_fn; |
57 | ls->ls_timer.data = (long) ls; | 57 | ls->ls_timer.data = (long) ls; |
58 | ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ); | 58 | ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ); |
59 | add_timer(&ls->ls_timer); | 59 | add_timer(&ls->ls_timer); |
60 | 60 | ||
61 | wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); | 61 | wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); |
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls) | |||
397 | 397 | ||
398 | if (dlm_no_directory(ls)) | 398 | if (dlm_no_directory(ls)) |
399 | count += recover_master_static(r); | 399 | count += recover_master_static(r); |
400 | else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) { | 400 | else if (!is_master(r) && |
401 | (dlm_is_removed(ls, r->res_nodeid) || | ||
402 | rsb_flag(r, RSB_NEW_MASTER))) { | ||
401 | recover_master(r); | 403 | recover_master(r); |
402 | count++; | 404 | count++; |
403 | } | 405 | } |
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 650536aa5139..3cb636d60249 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
77 | 77 | ||
78 | error = dlm_recover_members(ls, rv, &neg); | 78 | error = dlm_recover_members(ls, rv, &neg); |
79 | if (error) { | 79 | if (error) { |
80 | log_error(ls, "recover_members failed %d", error); | 80 | log_debug(ls, "recover_members failed %d", error); |
81 | goto fail; | 81 | goto fail; |
82 | } | 82 | } |
83 | start = jiffies; | 83 | start = jiffies; |
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
89 | 89 | ||
90 | error = dlm_recover_directory(ls); | 90 | error = dlm_recover_directory(ls); |
91 | if (error) { | 91 | if (error) { |
92 | log_error(ls, "recover_directory failed %d", error); | 92 | log_debug(ls, "recover_directory failed %d", error); |
93 | goto fail; | 93 | goto fail; |
94 | } | 94 | } |
95 | 95 | ||
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
99 | 99 | ||
100 | error = dlm_recover_directory_wait(ls); | 100 | error = dlm_recover_directory_wait(ls); |
101 | if (error) { | 101 | if (error) { |
102 | log_error(ls, "recover_directory_wait failed %d", error); | 102 | log_debug(ls, "recover_directory_wait failed %d", error); |
103 | goto fail; | 103 | goto fail; |
104 | } | 104 | } |
105 | 105 | ||
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
129 | 129 | ||
130 | error = dlm_recover_masters(ls); | 130 | error = dlm_recover_masters(ls); |
131 | if (error) { | 131 | if (error) { |
132 | log_error(ls, "recover_masters failed %d", error); | 132 | log_debug(ls, "recover_masters failed %d", error); |
133 | goto fail; | 133 | goto fail; |
134 | } | 134 | } |
135 | 135 | ||
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
139 | 139 | ||
140 | error = dlm_recover_locks(ls); | 140 | error = dlm_recover_locks(ls); |
141 | if (error) { | 141 | if (error) { |
142 | log_error(ls, "recover_locks failed %d", error); | 142 | log_debug(ls, "recover_locks failed %d", error); |
143 | goto fail; | 143 | goto fail; |
144 | } | 144 | } |
145 | 145 | ||
146 | error = dlm_recover_locks_wait(ls); | 146 | error = dlm_recover_locks_wait(ls); |
147 | if (error) { | 147 | if (error) { |
148 | log_error(ls, "recover_locks_wait failed %d", error); | 148 | log_debug(ls, "recover_locks_wait failed %d", error); |
149 | goto fail; | 149 | goto fail; |
150 | } | 150 | } |
151 | 151 | ||
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
166 | 166 | ||
167 | error = dlm_recover_locks_wait(ls); | 167 | error = dlm_recover_locks_wait(ls); |
168 | if (error) { | 168 | if (error) { |
169 | log_error(ls, "recover_locks_wait failed %d", error); | 169 | log_debug(ls, "recover_locks_wait failed %d", error); |
170 | goto fail; | 170 | goto fail; |
171 | } | 171 | } |
172 | } | 172 | } |
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
184 | dlm_set_recover_status(ls, DLM_RS_DONE); | 184 | dlm_set_recover_status(ls, DLM_RS_DONE); |
185 | error = dlm_recover_done_wait(ls); | 185 | error = dlm_recover_done_wait(ls); |
186 | if (error) { | 186 | if (error) { |
187 | log_error(ls, "recover_done_wait failed %d", error); | 187 | log_debug(ls, "recover_done_wait failed %d", error); |
188 | goto fail; | 188 | goto fail; |
189 | } | 189 | } |
190 | 190 | ||
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
192 | 192 | ||
193 | error = enable_locking(ls, rv->seq); | 193 | error = enable_locking(ls, rv->seq); |
194 | if (error) { | 194 | if (error) { |
195 | log_error(ls, "enable_locking failed %d", error); | 195 | log_debug(ls, "enable_locking failed %d", error); |
196 | goto fail; | 196 | goto fail; |
197 | } | 197 | } |
198 | 198 | ||
199 | error = dlm_process_requestqueue(ls); | 199 | error = dlm_process_requestqueue(ls); |
200 | if (error) { | 200 | if (error) { |
201 | log_error(ls, "process_requestqueue failed %d", error); | 201 | log_debug(ls, "process_requestqueue failed %d", error); |
202 | goto fail; | 202 | goto fail; |
203 | } | 203 | } |
204 | 204 | ||
205 | error = dlm_recover_waiters_post(ls); | 205 | error = dlm_recover_waiters_post(ls); |
206 | if (error) { | 206 | if (error) { |
207 | log_error(ls, "recover_waiters_post failed %d", error); | 207 | log_debug(ls, "recover_waiters_post failed %d", error); |
208 | goto fail; | 208 | goto fail; |
209 | } | 209 | } |
210 | 210 | ||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index c37e93e4f2df..d378b7fe2a1e 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
180 | ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) | 180 | ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) |
181 | remove_ownqueue = 1; | 181 | remove_ownqueue = 1; |
182 | 182 | ||
183 | /* unlocks or cancels of waiting requests need to be removed from the | ||
184 | proc's unlocking list, again there must be a better way... */ | ||
185 | |||
186 | if (ua->lksb.sb_status == -DLM_EUNLOCK || | ||
187 | (ua->lksb.sb_status == -DLM_ECANCEL && | ||
188 | lkb->lkb_grmode == DLM_LOCK_IV)) | ||
189 | remove_ownqueue = 1; | ||
190 | |||
183 | /* We want to copy the lvb to userspace when the completion | 191 | /* We want to copy the lvb to userspace when the completion |
184 | ast is read if the status is 0, the lock has an lvb and | 192 | ast is read if the status is 0, the lock has an lvb and |
185 | lvb_ops says we should. We could probably have set_lvb_lock() | 193 | lvb_ops says we should. We could probably have set_lvb_lock() |
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file) | |||
523 | proc->lockspace = ls->ls_local_handle; | 531 | proc->lockspace = ls->ls_local_handle; |
524 | INIT_LIST_HEAD(&proc->asts); | 532 | INIT_LIST_HEAD(&proc->asts); |
525 | INIT_LIST_HEAD(&proc->locks); | 533 | INIT_LIST_HEAD(&proc->locks); |
534 | INIT_LIST_HEAD(&proc->unlocking); | ||
526 | spin_lock_init(&proc->asts_spin); | 535 | spin_lock_init(&proc->asts_spin); |
527 | spin_lock_init(&proc->locks_spin); | 536 | spin_lock_init(&proc->locks_spin); |
528 | init_waitqueue_head(&proc->wait); | 537 | init_waitqueue_head(&proc->wait); |
diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 767197db9944..963889cf6740 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c | |||
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc) | |||
134 | rc->rc_type = cpu_to_le32(rc->rc_type); | 134 | rc->rc_type = cpu_to_le32(rc->rc_type); |
135 | rc->rc_result = cpu_to_le32(rc->rc_result); | 135 | rc->rc_result = cpu_to_le32(rc->rc_result); |
136 | rc->rc_id = cpu_to_le64(rc->rc_id); | 136 | rc->rc_id = cpu_to_le64(rc->rc_id); |
137 | rc->rc_seq = cpu_to_le64(rc->rc_seq); | ||
138 | rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); | ||
137 | 139 | ||
138 | if (type == DLM_RCOM_LOCK) | 140 | if (type == DLM_RCOM_LOCK) |
139 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); | 141 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); |
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc) | |||
151 | rc->rc_type = le32_to_cpu(rc->rc_type); | 153 | rc->rc_type = le32_to_cpu(rc->rc_type); |
152 | rc->rc_result = le32_to_cpu(rc->rc_result); | 154 | rc->rc_result = le32_to_cpu(rc->rc_result); |
153 | rc->rc_id = le64_to_cpu(rc->rc_id); | 155 | rc->rc_id = le64_to_cpu(rc->rc_id); |
156 | rc->rc_seq = le64_to_cpu(rc->rc_seq); | ||
157 | rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); | ||
154 | 158 | ||
155 | if (rc->rc_type == DLM_RCOM_LOCK) | 159 | if (rc->rc_type == DLM_RCOM_LOCK) |
156 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); | 160 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 6a2ffa2db14f..de8e64c03f73 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -4,44 +4,43 @@ config GFS2_FS | |||
4 | select FS_POSIX_ACL | 4 | select FS_POSIX_ACL |
5 | select CRC32 | 5 | select CRC32 |
6 | help | 6 | help |
7 | A cluster filesystem. | 7 | A cluster filesystem. |
8 | 8 | ||
9 | Allows a cluster of computers to simultaneously use a block device | 9 | Allows a cluster of computers to simultaneously use a block device |
10 | that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads | 10 | that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads |
11 | and writes to the block device like a local filesystem, but also uses | 11 | and writes to the block device like a local filesystem, but also uses |
12 | a lock module to allow the computers coordinate their I/O so | 12 | a lock module to allow the computers coordinate their I/O so |
13 | filesystem consistency is maintained. One of the nifty features of | 13 | filesystem consistency is maintained. One of the nifty features of |
14 | GFS is perfect consistency -- changes made to the filesystem on one | 14 | GFS is perfect consistency -- changes made to the filesystem on one |
15 | machine show up immediately on all other machines in the cluster. | 15 | machine show up immediately on all other machines in the cluster. |
16 | 16 | ||
17 | To use the GFS2 filesystem, you will need to enable one or more of | 17 | To use the GFS2 filesystem, you will need to enable one or more of |
18 | the below locking modules. Documentation and utilities for GFS2 can | 18 | the below locking modules. Documentation and utilities for GFS2 can |
19 | be found here: http://sources.redhat.com/cluster | 19 | be found here: http://sources.redhat.com/cluster |
20 | 20 | ||
21 | config GFS2_FS_LOCKING_NOLOCK | 21 | config GFS2_FS_LOCKING_NOLOCK |
22 | tristate "GFS2 \"nolock\" locking module" | 22 | tristate "GFS2 \"nolock\" locking module" |
23 | depends on GFS2_FS | 23 | depends on GFS2_FS |
24 | help | 24 | help |
25 | Single node locking module for GFS2. | 25 | Single node locking module for GFS2. |
26 | 26 | ||
27 | Use this module if you want to use GFS2 on a single node without | 27 | Use this module if you want to use GFS2 on a single node without |
28 | its clustering features. You can still take advantage of the | 28 | its clustering features. You can still take advantage of the |
29 | large file support, and upgrade to running a full cluster later on | 29 | large file support, and upgrade to running a full cluster later on |
30 | if required. | 30 | if required. |
31 | 31 | ||
32 | If you will only be using GFS2 in cluster mode, you do not need this | 32 | If you will only be using GFS2 in cluster mode, you do not need this |
33 | module. | 33 | module. |
34 | 34 | ||
35 | config GFS2_FS_LOCKING_DLM | 35 | config GFS2_FS_LOCKING_DLM |
36 | tristate "GFS2 DLM locking module" | 36 | tristate "GFS2 DLM locking module" |
37 | depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n) | 37 | depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n) |
38 | select IP_SCTP if DLM_SCTP | 38 | select IP_SCTP if DLM_SCTP |
39 | select CONFIGFS_FS | 39 | select CONFIGFS_FS |
40 | select DLM | 40 | select DLM |
41 | help | 41 | help |
42 | Multiple node locking module for GFS2 | 42 | Multiple node locking module for GFS2 |
43 | |||
44 | Most users of GFS2 will require this module. It provides the locking | ||
45 | interface between GFS2 and the DLM, which is required to use GFS2 | ||
46 | in a cluster environment. | ||
47 | 43 | ||
44 | Most users of GFS2 will require this module. It provides the locking | ||
45 | interface between GFS2 and the DLM, which is required to use GFS2 | ||
46 | in a cluster environment. | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8240c1ff94f4..113f6c9110c7 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -773,7 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
773 | gfs2_free_data(ip, bstart, blen); | 773 | gfs2_free_data(ip, bstart, blen); |
774 | } | 774 | } |
775 | 775 | ||
776 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 776 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
777 | 777 | ||
778 | gfs2_dinode_out(ip, dibh->b_data); | 778 | gfs2_dinode_out(ip, dibh->b_data); |
779 | 779 | ||
@@ -848,7 +848,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) | |||
848 | } | 848 | } |
849 | 849 | ||
850 | ip->i_di.di_size = size; | 850 | ip->i_di.di_size = size; |
851 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 851 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
852 | 852 | ||
853 | error = gfs2_meta_inode_buffer(ip, &dibh); | 853 | error = gfs2_meta_inode_buffer(ip, &dibh); |
854 | if (error) | 854 | if (error) |
@@ -963,7 +963,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
963 | 963 | ||
964 | if (gfs2_is_stuffed(ip)) { | 964 | if (gfs2_is_stuffed(ip)) { |
965 | ip->i_di.di_size = size; | 965 | ip->i_di.di_size = size; |
966 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 966 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
967 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 967 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
968 | gfs2_dinode_out(ip, dibh->b_data); | 968 | gfs2_dinode_out(ip, dibh->b_data); |
969 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); | 969 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); |
@@ -975,7 +975,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
975 | 975 | ||
976 | if (!error) { | 976 | if (!error) { |
977 | ip->i_di.di_size = size; | 977 | ip->i_di.di_size = size; |
978 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 978 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
979 | ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; | 979 | ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; |
980 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 980 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
981 | gfs2_dinode_out(ip, dibh->b_data); | 981 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -1048,7 +1048,7 @@ static int trunc_end(struct gfs2_inode *ip) | |||
1048 | ip->i_num.no_addr; | 1048 | ip->i_num.no_addr; |
1049 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1049 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
1050 | } | 1050 | } |
1051 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 1051 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1052 | ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; | 1052 | ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; |
1053 | 1053 | ||
1054 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1054 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0fdcb7713cd9..c93ca8f361b5 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, | |||
131 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); | 131 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); |
132 | if (ip->i_di.di_size < offset + size) | 132 | if (ip->i_di.di_size < offset + size) |
133 | ip->i_di.di_size = offset + size; | 133 | ip->i_di.di_size = offset + size; |
134 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 134 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
135 | gfs2_dinode_out(ip, dibh->b_data); | 135 | gfs2_dinode_out(ip, dibh->b_data); |
136 | 136 | ||
137 | brelse(dibh); | 137 | brelse(dibh); |
@@ -229,7 +229,7 @@ out: | |||
229 | 229 | ||
230 | if (ip->i_di.di_size < offset + copied) | 230 | if (ip->i_di.di_size < offset + copied) |
231 | ip->i_di.di_size = offset + copied; | 231 | ip->i_di.di_size = offset + copied; |
232 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 232 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
233 | 233 | ||
234 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 234 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
235 | gfs2_dinode_out(ip, dibh->b_data); | 235 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -1198,12 +1198,11 @@ static int compare_dents(const void *a, const void *b) | |||
1198 | */ | 1198 | */ |
1199 | 1199 | ||
1200 | static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | 1200 | static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, |
1201 | void *opaque, gfs2_filldir_t filldir, | 1201 | void *opaque, filldir_t filldir, |
1202 | const struct gfs2_dirent **darr, u32 entries, | 1202 | const struct gfs2_dirent **darr, u32 entries, |
1203 | int *copied) | 1203 | int *copied) |
1204 | { | 1204 | { |
1205 | const struct gfs2_dirent *dent, *dent_next; | 1205 | const struct gfs2_dirent *dent, *dent_next; |
1206 | struct gfs2_inum_host inum; | ||
1207 | u64 off, off_next; | 1206 | u64 off, off_next; |
1208 | unsigned int x, y; | 1207 | unsigned int x, y; |
1209 | int run = 0; | 1208 | int run = 0; |
@@ -1240,11 +1239,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
1240 | *offset = off; | 1239 | *offset = off; |
1241 | } | 1240 | } |
1242 | 1241 | ||
1243 | gfs2_inum_in(&inum, (char *)&dent->de_inum); | ||
1244 | |||
1245 | error = filldir(opaque, (const char *)(dent + 1), | 1242 | error = filldir(opaque, (const char *)(dent + 1), |
1246 | be16_to_cpu(dent->de_name_len), | 1243 | be16_to_cpu(dent->de_name_len), |
1247 | off, &inum, | 1244 | off, be64_to_cpu(dent->de_inum.no_addr), |
1248 | be16_to_cpu(dent->de_type)); | 1245 | be16_to_cpu(dent->de_type)); |
1249 | if (error) | 1246 | if (error) |
1250 | return 1; | 1247 | return 1; |
@@ -1262,8 +1259,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
1262 | } | 1259 | } |
1263 | 1260 | ||
1264 | static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | 1261 | static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, |
1265 | gfs2_filldir_t filldir, int *copied, | 1262 | filldir_t filldir, int *copied, unsigned *depth, |
1266 | unsigned *depth, u64 leaf_no) | 1263 | u64 leaf_no) |
1267 | { | 1264 | { |
1268 | struct gfs2_inode *ip = GFS2_I(inode); | 1265 | struct gfs2_inode *ip = GFS2_I(inode); |
1269 | struct buffer_head *bh; | 1266 | struct buffer_head *bh; |
@@ -1343,7 +1340,7 @@ out: | |||
1343 | */ | 1340 | */ |
1344 | 1341 | ||
1345 | static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | 1342 | static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, |
1346 | gfs2_filldir_t filldir) | 1343 | filldir_t filldir) |
1347 | { | 1344 | { |
1348 | struct gfs2_inode *dip = GFS2_I(inode); | 1345 | struct gfs2_inode *dip = GFS2_I(inode); |
1349 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 1346 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -1402,7 +1399,7 @@ out: | |||
1402 | } | 1399 | } |
1403 | 1400 | ||
1404 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 1401 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
1405 | gfs2_filldir_t filldir) | 1402 | filldir_t filldir) |
1406 | { | 1403 | { |
1407 | struct gfs2_inode *dip = GFS2_I(inode); | 1404 | struct gfs2_inode *dip = GFS2_I(inode); |
1408 | struct dirent_gather g; | 1405 | struct dirent_gather g; |
@@ -1568,7 +1565,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
1568 | break; | 1565 | break; |
1569 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 1566 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
1570 | ip->i_di.di_entries++; | 1567 | ip->i_di.di_entries++; |
1571 | ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); | 1568 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1572 | gfs2_dinode_out(ip, bh->b_data); | 1569 | gfs2_dinode_out(ip, bh->b_data); |
1573 | brelse(bh); | 1570 | brelse(bh); |
1574 | error = 0; | 1571 | error = 0; |
@@ -1654,7 +1651,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) | |||
1654 | gfs2_consist_inode(dip); | 1651 | gfs2_consist_inode(dip); |
1655 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | 1652 | gfs2_trans_add_bh(dip->i_gl, bh, 1); |
1656 | dip->i_di.di_entries--; | 1653 | dip->i_di.di_entries--; |
1657 | dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); | 1654 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1658 | gfs2_dinode_out(dip, bh->b_data); | 1655 | gfs2_dinode_out(dip, bh->b_data); |
1659 | brelse(bh); | 1656 | brelse(bh); |
1660 | mark_inode_dirty(&dip->i_inode); | 1657 | mark_inode_dirty(&dip->i_inode); |
@@ -1702,7 +1699,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | |||
1702 | gfs2_trans_add_bh(dip->i_gl, bh, 1); | 1699 | gfs2_trans_add_bh(dip->i_gl, bh, 1); |
1703 | } | 1700 | } |
1704 | 1701 | ||
1705 | dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); | 1702 | dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1706 | gfs2_dinode_out(dip, bh->b_data); | 1703 | gfs2_dinode_out(dip, bh->b_data); |
1707 | brelse(bh); | 1704 | brelse(bh); |
1708 | return 0; | 1705 | return 0; |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index b21b33668a5b..48fe89046bba 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -16,30 +16,13 @@ struct inode; | |||
16 | struct gfs2_inode; | 16 | struct gfs2_inode; |
17 | struct gfs2_inum; | 17 | struct gfs2_inum; |
18 | 18 | ||
19 | /** | ||
20 | * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read() | ||
21 | * @opaque: opaque data used by the function | ||
22 | * @name: the name of the directory entry | ||
23 | * @length: the length of the name | ||
24 | * @offset: the entry's offset in the directory | ||
25 | * @inum: the inode number the entry points to | ||
26 | * @type: the type of inode the entry points to | ||
27 | * | ||
28 | * Returns: 0 on success, 1 if buffer full | ||
29 | */ | ||
30 | |||
31 | typedef int (*gfs2_filldir_t) (void *opaque, | ||
32 | const char *name, unsigned int length, | ||
33 | u64 offset, | ||
34 | struct gfs2_inum_host *inum, unsigned int type); | ||
35 | |||
36 | int gfs2_dir_search(struct inode *dir, const struct qstr *filename, | 19 | int gfs2_dir_search(struct inode *dir, const struct qstr *filename, |
37 | struct gfs2_inum_host *inum, unsigned int *type); | 20 | struct gfs2_inum_host *inum, unsigned int *type); |
38 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 21 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
39 | const struct gfs2_inum_host *inum, unsigned int type); | 22 | const struct gfs2_inum_host *inum, unsigned int type); |
40 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); | 23 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); |
41 | int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, | 24 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
42 | gfs2_filldir_t filldir); | 25 | filldir_t filldir); |
43 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 26 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
44 | struct gfs2_inum_host *new_inum, unsigned int new_type); | 27 | struct gfs2_inum_host *new_inum, unsigned int new_type); |
45 | 28 | ||
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index ebebbdcd7057..0c83c7f4dda8 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c | |||
@@ -301,7 +301,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | |||
301 | 301 | ||
302 | error = gfs2_meta_inode_buffer(ip, &dibh); | 302 | error = gfs2_meta_inode_buffer(ip, &dibh); |
303 | if (!error) { | 303 | if (!error) { |
304 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 304 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
305 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 305 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
306 | gfs2_dinode_out(ip, dibh->b_data); | 306 | gfs2_dinode_out(ip, dibh->b_data); |
307 | brelse(dibh); | 307 | brelse(dibh); |
@@ -718,7 +718,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
718 | (er->er_mode & S_IFMT)); | 718 | (er->er_mode & S_IFMT)); |
719 | ip->i_inode.i_mode = er->er_mode; | 719 | ip->i_inode.i_mode = er->er_mode; |
720 | } | 720 | } |
721 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 721 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
722 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 722 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
723 | gfs2_dinode_out(ip, dibh->b_data); | 723 | gfs2_dinode_out(ip, dibh->b_data); |
724 | brelse(dibh); | 724 | brelse(dibh); |
@@ -853,7 +853,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, | |||
853 | (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); | 853 | (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); |
854 | ip->i_inode.i_mode = er->er_mode; | 854 | ip->i_inode.i_mode = er->er_mode; |
855 | } | 855 | } |
856 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 856 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
857 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 857 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
858 | gfs2_dinode_out(ip, dibh->b_data); | 858 | gfs2_dinode_out(ip, dibh->b_data); |
859 | brelse(dibh); | 859 | brelse(dibh); |
@@ -1134,7 +1134,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) | |||
1134 | 1134 | ||
1135 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1135 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1136 | if (!error) { | 1136 | if (!error) { |
1137 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 1137 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
1138 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1138 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1139 | gfs2_dinode_out(ip, dibh->b_data); | 1139 | gfs2_dinode_out(ip, dibh->b_data); |
1140 | brelse(dibh); | 1140 | brelse(dibh); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 438146904b58..6618c1190252 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/gfs2_ondisk.h> | 19 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/lm_interface.h> | 21 | #include <linux/lm_interface.h> |
22 | #include <linux/wait.h> | ||
23 | #include <linux/rwsem.h> | ||
22 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
23 | 25 | ||
24 | #include "gfs2.h" | 26 | #include "gfs2.h" |
@@ -33,11 +35,6 @@ | |||
33 | #include "super.h" | 35 | #include "super.h" |
34 | #include "util.h" | 36 | #include "util.h" |
35 | 37 | ||
36 | struct greedy { | ||
37 | struct gfs2_holder gr_gh; | ||
38 | struct delayed_work gr_work; | ||
39 | }; | ||
40 | |||
41 | struct gfs2_gl_hash_bucket { | 38 | struct gfs2_gl_hash_bucket { |
42 | struct hlist_head hb_list; | 39 | struct hlist_head hb_list; |
43 | }; | 40 | }; |
@@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl); | |||
47 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | 44 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); |
48 | static int dump_glock(struct gfs2_glock *gl); | 45 | static int dump_glock(struct gfs2_glock *gl); |
49 | static int dump_inode(struct gfs2_inode *ip); | 46 | static int dump_inode(struct gfs2_inode *ip); |
47 | static void gfs2_glock_xmote_th(struct gfs2_holder *gh); | ||
48 | static void gfs2_glock_drop_th(struct gfs2_glock *gl); | ||
49 | static DECLARE_RWSEM(gfs2_umount_flush_sem); | ||
50 | 50 | ||
51 | #define GFS2_GL_HASH_SHIFT 15 | 51 | #define GFS2_GL_HASH_SHIFT 15 |
52 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) | 52 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) |
@@ -213,30 +213,6 @@ out: | |||
213 | } | 213 | } |
214 | 214 | ||
215 | /** | 215 | /** |
216 | * queue_empty - check to see if a glock's queue is empty | ||
217 | * @gl: the glock | ||
218 | * @head: the head of the queue to check | ||
219 | * | ||
220 | * This function protects the list in the event that a process already | ||
221 | * has a holder on the list and is adding a second holder for itself. | ||
222 | * The glmutex lock is what generally prevents processes from working | ||
223 | * on the same glock at once, but the special case of adding a second | ||
224 | * holder for yourself ("recursive" locking) doesn't involve locking | ||
225 | * glmutex, making the spin lock necessary. | ||
226 | * | ||
227 | * Returns: 1 if the queue is empty | ||
228 | */ | ||
229 | |||
230 | static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head) | ||
231 | { | ||
232 | int empty; | ||
233 | spin_lock(&gl->gl_spin); | ||
234 | empty = list_empty(head); | ||
235 | spin_unlock(&gl->gl_spin); | ||
236 | return empty; | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * search_bucket() - Find struct gfs2_glock by lock number | 216 | * search_bucket() - Find struct gfs2_glock by lock number |
241 | * @bucket: the bucket to search | 217 | * @bucket: the bucket to search |
242 | * @name: The lock name | 218 | * @name: The lock name |
@@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | |||
395 | gh->gh_flags = flags; | 371 | gh->gh_flags = flags; |
396 | gh->gh_error = 0; | 372 | gh->gh_error = 0; |
397 | gh->gh_iflags = 0; | 373 | gh->gh_iflags = 0; |
398 | init_completion(&gh->gh_wait); | ||
399 | |||
400 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
401 | gh->gh_flags |= GL_LOCAL_EXCL; | ||
402 | |||
403 | gfs2_glock_hold(gl); | 374 | gfs2_glock_hold(gl); |
404 | } | 375 | } |
405 | 376 | ||
@@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * | |||
417 | { | 388 | { |
418 | gh->gh_state = state; | 389 | gh->gh_state = state; |
419 | gh->gh_flags = flags; | 390 | gh->gh_flags = flags; |
420 | if (gh->gh_state == LM_ST_EXCLUSIVE) | ||
421 | gh->gh_flags |= GL_LOCAL_EXCL; | ||
422 | |||
423 | gh->gh_iflags &= 1 << HIF_ALLOCED; | 391 | gh->gh_iflags &= 1 << HIF_ALLOCED; |
424 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 392 | gh->gh_ip = (unsigned long)__builtin_return_address(0); |
425 | } | 393 | } |
@@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh) | |||
479 | kfree(gh); | 447 | kfree(gh); |
480 | } | 448 | } |
481 | 449 | ||
450 | static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh) | ||
451 | { | ||
452 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) { | ||
453 | gfs2_holder_put(gh); | ||
454 | return; | ||
455 | } | ||
456 | clear_bit(HIF_WAIT, &gh->gh_iflags); | ||
457 | smp_mb(); | ||
458 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
459 | } | ||
460 | |||
461 | static int holder_wait(void *word) | ||
462 | { | ||
463 | schedule(); | ||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | static void wait_on_holder(struct gfs2_holder *gh) | ||
468 | { | ||
469 | might_sleep(); | ||
470 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); | ||
471 | } | ||
472 | |||
482 | /** | 473 | /** |
483 | * rq_mutex - process a mutex request in the queue | 474 | * rq_mutex - process a mutex request in the queue |
484 | * @gh: the glock holder | 475 | * @gh: the glock holder |
@@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh) | |||
493 | list_del_init(&gh->gh_list); | 484 | list_del_init(&gh->gh_list); |
494 | /* gh->gh_error never examined. */ | 485 | /* gh->gh_error never examined. */ |
495 | set_bit(GLF_LOCK, &gl->gl_flags); | 486 | set_bit(GLF_LOCK, &gl->gl_flags); |
496 | complete(&gh->gh_wait); | 487 | clear_bit(HIF_WAIT, &gh->gh_iflags); |
488 | smp_mb(); | ||
489 | wake_up_bit(&gh->gh_iflags, HIF_WAIT); | ||
497 | 490 | ||
498 | return 1; | 491 | return 1; |
499 | } | 492 | } |
@@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh) | |||
511 | { | 504 | { |
512 | struct gfs2_glock *gl = gh->gh_gl; | 505 | struct gfs2_glock *gl = gh->gh_gl; |
513 | struct gfs2_sbd *sdp = gl->gl_sbd; | 506 | struct gfs2_sbd *sdp = gl->gl_sbd; |
514 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
515 | 507 | ||
516 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | 508 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { |
517 | if (list_empty(&gl->gl_holders)) { | 509 | if (list_empty(&gl->gl_holders)) { |
@@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh) | |||
526 | gfs2_reclaim_glock(sdp); | 518 | gfs2_reclaim_glock(sdp); |
527 | } | 519 | } |
528 | 520 | ||
529 | glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); | 521 | gfs2_glock_xmote_th(gh); |
530 | spin_lock(&gl->gl_spin); | 522 | spin_lock(&gl->gl_spin); |
531 | } | 523 | } |
532 | return 1; | 524 | return 1; |
@@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh) | |||
537 | set_bit(GLF_LOCK, &gl->gl_flags); | 529 | set_bit(GLF_LOCK, &gl->gl_flags); |
538 | } else { | 530 | } else { |
539 | struct gfs2_holder *next_gh; | 531 | struct gfs2_holder *next_gh; |
540 | if (gh->gh_flags & GL_LOCAL_EXCL) | 532 | if (gh->gh_state == LM_ST_EXCLUSIVE) |
541 | return 1; | 533 | return 1; |
542 | next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, | 534 | next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, |
543 | gh_list); | 535 | gh_list); |
544 | if (next_gh->gh_flags & GL_LOCAL_EXCL) | 536 | if (next_gh->gh_state == LM_ST_EXCLUSIVE) |
545 | return 1; | 537 | return 1; |
546 | } | 538 | } |
547 | 539 | ||
@@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh) | |||
549 | gh->gh_error = 0; | 541 | gh->gh_error = 0; |
550 | set_bit(HIF_HOLDER, &gh->gh_iflags); | 542 | set_bit(HIF_HOLDER, &gh->gh_iflags); |
551 | 543 | ||
552 | complete(&gh->gh_wait); | 544 | gfs2_holder_dispose_or_wake(gh); |
553 | 545 | ||
554 | return 0; | 546 | return 0; |
555 | } | 547 | } |
@@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh) | |||
564 | static int rq_demote(struct gfs2_holder *gh) | 556 | static int rq_demote(struct gfs2_holder *gh) |
565 | { | 557 | { |
566 | struct gfs2_glock *gl = gh->gh_gl; | 558 | struct gfs2_glock *gl = gh->gh_gl; |
567 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
568 | 559 | ||
569 | if (!list_empty(&gl->gl_holders)) | 560 | if (!list_empty(&gl->gl_holders)) |
570 | return 1; | 561 | return 1; |
@@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh) | |||
573 | list_del_init(&gh->gh_list); | 564 | list_del_init(&gh->gh_list); |
574 | gh->gh_error = 0; | 565 | gh->gh_error = 0; |
575 | spin_unlock(&gl->gl_spin); | 566 | spin_unlock(&gl->gl_spin); |
576 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | 567 | gfs2_holder_dispose_or_wake(gh); |
577 | gfs2_holder_put(gh); | ||
578 | else | ||
579 | complete(&gh->gh_wait); | ||
580 | spin_lock(&gl->gl_spin); | 568 | spin_lock(&gl->gl_spin); |
581 | } else { | 569 | } else { |
582 | gl->gl_req_gh = gh; | 570 | gl->gl_req_gh = gh; |
@@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh) | |||
585 | 573 | ||
586 | if (gh->gh_state == LM_ST_UNLOCKED || | 574 | if (gh->gh_state == LM_ST_UNLOCKED || |
587 | gl->gl_state != LM_ST_EXCLUSIVE) | 575 | gl->gl_state != LM_ST_EXCLUSIVE) |
588 | glops->go_drop_th(gl); | 576 | gfs2_glock_drop_th(gl); |
589 | else | 577 | else |
590 | glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); | 578 | gfs2_glock_xmote_th(gh); |
591 | 579 | ||
592 | spin_lock(&gl->gl_spin); | 580 | spin_lock(&gl->gl_spin); |
593 | } | 581 | } |
@@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh) | |||
596 | } | 584 | } |
597 | 585 | ||
598 | /** | 586 | /** |
599 | * rq_greedy - process a queued request to drop greedy status | ||
600 | * @gh: the glock holder | ||
601 | * | ||
602 | * Returns: 1 if the queue is blocked | ||
603 | */ | ||
604 | |||
605 | static int rq_greedy(struct gfs2_holder *gh) | ||
606 | { | ||
607 | struct gfs2_glock *gl = gh->gh_gl; | ||
608 | |||
609 | list_del_init(&gh->gh_list); | ||
610 | /* gh->gh_error never examined. */ | ||
611 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
612 | spin_unlock(&gl->gl_spin); | ||
613 | |||
614 | gfs2_holder_uninit(gh); | ||
615 | kfree(container_of(gh, struct greedy, gr_gh)); | ||
616 | |||
617 | spin_lock(&gl->gl_spin); | ||
618 | |||
619 | return 0; | ||
620 | } | ||
621 | |||
622 | /** | ||
623 | * run_queue - process holder structures on a glock | 587 | * run_queue - process holder structures on a glock |
624 | * @gl: the glock | 588 | * @gl: the glock |
625 | * | 589 | * |
@@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl) | |||
649 | 613 | ||
650 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) | 614 | if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) |
651 | blocked = rq_demote(gh); | 615 | blocked = rq_demote(gh); |
652 | else if (test_bit(HIF_GREEDY, &gh->gh_iflags)) | ||
653 | blocked = rq_greedy(gh); | ||
654 | else | 616 | else |
655 | gfs2_assert_warn(gl->gl_sbd, 0); | 617 | gfs2_assert_warn(gl->gl_sbd, 0); |
656 | 618 | ||
@@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) | |||
684 | 646 | ||
685 | gfs2_holder_init(gl, 0, 0, &gh); | 647 | gfs2_holder_init(gl, 0, 0, &gh); |
686 | set_bit(HIF_MUTEX, &gh.gh_iflags); | 648 | set_bit(HIF_MUTEX, &gh.gh_iflags); |
649 | if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) | ||
650 | BUG(); | ||
687 | 651 | ||
688 | spin_lock(&gl->gl_spin); | 652 | spin_lock(&gl->gl_spin); |
689 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 653 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { |
@@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) | |||
691 | } else { | 655 | } else { |
692 | gl->gl_owner = current; | 656 | gl->gl_owner = current; |
693 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 657 | gl->gl_ip = (unsigned long)__builtin_return_address(0); |
694 | complete(&gh.gh_wait); | 658 | clear_bit(HIF_WAIT, &gh.gh_iflags); |
659 | smp_mb(); | ||
660 | wake_up_bit(&gh.gh_iflags, HIF_WAIT); | ||
695 | } | 661 | } |
696 | spin_unlock(&gl->gl_spin); | 662 | spin_unlock(&gl->gl_spin); |
697 | 663 | ||
698 | wait_for_completion(&gh.gh_wait); | 664 | wait_on_holder(&gh); |
699 | gfs2_holder_uninit(&gh); | 665 | gfs2_holder_uninit(&gh); |
700 | } | 666 | } |
701 | 667 | ||
@@ -774,6 +740,7 @@ restart: | |||
774 | return; | 740 | return; |
775 | set_bit(HIF_DEMOTE, &new_gh->gh_iflags); | 741 | set_bit(HIF_DEMOTE, &new_gh->gh_iflags); |
776 | set_bit(HIF_DEALLOC, &new_gh->gh_iflags); | 742 | set_bit(HIF_DEALLOC, &new_gh->gh_iflags); |
743 | set_bit(HIF_WAIT, &new_gh->gh_iflags); | ||
777 | 744 | ||
778 | goto restart; | 745 | goto restart; |
779 | } | 746 | } |
@@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
825 | int op_done = 1; | 792 | int op_done = 1; |
826 | 793 | ||
827 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 794 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
828 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 795 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
829 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); | 796 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); |
830 | 797 | ||
831 | state_change(gl, ret & LM_OUT_ST_MASK); | 798 | state_change(gl, ret & LM_OUT_ST_MASK); |
@@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
908 | 875 | ||
909 | gfs2_glock_put(gl); | 876 | gfs2_glock_put(gl); |
910 | 877 | ||
911 | if (gh) { | 878 | if (gh) |
912 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | 879 | gfs2_holder_dispose_or_wake(gh); |
913 | gfs2_holder_put(gh); | ||
914 | else | ||
915 | complete(&gh->gh_wait); | ||
916 | } | ||
917 | } | 880 | } |
918 | 881 | ||
919 | /** | 882 | /** |
@@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
924 | * | 887 | * |
925 | */ | 888 | */ |
926 | 889 | ||
927 | void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) | 890 | void gfs2_glock_xmote_th(struct gfs2_holder *gh) |
928 | { | 891 | { |
892 | struct gfs2_glock *gl = gh->gh_gl; | ||
929 | struct gfs2_sbd *sdp = gl->gl_sbd; | 893 | struct gfs2_sbd *sdp = gl->gl_sbd; |
894 | int flags = gh->gh_flags; | ||
895 | unsigned state = gh->gh_state; | ||
930 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 896 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
931 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | | 897 | int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | |
932 | LM_FLAG_NOEXP | LM_FLAG_ANY | | 898 | LM_FLAG_NOEXP | LM_FLAG_ANY | |
933 | LM_FLAG_PRIORITY); | 899 | LM_FLAG_PRIORITY); |
934 | unsigned int lck_ret; | 900 | unsigned int lck_ret; |
935 | 901 | ||
902 | if (glops->go_xmote_th) | ||
903 | glops->go_xmote_th(gl); | ||
904 | |||
936 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 905 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
937 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 906 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
938 | gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); | 907 | gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); |
939 | gfs2_assert_warn(sdp, state != gl->gl_state); | 908 | gfs2_assert_warn(sdp, state != gl->gl_state); |
940 | 909 | ||
941 | if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) | ||
942 | glops->go_sync(gl); | ||
943 | |||
944 | gfs2_glock_hold(gl); | 910 | gfs2_glock_hold(gl); |
945 | gl->gl_req_bh = xmote_bh; | 911 | gl->gl_req_bh = xmote_bh; |
946 | 912 | ||
@@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
971 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 937 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
972 | struct gfs2_holder *gh = gl->gl_req_gh; | 938 | struct gfs2_holder *gh = gl->gl_req_gh; |
973 | 939 | ||
974 | clear_bit(GLF_PREFETCH, &gl->gl_flags); | ||
975 | |||
976 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 940 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
977 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 941 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
978 | gfs2_assert_warn(sdp, !ret); | 942 | gfs2_assert_warn(sdp, !ret); |
979 | 943 | ||
980 | state_change(gl, LM_ST_UNLOCKED); | 944 | state_change(gl, LM_ST_UNLOCKED); |
@@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
1001 | 965 | ||
1002 | gfs2_glock_put(gl); | 966 | gfs2_glock_put(gl); |
1003 | 967 | ||
1004 | if (gh) { | 968 | if (gh) |
1005 | if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) | 969 | gfs2_holder_dispose_or_wake(gh); |
1006 | gfs2_holder_put(gh); | ||
1007 | else | ||
1008 | complete(&gh->gh_wait); | ||
1009 | } | ||
1010 | } | 970 | } |
1011 | 971 | ||
1012 | /** | 972 | /** |
@@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | |||
1015 | * | 975 | * |
1016 | */ | 976 | */ |
1017 | 977 | ||
1018 | void gfs2_glock_drop_th(struct gfs2_glock *gl) | 978 | static void gfs2_glock_drop_th(struct gfs2_glock *gl) |
1019 | { | 979 | { |
1020 | struct gfs2_sbd *sdp = gl->gl_sbd; | 980 | struct gfs2_sbd *sdp = gl->gl_sbd; |
1021 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 981 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1022 | unsigned int ret; | 982 | unsigned int ret; |
1023 | 983 | ||
984 | if (glops->go_drop_th) | ||
985 | glops->go_drop_th(gl); | ||
986 | |||
1024 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 987 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
1025 | gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); | 988 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
1026 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); | 989 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); |
1027 | 990 | ||
1028 | if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) | ||
1029 | glops->go_sync(gl); | ||
1030 | |||
1031 | gfs2_glock_hold(gl); | 991 | gfs2_glock_hold(gl); |
1032 | gl->gl_req_bh = drop_bh; | 992 | gl->gl_req_bh = drop_bh; |
1033 | 993 | ||
@@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh) | |||
1107 | if (gh->gh_flags & LM_FLAG_PRIORITY) | 1067 | if (gh->gh_flags & LM_FLAG_PRIORITY) |
1108 | do_cancels(gh); | 1068 | do_cancels(gh); |
1109 | 1069 | ||
1110 | wait_for_completion(&gh->gh_wait); | 1070 | wait_on_holder(gh); |
1111 | |||
1112 | if (gh->gh_error) | 1071 | if (gh->gh_error) |
1113 | return gh->gh_error; | 1072 | return gh->gh_error; |
1114 | 1073 | ||
@@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh) | |||
1164 | struct gfs2_holder *existing; | 1123 | struct gfs2_holder *existing; |
1165 | 1124 | ||
1166 | BUG_ON(!gh->gh_owner); | 1125 | BUG_ON(!gh->gh_owner); |
1126 | if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) | ||
1127 | BUG(); | ||
1167 | 1128 | ||
1168 | existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); | 1129 | existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); |
1169 | if (existing) { | 1130 | if (existing) { |
@@ -1227,8 +1188,6 @@ restart: | |||
1227 | } | 1188 | } |
1228 | } | 1189 | } |
1229 | 1190 | ||
1230 | clear_bit(GLF_PREFETCH, &gl->gl_flags); | ||
1231 | |||
1232 | return error; | 1191 | return error; |
1233 | } | 1192 | } |
1234 | 1193 | ||
@@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1321 | } | 1280 | } |
1322 | 1281 | ||
1323 | /** | 1282 | /** |
1324 | * gfs2_glock_prefetch - Try to prefetch a glock | ||
1325 | * @gl: the glock | ||
1326 | * @state: the state to prefetch in | ||
1327 | * @flags: flags passed to go_xmote_th() | ||
1328 | * | ||
1329 | */ | ||
1330 | |||
1331 | static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, | ||
1332 | int flags) | ||
1333 | { | ||
1334 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
1335 | |||
1336 | spin_lock(&gl->gl_spin); | ||
1337 | |||
1338 | if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) || | ||
1339 | !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) || | ||
1340 | !list_empty(&gl->gl_waiters3) || | ||
1341 | relaxed_state_ok(gl->gl_state, state, flags)) { | ||
1342 | spin_unlock(&gl->gl_spin); | ||
1343 | return; | ||
1344 | } | ||
1345 | |||
1346 | set_bit(GLF_PREFETCH, &gl->gl_flags); | ||
1347 | set_bit(GLF_LOCK, &gl->gl_flags); | ||
1348 | spin_unlock(&gl->gl_spin); | ||
1349 | |||
1350 | glops->go_xmote_th(gl, state, flags); | ||
1351 | } | ||
1352 | |||
1353 | static void greedy_work(struct work_struct *work) | ||
1354 | { | ||
1355 | struct greedy *gr = container_of(work, struct greedy, gr_work.work); | ||
1356 | struct gfs2_holder *gh = &gr->gr_gh; | ||
1357 | struct gfs2_glock *gl = gh->gh_gl; | ||
1358 | const struct gfs2_glock_operations *glops = gl->gl_ops; | ||
1359 | |||
1360 | clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); | ||
1361 | |||
1362 | if (glops->go_greedy) | ||
1363 | glops->go_greedy(gl); | ||
1364 | |||
1365 | spin_lock(&gl->gl_spin); | ||
1366 | |||
1367 | if (list_empty(&gl->gl_waiters2)) { | ||
1368 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
1369 | spin_unlock(&gl->gl_spin); | ||
1370 | gfs2_holder_uninit(gh); | ||
1371 | kfree(gr); | ||
1372 | } else { | ||
1373 | gfs2_glock_hold(gl); | ||
1374 | list_add_tail(&gh->gh_list, &gl->gl_waiters2); | ||
1375 | run_queue(gl); | ||
1376 | spin_unlock(&gl->gl_spin); | ||
1377 | gfs2_glock_put(gl); | ||
1378 | } | ||
1379 | } | ||
1380 | |||
1381 | /** | ||
1382 | * gfs2_glock_be_greedy - | ||
1383 | * @gl: | ||
1384 | * @time: | ||
1385 | * | ||
1386 | * Returns: 0 if go_greedy will be called, 1 otherwise | ||
1387 | */ | ||
1388 | |||
1389 | int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time) | ||
1390 | { | ||
1391 | struct greedy *gr; | ||
1392 | struct gfs2_holder *gh; | ||
1393 | |||
1394 | if (!time || gl->gl_sbd->sd_args.ar_localcaching || | ||
1395 | test_and_set_bit(GLF_GREEDY, &gl->gl_flags)) | ||
1396 | return 1; | ||
1397 | |||
1398 | gr = kmalloc(sizeof(struct greedy), GFP_KERNEL); | ||
1399 | if (!gr) { | ||
1400 | clear_bit(GLF_GREEDY, &gl->gl_flags); | ||
1401 | return 1; | ||
1402 | } | ||
1403 | gh = &gr->gr_gh; | ||
1404 | |||
1405 | gfs2_holder_init(gl, 0, 0, gh); | ||
1406 | set_bit(HIF_GREEDY, &gh->gh_iflags); | ||
1407 | INIT_DELAYED_WORK(&gr->gr_work, greedy_work); | ||
1408 | |||
1409 | set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); | ||
1410 | schedule_delayed_work(&gr->gr_work, time); | ||
1411 | |||
1412 | return 0; | ||
1413 | } | ||
1414 | |||
1415 | /** | ||
1416 | * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it | 1283 | * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it |
1417 | * @gh: the holder structure | 1284 | * @gh: the holder structure |
1418 | * | 1285 | * |
@@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b) | |||
1470 | return 1; | 1337 | return 1; |
1471 | if (a->ln_number < b->ln_number) | 1338 | if (a->ln_number < b->ln_number) |
1472 | return -1; | 1339 | return -1; |
1473 | if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) | 1340 | BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); |
1474 | return 1; | ||
1475 | if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL)) | ||
1476 | return 1; | ||
1477 | return 0; | 1341 | return 0; |
1478 | } | 1342 | } |
1479 | 1343 | ||
@@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
1618 | } | 1482 | } |
1619 | 1483 | ||
1620 | /** | 1484 | /** |
1621 | * gfs2_glock_prefetch_num - prefetch a glock based on lock number | ||
1622 | * @sdp: the filesystem | ||
1623 | * @number: the lock number | ||
1624 | * @glops: the glock operations for the type of glock | ||
1625 | * @state: the state to acquire the glock in | ||
1626 | * @flags: modifier flags for the aquisition | ||
1627 | * | ||
1628 | * Returns: errno | ||
1629 | */ | ||
1630 | |||
1631 | void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, | ||
1632 | const struct gfs2_glock_operations *glops, | ||
1633 | unsigned int state, int flags) | ||
1634 | { | ||
1635 | struct gfs2_glock *gl; | ||
1636 | int error; | ||
1637 | |||
1638 | if (atomic_read(&sdp->sd_reclaim_count) < | ||
1639 | gfs2_tune_get(sdp, gt_reclaim_limit)) { | ||
1640 | error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); | ||
1641 | if (!error) { | ||
1642 | gfs2_glock_prefetch(gl, state, flags); | ||
1643 | gfs2_glock_put(gl); | ||
1644 | } | ||
1645 | } | ||
1646 | } | ||
1647 | |||
1648 | /** | ||
1649 | * gfs2_lvb_hold - attach a LVB from a glock | 1485 | * gfs2_lvb_hold - attach a LVB from a glock |
1650 | * @gl: The glock in question | 1486 | * @gl: The glock in question |
1651 | * | 1487 | * |
@@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
1703 | if (!gl) | 1539 | if (!gl) |
1704 | return; | 1540 | return; |
1705 | 1541 | ||
1706 | if (gl->gl_ops->go_callback) | ||
1707 | gl->gl_ops->go_callback(gl, state); | ||
1708 | handle_callback(gl, state); | 1542 | handle_callback(gl, state); |
1709 | 1543 | ||
1710 | spin_lock(&gl->gl_spin); | 1544 | spin_lock(&gl->gl_spin); |
@@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
1746 | struct lm_async_cb *async = data; | 1580 | struct lm_async_cb *async = data; |
1747 | struct gfs2_glock *gl; | 1581 | struct gfs2_glock *gl; |
1748 | 1582 | ||
1583 | down_read(&gfs2_umount_flush_sem); | ||
1749 | gl = gfs2_glock_find(sdp, &async->lc_name); | 1584 | gl = gfs2_glock_find(sdp, &async->lc_name); |
1750 | if (gfs2_assert_warn(sdp, gl)) | 1585 | if (gfs2_assert_warn(sdp, gl)) |
1751 | return; | 1586 | return; |
1752 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) | 1587 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) |
1753 | gl->gl_req_bh(gl, async->lc_ret); | 1588 | gl->gl_req_bh(gl, async->lc_ret); |
1754 | gfs2_glock_put(gl); | 1589 | gfs2_glock_put(gl); |
1590 | up_read(&gfs2_umount_flush_sem); | ||
1755 | return; | 1591 | return; |
1756 | } | 1592 | } |
1757 | 1593 | ||
@@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
1781 | 1617 | ||
1782 | static int demote_ok(struct gfs2_glock *gl) | 1618 | static int demote_ok(struct gfs2_glock *gl) |
1783 | { | 1619 | { |
1784 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1785 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 1620 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1786 | int demote = 1; | 1621 | int demote = 1; |
1787 | 1622 | ||
1788 | if (test_bit(GLF_STICKY, &gl->gl_flags)) | 1623 | if (test_bit(GLF_STICKY, &gl->gl_flags)) |
1789 | demote = 0; | 1624 | demote = 0; |
1790 | else if (test_bit(GLF_PREFETCH, &gl->gl_flags)) | ||
1791 | demote = time_after_eq(jiffies, gl->gl_stamp + | ||
1792 | gfs2_tune_get(sdp, gt_prefetch_secs) * HZ); | ||
1793 | else if (glops->go_demote_ok) | 1625 | else if (glops->go_demote_ok) |
1794 | demote = glops->go_demote_ok(gl); | 1626 | demote = glops->go_demote_ok(gl); |
1795 | 1627 | ||
@@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) | |||
1845 | atomic_inc(&sdp->sd_reclaimed); | 1677 | atomic_inc(&sdp->sd_reclaimed); |
1846 | 1678 | ||
1847 | if (gfs2_glmutex_trylock(gl)) { | 1679 | if (gfs2_glmutex_trylock(gl)) { |
1848 | if (queue_empty(gl, &gl->gl_holders) && | 1680 | if (list_empty(&gl->gl_holders) && |
1849 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1681 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) |
1850 | handle_callback(gl, LM_ST_UNLOCKED); | 1682 | handle_callback(gl, LM_ST_UNLOCKED); |
1851 | gfs2_glmutex_unlock(gl); | 1683 | gfs2_glmutex_unlock(gl); |
@@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl) | |||
1909 | return; | 1741 | return; |
1910 | 1742 | ||
1911 | if (gfs2_glmutex_trylock(gl)) { | 1743 | if (gfs2_glmutex_trylock(gl)) { |
1912 | if (queue_empty(gl, &gl->gl_holders) && | 1744 | if (list_empty(&gl->gl_holders) && |
1913 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) | 1745 | gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) |
1914 | goto out_schedule; | 1746 | goto out_schedule; |
1915 | gfs2_glmutex_unlock(gl); | 1747 | gfs2_glmutex_unlock(gl); |
@@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl) | |||
1958 | } | 1790 | } |
1959 | 1791 | ||
1960 | if (gfs2_glmutex_trylock(gl)) { | 1792 | if (gfs2_glmutex_trylock(gl)) { |
1961 | if (queue_empty(gl, &gl->gl_holders) && | 1793 | if (list_empty(&gl->gl_holders) && |
1962 | gl->gl_state != LM_ST_UNLOCKED) | 1794 | gl->gl_state != LM_ST_UNLOCKED) |
1963 | handle_callback(gl, LM_ST_UNLOCKED); | 1795 | handle_callback(gl, LM_ST_UNLOCKED); |
1964 | gfs2_glmutex_unlock(gl); | 1796 | gfs2_glmutex_unlock(gl); |
@@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) | |||
2000 | t = jiffies; | 1832 | t = jiffies; |
2001 | } | 1833 | } |
2002 | 1834 | ||
1835 | down_write(&gfs2_umount_flush_sem); | ||
2003 | invalidate_inodes(sdp->sd_vfs); | 1836 | invalidate_inodes(sdp->sd_vfs); |
1837 | up_write(&gfs2_umount_flush_sem); | ||
2004 | msleep(10); | 1838 | msleep(10); |
2005 | } | 1839 | } |
2006 | } | 1840 | } |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index fb39108fc05c..f50e40ceca43 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -20,7 +20,6 @@ | |||
20 | #define LM_FLAG_ANY 0x00000008 | 20 | #define LM_FLAG_ANY 0x00000008 |
21 | #define LM_FLAG_PRIORITY 0x00000010 */ | 21 | #define LM_FLAG_PRIORITY 0x00000010 */ |
22 | 22 | ||
23 | #define GL_LOCAL_EXCL 0x00000020 | ||
24 | #define GL_ASYNC 0x00000040 | 23 | #define GL_ASYNC 0x00000040 |
25 | #define GL_EXACT 0x00000080 | 24 | #define GL_EXACT 0x00000080 |
26 | #define GL_SKIP 0x00000100 | 25 | #define GL_SKIP 0x00000100 |
@@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | |||
83 | void gfs2_holder_reinit(unsigned int state, unsigned flags, | 82 | void gfs2_holder_reinit(unsigned int state, unsigned flags, |
84 | struct gfs2_holder *gh); | 83 | struct gfs2_holder *gh); |
85 | void gfs2_holder_uninit(struct gfs2_holder *gh); | 84 | void gfs2_holder_uninit(struct gfs2_holder *gh); |
86 | |||
87 | void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags); | ||
88 | void gfs2_glock_drop_th(struct gfs2_glock *gl); | ||
89 | |||
90 | int gfs2_glock_nq(struct gfs2_holder *gh); | 85 | int gfs2_glock_nq(struct gfs2_holder *gh); |
91 | int gfs2_glock_poll(struct gfs2_holder *gh); | 86 | int gfs2_glock_poll(struct gfs2_holder *gh); |
92 | int gfs2_glock_wait(struct gfs2_holder *gh); | 87 | int gfs2_glock_wait(struct gfs2_holder *gh); |
93 | void gfs2_glock_dq(struct gfs2_holder *gh); | 88 | void gfs2_glock_dq(struct gfs2_holder *gh); |
94 | 89 | ||
95 | int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time); | ||
96 | |||
97 | void gfs2_glock_dq_uninit(struct gfs2_holder *gh); | 90 | void gfs2_glock_dq_uninit(struct gfs2_holder *gh); |
98 | int gfs2_glock_nq_num(struct gfs2_sbd *sdp, | 91 | int gfs2_glock_nq_num(struct gfs2_sbd *sdp, |
99 | u64 number, const struct gfs2_glock_operations *glops, | 92 | u64 number, const struct gfs2_glock_operations *glops, |
@@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | |||
103 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 96 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
104 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | 97 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); |
105 | 98 | ||
106 | void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, | ||
107 | const struct gfs2_glock_operations *glops, | ||
108 | unsigned int state, int flags); | ||
109 | |||
110 | /** | 99 | /** |
111 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 100 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock |
112 | * @gl: the glock | 101 | * @gl: the glock |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index b068d10bcb6e..c4b0391b7aa2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -117,12 +117,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) | |||
117 | 117 | ||
118 | static void meta_go_sync(struct gfs2_glock *gl) | 118 | static void meta_go_sync(struct gfs2_glock *gl) |
119 | { | 119 | { |
120 | if (gl->gl_state != LM_ST_EXCLUSIVE) | ||
121 | return; | ||
122 | |||
120 | if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { | 123 | if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { |
121 | gfs2_log_flush(gl->gl_sbd, gl); | 124 | gfs2_log_flush(gl->gl_sbd, gl); |
122 | gfs2_meta_sync(gl); | 125 | gfs2_meta_sync(gl); |
123 | gfs2_ail_empty_gl(gl); | 126 | gfs2_ail_empty_gl(gl); |
124 | } | 127 | } |
125 | |||
126 | } | 128 | } |
127 | 129 | ||
128 | /** | 130 | /** |
@@ -142,6 +144,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
142 | } | 144 | } |
143 | 145 | ||
144 | /** | 146 | /** |
147 | * inode_go_sync - Sync the dirty data and/or metadata for an inode glock | ||
148 | * @gl: the glock protecting the inode | ||
149 | * | ||
150 | */ | ||
151 | |||
152 | static void inode_go_sync(struct gfs2_glock *gl) | ||
153 | { | ||
154 | struct gfs2_inode *ip = gl->gl_object; | ||
155 | |||
156 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | ||
157 | ip = NULL; | ||
158 | |||
159 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { | ||
160 | gfs2_log_flush(gl->gl_sbd, gl); | ||
161 | if (ip) | ||
162 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
163 | gfs2_meta_sync(gl); | ||
164 | if (ip) { | ||
165 | struct address_space *mapping = ip->i_inode.i_mapping; | ||
166 | int error = filemap_fdatawait(mapping); | ||
167 | if (error == -ENOSPC) | ||
168 | set_bit(AS_ENOSPC, &mapping->flags); | ||
169 | else if (error) | ||
170 | set_bit(AS_EIO, &mapping->flags); | ||
171 | } | ||
172 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
173 | gfs2_ail_empty_gl(gl); | ||
174 | } | ||
175 | } | ||
176 | |||
177 | /** | ||
145 | * inode_go_xmote_th - promote/demote a glock | 178 | * inode_go_xmote_th - promote/demote a glock |
146 | * @gl: the glock | 179 | * @gl: the glock |
147 | * @state: the requested state | 180 | * @state: the requested state |
@@ -149,12 +182,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
149 | * | 182 | * |
150 | */ | 183 | */ |
151 | 184 | ||
152 | static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | 185 | static void inode_go_xmote_th(struct gfs2_glock *gl) |
153 | int flags) | ||
154 | { | 186 | { |
155 | if (gl->gl_state != LM_ST_UNLOCKED) | 187 | if (gl->gl_state != LM_ST_UNLOCKED) |
156 | gfs2_pte_inval(gl); | 188 | gfs2_pte_inval(gl); |
157 | gfs2_glock_xmote_th(gl, state, flags); | 189 | if (gl->gl_state == LM_ST_EXCLUSIVE) |
190 | inode_go_sync(gl); | ||
158 | } | 191 | } |
159 | 192 | ||
160 | /** | 193 | /** |
@@ -189,38 +222,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl) | |||
189 | static void inode_go_drop_th(struct gfs2_glock *gl) | 222 | static void inode_go_drop_th(struct gfs2_glock *gl) |
190 | { | 223 | { |
191 | gfs2_pte_inval(gl); | 224 | gfs2_pte_inval(gl); |
192 | gfs2_glock_drop_th(gl); | 225 | if (gl->gl_state == LM_ST_EXCLUSIVE) |
193 | } | 226 | inode_go_sync(gl); |
194 | |||
195 | /** | ||
196 | * inode_go_sync - Sync the dirty data and/or metadata for an inode glock | ||
197 | * @gl: the glock protecting the inode | ||
198 | * | ||
199 | */ | ||
200 | |||
201 | static void inode_go_sync(struct gfs2_glock *gl) | ||
202 | { | ||
203 | struct gfs2_inode *ip = gl->gl_object; | ||
204 | |||
205 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | ||
206 | ip = NULL; | ||
207 | |||
208 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { | ||
209 | gfs2_log_flush(gl->gl_sbd, gl); | ||
210 | if (ip) | ||
211 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
212 | gfs2_meta_sync(gl); | ||
213 | if (ip) { | ||
214 | struct address_space *mapping = ip->i_inode.i_mapping; | ||
215 | int error = filemap_fdatawait(mapping); | ||
216 | if (error == -ENOSPC) | ||
217 | set_bit(AS_ENOSPC, &mapping->flags); | ||
218 | else if (error) | ||
219 | set_bit(AS_EIO, &mapping->flags); | ||
220 | } | ||
221 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
222 | gfs2_ail_empty_gl(gl); | ||
223 | } | ||
224 | } | 227 | } |
225 | 228 | ||
226 | /** | 229 | /** |
@@ -295,7 +298,7 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
295 | 298 | ||
296 | if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && | 299 | if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && |
297 | (gl->gl_state == LM_ST_EXCLUSIVE) && | 300 | (gl->gl_state == LM_ST_EXCLUSIVE) && |
298 | (gh->gh_flags & GL_LOCAL_EXCL)) | 301 | (gh->gh_state == LM_ST_EXCLUSIVE)) |
299 | error = gfs2_truncatei_resume(ip); | 302 | error = gfs2_truncatei_resume(ip); |
300 | 303 | ||
301 | return error; | 304 | return error; |
@@ -319,39 +322,6 @@ static void inode_go_unlock(struct gfs2_holder *gh) | |||
319 | } | 322 | } |
320 | 323 | ||
321 | /** | 324 | /** |
322 | * inode_greedy - | ||
323 | * @gl: the glock | ||
324 | * | ||
325 | */ | ||
326 | |||
327 | static void inode_greedy(struct gfs2_glock *gl) | ||
328 | { | ||
329 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
330 | struct gfs2_inode *ip = gl->gl_object; | ||
331 | unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum); | ||
332 | unsigned int max = gfs2_tune_get(sdp, gt_greedy_max); | ||
333 | unsigned int new_time; | ||
334 | |||
335 | spin_lock(&ip->i_spin); | ||
336 | |||
337 | if (time_after(ip->i_last_pfault + quantum, jiffies)) { | ||
338 | new_time = ip->i_greedy + quantum; | ||
339 | if (new_time > max) | ||
340 | new_time = max; | ||
341 | } else { | ||
342 | new_time = ip->i_greedy - quantum; | ||
343 | if (!new_time || new_time > max) | ||
344 | new_time = 1; | ||
345 | } | ||
346 | |||
347 | ip->i_greedy = new_time; | ||
348 | |||
349 | spin_unlock(&ip->i_spin); | ||
350 | |||
351 | iput(&ip->i_inode); | ||
352 | } | ||
353 | |||
354 | /** | ||
355 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | 325 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock |
356 | * @gl: the glock | 326 | * @gl: the glock |
357 | * | 327 | * |
@@ -398,8 +368,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) | |||
398 | * | 368 | * |
399 | */ | 369 | */ |
400 | 370 | ||
401 | static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | 371 | static void trans_go_xmote_th(struct gfs2_glock *gl) |
402 | int flags) | ||
403 | { | 372 | { |
404 | struct gfs2_sbd *sdp = gl->gl_sbd; | 373 | struct gfs2_sbd *sdp = gl->gl_sbd; |
405 | 374 | ||
@@ -408,8 +377,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, | |||
408 | gfs2_meta_syncfs(sdp); | 377 | gfs2_meta_syncfs(sdp); |
409 | gfs2_log_shutdown(sdp); | 378 | gfs2_log_shutdown(sdp); |
410 | } | 379 | } |
411 | |||
412 | gfs2_glock_xmote_th(gl, state, flags); | ||
413 | } | 380 | } |
414 | 381 | ||
415 | /** | 382 | /** |
@@ -461,8 +428,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl) | |||
461 | gfs2_meta_syncfs(sdp); | 428 | gfs2_meta_syncfs(sdp); |
462 | gfs2_log_shutdown(sdp); | 429 | gfs2_log_shutdown(sdp); |
463 | } | 430 | } |
464 | |||
465 | gfs2_glock_drop_th(gl); | ||
466 | } | 431 | } |
467 | 432 | ||
468 | /** | 433 | /** |
@@ -478,8 +443,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl) | |||
478 | } | 443 | } |
479 | 444 | ||
480 | const struct gfs2_glock_operations gfs2_meta_glops = { | 445 | const struct gfs2_glock_operations gfs2_meta_glops = { |
481 | .go_xmote_th = gfs2_glock_xmote_th, | 446 | .go_xmote_th = meta_go_sync, |
482 | .go_drop_th = gfs2_glock_drop_th, | 447 | .go_drop_th = meta_go_sync, |
483 | .go_type = LM_TYPE_META, | 448 | .go_type = LM_TYPE_META, |
484 | }; | 449 | }; |
485 | 450 | ||
@@ -487,19 +452,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = { | |||
487 | .go_xmote_th = inode_go_xmote_th, | 452 | .go_xmote_th = inode_go_xmote_th, |
488 | .go_xmote_bh = inode_go_xmote_bh, | 453 | .go_xmote_bh = inode_go_xmote_bh, |
489 | .go_drop_th = inode_go_drop_th, | 454 | .go_drop_th = inode_go_drop_th, |
490 | .go_sync = inode_go_sync, | ||
491 | .go_inval = inode_go_inval, | 455 | .go_inval = inode_go_inval, |
492 | .go_demote_ok = inode_go_demote_ok, | 456 | .go_demote_ok = inode_go_demote_ok, |
493 | .go_lock = inode_go_lock, | 457 | .go_lock = inode_go_lock, |
494 | .go_unlock = inode_go_unlock, | 458 | .go_unlock = inode_go_unlock, |
495 | .go_greedy = inode_greedy, | ||
496 | .go_type = LM_TYPE_INODE, | 459 | .go_type = LM_TYPE_INODE, |
497 | }; | 460 | }; |
498 | 461 | ||
499 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 462 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
500 | .go_xmote_th = gfs2_glock_xmote_th, | ||
501 | .go_drop_th = gfs2_glock_drop_th, | ||
502 | .go_sync = meta_go_sync, | ||
503 | .go_inval = meta_go_inval, | 463 | .go_inval = meta_go_inval, |
504 | .go_demote_ok = rgrp_go_demote_ok, | 464 | .go_demote_ok = rgrp_go_demote_ok, |
505 | .go_lock = rgrp_go_lock, | 465 | .go_lock = rgrp_go_lock, |
@@ -515,33 +475,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = { | |||
515 | }; | 475 | }; |
516 | 476 | ||
517 | const struct gfs2_glock_operations gfs2_iopen_glops = { | 477 | const struct gfs2_glock_operations gfs2_iopen_glops = { |
518 | .go_xmote_th = gfs2_glock_xmote_th, | ||
519 | .go_drop_th = gfs2_glock_drop_th, | ||
520 | .go_type = LM_TYPE_IOPEN, | 478 | .go_type = LM_TYPE_IOPEN, |
521 | }; | 479 | }; |
522 | 480 | ||
523 | const struct gfs2_glock_operations gfs2_flock_glops = { | 481 | const struct gfs2_glock_operations gfs2_flock_glops = { |
524 | .go_xmote_th = gfs2_glock_xmote_th, | ||
525 | .go_drop_th = gfs2_glock_drop_th, | ||
526 | .go_type = LM_TYPE_FLOCK, | 482 | .go_type = LM_TYPE_FLOCK, |
527 | }; | 483 | }; |
528 | 484 | ||
529 | const struct gfs2_glock_operations gfs2_nondisk_glops = { | 485 | const struct gfs2_glock_operations gfs2_nondisk_glops = { |
530 | .go_xmote_th = gfs2_glock_xmote_th, | ||
531 | .go_drop_th = gfs2_glock_drop_th, | ||
532 | .go_type = LM_TYPE_NONDISK, | 486 | .go_type = LM_TYPE_NONDISK, |
533 | }; | 487 | }; |
534 | 488 | ||
535 | const struct gfs2_glock_operations gfs2_quota_glops = { | 489 | const struct gfs2_glock_operations gfs2_quota_glops = { |
536 | .go_xmote_th = gfs2_glock_xmote_th, | ||
537 | .go_drop_th = gfs2_glock_drop_th, | ||
538 | .go_demote_ok = quota_go_demote_ok, | 490 | .go_demote_ok = quota_go_demote_ok, |
539 | .go_type = LM_TYPE_QUOTA, | 491 | .go_type = LM_TYPE_QUOTA, |
540 | }; | 492 | }; |
541 | 493 | ||
542 | const struct gfs2_glock_operations gfs2_journal_glops = { | 494 | const struct gfs2_glock_operations gfs2_journal_glops = { |
543 | .go_xmote_th = gfs2_glock_xmote_th, | ||
544 | .go_drop_th = gfs2_glock_drop_th, | ||
545 | .go_type = LM_TYPE_JOURNAL, | 495 | .go_type = LM_TYPE_JOURNAL, |
546 | }; | 496 | }; |
547 | 497 | ||
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 734421edae85..12c80fd28db5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -101,17 +101,14 @@ struct gfs2_bufdata { | |||
101 | }; | 101 | }; |
102 | 102 | ||
103 | struct gfs2_glock_operations { | 103 | struct gfs2_glock_operations { |
104 | void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags); | 104 | void (*go_xmote_th) (struct gfs2_glock *gl); |
105 | void (*go_xmote_bh) (struct gfs2_glock *gl); | 105 | void (*go_xmote_bh) (struct gfs2_glock *gl); |
106 | void (*go_drop_th) (struct gfs2_glock *gl); | 106 | void (*go_drop_th) (struct gfs2_glock *gl); |
107 | void (*go_drop_bh) (struct gfs2_glock *gl); | 107 | void (*go_drop_bh) (struct gfs2_glock *gl); |
108 | void (*go_sync) (struct gfs2_glock *gl); | ||
109 | void (*go_inval) (struct gfs2_glock *gl, int flags); | 108 | void (*go_inval) (struct gfs2_glock *gl, int flags); |
110 | int (*go_demote_ok) (struct gfs2_glock *gl); | 109 | int (*go_demote_ok) (struct gfs2_glock *gl); |
111 | int (*go_lock) (struct gfs2_holder *gh); | 110 | int (*go_lock) (struct gfs2_holder *gh); |
112 | void (*go_unlock) (struct gfs2_holder *gh); | 111 | void (*go_unlock) (struct gfs2_holder *gh); |
113 | void (*go_callback) (struct gfs2_glock *gl, unsigned int state); | ||
114 | void (*go_greedy) (struct gfs2_glock *gl); | ||
115 | const int go_type; | 112 | const int go_type; |
116 | }; | 113 | }; |
117 | 114 | ||
@@ -120,7 +117,6 @@ enum { | |||
120 | HIF_MUTEX = 0, | 117 | HIF_MUTEX = 0, |
121 | HIF_PROMOTE = 1, | 118 | HIF_PROMOTE = 1, |
122 | HIF_DEMOTE = 2, | 119 | HIF_DEMOTE = 2, |
123 | HIF_GREEDY = 3, | ||
124 | 120 | ||
125 | /* States */ | 121 | /* States */ |
126 | HIF_ALLOCED = 4, | 122 | HIF_ALLOCED = 4, |
@@ -128,6 +124,7 @@ enum { | |||
128 | HIF_HOLDER = 6, | 124 | HIF_HOLDER = 6, |
129 | HIF_FIRST = 7, | 125 | HIF_FIRST = 7, |
130 | HIF_ABORTED = 9, | 126 | HIF_ABORTED = 9, |
127 | HIF_WAIT = 10, | ||
131 | }; | 128 | }; |
132 | 129 | ||
133 | struct gfs2_holder { | 130 | struct gfs2_holder { |
@@ -140,17 +137,14 @@ struct gfs2_holder { | |||
140 | 137 | ||
141 | int gh_error; | 138 | int gh_error; |
142 | unsigned long gh_iflags; | 139 | unsigned long gh_iflags; |
143 | struct completion gh_wait; | ||
144 | unsigned long gh_ip; | 140 | unsigned long gh_ip; |
145 | }; | 141 | }; |
146 | 142 | ||
147 | enum { | 143 | enum { |
148 | GLF_LOCK = 1, | 144 | GLF_LOCK = 1, |
149 | GLF_STICKY = 2, | 145 | GLF_STICKY = 2, |
150 | GLF_PREFETCH = 3, | ||
151 | GLF_DIRTY = 5, | 146 | GLF_DIRTY = 5, |
152 | GLF_SKIP_WAITERS2 = 6, | 147 | GLF_SKIP_WAITERS2 = 6, |
153 | GLF_GREEDY = 7, | ||
154 | }; | 148 | }; |
155 | 149 | ||
156 | struct gfs2_glock { | 150 | struct gfs2_glock { |
@@ -167,7 +161,7 @@ struct gfs2_glock { | |||
167 | unsigned long gl_ip; | 161 | unsigned long gl_ip; |
168 | struct list_head gl_holders; | 162 | struct list_head gl_holders; |
169 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 163 | struct list_head gl_waiters1; /* HIF_MUTEX */ |
170 | struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ | 164 | struct list_head gl_waiters2; /* HIF_DEMOTE */ |
171 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 165 | struct list_head gl_waiters3; /* HIF_PROMOTE */ |
172 | 166 | ||
173 | const struct gfs2_glock_operations *gl_ops; | 167 | const struct gfs2_glock_operations *gl_ops; |
@@ -236,7 +230,6 @@ struct gfs2_inode { | |||
236 | 230 | ||
237 | spinlock_t i_spin; | 231 | spinlock_t i_spin; |
238 | struct rw_semaphore i_rw_mutex; | 232 | struct rw_semaphore i_rw_mutex; |
239 | unsigned int i_greedy; | ||
240 | unsigned long i_last_pfault; | 233 | unsigned long i_last_pfault; |
241 | 234 | ||
242 | struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; | 235 | struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; |
@@ -418,17 +411,12 @@ struct gfs2_tune { | |||
418 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | 411 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ |
419 | unsigned int gt_new_files_jdata; | 412 | unsigned int gt_new_files_jdata; |
420 | unsigned int gt_new_files_directio; | 413 | unsigned int gt_new_files_directio; |
421 | unsigned int gt_max_atomic_write; /* Split big writes into this size */ | ||
422 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 414 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
423 | unsigned int gt_lockdump_size; | 415 | unsigned int gt_lockdump_size; |
424 | unsigned int gt_stall_secs; /* Detects trouble! */ | 416 | unsigned int gt_stall_secs; /* Detects trouble! */ |
425 | unsigned int gt_complain_secs; | 417 | unsigned int gt_complain_secs; |
426 | unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ | 418 | unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ |
427 | unsigned int gt_entries_per_readdir; | 419 | unsigned int gt_entries_per_readdir; |
428 | unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */ | ||
429 | unsigned int gt_greedy_default; | ||
430 | unsigned int gt_greedy_quantum; | ||
431 | unsigned int gt_greedy_max; | ||
432 | unsigned int gt_statfs_quantum; | 420 | unsigned int gt_statfs_quantum; |
433 | unsigned int gt_statfs_slow; | 421 | unsigned int gt_statfs_slow; |
434 | }; | 422 | }; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d122074c45e1..0d6831a40565 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -287,10 +287,8 @@ out: | |||
287 | * | 287 | * |
288 | * Returns: errno | 288 | * Returns: errno |
289 | */ | 289 | */ |
290 | |||
291 | int gfs2_change_nlink(struct gfs2_inode *ip, int diff) | 290 | int gfs2_change_nlink(struct gfs2_inode *ip, int diff) |
292 | { | 291 | { |
293 | struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info; | ||
294 | struct buffer_head *dibh; | 292 | struct buffer_head *dibh; |
295 | u32 nlink; | 293 | u32 nlink; |
296 | int error; | 294 | int error; |
@@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) | |||
315 | else | 313 | else |
316 | drop_nlink(&ip->i_inode); | 314 | drop_nlink(&ip->i_inode); |
317 | 315 | ||
318 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 316 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
319 | 317 | ||
320 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 318 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
321 | gfs2_dinode_out(ip, dibh->b_data); | 319 | gfs2_dinode_out(ip, dibh->b_data); |
322 | brelse(dibh); | 320 | brelse(dibh); |
323 | mark_inode_dirty(&ip->i_inode); | 321 | mark_inode_dirty(&ip->i_inode); |
324 | 322 | ||
325 | if (ip->i_inode.i_nlink == 0) { | 323 | if (ip->i_inode.i_nlink == 0) |
326 | struct gfs2_rgrpd *rgd; | ||
327 | struct gfs2_holder ri_gh, rg_gh; | ||
328 | |||
329 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
330 | if (error) | ||
331 | goto out; | ||
332 | error = -EIO; | ||
333 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); | ||
334 | if (!rgd) | ||
335 | goto out_norgrp; | ||
336 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); | ||
337 | if (error) | ||
338 | goto out_norgrp; | ||
339 | |||
340 | gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ | 324 | gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ |
341 | gfs2_glock_dq_uninit(&rg_gh); | 325 | |
342 | out_norgrp: | ||
343 | gfs2_glock_dq_uninit(&ri_gh); | ||
344 | } | ||
345 | out: | ||
346 | return error; | 326 | return error; |
347 | } | 327 | } |
348 | 328 | ||
349 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) | 329 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) |
350 | { | 330 | { |
351 | struct qstr qstr; | 331 | struct qstr qstr; |
332 | struct inode *inode; | ||
352 | gfs2_str2qstr(&qstr, name); | 333 | gfs2_str2qstr(&qstr, name); |
353 | return gfs2_lookupi(dip, &qstr, 1, NULL); | 334 | inode = gfs2_lookupi(dip, &qstr, 1, NULL); |
335 | /* gfs2_lookupi has inconsistent callers: vfs | ||
336 | * related routines expect NULL for no entry found, | ||
337 | * gfs2_lookup_simple callers expect ENOENT | ||
338 | * and do not check for NULL. | ||
339 | */ | ||
340 | if (inode == NULL) | ||
341 | return ERR_PTR(-ENOENT); | ||
342 | else | ||
343 | return inode; | ||
354 | } | 344 | } |
355 | 345 | ||
356 | 346 | ||
@@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) | |||
361 | * @is_root: If 1, ignore the caller's permissions | 351 | * @is_root: If 1, ignore the caller's permissions |
362 | * @i_gh: An uninitialized holder for the new inode glock | 352 | * @i_gh: An uninitialized holder for the new inode glock |
363 | * | 353 | * |
364 | * There will always be a vnode (Linux VFS inode) for the d_gh inode unless | 354 | * This can be called via the VFS filldir function when NFS is doing |
365 | * @is_root is true. | 355 | * a readdirplus and the inode which its intending to stat isn't |
356 | * already in cache. In this case we must not take the directory glock | ||
357 | * again, since the readdir call will have already taken that lock. | ||
366 | * | 358 | * |
367 | * Returns: errno | 359 | * Returns: errno |
368 | */ | 360 | */ |
@@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
375 | struct gfs2_holder d_gh; | 367 | struct gfs2_holder d_gh; |
376 | struct gfs2_inum_host inum; | 368 | struct gfs2_inum_host inum; |
377 | unsigned int type; | 369 | unsigned int type; |
378 | int error = 0; | 370 | int error; |
379 | struct inode *inode = NULL; | 371 | struct inode *inode = NULL; |
372 | int unlock = 0; | ||
380 | 373 | ||
381 | if (!name->len || name->len > GFS2_FNAMESIZE) | 374 | if (!name->len || name->len > GFS2_FNAMESIZE) |
382 | return ERR_PTR(-ENAMETOOLONG); | 375 | return ERR_PTR(-ENAMETOOLONG); |
@@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
388 | return dir; | 381 | return dir; |
389 | } | 382 | } |
390 | 383 | ||
391 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 384 | if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { |
392 | if (error) | 385 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
393 | return ERR_PTR(error); | 386 | if (error) |
387 | return ERR_PTR(error); | ||
388 | unlock = 1; | ||
389 | } | ||
394 | 390 | ||
395 | if (!is_root) { | 391 | if (!is_root) { |
396 | error = permission(dir, MAY_EXEC, NULL); | 392 | error = permission(dir, MAY_EXEC, NULL); |
@@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
405 | inode = gfs2_inode_lookup(sb, &inum, type); | 401 | inode = gfs2_inode_lookup(sb, &inum, type); |
406 | 402 | ||
407 | out: | 403 | out: |
408 | gfs2_glock_dq_uninit(&d_gh); | 404 | if (unlock) |
405 | gfs2_glock_dq_uninit(&d_gh); | ||
409 | if (error == -ENOENT) | 406 | if (error == -ENOENT) |
410 | return NULL; | 407 | return NULL; |
411 | return inode; | 408 | return inode ? inode : ERR_PTR(error); |
412 | } | 409 | } |
413 | 410 | ||
414 | static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) | 411 | static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) |
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c index effe4a337c1d..e30673dd37e0 100644 --- a/fs/gfs2/lm.c +++ b/fs/gfs2/lm.c | |||
@@ -104,15 +104,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | |||
104 | vprintk(fmt, args); | 104 | vprintk(fmt, args); |
105 | va_end(args); | 105 | va_end(args); |
106 | 106 | ||
107 | fs_err(sdp, "about to withdraw from the cluster\n"); | 107 | fs_err(sdp, "about to withdraw this file system\n"); |
108 | BUG_ON(sdp->sd_args.ar_debug); | 108 | BUG_ON(sdp->sd_args.ar_debug); |
109 | 109 | ||
110 | |||
111 | fs_err(sdp, "waiting for outstanding I/O\n"); | ||
112 | |||
113 | /* FIXME: suspend dm device so oustanding bio's complete | ||
114 | and all further io requests fail */ | ||
115 | |||
116 | fs_err(sdp, "telling LM to withdraw\n"); | 110 | fs_err(sdp, "telling LM to withdraw\n"); |
117 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); | 111 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); |
118 | fs_err(sdp, "withdrawn\n"); | 112 | fs_err(sdp, "withdrawn\n"); |
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 33af707a4d3f..a87c7bf3c568 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
@@ -36,7 +36,7 @@ | |||
36 | 36 | ||
37 | #define GDLM_STRNAME_BYTES 24 | 37 | #define GDLM_STRNAME_BYTES 24 |
38 | #define GDLM_LVB_SIZE 32 | 38 | #define GDLM_LVB_SIZE 32 |
39 | #define GDLM_DROP_COUNT 50000 | 39 | #define GDLM_DROP_COUNT 200000 |
40 | #define GDLM_DROP_PERIOD 60 | 40 | #define GDLM_DROP_PERIOD 60 |
41 | #define GDLM_NAME_LEN 128 | 41 | #define GDLM_NAME_LEN 128 |
42 | 42 | ||
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c index 2194b1d5b5ec..a0e7eda643ed 100644 --- a/fs/gfs2/locking/dlm/main.c +++ b/fs/gfs2/locking/dlm/main.c | |||
@@ -11,9 +11,6 @@ | |||
11 | 11 | ||
12 | #include "lock_dlm.h" | 12 | #include "lock_dlm.h" |
13 | 13 | ||
14 | extern int gdlm_drop_count; | ||
15 | extern int gdlm_drop_period; | ||
16 | |||
17 | extern struct lm_lockops gdlm_ops; | 14 | extern struct lm_lockops gdlm_ops; |
18 | 15 | ||
19 | static int __init init_lock_dlm(void) | 16 | static int __init init_lock_dlm(void) |
@@ -40,9 +37,6 @@ static int __init init_lock_dlm(void) | |||
40 | return error; | 37 | return error; |
41 | } | 38 | } |
42 | 39 | ||
43 | gdlm_drop_count = GDLM_DROP_COUNT; | ||
44 | gdlm_drop_period = GDLM_DROP_PERIOD; | ||
45 | |||
46 | printk(KERN_INFO | 40 | printk(KERN_INFO |
47 | "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); | 41 | "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); |
48 | return 0; | 42 | return 0; |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index cdd1694e889b..1d8faa3da8af 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -9,8 +9,6 @@ | |||
9 | 9 | ||
10 | #include "lock_dlm.h" | 10 | #include "lock_dlm.h" |
11 | 11 | ||
12 | int gdlm_drop_count; | ||
13 | int gdlm_drop_period; | ||
14 | const struct lm_lockops gdlm_ops; | 12 | const struct lm_lockops gdlm_ops; |
15 | 13 | ||
16 | 14 | ||
@@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp, | |||
24 | if (!ls) | 22 | if (!ls) |
25 | return NULL; | 23 | return NULL; |
26 | 24 | ||
27 | ls->drop_locks_count = gdlm_drop_count; | 25 | ls->drop_locks_count = GDLM_DROP_COUNT; |
28 | ls->drop_locks_period = gdlm_drop_period; | 26 | ls->drop_locks_period = GDLM_DROP_PERIOD; |
29 | ls->fscb = cb; | 27 | ls->fscb = cb; |
30 | ls->sdp = sdp; | 28 | ls->sdp = sdp; |
31 | ls->fsflags = flags; | 29 | ls->fsflags = flags; |
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index 29ae06f94944..4746b884662d 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
@@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf) | |||
116 | return sprintf(buf, "%d\n", ls->recover_jid_status); | 116 | return sprintf(buf, "%d\n", ls->recover_jid_status); |
117 | } | 117 | } |
118 | 118 | ||
119 | static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf) | ||
120 | { | ||
121 | return sprintf(buf, "%d\n", ls->drop_locks_count); | ||
122 | } | ||
123 | |||
124 | static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len) | ||
125 | { | ||
126 | ls->drop_locks_count = simple_strtol(buf, NULL, 0); | ||
127 | return len; | ||
128 | } | ||
129 | |||
119 | struct gdlm_attr { | 130 | struct gdlm_attr { |
120 | struct attribute attr; | 131 | struct attribute attr; |
121 | ssize_t (*show)(struct gdlm_ls *, char *); | 132 | ssize_t (*show)(struct gdlm_ls *, char *); |
@@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL); | |||
135 | GDLM_ATTR(recover, 0644, recover_show, recover_store); | 146 | GDLM_ATTR(recover, 0644, recover_show, recover_store); |
136 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 147 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
137 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | 148 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); |
149 | GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store); | ||
138 | 150 | ||
139 | static struct attribute *gdlm_attrs[] = { | 151 | static struct attribute *gdlm_attrs[] = { |
140 | &gdlm_attr_proto_name.attr, | 152 | &gdlm_attr_proto_name.attr, |
@@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = { | |||
147 | &gdlm_attr_recover.attr, | 159 | &gdlm_attr_recover.attr, |
148 | &gdlm_attr_recover_done.attr, | 160 | &gdlm_attr_recover_done.attr, |
149 | &gdlm_attr_recover_status.attr, | 161 | &gdlm_attr_recover_status.attr, |
162 | &gdlm_attr_drop_count.attr, | ||
150 | NULL, | 163 | NULL, |
151 | }; | 164 | }; |
152 | 165 | ||
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 4d7f94d8c7bd..16bb4b4561ae 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
69 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); | 69 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); |
70 | struct gfs2_trans *tr; | 70 | struct gfs2_trans *tr; |
71 | 71 | ||
72 | if (!list_empty(&bd->bd_list_tr)) | 72 | gfs2_log_lock(sdp); |
73 | if (!list_empty(&bd->bd_list_tr)) { | ||
74 | gfs2_log_unlock(sdp); | ||
73 | return; | 75 | return; |
74 | 76 | } | |
75 | tr = current->journal_info; | 77 | tr = current->journal_info; |
76 | tr->tr_touched = 1; | 78 | tr->tr_touched = 1; |
77 | tr->tr_num_buf++; | 79 | tr->tr_num_buf++; |
78 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 80 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); |
81 | gfs2_log_unlock(sdp); | ||
79 | 82 | ||
80 | if (!list_empty(&le->le_list)) | 83 | if (!list_empty(&le->le_list)) |
81 | return; | 84 | return; |
@@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
84 | 87 | ||
85 | gfs2_meta_check(sdp, bd->bd_bh); | 88 | gfs2_meta_check(sdp, bd->bd_bh); |
86 | gfs2_pin(sdp, bd->bd_bh); | 89 | gfs2_pin(sdp, bd->bd_bh); |
87 | |||
88 | gfs2_log_lock(sdp); | 90 | gfs2_log_lock(sdp); |
89 | sdp->sd_log_num_buf++; | 91 | sdp->sd_log_num_buf++; |
90 | list_add(&le->le_list, &sdp->sd_log_le_buf); | 92 | list_add(&le->le_list, &sdp->sd_log_le_buf); |
@@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
98 | struct list_head *head = &tr->tr_list_buf; | 100 | struct list_head *head = &tr->tr_list_buf; |
99 | struct gfs2_bufdata *bd; | 101 | struct gfs2_bufdata *bd; |
100 | 102 | ||
103 | gfs2_log_lock(sdp); | ||
101 | while (!list_empty(head)) { | 104 | while (!list_empty(head)) { |
102 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); | 105 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); |
103 | list_del_init(&bd->bd_list_tr); | 106 | list_del_init(&bd->bd_list_tr); |
104 | tr->tr_num_buf--; | 107 | tr->tr_num_buf--; |
105 | } | 108 | } |
109 | gfs2_log_unlock(sdp); | ||
106 | gfs2_assert_warn(sdp, !tr->tr_num_buf); | 110 | gfs2_assert_warn(sdp, !tr->tr_num_buf); |
107 | } | 111 | } |
108 | 112 | ||
@@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
462 | struct address_space *mapping = bd->bd_bh->b_page->mapping; | 466 | struct address_space *mapping = bd->bd_bh->b_page->mapping; |
463 | struct gfs2_inode *ip = GFS2_I(mapping->host); | 467 | struct gfs2_inode *ip = GFS2_I(mapping->host); |
464 | 468 | ||
469 | gfs2_log_lock(sdp); | ||
465 | tr->tr_touched = 1; | 470 | tr->tr_touched = 1; |
466 | if (list_empty(&bd->bd_list_tr) && | 471 | if (list_empty(&bd->bd_list_tr) && |
467 | (ip->i_di.di_flags & GFS2_DIF_JDATA)) { | 472 | (ip->i_di.di_flags & GFS2_DIF_JDATA)) { |
468 | tr->tr_num_buf++; | 473 | tr->tr_num_buf++; |
469 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 474 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); |
475 | gfs2_log_unlock(sdp); | ||
470 | gfs2_pin(sdp, bd->bd_bh); | 476 | gfs2_pin(sdp, bd->bd_bh); |
471 | tr->tr_num_buf_new++; | 477 | tr->tr_num_buf_new++; |
478 | } else { | ||
479 | gfs2_log_unlock(sdp); | ||
472 | } | 480 | } |
473 | gfs2_trans_add_gl(bd->bd_gl); | 481 | gfs2_trans_add_gl(bd->bd_gl); |
474 | gfs2_log_lock(sdp); | 482 | gfs2_log_lock(sdp); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d8d69a72a10d..56e33590b656 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/pagevec.h> | 16 | #include <linux/pagevec.h> |
17 | #include <linux/mpage.h> | 17 | #include <linux/mpage.h> |
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/writeback.h> | ||
19 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/lm_interface.h> | 21 | #include <linux/lm_interface.h> |
21 | 22 | ||
@@ -157,6 +158,32 @@ out_ignore: | |||
157 | } | 158 | } |
158 | 159 | ||
159 | /** | 160 | /** |
161 | * gfs2_writepages - Write a bunch of dirty pages back to disk | ||
162 | * @mapping: The mapping to write | ||
163 | * @wbc: Write-back control | ||
164 | * | ||
165 | * For journaled files and/or ordered writes this just falls back to the | ||
166 | * kernel's default writepages path for now. We will probably want to change | ||
167 | * that eventually (i.e. when we look at allocate on flush). | ||
168 | * | ||
169 | * For the data=writeback case though we can already ignore buffer heads | ||
170 | * and write whole extents at once. This is a big reduction in the | ||
171 | * number of I/O requests we send and the bmap calls we make in this case. | ||
172 | */ | ||
173 | static int gfs2_writepages(struct address_space *mapping, | ||
174 | struct writeback_control *wbc) | ||
175 | { | ||
176 | struct inode *inode = mapping->host; | ||
177 | struct gfs2_inode *ip = GFS2_I(inode); | ||
178 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
179 | |||
180 | if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) | ||
181 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | ||
182 | |||
183 | return generic_writepages(mapping, wbc); | ||
184 | } | ||
185 | |||
186 | /** | ||
160 | * stuffed_readpage - Fill in a Linux page with stuffed file data | 187 | * stuffed_readpage - Fill in a Linux page with stuffed file data |
161 | * @ip: the inode | 188 | * @ip: the inode |
162 | * @page: the page | 189 | * @page: the page |
@@ -256,7 +283,7 @@ out_unlock: | |||
256 | * the page lock and the glock) and return having done no I/O. Its | 283 | * the page lock and the glock) and return having done no I/O. Its |
257 | * obviously not something we'd want to do on too regular a basis. | 284 | * obviously not something we'd want to do on too regular a basis. |
258 | * Any I/O we ignore at this time will be done via readpage later. | 285 | * Any I/O we ignore at this time will be done via readpage later. |
259 | * 2. We have to handle stuffed files here too. | 286 | * 2. We don't handle stuffed files here we let readpage do the honours. |
260 | * 3. mpage_readpages() does most of the heavy lifting in the common case. | 287 | * 3. mpage_readpages() does most of the heavy lifting in the common case. |
261 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. | 288 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. |
262 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as | 289 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as |
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
269 | struct gfs2_inode *ip = GFS2_I(inode); | 296 | struct gfs2_inode *ip = GFS2_I(inode); |
270 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 297 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
271 | struct gfs2_holder gh; | 298 | struct gfs2_holder gh; |
272 | unsigned page_idx; | 299 | int ret = 0; |
273 | int ret; | ||
274 | int do_unlock = 0; | 300 | int do_unlock = 0; |
275 | 301 | ||
276 | if (likely(file != &gfs2_internal_file_sentinel)) { | 302 | if (likely(file != &gfs2_internal_file_sentinel)) { |
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
289 | goto out_unlock; | 315 | goto out_unlock; |
290 | } | 316 | } |
291 | skip_lock: | 317 | skip_lock: |
292 | if (gfs2_is_stuffed(ip)) { | 318 | if (!gfs2_is_stuffed(ip)) |
293 | struct pagevec lru_pvec; | ||
294 | pagevec_init(&lru_pvec, 0); | ||
295 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | ||
296 | struct page *page = list_entry(pages->prev, struct page, lru); | ||
297 | prefetchw(&page->flags); | ||
298 | list_del(&page->lru); | ||
299 | if (!add_to_page_cache(page, mapping, | ||
300 | page->index, GFP_KERNEL)) { | ||
301 | ret = stuffed_readpage(ip, page); | ||
302 | unlock_page(page); | ||
303 | if (!pagevec_add(&lru_pvec, page)) | ||
304 | __pagevec_lru_add(&lru_pvec); | ||
305 | } else { | ||
306 | page_cache_release(page); | ||
307 | } | ||
308 | } | ||
309 | pagevec_lru_add(&lru_pvec); | ||
310 | ret = 0; | ||
311 | } else { | ||
312 | /* What we really want to do .... */ | ||
313 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); | 319 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); |
314 | } | ||
315 | 320 | ||
316 | if (do_unlock) { | 321 | if (do_unlock) { |
317 | gfs2_glock_dq_m(1, &gh); | 322 | gfs2_glock_dq_m(1, &gh); |
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page, | |||
356 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); | 361 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); |
357 | error = gfs2_glock_nq_atime(&ip->i_gh); | 362 | error = gfs2_glock_nq_atime(&ip->i_gh); |
358 | if (unlikely(error)) { | 363 | if (unlikely(error)) { |
359 | if (error == GLR_TRYFAILED) | 364 | if (error == GLR_TRYFAILED) { |
365 | unlock_page(page); | ||
360 | error = AOP_TRUNCATED_PAGE; | 366 | error = AOP_TRUNCATED_PAGE; |
367 | } | ||
361 | goto out_uninit; | 368 | goto out_uninit; |
362 | } | 369 | } |
363 | 370 | ||
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset) | |||
594 | return; | 601 | return; |
595 | } | 602 | } |
596 | 603 | ||
604 | /** | ||
605 | * gfs2_ok_for_dio - check that dio is valid on this file | ||
606 | * @ip: The inode | ||
607 | * @rw: READ or WRITE | ||
608 | * @offset: The offset at which we are reading or writing | ||
609 | * | ||
610 | * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) | ||
611 | * 1 (to accept the i/o request) | ||
612 | */ | ||
613 | static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | ||
614 | { | ||
615 | /* | ||
616 | * Should we return an error here? I can't see that O_DIRECT for | ||
617 | * a journaled file makes any sense. For now we'll silently fall | ||
618 | * back to buffered I/O, likewise we do the same for stuffed | ||
619 | * files since they are (a) small and (b) unaligned. | ||
620 | */ | ||
621 | if (gfs2_is_jdata(ip)) | ||
622 | return 0; | ||
623 | |||
624 | if (gfs2_is_stuffed(ip)) | ||
625 | return 0; | ||
626 | |||
627 | if (offset > i_size_read(&ip->i_inode)) | ||
628 | return 0; | ||
629 | return 1; | ||
630 | } | ||
631 | |||
632 | |||
633 | |||
597 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | 634 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, |
598 | const struct iovec *iov, loff_t offset, | 635 | const struct iovec *iov, loff_t offset, |
599 | unsigned long nr_segs) | 636 | unsigned long nr_segs) |
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
604 | struct gfs2_holder gh; | 641 | struct gfs2_holder gh; |
605 | int rv; | 642 | int rv; |
606 | 643 | ||
607 | if (rw == READ) | ||
608 | mutex_lock(&inode->i_mutex); | ||
609 | /* | 644 | /* |
610 | * Shared lock, even if its a write, since we do no allocation | 645 | * Deferred lock, even if its a write, since we do no allocation |
611 | * on this path. All we need change is atime. | 646 | * on this path. All we need change is atime, and this lock mode |
647 | * ensures that other nodes have flushed their buffered read caches | ||
648 | * (i.e. their page cache entries for this inode). We do not, | ||
649 | * unfortunately have the option of only flushing a range like | ||
650 | * the VFS does. | ||
612 | */ | 651 | */ |
613 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 652 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); |
614 | rv = gfs2_glock_nq_atime(&gh); | 653 | rv = gfs2_glock_nq_atime(&gh); |
615 | if (rv) | 654 | if (rv) |
616 | goto out; | 655 | return rv; |
617 | 656 | rv = gfs2_ok_for_dio(ip, rw, offset); | |
618 | if (offset > i_size_read(inode)) | 657 | if (rv != 1) |
619 | goto out; | 658 | goto out; /* dio not valid, fall back to buffered i/o */ |
620 | 659 | ||
621 | /* | 660 | rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, |
622 | * Should we return an error here? I can't see that O_DIRECT for | 661 | iov, offset, nr_segs, |
623 | * a journaled file makes any sense. For now we'll silently fall | 662 | gfs2_get_block_direct, NULL); |
624 | * back to buffered I/O, likewise we do the same for stuffed | ||
625 | * files since they are (a) small and (b) unaligned. | ||
626 | */ | ||
627 | if (gfs2_is_jdata(ip)) | ||
628 | goto out; | ||
629 | |||
630 | if (gfs2_is_stuffed(ip)) | ||
631 | goto out; | ||
632 | |||
633 | rv = blockdev_direct_IO_own_locking(rw, iocb, inode, | ||
634 | inode->i_sb->s_bdev, | ||
635 | iov, offset, nr_segs, | ||
636 | gfs2_get_block_direct, NULL); | ||
637 | out: | 663 | out: |
638 | gfs2_glock_dq_m(1, &gh); | 664 | gfs2_glock_dq_m(1, &gh); |
639 | gfs2_holder_uninit(&gh); | 665 | gfs2_holder_uninit(&gh); |
640 | if (rw == READ) | ||
641 | mutex_unlock(&inode->i_mutex); | ||
642 | |||
643 | return rv; | 666 | return rv; |
644 | } | 667 | } |
645 | 668 | ||
@@ -763,6 +786,7 @@ out: | |||
763 | 786 | ||
764 | const struct address_space_operations gfs2_file_aops = { | 787 | const struct address_space_operations gfs2_file_aops = { |
765 | .writepage = gfs2_writepage, | 788 | .writepage = gfs2_writepage, |
789 | .writepages = gfs2_writepages, | ||
766 | .readpage = gfs2_readpage, | 790 | .readpage = gfs2_readpage, |
767 | .readpages = gfs2_readpages, | 791 | .readpages = gfs2_readpages, |
768 | .sync_page = block_sync_page, | 792 | .sync_page = block_sync_page, |
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index d355899585d8..9187eb174b43 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c | |||
@@ -46,6 +46,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
46 | struct gfs2_inum_host inum; | 46 | struct gfs2_inum_host inum; |
47 | unsigned int type; | 47 | unsigned int type; |
48 | int error; | 48 | int error; |
49 | int had_lock=0; | ||
49 | 50 | ||
50 | if (inode && is_bad_inode(inode)) | 51 | if (inode && is_bad_inode(inode)) |
51 | goto invalid; | 52 | goto invalid; |
@@ -53,9 +54,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
53 | if (sdp->sd_args.ar_localcaching) | 54 | if (sdp->sd_args.ar_localcaching) |
54 | goto valid; | 55 | goto valid; |
55 | 56 | ||
56 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 57 | had_lock = gfs2_glock_is_locked_by_me(dip->i_gl); |
57 | if (error) | 58 | if (!had_lock) { |
58 | goto fail; | 59 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
60 | if (error) | ||
61 | goto fail; | ||
62 | } | ||
59 | 63 | ||
60 | error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); | 64 | error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); |
61 | switch (error) { | 65 | switch (error) { |
@@ -82,13 +86,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
82 | } | 86 | } |
83 | 87 | ||
84 | valid_gunlock: | 88 | valid_gunlock: |
85 | gfs2_glock_dq_uninit(&d_gh); | 89 | if (!had_lock) |
90 | gfs2_glock_dq_uninit(&d_gh); | ||
86 | valid: | 91 | valid: |
87 | dput(parent); | 92 | dput(parent); |
88 | return 1; | 93 | return 1; |
89 | 94 | ||
90 | invalid_gunlock: | 95 | invalid_gunlock: |
91 | gfs2_glock_dq_uninit(&d_gh); | 96 | if (!had_lock) |
97 | gfs2_glock_dq_uninit(&d_gh); | ||
92 | invalid: | 98 | invalid: |
93 | if (inode && S_ISDIR(inode->i_mode)) { | 99 | if (inode && S_ISDIR(inode->i_mode)) { |
94 | if (have_submounts(dentry)) | 100 | if (have_submounts(dentry)) |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b4e7b8775315..4855e8cca622 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "glock.h" | 22 | #include "glock.h" |
23 | #include "glops.h" | 23 | #include "glops.h" |
24 | #include "inode.h" | 24 | #include "inode.h" |
25 | #include "ops_dentry.h" | ||
25 | #include "ops_export.h" | 26 | #include "ops_export.h" |
26 | #include "rgrp.h" | 27 | #include "rgrp.h" |
27 | #include "util.h" | 28 | #include "util.h" |
@@ -112,13 +113,12 @@ struct get_name_filldir { | |||
112 | char *name; | 113 | char *name; |
113 | }; | 114 | }; |
114 | 115 | ||
115 | static int get_name_filldir(void *opaque, const char *name, unsigned int length, | 116 | static int get_name_filldir(void *opaque, const char *name, int length, |
116 | u64 offset, struct gfs2_inum_host *inum, | 117 | loff_t offset, u64 inum, unsigned int type) |
117 | unsigned int type) | ||
118 | { | 118 | { |
119 | struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; | 119 | struct get_name_filldir *gnfd = opaque; |
120 | 120 | ||
121 | if (!gfs2_inum_equal(inum, &gnfd->inum)) | 121 | if (inum != gnfd->inum.no_addr) |
122 | return 0; | 122 | return 0; |
123 | 123 | ||
124 | memcpy(gnfd->name, name, length); | 124 | memcpy(gnfd->name, name, length); |
@@ -189,6 +189,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) | |||
189 | return ERR_PTR(-ENOMEM); | 189 | return ERR_PTR(-ENOMEM); |
190 | } | 190 | } |
191 | 191 | ||
192 | dentry->d_op = &gfs2_dops; | ||
192 | return dentry; | 193 | return dentry; |
193 | } | 194 | } |
194 | 195 | ||
@@ -215,8 +216,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) | |||
215 | } | 216 | } |
216 | 217 | ||
217 | error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, | 218 | error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, |
218 | LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, | 219 | LM_ST_SHARED, LM_FLAG_ANY, &i_gh); |
219 | &i_gh); | ||
220 | if (error) | 220 | if (error) |
221 | return ERR_PTR(error); | 221 | return ERR_PTR(error); |
222 | 222 | ||
@@ -269,6 +269,7 @@ out_inode: | |||
269 | return ERR_PTR(-ENOMEM); | 269 | return ERR_PTR(-ENOMEM); |
270 | } | 270 | } |
271 | 271 | ||
272 | dentry->d_op = &gfs2_dops; | ||
272 | return dentry; | 273 | return dentry; |
273 | 274 | ||
274 | fail_rgd: | 275 | fail_rgd: |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index faa07e4b97d0..c996aa739a05 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -43,15 +43,6 @@ | |||
43 | #include "util.h" | 43 | #include "util.h" |
44 | #include "eaops.h" | 44 | #include "eaops.h" |
45 | 45 | ||
46 | /* For regular, non-NFS */ | ||
47 | struct filldir_reg { | ||
48 | struct gfs2_sbd *fdr_sbd; | ||
49 | int fdr_prefetch; | ||
50 | |||
51 | filldir_t fdr_filldir; | ||
52 | void *fdr_opaque; | ||
53 | }; | ||
54 | |||
55 | /* | 46 | /* |
56 | * Most fields left uninitialised to catch anybody who tries to | 47 | * Most fields left uninitialised to catch anybody who tries to |
57 | * use them. f_flags set to prevent file_accessed() from touching | 48 | * use them. f_flags set to prevent file_accessed() from touching |
@@ -128,41 +119,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) | |||
128 | } | 119 | } |
129 | 120 | ||
130 | /** | 121 | /** |
131 | * filldir_func - Report a directory entry to the caller of gfs2_dir_read() | ||
132 | * @opaque: opaque data used by the function | ||
133 | * @name: the name of the directory entry | ||
134 | * @length: the length of the name | ||
135 | * @offset: the entry's offset in the directory | ||
136 | * @inum: the inode number the entry points to | ||
137 | * @type: the type of inode the entry points to | ||
138 | * | ||
139 | * Returns: 0 on success, 1 if buffer full | ||
140 | */ | ||
141 | |||
142 | static int filldir_func(void *opaque, const char *name, unsigned int length, | ||
143 | u64 offset, struct gfs2_inum_host *inum, | ||
144 | unsigned int type) | ||
145 | { | ||
146 | struct filldir_reg *fdr = (struct filldir_reg *)opaque; | ||
147 | struct gfs2_sbd *sdp = fdr->fdr_sbd; | ||
148 | int error; | ||
149 | |||
150 | error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, | ||
151 | inum->no_addr, type); | ||
152 | if (error) | ||
153 | return 1; | ||
154 | |||
155 | if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { | ||
156 | gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops, | ||
157 | LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); | ||
158 | gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops, | ||
159 | LM_ST_SHARED, LM_FLAG_TRY); | ||
160 | } | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * gfs2_readdir - Read directory entries from a directory | 122 | * gfs2_readdir - Read directory entries from a directory |
167 | * @file: The directory to read from | 123 | * @file: The directory to read from |
168 | * @dirent: Buffer for dirents | 124 | * @dirent: Buffer for dirents |
@@ -175,16 +131,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
175 | { | 131 | { |
176 | struct inode *dir = file->f_mapping->host; | 132 | struct inode *dir = file->f_mapping->host; |
177 | struct gfs2_inode *dip = GFS2_I(dir); | 133 | struct gfs2_inode *dip = GFS2_I(dir); |
178 | struct filldir_reg fdr; | ||
179 | struct gfs2_holder d_gh; | 134 | struct gfs2_holder d_gh; |
180 | u64 offset = file->f_pos; | 135 | u64 offset = file->f_pos; |
181 | int error; | 136 | int error; |
182 | 137 | ||
183 | fdr.fdr_sbd = GFS2_SB(dir); | ||
184 | fdr.fdr_prefetch = 1; | ||
185 | fdr.fdr_filldir = filldir; | ||
186 | fdr.fdr_opaque = dirent; | ||
187 | |||
188 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | 138 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); |
189 | error = gfs2_glock_nq_atime(&d_gh); | 139 | error = gfs2_glock_nq_atime(&d_gh); |
190 | if (error) { | 140 | if (error) { |
@@ -192,7 +142,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
192 | return error; | 142 | return error; |
193 | } | 143 | } |
194 | 144 | ||
195 | error = gfs2_dir_read(dir, &offset, &fdr, filldir_func); | 145 | error = gfs2_dir_read(dir, &offset, dirent, filldir); |
196 | 146 | ||
197 | gfs2_glock_dq_uninit(&d_gh); | 147 | gfs2_glock_dq_uninit(&d_gh); |
198 | 148 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 636dda4c7d38..f40a84807d75 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -264,13 +264,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
264 | struct gfs2_inode *dip = GFS2_I(dir); | 264 | struct gfs2_inode *dip = GFS2_I(dir); |
265 | struct gfs2_sbd *sdp = GFS2_SB(dir); | 265 | struct gfs2_sbd *sdp = GFS2_SB(dir); |
266 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 266 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
267 | struct gfs2_holder ghs[2]; | 267 | struct gfs2_holder ghs[3]; |
268 | struct gfs2_rgrpd *rgd; | ||
269 | struct gfs2_holder ri_gh; | ||
268 | int error; | 270 | int error; |
269 | 271 | ||
272 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
273 | if (error) | ||
274 | return error; | ||
275 | |||
270 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 276 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
271 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 277 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
272 | 278 | ||
273 | error = gfs2_glock_nq_m(2, ghs); | 279 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); |
280 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | ||
281 | |||
282 | |||
283 | error = gfs2_glock_nq_m(3, ghs); | ||
274 | if (error) | 284 | if (error) |
275 | goto out; | 285 | goto out; |
276 | 286 | ||
@@ -291,10 +301,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
291 | out_end_trans: | 301 | out_end_trans: |
292 | gfs2_trans_end(sdp); | 302 | gfs2_trans_end(sdp); |
293 | out_gunlock: | 303 | out_gunlock: |
294 | gfs2_glock_dq_m(2, ghs); | 304 | gfs2_glock_dq_m(3, ghs); |
295 | out: | 305 | out: |
296 | gfs2_holder_uninit(ghs); | 306 | gfs2_holder_uninit(ghs); |
297 | gfs2_holder_uninit(ghs + 1); | 307 | gfs2_holder_uninit(ghs + 1); |
308 | gfs2_holder_uninit(ghs + 2); | ||
309 | gfs2_glock_dq_uninit(&ri_gh); | ||
298 | return error; | 310 | return error; |
299 | } | 311 | } |
300 | 312 | ||
@@ -449,13 +461,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
449 | struct gfs2_inode *dip = GFS2_I(dir); | 461 | struct gfs2_inode *dip = GFS2_I(dir); |
450 | struct gfs2_sbd *sdp = GFS2_SB(dir); | 462 | struct gfs2_sbd *sdp = GFS2_SB(dir); |
451 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 463 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
452 | struct gfs2_holder ghs[2]; | 464 | struct gfs2_holder ghs[3]; |
465 | struct gfs2_rgrpd *rgd; | ||
466 | struct gfs2_holder ri_gh; | ||
453 | int error; | 467 | int error; |
454 | 468 | ||
469 | |||
470 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
471 | if (error) | ||
472 | return error; | ||
455 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 473 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
456 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 474 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
457 | 475 | ||
458 | error = gfs2_glock_nq_m(2, ghs); | 476 | rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); |
477 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | ||
478 | |||
479 | error = gfs2_glock_nq_m(3, ghs); | ||
459 | if (error) | 480 | if (error) |
460 | goto out; | 481 | goto out; |
461 | 482 | ||
@@ -483,10 +504,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
483 | gfs2_trans_end(sdp); | 504 | gfs2_trans_end(sdp); |
484 | 505 | ||
485 | out_gunlock: | 506 | out_gunlock: |
486 | gfs2_glock_dq_m(2, ghs); | 507 | gfs2_glock_dq_m(3, ghs); |
487 | out: | 508 | out: |
488 | gfs2_holder_uninit(ghs); | 509 | gfs2_holder_uninit(ghs); |
489 | gfs2_holder_uninit(ghs + 1); | 510 | gfs2_holder_uninit(ghs + 1); |
511 | gfs2_holder_uninit(ghs + 2); | ||
512 | gfs2_glock_dq_uninit(&ri_gh); | ||
490 | return error; | 513 | return error; |
491 | } | 514 | } |
492 | 515 | ||
@@ -547,7 +570,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
547 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 570 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
548 | struct gfs2_inode *nip = NULL; | 571 | struct gfs2_inode *nip = NULL; |
549 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 572 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
550 | struct gfs2_holder ghs[4], r_gh; | 573 | struct gfs2_holder ghs[5], r_gh; |
574 | struct gfs2_rgrpd *nrgd; | ||
551 | unsigned int num_gh; | 575 | unsigned int num_gh; |
552 | int dir_rename = 0; | 576 | int dir_rename = 0; |
553 | int alloc_required; | 577 | int alloc_required; |
@@ -587,6 +611,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
587 | if (nip) { | 611 | if (nip) { |
588 | gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); | 612 | gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); |
589 | num_gh++; | 613 | num_gh++; |
614 | /* grab the resource lock for unlink flag twiddling | ||
615 | * this is the case of the target file already existing | ||
616 | * so we unlink before doing the rename | ||
617 | */ | ||
618 | nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr); | ||
619 | if (nrgd) | ||
620 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); | ||
590 | } | 621 | } |
591 | 622 | ||
592 | error = gfs2_glock_nq_m(num_gh, ghs); | 623 | error = gfs2_glock_nq_m(num_gh, ghs); |
@@ -684,12 +715,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
684 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 715 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
685 | al->al_rgd->rd_ri.ri_length + | 716 | al->al_rgd->rd_ri.ri_length + |
686 | 4 * RES_DINODE + 4 * RES_LEAF + | 717 | 4 * RES_DINODE + 4 * RES_LEAF + |
687 | RES_STATFS + RES_QUOTA, 0); | 718 | RES_STATFS + RES_QUOTA + 4, 0); |
688 | if (error) | 719 | if (error) |
689 | goto out_ipreserv; | 720 | goto out_ipreserv; |
690 | } else { | 721 | } else { |
691 | error = gfs2_trans_begin(sdp, 4 * RES_DINODE + | 722 | error = gfs2_trans_begin(sdp, 4 * RES_DINODE + |
692 | 5 * RES_LEAF, 0); | 723 | 5 * RES_LEAF + 4, 0); |
693 | if (error) | 724 | if (error) |
694 | goto out_gunlock; | 725 | goto out_gunlock; |
695 | } | 726 | } |
@@ -728,7 +759,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
728 | error = gfs2_meta_inode_buffer(ip, &dibh); | 759 | error = gfs2_meta_inode_buffer(ip, &dibh); |
729 | if (error) | 760 | if (error) |
730 | goto out_end_trans; | 761 | goto out_end_trans; |
731 | ip->i_inode.i_ctime.tv_sec = get_seconds(); | 762 | ip->i_inode.i_ctime = CURRENT_TIME_SEC; |
732 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 763 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
733 | gfs2_dinode_out(ip, dibh->b_data); | 764 | gfs2_dinode_out(ip, dibh->b_data); |
734 | brelse(dibh); | 765 | brelse(dibh); |
@@ -1018,7 +1049,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1018 | } | 1049 | } |
1019 | 1050 | ||
1020 | generic_fillattr(inode, stat); | 1051 | generic_fillattr(inode, stat); |
1021 | if (unlock); | 1052 | if (unlock) |
1022 | gfs2_glock_dq_uninit(&gh); | 1053 | gfs2_glock_dq_uninit(&gh); |
1023 | 1054 | ||
1024 | return 0; | 1055 | return 0; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 7685b46f934b..47369d011214 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb) | |||
173 | struct gfs2_sbd *sdp = sb->s_fs_info; | 173 | struct gfs2_sbd *sdp = sb->s_fs_info; |
174 | int error; | 174 | int error; |
175 | 175 | ||
176 | if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
177 | return; | ||
178 | |||
176 | for (;;) { | 179 | for (;;) { |
177 | error = gfs2_freeze_fs(sdp); | 180 | error = gfs2_freeze_fs(sdp); |
178 | if (!error) | 181 | if (!error) |
@@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode) | |||
426 | } | 429 | } |
427 | 430 | ||
428 | error = gfs2_dinode_dealloc(ip); | 431 | error = gfs2_dinode_dealloc(ip); |
432 | /* | ||
433 | * Must do this before unlock to avoid trying to write back | ||
434 | * potentially dirty data now that inode no longer exists | ||
435 | * on disk. | ||
436 | */ | ||
437 | truncate_inode_pages(&inode->i_data, 0); | ||
429 | 438 | ||
430 | out_unlock: | 439 | out_unlock: |
431 | gfs2_glock_dq(&ip->i_iopen_gh); | 440 | gfs2_glock_dq(&ip->i_iopen_gh); |
@@ -443,14 +452,12 @@ out: | |||
443 | 452 | ||
444 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 453 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
445 | { | 454 | { |
446 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
447 | struct gfs2_inode *ip; | 455 | struct gfs2_inode *ip; |
448 | 456 | ||
449 | ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); | 457 | ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); |
450 | if (ip) { | 458 | if (ip) { |
451 | ip->i_flags = 0; | 459 | ip->i_flags = 0; |
452 | ip->i_gl = NULL; | 460 | ip->i_gl = NULL; |
453 | ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default); | ||
454 | ip->i_last_pfault = jiffies; | 461 | ip->i_last_pfault = jiffies; |
455 | } | 462 | } |
456 | return &ip->i_inode; | 463 | return &ip->i_inode; |
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 45a5f11fc39a..14b380fb0602 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c | |||
@@ -28,34 +28,13 @@ | |||
28 | #include "trans.h" | 28 | #include "trans.h" |
29 | #include "util.h" | 29 | #include "util.h" |
30 | 30 | ||
31 | static void pfault_be_greedy(struct gfs2_inode *ip) | ||
32 | { | ||
33 | unsigned int time; | ||
34 | |||
35 | spin_lock(&ip->i_spin); | ||
36 | time = ip->i_greedy; | ||
37 | ip->i_last_pfault = jiffies; | ||
38 | spin_unlock(&ip->i_spin); | ||
39 | |||
40 | igrab(&ip->i_inode); | ||
41 | if (gfs2_glock_be_greedy(ip->i_gl, time)) | ||
42 | iput(&ip->i_inode); | ||
43 | } | ||
44 | |||
45 | static struct page *gfs2_private_nopage(struct vm_area_struct *area, | 31 | static struct page *gfs2_private_nopage(struct vm_area_struct *area, |
46 | unsigned long address, int *type) | 32 | unsigned long address, int *type) |
47 | { | 33 | { |
48 | struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); | 34 | struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); |
49 | struct page *result; | ||
50 | 35 | ||
51 | set_bit(GIF_PAGED, &ip->i_flags); | 36 | set_bit(GIF_PAGED, &ip->i_flags); |
52 | 37 | return filemap_nopage(area, address, type); | |
53 | result = filemap_nopage(area, address, type); | ||
54 | |||
55 | if (result && result != NOPAGE_OOM) | ||
56 | pfault_be_greedy(ip); | ||
57 | |||
58 | return result; | ||
59 | } | 38 | } |
60 | 39 | ||
61 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) | 40 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) |
@@ -167,7 +146,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, | |||
167 | set_page_dirty(result); | 146 | set_page_dirty(result); |
168 | } | 147 | } |
169 | 148 | ||
170 | pfault_be_greedy(ip); | ||
171 | out: | 149 | out: |
172 | gfs2_glock_dq_uninit(&i_gh); | 150 | gfs2_glock_dq_uninit(&i_gh); |
173 | 151 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 43a24f2e5905..70f424fcf1cd 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
71 | gt->gt_atime_quantum = 3600; | 71 | gt->gt_atime_quantum = 3600; |
72 | gt->gt_new_files_jdata = 0; | 72 | gt->gt_new_files_jdata = 0; |
73 | gt->gt_new_files_directio = 0; | 73 | gt->gt_new_files_directio = 0; |
74 | gt->gt_max_atomic_write = 4 << 20; | ||
75 | gt->gt_max_readahead = 1 << 18; | 74 | gt->gt_max_readahead = 1 << 18; |
76 | gt->gt_lockdump_size = 131072; | 75 | gt->gt_lockdump_size = 131072; |
77 | gt->gt_stall_secs = 600; | 76 | gt->gt_stall_secs = 600; |
78 | gt->gt_complain_secs = 10; | 77 | gt->gt_complain_secs = 10; |
79 | gt->gt_reclaim_limit = 5000; | 78 | gt->gt_reclaim_limit = 5000; |
80 | gt->gt_entries_per_readdir = 32; | 79 | gt->gt_entries_per_readdir = 32; |
81 | gt->gt_prefetch_secs = 10; | ||
82 | gt->gt_greedy_default = HZ / 10; | ||
83 | gt->gt_greedy_quantum = HZ / 40; | ||
84 | gt->gt_greedy_max = HZ / 4; | ||
85 | gt->gt_statfs_quantum = 30; | 80 | gt->gt_statfs_quantum = 30; |
86 | gt->gt_statfs_slow = 0; | 81 | gt->gt_statfs_slow = 0; |
87 | } | 82 | } |
@@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
359 | mutex_lock(&sdp->sd_jindex_mutex); | 354 | mutex_lock(&sdp->sd_jindex_mutex); |
360 | 355 | ||
361 | for (;;) { | 356 | for (;;) { |
362 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, | 357 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); |
363 | GL_LOCAL_EXCL, ji_gh); | ||
364 | if (error) | 358 | if (error) |
365 | break; | 359 | break; |
366 | 360 | ||
@@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
529 | struct gfs2_log_header_host head; | 523 | struct gfs2_log_header_host head; |
530 | int error; | 524 | int error; |
531 | 525 | ||
532 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | 526 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); |
533 | GL_LOCAL_EXCL, &t_gh); | ||
534 | if (error) | 527 | if (error) |
535 | return error; | 528 | return error; |
536 | 529 | ||
@@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | |||
583 | gfs2_quota_sync(sdp); | 576 | gfs2_quota_sync(sdp); |
584 | gfs2_statfs_sync(sdp); | 577 | gfs2_statfs_sync(sdp); |
585 | 578 | ||
586 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, | 579 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, |
587 | GL_LOCAL_EXCL | GL_NOCACHE, | 580 | &t_gh); |
588 | &t_gh); | ||
589 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | 581 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) |
590 | return error; | 582 | return error; |
591 | 583 | ||
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 983eaf1e06be..d01f9f0fda26 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0); | |||
436 | TUNE_ATTR(max_readahead, 0); | 436 | TUNE_ATTR(max_readahead, 0); |
437 | TUNE_ATTR(complain_secs, 0); | 437 | TUNE_ATTR(complain_secs, 0); |
438 | TUNE_ATTR(reclaim_limit, 0); | 438 | TUNE_ATTR(reclaim_limit, 0); |
439 | TUNE_ATTR(prefetch_secs, 0); | ||
440 | TUNE_ATTR(statfs_slow, 0); | 439 | TUNE_ATTR(statfs_slow, 0); |
441 | TUNE_ATTR(new_files_jdata, 0); | 440 | TUNE_ATTR(new_files_jdata, 0); |
442 | TUNE_ATTR(new_files_directio, 0); | 441 | TUNE_ATTR(new_files_directio, 0); |
443 | TUNE_ATTR(quota_simul_sync, 1); | 442 | TUNE_ATTR(quota_simul_sync, 1); |
444 | TUNE_ATTR(quota_cache_secs, 1); | 443 | TUNE_ATTR(quota_cache_secs, 1); |
445 | TUNE_ATTR(max_atomic_write, 1); | ||
446 | TUNE_ATTR(stall_secs, 1); | 444 | TUNE_ATTR(stall_secs, 1); |
447 | TUNE_ATTR(greedy_default, 1); | ||
448 | TUNE_ATTR(greedy_quantum, 1); | ||
449 | TUNE_ATTR(greedy_max, 1); | ||
450 | TUNE_ATTR(statfs_quantum, 1); | 445 | TUNE_ATTR(statfs_quantum, 1); |
451 | TUNE_ATTR_DAEMON(scand_secs, scand_process); | 446 | TUNE_ATTR_DAEMON(scand_secs, scand_process); |
452 | TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); | 447 | TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); |
@@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = { | |||
465 | &tune_attr_max_readahead.attr, | 460 | &tune_attr_max_readahead.attr, |
466 | &tune_attr_complain_secs.attr, | 461 | &tune_attr_complain_secs.attr, |
467 | &tune_attr_reclaim_limit.attr, | 462 | &tune_attr_reclaim_limit.attr, |
468 | &tune_attr_prefetch_secs.attr, | ||
469 | &tune_attr_statfs_slow.attr, | 463 | &tune_attr_statfs_slow.attr, |
470 | &tune_attr_quota_simul_sync.attr, | 464 | &tune_attr_quota_simul_sync.attr, |
471 | &tune_attr_quota_cache_secs.attr, | 465 | &tune_attr_quota_cache_secs.attr, |
472 | &tune_attr_max_atomic_write.attr, | ||
473 | &tune_attr_stall_secs.attr, | 466 | &tune_attr_stall_secs.attr, |
474 | &tune_attr_greedy_default.attr, | ||
475 | &tune_attr_greedy_quantum.attr, | ||
476 | &tune_attr_greedy_max.attr, | ||
477 | &tune_attr_statfs_quantum.attr, | 467 | &tune_attr_statfs_quantum.attr, |
478 | &tune_attr_scand_secs.attr, | 468 | &tune_attr_scand_secs.attr, |
479 | &tune_attr_recoverd_secs.attr, | 469 | &tune_attr_recoverd_secs.attr, |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index f5719117edfe..e285022f006c 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -182,9 +182,9 @@ int jfs_get_block(struct inode *ip, sector_t lblock, | |||
182 | * Take appropriate lock on inode | 182 | * Take appropriate lock on inode |
183 | */ | 183 | */ |
184 | if (create) | 184 | if (create) |
185 | IWRITE_LOCK(ip); | 185 | IWRITE_LOCK(ip, RDWRLOCK_NORMAL); |
186 | else | 186 | else |
187 | IREAD_LOCK(ip); | 187 | IREAD_LOCK(ip, RDWRLOCK_NORMAL); |
188 | 188 | ||
189 | if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && | 189 | if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && |
190 | (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && | 190 | (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && |
@@ -359,7 +359,7 @@ void jfs_truncate(struct inode *ip) | |||
359 | 359 | ||
360 | nobh_truncate_page(ip->i_mapping, ip->i_size); | 360 | nobh_truncate_page(ip->i_mapping, ip->i_size); |
361 | 361 | ||
362 | IWRITE_LOCK(ip); | 362 | IWRITE_LOCK(ip, RDWRLOCK_NORMAL); |
363 | jfs_truncate_nolock(ip, ip->i_size); | 363 | jfs_truncate_nolock(ip, ip->i_size); |
364 | IWRITE_UNLOCK(ip); | 364 | IWRITE_UNLOCK(ip); |
365 | } | 365 | } |
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index ddffbbd4d955..7378798f0b21 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h | |||
@@ -39,10 +39,6 @@ extern void jfs_proc_clean(void); | |||
39 | /* | 39 | /* |
40 | * assert with traditional printf/panic | 40 | * assert with traditional printf/panic |
41 | */ | 41 | */ |
42 | #ifdef CONFIG_KERNEL_ASSERTS | ||
43 | /* kgdb stuff */ | ||
44 | #define assert(p) KERNEL_ASSERT(#p, p) | ||
45 | #else | ||
46 | #define assert(p) do { \ | 42 | #define assert(p) do { \ |
47 | if (!(p)) { \ | 43 | if (!(p)) { \ |
48 | printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \ | 44 | printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \ |
@@ -50,7 +46,6 @@ extern void jfs_proc_clean(void); | |||
50 | BUG(); \ | 46 | BUG(); \ |
51 | } \ | 47 | } \ |
52 | } while (0) | 48 | } while (0) |
53 | #endif | ||
54 | 49 | ||
55 | /* | 50 | /* |
56 | * debug ON | 51 | * debug ON |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 23546c8fd48b..82b0544bd76d 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
@@ -337,7 +337,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
337 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 337 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
338 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 338 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; |
339 | 339 | ||
340 | IREAD_LOCK(ipbmap); | 340 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
341 | 341 | ||
342 | /* block to be freed better be within the mapsize. */ | 342 | /* block to be freed better be within the mapsize. */ |
343 | if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { | 343 | if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { |
@@ -733,7 +733,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
733 | * allocation group size, try to allocate anywhere. | 733 | * allocation group size, try to allocate anywhere. |
734 | */ | 734 | */ |
735 | if (l2nb > bmp->db_agl2size) { | 735 | if (l2nb > bmp->db_agl2size) { |
736 | IWRITE_LOCK(ipbmap); | 736 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); |
737 | 737 | ||
738 | rc = dbAllocAny(bmp, nblocks, l2nb, results); | 738 | rc = dbAllocAny(bmp, nblocks, l2nb, results); |
739 | 739 | ||
@@ -774,7 +774,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
774 | * the hint using a tiered strategy. | 774 | * the hint using a tiered strategy. |
775 | */ | 775 | */ |
776 | if (nblocks <= BPERDMAP) { | 776 | if (nblocks <= BPERDMAP) { |
777 | IREAD_LOCK(ipbmap); | 777 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
778 | 778 | ||
779 | /* get the buffer for the dmap containing the hint. | 779 | /* get the buffer for the dmap containing the hint. |
780 | */ | 780 | */ |
@@ -844,7 +844,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
844 | /* try to satisfy the allocation request with blocks within | 844 | /* try to satisfy the allocation request with blocks within |
845 | * the same allocation group as the hint. | 845 | * the same allocation group as the hint. |
846 | */ | 846 | */ |
847 | IWRITE_LOCK(ipbmap); | 847 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); |
848 | if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) | 848 | if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) |
849 | goto write_unlock; | 849 | goto write_unlock; |
850 | 850 | ||
@@ -856,7 +856,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
856 | * Let dbNextAG recommend a preferred allocation group | 856 | * Let dbNextAG recommend a preferred allocation group |
857 | */ | 857 | */ |
858 | agno = dbNextAG(ipbmap); | 858 | agno = dbNextAG(ipbmap); |
859 | IWRITE_LOCK(ipbmap); | 859 | IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); |
860 | 860 | ||
861 | /* Try to allocate within this allocation group. if that fails, try to | 861 | /* Try to allocate within this allocation group. if that fails, try to |
862 | * allocate anywhere in the map. | 862 | * allocate anywhere in the map. |
@@ -900,7 +900,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) | |||
900 | s64 lblkno; | 900 | s64 lblkno; |
901 | struct metapage *mp; | 901 | struct metapage *mp; |
902 | 902 | ||
903 | IREAD_LOCK(ipbmap); | 903 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
904 | 904 | ||
905 | /* | 905 | /* |
906 | * validate extent request: | 906 | * validate extent request: |
@@ -1050,7 +1050,7 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | |||
1050 | */ | 1050 | */ |
1051 | extblkno = lastblkno + 1; | 1051 | extblkno = lastblkno + 1; |
1052 | 1052 | ||
1053 | IREAD_LOCK(ipbmap); | 1053 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
1054 | 1054 | ||
1055 | /* better be within the file system */ | 1055 | /* better be within the file system */ |
1056 | bmp = sbi->bmap; | 1056 | bmp = sbi->bmap; |
@@ -3116,7 +3116,7 @@ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) | |||
3116 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | 3116 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; |
3117 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; | 3117 | struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; |
3118 | 3118 | ||
3119 | IREAD_LOCK(ipbmap); | 3119 | IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); |
3120 | 3120 | ||
3121 | /* block to be allocated better be within the mapsize. */ | 3121 | /* block to be allocated better be within the mapsize. */ |
3122 | ASSERT(nblocks <= bmp->db_mapsize - blkno); | 3122 | ASSERT(nblocks <= bmp->db_mapsize - blkno); |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 53f63b47a6d3..aa5124b643b1 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -331,7 +331,7 @@ int diRead(struct inode *ip) | |||
331 | 331 | ||
332 | /* read the iag */ | 332 | /* read the iag */ |
333 | imap = JFS_IP(ipimap)->i_imap; | 333 | imap = JFS_IP(ipimap)->i_imap; |
334 | IREAD_LOCK(ipimap); | 334 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
335 | rc = diIAGRead(imap, iagno, &mp); | 335 | rc = diIAGRead(imap, iagno, &mp); |
336 | IREAD_UNLOCK(ipimap); | 336 | IREAD_UNLOCK(ipimap); |
337 | if (rc) { | 337 | if (rc) { |
@@ -920,7 +920,7 @@ int diFree(struct inode *ip) | |||
920 | /* Obtain read lock in imap inode. Don't release it until we have | 920 | /* Obtain read lock in imap inode. Don't release it until we have |
921 | * read all of the IAG's that we are going to. | 921 | * read all of the IAG's that we are going to. |
922 | */ | 922 | */ |
923 | IREAD_LOCK(ipimap); | 923 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
924 | 924 | ||
925 | /* read the iag. | 925 | /* read the iag. |
926 | */ | 926 | */ |
@@ -1415,7 +1415,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1415 | AG_LOCK(imap, agno); | 1415 | AG_LOCK(imap, agno); |
1416 | 1416 | ||
1417 | /* Get read lock on imap inode */ | 1417 | /* Get read lock on imap inode */ |
1418 | IREAD_LOCK(ipimap); | 1418 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
1419 | 1419 | ||
1420 | /* get the iag number and read the iag */ | 1420 | /* get the iag number and read the iag */ |
1421 | iagno = INOTOIAG(inum); | 1421 | iagno = INOTOIAG(inum); |
@@ -1808,7 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1808 | return -ENOSPC; | 1808 | return -ENOSPC; |
1809 | 1809 | ||
1810 | /* obtain read lock on imap inode */ | 1810 | /* obtain read lock on imap inode */ |
1811 | IREAD_LOCK(imap->im_ipimap); | 1811 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); |
1812 | 1812 | ||
1813 | /* read the iag at the head of the list. | 1813 | /* read the iag at the head of the list. |
1814 | */ | 1814 | */ |
@@ -1946,7 +1946,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
1946 | } else { | 1946 | } else { |
1947 | /* read the iag. | 1947 | /* read the iag. |
1948 | */ | 1948 | */ |
1949 | IREAD_LOCK(imap->im_ipimap); | 1949 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); |
1950 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 1950 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
1951 | IREAD_UNLOCK(imap->im_ipimap); | 1951 | IREAD_UNLOCK(imap->im_ipimap); |
1952 | jfs_error(ip->i_sb, "diAllocExt: error reading iag"); | 1952 | jfs_error(ip->i_sb, "diAllocExt: error reading iag"); |
@@ -2509,7 +2509,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2509 | */ | 2509 | */ |
2510 | 2510 | ||
2511 | /* acquire inode map lock */ | 2511 | /* acquire inode map lock */ |
2512 | IWRITE_LOCK(ipimap); | 2512 | IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); |
2513 | 2513 | ||
2514 | if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { | 2514 | if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { |
2515 | IWRITE_UNLOCK(ipimap); | 2515 | IWRITE_UNLOCK(ipimap); |
@@ -2648,7 +2648,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2648 | } | 2648 | } |
2649 | 2649 | ||
2650 | /* obtain read lock on map */ | 2650 | /* obtain read lock on map */ |
2651 | IREAD_LOCK(ipimap); | 2651 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
2652 | 2652 | ||
2653 | /* read the iag */ | 2653 | /* read the iag */ |
2654 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 2654 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
@@ -2779,7 +2779,7 @@ diUpdatePMap(struct inode *ipimap, | |||
2779 | return -EIO; | 2779 | return -EIO; |
2780 | } | 2780 | } |
2781 | /* read the iag */ | 2781 | /* read the iag */ |
2782 | IREAD_LOCK(ipimap); | 2782 | IREAD_LOCK(ipimap, RDWRLOCK_IMAP); |
2783 | rc = diIAGRead(imap, iagno, &mp); | 2783 | rc = diIAGRead(imap, iagno, &mp); |
2784 | IREAD_UNLOCK(ipimap); | 2784 | IREAD_UNLOCK(ipimap); |
2785 | if (rc) | 2785 | if (rc) |
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 94005584445a..8f453eff3c83 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h | |||
@@ -109,9 +109,11 @@ struct jfs_inode_info { | |||
109 | 109 | ||
110 | #define JFS_ACL_NOT_CACHED ((void *)-1) | 110 | #define JFS_ACL_NOT_CACHED ((void *)-1) |
111 | 111 | ||
112 | #define IREAD_LOCK(ip) down_read(&JFS_IP(ip)->rdwrlock) | 112 | #define IREAD_LOCK(ip, subclass) \ |
113 | down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) | ||
113 | #define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) | 114 | #define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) |
114 | #define IWRITE_LOCK(ip) down_write(&JFS_IP(ip)->rdwrlock) | 115 | #define IWRITE_LOCK(ip, subclass) \ |
116 | down_write_nested(&JFS_IP(ip)->rdwrlock, subclass) | ||
115 | #define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock) | 117 | #define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock) |
116 | 118 | ||
117 | /* | 119 | /* |
@@ -127,6 +129,29 @@ enum cflags { | |||
127 | COMMIT_Synclist, /* metadata pages on group commit synclist */ | 129 | COMMIT_Synclist, /* metadata pages on group commit synclist */ |
128 | }; | 130 | }; |
129 | 131 | ||
132 | /* | ||
133 | * commit_mutex nesting subclasses: | ||
134 | */ | ||
135 | enum commit_mutex_class | ||
136 | { | ||
137 | COMMIT_MUTEX_PARENT, | ||
138 | COMMIT_MUTEX_CHILD, | ||
139 | COMMIT_MUTEX_SECOND_PARENT, /* Renaming */ | ||
140 | COMMIT_MUTEX_VICTIM /* Inode being unlinked due to rename */ | ||
141 | }; | ||
142 | |||
143 | /* | ||
144 | * rdwrlock subclasses: | ||
145 | * The dmap inode may be locked while a normal inode or the imap inode are | ||
146 | * locked. | ||
147 | */ | ||
148 | enum rdwrlock_class | ||
149 | { | ||
150 | RDWRLOCK_NORMAL, | ||
151 | RDWRLOCK_IMAP, | ||
152 | RDWRLOCK_DMAP | ||
153 | }; | ||
154 | |||
130 | #define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag)) | 155 | #define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag)) |
131 | #define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag)) | 156 | #define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag)) |
132 | #define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag)) | 157 | #define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag)) |
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h index 7d78e83d7c40..df48ece4b7a3 100644 --- a/fs/jfs/jfs_lock.h +++ b/fs/jfs/jfs_lock.h | |||
@@ -42,7 +42,7 @@ do { \ | |||
42 | if (cond) \ | 42 | if (cond) \ |
43 | break; \ | 43 | break; \ |
44 | unlock_cmd; \ | 44 | unlock_cmd; \ |
45 | schedule(); \ | 45 | io_schedule(); \ |
46 | lock_cmd; \ | 46 | lock_cmd; \ |
47 | } \ | 47 | } \ |
48 | current->state = TASK_RUNNING; \ | 48 | current->state = TASK_RUNNING; \ |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index ceaf03b94935..58deae007507 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
@@ -56,7 +56,7 @@ static inline void __lock_metapage(struct metapage *mp) | |||
56 | set_current_state(TASK_UNINTERRUPTIBLE); | 56 | set_current_state(TASK_UNINTERRUPTIBLE); |
57 | if (metapage_locked(mp)) { | 57 | if (metapage_locked(mp)) { |
58 | unlock_page(mp->page); | 58 | unlock_page(mp->page); |
59 | schedule(); | 59 | io_schedule(); |
60 | lock_page(mp->page); | 60 | lock_page(mp->page); |
61 | } | 61 | } |
62 | } while (trylock_metapage(mp)); | 62 | } while (trylock_metapage(mp)); |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index d558e51b0df8..6988a1082f58 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -135,7 +135,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) | |||
135 | add_wait_queue(event, &wait); | 135 | add_wait_queue(event, &wait); |
136 | set_current_state(TASK_UNINTERRUPTIBLE); | 136 | set_current_state(TASK_UNINTERRUPTIBLE); |
137 | TXN_UNLOCK(); | 137 | TXN_UNLOCK(); |
138 | schedule(); | 138 | io_schedule(); |
139 | current->state = TASK_RUNNING; | 139 | current->state = TASK_RUNNING; |
140 | remove_wait_queue(event, &wait); | 140 | remove_wait_queue(event, &wait); |
141 | } | 141 | } |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index e98eb03e5310..acc97c46d8a4 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
@@ -757,6 +757,11 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | |||
757 | nsplit = 0; | 757 | nsplit = 0; |
758 | 758 | ||
759 | /* push (bn, index) of the parent page/entry */ | 759 | /* push (bn, index) of the parent page/entry */ |
760 | if (BT_STACK_FULL(btstack)) { | ||
761 | jfs_error(ip->i_sb, "stack overrun in xtSearch!"); | ||
762 | XT_PUTPAGE(mp); | ||
763 | return -EIO; | ||
764 | } | ||
760 | BT_PUSH(btstack, bn, index); | 765 | BT_PUSH(btstack, bn, index); |
761 | 766 | ||
762 | /* get the child page block number */ | 767 | /* get the child page block number */ |
@@ -3915,6 +3920,11 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
3915 | */ | 3920 | */ |
3916 | getChild: | 3921 | getChild: |
3917 | /* save current parent entry for the child page */ | 3922 | /* save current parent entry for the child page */ |
3923 | if (BT_STACK_FULL(&btstack)) { | ||
3924 | jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); | ||
3925 | XT_PUTPAGE(mp); | ||
3926 | return -EIO; | ||
3927 | } | ||
3918 | BT_PUSH(&btstack, bn, index); | 3928 | BT_PUSH(&btstack, bn, index); |
3919 | 3929 | ||
3920 | /* get child page */ | 3930 | /* get child page */ |
@@ -4112,6 +4122,11 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
4112 | */ | 4122 | */ |
4113 | getChild: | 4123 | getChild: |
4114 | /* save current parent entry for the child page */ | 4124 | /* save current parent entry for the child page */ |
4125 | if (BT_STACK_FULL(&btstack)) { | ||
4126 | jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); | ||
4127 | XT_PUTPAGE(mp); | ||
4128 | return -EIO; | ||
4129 | } | ||
4115 | BT_PUSH(&btstack, bn, index); | 4130 | BT_PUSH(&btstack, bn, index); |
4116 | 4131 | ||
4117 | /* get child page */ | 4132 | /* get child page */ |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index a6a8c16c872c..7ab47561b68d 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -104,8 +104,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, | |||
104 | 104 | ||
105 | tid = txBegin(dip->i_sb, 0); | 105 | tid = txBegin(dip->i_sb, 0); |
106 | 106 | ||
107 | mutex_lock(&JFS_IP(dip)->commit_mutex); | 107 | mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); |
108 | mutex_lock(&JFS_IP(ip)->commit_mutex); | 108 | mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); |
109 | 109 | ||
110 | rc = jfs_init_acl(tid, ip, dip); | 110 | rc = jfs_init_acl(tid, ip, dip); |
111 | if (rc) | 111 | if (rc) |
@@ -238,8 +238,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) | |||
238 | 238 | ||
239 | tid = txBegin(dip->i_sb, 0); | 239 | tid = txBegin(dip->i_sb, 0); |
240 | 240 | ||
241 | mutex_lock(&JFS_IP(dip)->commit_mutex); | 241 | mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); |
242 | mutex_lock(&JFS_IP(ip)->commit_mutex); | 242 | mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); |
243 | 243 | ||
244 | rc = jfs_init_acl(tid, ip, dip); | 244 | rc = jfs_init_acl(tid, ip, dip); |
245 | if (rc) | 245 | if (rc) |
@@ -365,8 +365,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) | |||
365 | 365 | ||
366 | tid = txBegin(dip->i_sb, 0); | 366 | tid = txBegin(dip->i_sb, 0); |
367 | 367 | ||
368 | mutex_lock(&JFS_IP(dip)->commit_mutex); | 368 | mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); |
369 | mutex_lock(&JFS_IP(ip)->commit_mutex); | 369 | mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); |
370 | 370 | ||
371 | iplist[0] = dip; | 371 | iplist[0] = dip; |
372 | iplist[1] = ip; | 372 | iplist[1] = ip; |
@@ -483,12 +483,12 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
483 | if ((rc = get_UCSname(&dname, dentry))) | 483 | if ((rc = get_UCSname(&dname, dentry))) |
484 | goto out; | 484 | goto out; |
485 | 485 | ||
486 | IWRITE_LOCK(ip); | 486 | IWRITE_LOCK(ip, RDWRLOCK_NORMAL); |
487 | 487 | ||
488 | tid = txBegin(dip->i_sb, 0); | 488 | tid = txBegin(dip->i_sb, 0); |
489 | 489 | ||
490 | mutex_lock(&JFS_IP(dip)->commit_mutex); | 490 | mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); |
491 | mutex_lock(&JFS_IP(ip)->commit_mutex); | 491 | mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); |
492 | 492 | ||
493 | iplist[0] = dip; | 493 | iplist[0] = dip; |
494 | iplist[1] = ip; | 494 | iplist[1] = ip; |
@@ -802,8 +802,8 @@ static int jfs_link(struct dentry *old_dentry, | |||
802 | 802 | ||
803 | tid = txBegin(ip->i_sb, 0); | 803 | tid = txBegin(ip->i_sb, 0); |
804 | 804 | ||
805 | mutex_lock(&JFS_IP(dir)->commit_mutex); | 805 | mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); |
806 | mutex_lock(&JFS_IP(ip)->commit_mutex); | 806 | mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * scan parent directory for entry/freespace | 809 | * scan parent directory for entry/freespace |
@@ -913,8 +913,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
913 | 913 | ||
914 | tid = txBegin(dip->i_sb, 0); | 914 | tid = txBegin(dip->i_sb, 0); |
915 | 915 | ||
916 | mutex_lock(&JFS_IP(dip)->commit_mutex); | 916 | mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); |
917 | mutex_lock(&JFS_IP(ip)->commit_mutex); | 917 | mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); |
918 | 918 | ||
919 | rc = jfs_init_security(tid, ip, dip); | 919 | rc = jfs_init_security(tid, ip, dip); |
920 | if (rc) | 920 | if (rc) |
@@ -1127,7 +1127,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1127 | goto out3; | 1127 | goto out3; |
1128 | } | 1128 | } |
1129 | } else if (new_ip) { | 1129 | } else if (new_ip) { |
1130 | IWRITE_LOCK(new_ip); | 1130 | IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); |
1131 | /* Init inode for quota operations. */ | 1131 | /* Init inode for quota operations. */ |
1132 | DQUOT_INIT(new_ip); | 1132 | DQUOT_INIT(new_ip); |
1133 | } | 1133 | } |
@@ -1137,13 +1137,21 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1137 | */ | 1137 | */ |
1138 | tid = txBegin(new_dir->i_sb, 0); | 1138 | tid = txBegin(new_dir->i_sb, 0); |
1139 | 1139 | ||
1140 | mutex_lock(&JFS_IP(new_dir)->commit_mutex); | 1140 | /* |
1141 | mutex_lock(&JFS_IP(old_ip)->commit_mutex); | 1141 | * How do we know the locking is safe from deadlocks? |
1142 | * The vfs does the hard part for us. Any time we are taking nested | ||
1143 | * commit_mutexes, the vfs already has i_mutex held on the parent. | ||
1144 | * Here, the vfs has already taken i_mutex on both old_dir and new_dir. | ||
1145 | */ | ||
1146 | mutex_lock_nested(&JFS_IP(new_dir)->commit_mutex, COMMIT_MUTEX_PARENT); | ||
1147 | mutex_lock_nested(&JFS_IP(old_ip)->commit_mutex, COMMIT_MUTEX_CHILD); | ||
1142 | if (old_dir != new_dir) | 1148 | if (old_dir != new_dir) |
1143 | mutex_lock(&JFS_IP(old_dir)->commit_mutex); | 1149 | mutex_lock_nested(&JFS_IP(old_dir)->commit_mutex, |
1150 | COMMIT_MUTEX_SECOND_PARENT); | ||
1144 | 1151 | ||
1145 | if (new_ip) { | 1152 | if (new_ip) { |
1146 | mutex_lock(&JFS_IP(new_ip)->commit_mutex); | 1153 | mutex_lock_nested(&JFS_IP(new_ip)->commit_mutex, |
1154 | COMMIT_MUTEX_VICTIM); | ||
1147 | /* | 1155 | /* |
1148 | * Change existing directory entry to new inode number | 1156 | * Change existing directory entry to new inode number |
1149 | */ | 1157 | */ |
@@ -1357,8 +1365,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, | |||
1357 | 1365 | ||
1358 | tid = txBegin(dir->i_sb, 0); | 1366 | tid = txBegin(dir->i_sb, 0); |
1359 | 1367 | ||
1360 | mutex_lock(&JFS_IP(dir)->commit_mutex); | 1368 | mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); |
1361 | mutex_lock(&JFS_IP(ip)->commit_mutex); | 1369 | mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); |
1362 | 1370 | ||
1363 | rc = jfs_init_acl(tid, ip, dir); | 1371 | rc = jfs_init_acl(tid, ip, dir); |
1364 | if (rc) | 1372 | if (rc) |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 277ca67a2ad6..5a9779bb9236 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -184,10 +184,9 @@ static void o2hb_disarm_write_timeout(struct o2hb_region *reg) | |||
184 | flush_scheduled_work(); | 184 | flush_scheduled_work(); |
185 | } | 185 | } |
186 | 186 | ||
187 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc, | 187 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) |
188 | unsigned int num_ios) | ||
189 | { | 188 | { |
190 | atomic_set(&wc->wc_num_reqs, num_ios); | 189 | atomic_set(&wc->wc_num_reqs, 1); |
191 | init_completion(&wc->wc_io_complete); | 190 | init_completion(&wc->wc_io_complete); |
192 | wc->wc_error = 0; | 191 | wc->wc_error = 0; |
193 | } | 192 | } |
@@ -212,6 +211,7 @@ static void o2hb_wait_on_io(struct o2hb_region *reg, | |||
212 | struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping; | 211 | struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping; |
213 | 212 | ||
214 | blk_run_address_space(mapping); | 213 | blk_run_address_space(mapping); |
214 | o2hb_bio_wait_dec(wc, 1); | ||
215 | 215 | ||
216 | wait_for_completion(&wc->wc_io_complete); | 216 | wait_for_completion(&wc->wc_io_complete); |
217 | } | 217 | } |
@@ -231,6 +231,7 @@ static int o2hb_bio_end_io(struct bio *bio, | |||
231 | return 1; | 231 | return 1; |
232 | 232 | ||
233 | o2hb_bio_wait_dec(wc, 1); | 233 | o2hb_bio_wait_dec(wc, 1); |
234 | bio_put(bio); | ||
234 | return 0; | 235 | return 0; |
235 | } | 236 | } |
236 | 237 | ||
@@ -238,23 +239,22 @@ static int o2hb_bio_end_io(struct bio *bio, | |||
238 | * start_slot. */ | 239 | * start_slot. */ |
239 | static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, | 240 | static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, |
240 | struct o2hb_bio_wait_ctxt *wc, | 241 | struct o2hb_bio_wait_ctxt *wc, |
241 | unsigned int start_slot, | 242 | unsigned int *current_slot, |
242 | unsigned int num_slots) | 243 | unsigned int max_slots) |
243 | { | 244 | { |
244 | int i, nr_vecs, len, first_page, last_page; | 245 | int len, current_page; |
245 | unsigned int vec_len, vec_start; | 246 | unsigned int vec_len, vec_start; |
246 | unsigned int bits = reg->hr_block_bits; | 247 | unsigned int bits = reg->hr_block_bits; |
247 | unsigned int spp = reg->hr_slots_per_page; | 248 | unsigned int spp = reg->hr_slots_per_page; |
249 | unsigned int cs = *current_slot; | ||
248 | struct bio *bio; | 250 | struct bio *bio; |
249 | struct page *page; | 251 | struct page *page; |
250 | 252 | ||
251 | nr_vecs = (num_slots + spp - 1) / spp; | ||
252 | |||
253 | /* Testing has shown this allocation to take long enough under | 253 | /* Testing has shown this allocation to take long enough under |
254 | * GFP_KERNEL that the local node can get fenced. It would be | 254 | * GFP_KERNEL that the local node can get fenced. It would be |
255 | * nicest if we could pre-allocate these bios and avoid this | 255 | * nicest if we could pre-allocate these bios and avoid this |
256 | * all together. */ | 256 | * all together. */ |
257 | bio = bio_alloc(GFP_ATOMIC, nr_vecs); | 257 | bio = bio_alloc(GFP_ATOMIC, 16); |
258 | if (!bio) { | 258 | if (!bio) { |
259 | mlog(ML_ERROR, "Could not alloc slots BIO!\n"); | 259 | mlog(ML_ERROR, "Could not alloc slots BIO!\n"); |
260 | bio = ERR_PTR(-ENOMEM); | 260 | bio = ERR_PTR(-ENOMEM); |
@@ -262,137 +262,53 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, | |||
262 | } | 262 | } |
263 | 263 | ||
264 | /* Must put everything in 512 byte sectors for the bio... */ | 264 | /* Must put everything in 512 byte sectors for the bio... */ |
265 | bio->bi_sector = (reg->hr_start_block + start_slot) << (bits - 9); | 265 | bio->bi_sector = (reg->hr_start_block + cs) << (bits - 9); |
266 | bio->bi_bdev = reg->hr_bdev; | 266 | bio->bi_bdev = reg->hr_bdev; |
267 | bio->bi_private = wc; | 267 | bio->bi_private = wc; |
268 | bio->bi_end_io = o2hb_bio_end_io; | 268 | bio->bi_end_io = o2hb_bio_end_io; |
269 | 269 | ||
270 | first_page = start_slot / spp; | 270 | vec_start = (cs << bits) % PAGE_CACHE_SIZE; |
271 | last_page = first_page + nr_vecs; | 271 | while(cs < max_slots) { |
272 | vec_start = (start_slot << bits) % PAGE_CACHE_SIZE; | 272 | current_page = cs / spp; |
273 | for(i = first_page; i < last_page; i++) { | 273 | page = reg->hr_slot_data[current_page]; |
274 | page = reg->hr_slot_data[i]; | ||
275 | 274 | ||
276 | vec_len = PAGE_CACHE_SIZE; | 275 | vec_len = min(PAGE_CACHE_SIZE, |
277 | /* last page might be short */ | 276 | (max_slots-cs) * (PAGE_CACHE_SIZE/spp) ); |
278 | if (((i + 1) * spp) > (start_slot + num_slots)) | ||
279 | vec_len = ((num_slots + start_slot) % spp) << bits; | ||
280 | vec_len -= vec_start; | ||
281 | 277 | ||
282 | mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n", | 278 | mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n", |
283 | i, vec_len, vec_start); | 279 | current_page, vec_len, vec_start); |
284 | 280 | ||
285 | len = bio_add_page(bio, page, vec_len, vec_start); | 281 | len = bio_add_page(bio, page, vec_len, vec_start); |
286 | if (len != vec_len) { | 282 | if (len != vec_len) break; |
287 | bio_put(bio); | ||
288 | bio = ERR_PTR(-EIO); | ||
289 | |||
290 | mlog(ML_ERROR, "Error adding page to bio i = %d, " | ||
291 | "vec_len = %u, len = %d\n, start = %u\n", | ||
292 | i, vec_len, len, vec_start); | ||
293 | goto bail; | ||
294 | } | ||
295 | 283 | ||
284 | cs += vec_len / (PAGE_CACHE_SIZE/spp); | ||
296 | vec_start = 0; | 285 | vec_start = 0; |
297 | } | 286 | } |
298 | 287 | ||
299 | bail: | 288 | bail: |
289 | *current_slot = cs; | ||
300 | return bio; | 290 | return bio; |
301 | } | 291 | } |
302 | 292 | ||
303 | /* | ||
304 | * Compute the maximum number of sectors the bdev can handle in one bio, | ||
305 | * as a power of two. | ||
306 | * | ||
307 | * Stolen from oracleasm, thanks Joel! | ||
308 | */ | ||
309 | static int compute_max_sectors(struct block_device *bdev) | ||
310 | { | ||
311 | int max_pages, max_sectors, pow_two_sectors; | ||
312 | |||
313 | struct request_queue *q; | ||
314 | |||
315 | q = bdev_get_queue(bdev); | ||
316 | max_pages = q->max_sectors >> (PAGE_SHIFT - 9); | ||
317 | if (max_pages > BIO_MAX_PAGES) | ||
318 | max_pages = BIO_MAX_PAGES; | ||
319 | if (max_pages > q->max_phys_segments) | ||
320 | max_pages = q->max_phys_segments; | ||
321 | if (max_pages > q->max_hw_segments) | ||
322 | max_pages = q->max_hw_segments; | ||
323 | max_pages--; /* Handle I/Os that straddle a page */ | ||
324 | |||
325 | if (max_pages) { | ||
326 | max_sectors = max_pages << (PAGE_SHIFT - 9); | ||
327 | } else { | ||
328 | /* If BIO contains 1 or less than 1 page. */ | ||
329 | max_sectors = q->max_sectors; | ||
330 | } | ||
331 | /* Why is fls() 1-based???? */ | ||
332 | pow_two_sectors = 1 << (fls(max_sectors) - 1); | ||
333 | |||
334 | return pow_two_sectors; | ||
335 | } | ||
336 | |||
337 | static inline void o2hb_compute_request_limits(struct o2hb_region *reg, | ||
338 | unsigned int num_slots, | ||
339 | unsigned int *num_bios, | ||
340 | unsigned int *slots_per_bio) | ||
341 | { | ||
342 | unsigned int max_sectors, io_sectors; | ||
343 | |||
344 | max_sectors = compute_max_sectors(reg->hr_bdev); | ||
345 | |||
346 | io_sectors = num_slots << (reg->hr_block_bits - 9); | ||
347 | |||
348 | *num_bios = (io_sectors + max_sectors - 1) / max_sectors; | ||
349 | *slots_per_bio = max_sectors >> (reg->hr_block_bits - 9); | ||
350 | |||
351 | mlog(ML_HB_BIO, "My io size is %u sectors for %u slots. This " | ||
352 | "device can handle %u sectors of I/O\n", io_sectors, num_slots, | ||
353 | max_sectors); | ||
354 | mlog(ML_HB_BIO, "Will need %u bios holding %u slots each\n", | ||
355 | *num_bios, *slots_per_bio); | ||
356 | } | ||
357 | |||
358 | static int o2hb_read_slots(struct o2hb_region *reg, | 293 | static int o2hb_read_slots(struct o2hb_region *reg, |
359 | unsigned int max_slots) | 294 | unsigned int max_slots) |
360 | { | 295 | { |
361 | unsigned int num_bios, slots_per_bio, start_slot, num_slots; | 296 | unsigned int current_slot=0; |
362 | int i, status; | 297 | int status; |
363 | struct o2hb_bio_wait_ctxt wc; | 298 | struct o2hb_bio_wait_ctxt wc; |
364 | struct bio **bios; | ||
365 | struct bio *bio; | 299 | struct bio *bio; |
366 | 300 | ||
367 | o2hb_compute_request_limits(reg, max_slots, &num_bios, &slots_per_bio); | 301 | o2hb_bio_wait_init(&wc); |
368 | 302 | ||
369 | bios = kcalloc(num_bios, sizeof(struct bio *), GFP_KERNEL); | 303 | while(current_slot < max_slots) { |
370 | if (!bios) { | 304 | bio = o2hb_setup_one_bio(reg, &wc, ¤t_slot, max_slots); |
371 | status = -ENOMEM; | ||
372 | mlog_errno(status); | ||
373 | return status; | ||
374 | } | ||
375 | |||
376 | o2hb_bio_wait_init(&wc, num_bios); | ||
377 | |||
378 | num_slots = slots_per_bio; | ||
379 | for(i = 0; i < num_bios; i++) { | ||
380 | start_slot = i * slots_per_bio; | ||
381 | |||
382 | /* adjust num_slots at last bio */ | ||
383 | if (max_slots < (start_slot + num_slots)) | ||
384 | num_slots = max_slots - start_slot; | ||
385 | |||
386 | bio = o2hb_setup_one_bio(reg, &wc, start_slot, num_slots); | ||
387 | if (IS_ERR(bio)) { | 305 | if (IS_ERR(bio)) { |
388 | o2hb_bio_wait_dec(&wc, num_bios - i); | ||
389 | |||
390 | status = PTR_ERR(bio); | 306 | status = PTR_ERR(bio); |
391 | mlog_errno(status); | 307 | mlog_errno(status); |
392 | goto bail_and_wait; | 308 | goto bail_and_wait; |
393 | } | 309 | } |
394 | bios[i] = bio; | ||
395 | 310 | ||
311 | atomic_inc(&wc.wc_num_reqs); | ||
396 | submit_bio(READ, bio); | 312 | submit_bio(READ, bio); |
397 | } | 313 | } |
398 | 314 | ||
@@ -403,38 +319,30 @@ bail_and_wait: | |||
403 | if (wc.wc_error && !status) | 319 | if (wc.wc_error && !status) |
404 | status = wc.wc_error; | 320 | status = wc.wc_error; |
405 | 321 | ||
406 | if (bios) { | ||
407 | for(i = 0; i < num_bios; i++) | ||
408 | if (bios[i]) | ||
409 | bio_put(bios[i]); | ||
410 | kfree(bios); | ||
411 | } | ||
412 | |||
413 | return status; | 322 | return status; |
414 | } | 323 | } |
415 | 324 | ||
416 | static int o2hb_issue_node_write(struct o2hb_region *reg, | 325 | static int o2hb_issue_node_write(struct o2hb_region *reg, |
417 | struct bio **write_bio, | ||
418 | struct o2hb_bio_wait_ctxt *write_wc) | 326 | struct o2hb_bio_wait_ctxt *write_wc) |
419 | { | 327 | { |
420 | int status; | 328 | int status; |
421 | unsigned int slot; | 329 | unsigned int slot; |
422 | struct bio *bio; | 330 | struct bio *bio; |
423 | 331 | ||
424 | o2hb_bio_wait_init(write_wc, 1); | 332 | o2hb_bio_wait_init(write_wc); |
425 | 333 | ||
426 | slot = o2nm_this_node(); | 334 | slot = o2nm_this_node(); |
427 | 335 | ||
428 | bio = o2hb_setup_one_bio(reg, write_wc, slot, 1); | 336 | bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1); |
429 | if (IS_ERR(bio)) { | 337 | if (IS_ERR(bio)) { |
430 | status = PTR_ERR(bio); | 338 | status = PTR_ERR(bio); |
431 | mlog_errno(status); | 339 | mlog_errno(status); |
432 | goto bail; | 340 | goto bail; |
433 | } | 341 | } |
434 | 342 | ||
343 | atomic_inc(&write_wc->wc_num_reqs); | ||
435 | submit_bio(WRITE, bio); | 344 | submit_bio(WRITE, bio); |
436 | 345 | ||
437 | *write_bio = bio; | ||
438 | status = 0; | 346 | status = 0; |
439 | bail: | 347 | bail: |
440 | return status; | 348 | return status; |
@@ -826,7 +734,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
826 | { | 734 | { |
827 | int i, ret, highest_node, change = 0; | 735 | int i, ret, highest_node, change = 0; |
828 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 736 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
829 | struct bio *write_bio; | ||
830 | struct o2hb_bio_wait_ctxt write_wc; | 737 | struct o2hb_bio_wait_ctxt write_wc; |
831 | 738 | ||
832 | ret = o2nm_configured_node_map(configured_nodes, | 739 | ret = o2nm_configured_node_map(configured_nodes, |
@@ -864,7 +771,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
864 | 771 | ||
865 | /* And fire off the write. Note that we don't wait on this I/O | 772 | /* And fire off the write. Note that we don't wait on this I/O |
866 | * until later. */ | 773 | * until later. */ |
867 | ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); | 774 | ret = o2hb_issue_node_write(reg, &write_wc); |
868 | if (ret < 0) { | 775 | if (ret < 0) { |
869 | mlog_errno(ret); | 776 | mlog_errno(ret); |
870 | return ret; | 777 | return ret; |
@@ -882,7 +789,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
882 | * people we find in our steady state have seen us. | 789 | * people we find in our steady state have seen us. |
883 | */ | 790 | */ |
884 | o2hb_wait_on_io(reg, &write_wc); | 791 | o2hb_wait_on_io(reg, &write_wc); |
885 | bio_put(write_bio); | ||
886 | if (write_wc.wc_error) { | 792 | if (write_wc.wc_error) { |
887 | /* Do not re-arm the write timeout on I/O error - we | 793 | /* Do not re-arm the write timeout on I/O error - we |
888 | * can't be sure that the new block ever made it to | 794 | * can't be sure that the new block ever made it to |
@@ -943,7 +849,6 @@ static int o2hb_thread(void *data) | |||
943 | { | 849 | { |
944 | int i, ret; | 850 | int i, ret; |
945 | struct o2hb_region *reg = data; | 851 | struct o2hb_region *reg = data; |
946 | struct bio *write_bio; | ||
947 | struct o2hb_bio_wait_ctxt write_wc; | 852 | struct o2hb_bio_wait_ctxt write_wc; |
948 | struct timeval before_hb, after_hb; | 853 | struct timeval before_hb, after_hb; |
949 | unsigned int elapsed_msec; | 854 | unsigned int elapsed_msec; |
@@ -993,10 +898,9 @@ static int o2hb_thread(void *data) | |||
993 | * | 898 | * |
994 | * XXX: Should we skip this on unclean_stop? */ | 899 | * XXX: Should we skip this on unclean_stop? */ |
995 | o2hb_prepare_block(reg, 0); | 900 | o2hb_prepare_block(reg, 0); |
996 | ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); | 901 | ret = o2hb_issue_node_write(reg, &write_wc); |
997 | if (ret == 0) { | 902 | if (ret == 0) { |
998 | o2hb_wait_on_io(reg, &write_wc); | 903 | o2hb_wait_on_io(reg, &write_wc); |
999 | bio_put(write_bio); | ||
1000 | } else { | 904 | } else { |
1001 | mlog_errno(ret); | 905 | mlog_errno(ret); |
1002 | } | 906 | } |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ae4ff4a6636b..1718215fc018 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -556,6 +556,8 @@ static void o2net_register_callbacks(struct sock *sk, | |||
556 | sk->sk_data_ready = o2net_data_ready; | 556 | sk->sk_data_ready = o2net_data_ready; |
557 | sk->sk_state_change = o2net_state_change; | 557 | sk->sk_state_change = o2net_state_change; |
558 | 558 | ||
559 | mutex_init(&sc->sc_send_lock); | ||
560 | |||
559 | write_unlock_bh(&sk->sk_callback_lock); | 561 | write_unlock_bh(&sk->sk_callback_lock); |
560 | } | 562 | } |
561 | 563 | ||
@@ -688,6 +690,7 @@ static void o2net_handler_put(struct o2net_msg_handler *nmh) | |||
688 | * be given to the handler if their payload is longer than the max. */ | 690 | * be given to the handler if their payload is longer than the max. */ |
689 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | 691 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, |
690 | o2net_msg_handler_func *func, void *data, | 692 | o2net_msg_handler_func *func, void *data, |
693 | o2net_post_msg_handler_func *post_func, | ||
691 | struct list_head *unreg_list) | 694 | struct list_head *unreg_list) |
692 | { | 695 | { |
693 | struct o2net_msg_handler *nmh = NULL; | 696 | struct o2net_msg_handler *nmh = NULL; |
@@ -722,6 +725,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | |||
722 | 725 | ||
723 | nmh->nh_func = func; | 726 | nmh->nh_func = func; |
724 | nmh->nh_func_data = data; | 727 | nmh->nh_func_data = data; |
728 | nmh->nh_post_func = post_func; | ||
725 | nmh->nh_msg_type = msg_type; | 729 | nmh->nh_msg_type = msg_type; |
726 | nmh->nh_max_len = max_len; | 730 | nmh->nh_max_len = max_len; |
727 | nmh->nh_key = key; | 731 | nmh->nh_key = key; |
@@ -856,10 +860,12 @@ static void o2net_sendpage(struct o2net_sock_container *sc, | |||
856 | ssize_t ret; | 860 | ssize_t ret; |
857 | 861 | ||
858 | 862 | ||
863 | mutex_lock(&sc->sc_send_lock); | ||
859 | ret = sc->sc_sock->ops->sendpage(sc->sc_sock, | 864 | ret = sc->sc_sock->ops->sendpage(sc->sc_sock, |
860 | virt_to_page(kmalloced_virt), | 865 | virt_to_page(kmalloced_virt), |
861 | (long)kmalloced_virt & ~PAGE_MASK, | 866 | (long)kmalloced_virt & ~PAGE_MASK, |
862 | size, MSG_DONTWAIT); | 867 | size, MSG_DONTWAIT); |
868 | mutex_unlock(&sc->sc_send_lock); | ||
863 | if (ret != size) { | 869 | if (ret != size) { |
864 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT | 870 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT |
865 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); | 871 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); |
@@ -974,8 +980,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
974 | 980 | ||
975 | /* finally, convert the message header to network byte-order | 981 | /* finally, convert the message header to network byte-order |
976 | * and send */ | 982 | * and send */ |
983 | mutex_lock(&sc->sc_send_lock); | ||
977 | ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, | 984 | ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, |
978 | sizeof(struct o2net_msg) + caller_bytes); | 985 | sizeof(struct o2net_msg) + caller_bytes); |
986 | mutex_unlock(&sc->sc_send_lock); | ||
979 | msglog(msg, "sending returned %d\n", ret); | 987 | msglog(msg, "sending returned %d\n", ret); |
980 | if (ret < 0) { | 988 | if (ret < 0) { |
981 | mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); | 989 | mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); |
@@ -1049,6 +1057,7 @@ static int o2net_process_message(struct o2net_sock_container *sc, | |||
1049 | int ret = 0, handler_status; | 1057 | int ret = 0, handler_status; |
1050 | enum o2net_system_error syserr; | 1058 | enum o2net_system_error syserr; |
1051 | struct o2net_msg_handler *nmh = NULL; | 1059 | struct o2net_msg_handler *nmh = NULL; |
1060 | void *ret_data = NULL; | ||
1052 | 1061 | ||
1053 | msglog(hdr, "processing message\n"); | 1062 | msglog(hdr, "processing message\n"); |
1054 | 1063 | ||
@@ -1101,17 +1110,26 @@ static int o2net_process_message(struct o2net_sock_container *sc, | |||
1101 | sc->sc_msg_type = be16_to_cpu(hdr->msg_type); | 1110 | sc->sc_msg_type = be16_to_cpu(hdr->msg_type); |
1102 | handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + | 1111 | handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + |
1103 | be16_to_cpu(hdr->data_len), | 1112 | be16_to_cpu(hdr->data_len), |
1104 | nmh->nh_func_data); | 1113 | nmh->nh_func_data, &ret_data); |
1105 | do_gettimeofday(&sc->sc_tv_func_stop); | 1114 | do_gettimeofday(&sc->sc_tv_func_stop); |
1106 | 1115 | ||
1107 | out_respond: | 1116 | out_respond: |
1108 | /* this destroys the hdr, so don't use it after this */ | 1117 | /* this destroys the hdr, so don't use it after this */ |
1118 | mutex_lock(&sc->sc_send_lock); | ||
1109 | ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, | 1119 | ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, |
1110 | handler_status); | 1120 | handler_status); |
1121 | mutex_unlock(&sc->sc_send_lock); | ||
1111 | hdr = NULL; | 1122 | hdr = NULL; |
1112 | mlog(0, "sending handler status %d, syserr %d returned %d\n", | 1123 | mlog(0, "sending handler status %d, syserr %d returned %d\n", |
1113 | handler_status, syserr, ret); | 1124 | handler_status, syserr, ret); |
1114 | 1125 | ||
1126 | if (nmh) { | ||
1127 | BUG_ON(ret_data != NULL && nmh->nh_post_func == NULL); | ||
1128 | if (nmh->nh_post_func) | ||
1129 | (nmh->nh_post_func)(handler_status, nmh->nh_func_data, | ||
1130 | ret_data); | ||
1131 | } | ||
1132 | |||
1115 | out: | 1133 | out: |
1116 | if (nmh) | 1134 | if (nmh) |
1117 | o2net_handler_put(nmh); | 1135 | o2net_handler_put(nmh); |
@@ -1795,13 +1813,13 @@ out: | |||
1795 | ready(sk, bytes); | 1813 | ready(sk, bytes); |
1796 | } | 1814 | } |
1797 | 1815 | ||
1798 | static int o2net_open_listening_sock(__be16 port) | 1816 | static int o2net_open_listening_sock(__be32 addr, __be16 port) |
1799 | { | 1817 | { |
1800 | struct socket *sock = NULL; | 1818 | struct socket *sock = NULL; |
1801 | int ret; | 1819 | int ret; |
1802 | struct sockaddr_in sin = { | 1820 | struct sockaddr_in sin = { |
1803 | .sin_family = PF_INET, | 1821 | .sin_family = PF_INET, |
1804 | .sin_addr = { .s_addr = (__force u32)htonl(INADDR_ANY) }, | 1822 | .sin_addr = { .s_addr = (__force u32)addr }, |
1805 | .sin_port = (__force u16)port, | 1823 | .sin_port = (__force u16)port, |
1806 | }; | 1824 | }; |
1807 | 1825 | ||
@@ -1824,15 +1842,15 @@ static int o2net_open_listening_sock(__be16 port) | |||
1824 | sock->sk->sk_reuse = 1; | 1842 | sock->sk->sk_reuse = 1; |
1825 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); | 1843 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); |
1826 | if (ret < 0) { | 1844 | if (ret < 0) { |
1827 | mlog(ML_ERROR, "unable to bind socket to port %d, ret=%d\n", | 1845 | mlog(ML_ERROR, "unable to bind socket at %u.%u.%u.%u:%u, " |
1828 | ntohs(port), ret); | 1846 | "ret=%d\n", NIPQUAD(addr), ntohs(port), ret); |
1829 | goto out; | 1847 | goto out; |
1830 | } | 1848 | } |
1831 | 1849 | ||
1832 | ret = sock->ops->listen(sock, 64); | 1850 | ret = sock->ops->listen(sock, 64); |
1833 | if (ret < 0) { | 1851 | if (ret < 0) { |
1834 | mlog(ML_ERROR, "unable to listen on port %d, ret=%d\n", | 1852 | mlog(ML_ERROR, "unable to listen on %u.%u.%u.%u:%u, ret=%d\n", |
1835 | ntohs(port), ret); | 1853 | NIPQUAD(addr), ntohs(port), ret); |
1836 | } | 1854 | } |
1837 | 1855 | ||
1838 | out: | 1856 | out: |
@@ -1865,7 +1883,8 @@ int o2net_start_listening(struct o2nm_node *node) | |||
1865 | return -ENOMEM; /* ? */ | 1883 | return -ENOMEM; /* ? */ |
1866 | } | 1884 | } |
1867 | 1885 | ||
1868 | ret = o2net_open_listening_sock(node->nd_ipv4_port); | 1886 | ret = o2net_open_listening_sock(node->nd_ipv4_address, |
1887 | node->nd_ipv4_port); | ||
1869 | if (ret) { | 1888 | if (ret) { |
1870 | destroy_workqueue(o2net_wq); | 1889 | destroy_workqueue(o2net_wq); |
1871 | o2net_wq = NULL; | 1890 | o2net_wq = NULL; |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index 21a4e43df836..da880fc215f0 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -50,7 +50,10 @@ struct o2net_msg | |||
50 | __u8 buf[0]; | 50 | __u8 buf[0]; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data); | 53 | typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data, |
54 | void **ret_data); | ||
55 | typedef void (o2net_post_msg_handler_func)(int status, void *data, | ||
56 | void *ret_data); | ||
54 | 57 | ||
55 | #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) | 58 | #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) |
56 | 59 | ||
@@ -99,6 +102,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *vec, | |||
99 | 102 | ||
100 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | 103 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, |
101 | o2net_msg_handler_func *func, void *data, | 104 | o2net_msg_handler_func *func, void *data, |
105 | o2net_post_msg_handler_func *post_func, | ||
102 | struct list_head *unreg_list); | 106 | struct list_head *unreg_list); |
103 | void o2net_unregister_handler_list(struct list_head *list); | 107 | void o2net_unregister_handler_list(struct list_head *list); |
104 | 108 | ||
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index b700dc9624d1..4dae5df5e467 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -38,6 +38,12 @@ | |||
38 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should |
39 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. |
40 | * | 40 | * |
41 | * New in version 7: | ||
42 | * - DLM join domain includes the live nodemap | ||
43 | * | ||
44 | * New in version 6: | ||
45 | * - DLM lockres remote refcount fixes. | ||
46 | * | ||
41 | * New in version 5: | 47 | * New in version 5: |
42 | * - Network timeout checking protocol | 48 | * - Network timeout checking protocol |
43 | * | 49 | * |
@@ -51,7 +57,7 @@ | |||
51 | * - full 64 bit i_size in the metadata lock lvbs | 57 | * - full 64 bit i_size in the metadata lock lvbs |
52 | * - introduction of "rw" lock and pushing meta/data locking down | 58 | * - introduction of "rw" lock and pushing meta/data locking down |
53 | */ | 59 | */ |
54 | #define O2NET_PROTOCOL_VERSION 5ULL | 60 | #define O2NET_PROTOCOL_VERSION 7ULL |
55 | struct o2net_handshake { | 61 | struct o2net_handshake { |
56 | __be64 protocol_version; | 62 | __be64 protocol_version; |
57 | __be64 connector_id; | 63 | __be64 connector_id; |
@@ -149,6 +155,8 @@ struct o2net_sock_container { | |||
149 | struct timeval sc_tv_func_stop; | 155 | struct timeval sc_tv_func_stop; |
150 | u32 sc_msg_key; | 156 | u32 sc_msg_key; |
151 | u16 sc_msg_type; | 157 | u16 sc_msg_type; |
158 | |||
159 | struct mutex sc_send_lock; | ||
152 | }; | 160 | }; |
153 | 161 | ||
154 | struct o2net_msg_handler { | 162 | struct o2net_msg_handler { |
@@ -158,6 +166,8 @@ struct o2net_msg_handler { | |||
158 | u32 nh_key; | 166 | u32 nh_key; |
159 | o2net_msg_handler_func *nh_func; | 167 | o2net_msg_handler_func *nh_func; |
160 | o2net_msg_handler_func *nh_func_data; | 168 | o2net_msg_handler_func *nh_func_data; |
169 | o2net_post_msg_handler_func | ||
170 | *nh_post_func; | ||
161 | struct kref nh_kref; | 171 | struct kref nh_kref; |
162 | struct list_head nh_unregister_item; | 172 | struct list_head nh_unregister_item; |
163 | }; | 173 | }; |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 681046d51393..241cad342a48 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -263,7 +263,8 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
263 | 263 | ||
264 | 264 | ||
265 | 265 | ||
266 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | 266 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, |
267 | void **ret_data) | ||
267 | { | 268 | { |
268 | int ret; | 269 | int ret; |
269 | unsigned int locklen; | 270 | unsigned int locklen; |
@@ -311,8 +312,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
311 | past->type != DLM_BAST) { | 312 | past->type != DLM_BAST) { |
312 | mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" | 313 | mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" |
313 | "name=%.*s\n", past->type, | 314 | "name=%.*s\n", past->type, |
314 | dlm_get_lock_cookie_node(cookie), | 315 | dlm_get_lock_cookie_node(be64_to_cpu(cookie)), |
315 | dlm_get_lock_cookie_seq(cookie), | 316 | dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), |
316 | locklen, name); | 317 | locklen, name); |
317 | ret = DLM_IVLOCKID; | 318 | ret = DLM_IVLOCKID; |
318 | goto leave; | 319 | goto leave; |
@@ -323,8 +324,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
323 | mlog(0, "got %sast for unknown lockres! " | 324 | mlog(0, "got %sast for unknown lockres! " |
324 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", | 325 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", |
325 | past->type == DLM_AST ? "" : "b", | 326 | past->type == DLM_AST ? "" : "b", |
326 | dlm_get_lock_cookie_node(cookie), | 327 | dlm_get_lock_cookie_node(be64_to_cpu(cookie)), |
327 | dlm_get_lock_cookie_seq(cookie), | 328 | dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), |
328 | locklen, name, locklen); | 329 | locklen, name, locklen); |
329 | ret = DLM_IVLOCKID; | 330 | ret = DLM_IVLOCKID; |
330 | goto leave; | 331 | goto leave; |
@@ -369,7 +370,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
369 | 370 | ||
370 | mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " | 371 | mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " |
371 | "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", | 372 | "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", |
372 | dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie), | 373 | dlm_get_lock_cookie_node(be64_to_cpu(cookie)), |
374 | dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), | ||
373 | locklen, name, locklen); | 375 | locklen, name, locklen); |
374 | 376 | ||
375 | ret = DLM_NORMAL; | 377 | ret = DLM_NORMAL; |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 6b6ff76538c5..e90b92f9ece1 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -180,6 +180,11 @@ struct dlm_assert_master_priv | |||
180 | unsigned ignore_higher:1; | 180 | unsigned ignore_higher:1; |
181 | }; | 181 | }; |
182 | 182 | ||
183 | struct dlm_deref_lockres_priv | ||
184 | { | ||
185 | struct dlm_lock_resource *deref_res; | ||
186 | u8 deref_node; | ||
187 | }; | ||
183 | 188 | ||
184 | struct dlm_work_item | 189 | struct dlm_work_item |
185 | { | 190 | { |
@@ -191,6 +196,7 @@ struct dlm_work_item | |||
191 | struct dlm_request_all_locks_priv ral; | 196 | struct dlm_request_all_locks_priv ral; |
192 | struct dlm_mig_lockres_priv ml; | 197 | struct dlm_mig_lockres_priv ml; |
193 | struct dlm_assert_master_priv am; | 198 | struct dlm_assert_master_priv am; |
199 | struct dlm_deref_lockres_priv dl; | ||
194 | } u; | 200 | } u; |
195 | }; | 201 | }; |
196 | 202 | ||
@@ -222,6 +228,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, | |||
222 | #define DLM_LOCK_RES_DIRTY 0x00000008 | 228 | #define DLM_LOCK_RES_DIRTY 0x00000008 |
223 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 | 229 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 |
224 | #define DLM_LOCK_RES_MIGRATING 0x00000020 | 230 | #define DLM_LOCK_RES_MIGRATING 0x00000020 |
231 | #define DLM_LOCK_RES_DROPPING_REF 0x00000040 | ||
232 | #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 | ||
233 | #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 | ||
225 | 234 | ||
226 | /* max milliseconds to wait to sync up a network failure with a node death */ | 235 | /* max milliseconds to wait to sync up a network failure with a node death */ |
227 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) | 236 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) |
@@ -265,6 +274,8 @@ struct dlm_lock_resource | |||
265 | u8 owner; //node which owns the lock resource, or unknown | 274 | u8 owner; //node which owns the lock resource, or unknown |
266 | u16 state; | 275 | u16 state; |
267 | char lvb[DLM_LVB_LEN]; | 276 | char lvb[DLM_LVB_LEN]; |
277 | unsigned int inflight_locks; | ||
278 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
268 | }; | 279 | }; |
269 | 280 | ||
270 | struct dlm_migratable_lock | 281 | struct dlm_migratable_lock |
@@ -367,7 +378,7 @@ enum { | |||
367 | DLM_CONVERT_LOCK_MSG, /* 504 */ | 378 | DLM_CONVERT_LOCK_MSG, /* 504 */ |
368 | DLM_PROXY_AST_MSG, /* 505 */ | 379 | DLM_PROXY_AST_MSG, /* 505 */ |
369 | DLM_UNLOCK_LOCK_MSG, /* 506 */ | 380 | DLM_UNLOCK_LOCK_MSG, /* 506 */ |
370 | DLM_UNUSED_MSG2, /* 507 */ | 381 | DLM_DEREF_LOCKRES_MSG, /* 507 */ |
371 | DLM_MIGRATE_REQUEST_MSG, /* 508 */ | 382 | DLM_MIGRATE_REQUEST_MSG, /* 508 */ |
372 | DLM_MIG_LOCKRES_MSG, /* 509 */ | 383 | DLM_MIG_LOCKRES_MSG, /* 509 */ |
373 | DLM_QUERY_JOIN_MSG, /* 510 */ | 384 | DLM_QUERY_JOIN_MSG, /* 510 */ |
@@ -417,6 +428,9 @@ struct dlm_master_request | |||
417 | u8 name[O2NM_MAX_NAME_LEN]; | 428 | u8 name[O2NM_MAX_NAME_LEN]; |
418 | }; | 429 | }; |
419 | 430 | ||
431 | #define DLM_ASSERT_RESPONSE_REASSERT 0x00000001 | ||
432 | #define DLM_ASSERT_RESPONSE_MASTERY_REF 0x00000002 | ||
433 | |||
420 | #define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001 | 434 | #define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001 |
421 | #define DLM_ASSERT_MASTER_REQUERY 0x00000002 | 435 | #define DLM_ASSERT_MASTER_REQUERY 0x00000002 |
422 | #define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004 | 436 | #define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004 |
@@ -430,6 +444,8 @@ struct dlm_assert_master | |||
430 | u8 name[O2NM_MAX_NAME_LEN]; | 444 | u8 name[O2NM_MAX_NAME_LEN]; |
431 | }; | 445 | }; |
432 | 446 | ||
447 | #define DLM_MIGRATE_RESPONSE_MASTERY_REF 0x00000001 | ||
448 | |||
433 | struct dlm_migrate_request | 449 | struct dlm_migrate_request |
434 | { | 450 | { |
435 | u8 master; | 451 | u8 master; |
@@ -609,12 +625,16 @@ struct dlm_begin_reco | |||
609 | }; | 625 | }; |
610 | 626 | ||
611 | 627 | ||
628 | #define BITS_PER_BYTE 8 | ||
629 | #define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE) | ||
630 | |||
612 | struct dlm_query_join_request | 631 | struct dlm_query_join_request |
613 | { | 632 | { |
614 | u8 node_idx; | 633 | u8 node_idx; |
615 | u8 pad1[2]; | 634 | u8 pad1[2]; |
616 | u8 name_len; | 635 | u8 name_len; |
617 | u8 domain[O2NM_MAX_NAME_LEN]; | 636 | u8 domain[O2NM_MAX_NAME_LEN]; |
637 | u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)]; | ||
618 | }; | 638 | }; |
619 | 639 | ||
620 | struct dlm_assert_joined | 640 | struct dlm_assert_joined |
@@ -648,6 +668,16 @@ struct dlm_finalize_reco | |||
648 | __be32 pad2; | 668 | __be32 pad2; |
649 | }; | 669 | }; |
650 | 670 | ||
671 | struct dlm_deref_lockres | ||
672 | { | ||
673 | u32 pad1; | ||
674 | u16 pad2; | ||
675 | u8 node_idx; | ||
676 | u8 namelen; | ||
677 | |||
678 | u8 name[O2NM_MAX_NAME_LEN]; | ||
679 | }; | ||
680 | |||
651 | static inline enum dlm_status | 681 | static inline enum dlm_status |
652 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) | 682 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) |
653 | { | 683 | { |
@@ -688,16 +718,20 @@ void dlm_lock_put(struct dlm_lock *lock); | |||
688 | void dlm_lock_attach_lockres(struct dlm_lock *lock, | 718 | void dlm_lock_attach_lockres(struct dlm_lock *lock, |
689 | struct dlm_lock_resource *res); | 719 | struct dlm_lock_resource *res); |
690 | 720 | ||
691 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data); | 721 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
692 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data); | 722 | void **ret_data); |
693 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data); | 723 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
724 | void **ret_data); | ||
725 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, | ||
726 | void **ret_data); | ||
694 | 727 | ||
695 | void dlm_revert_pending_convert(struct dlm_lock_resource *res, | 728 | void dlm_revert_pending_convert(struct dlm_lock_resource *res, |
696 | struct dlm_lock *lock); | 729 | struct dlm_lock *lock); |
697 | void dlm_revert_pending_lock(struct dlm_lock_resource *res, | 730 | void dlm_revert_pending_lock(struct dlm_lock_resource *res, |
698 | struct dlm_lock *lock); | 731 | struct dlm_lock *lock); |
699 | 732 | ||
700 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data); | 733 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
734 | void **ret_data); | ||
701 | void dlm_commit_pending_cancel(struct dlm_lock_resource *res, | 735 | void dlm_commit_pending_cancel(struct dlm_lock_resource *res, |
702 | struct dlm_lock *lock); | 736 | struct dlm_lock *lock); |
703 | void dlm_commit_pending_unlock(struct dlm_lock_resource *res, | 737 | void dlm_commit_pending_unlock(struct dlm_lock_resource *res, |
@@ -721,8 +755,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
721 | struct dlm_lock_resource *res); | 755 | struct dlm_lock_resource *res); |
722 | void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | 756 | void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, |
723 | struct dlm_lock_resource *res); | 757 | struct dlm_lock_resource *res); |
724 | void dlm_purge_lockres(struct dlm_ctxt *dlm, | ||
725 | struct dlm_lock_resource *lockres); | ||
726 | static inline void dlm_lockres_get(struct dlm_lock_resource *res) | 758 | static inline void dlm_lockres_get(struct dlm_lock_resource *res) |
727 | { | 759 | { |
728 | /* This is called on every lookup, so it might be worth | 760 | /* This is called on every lookup, so it might be worth |
@@ -733,6 +765,10 @@ void dlm_lockres_put(struct dlm_lock_resource *res); | |||
733 | void __dlm_unhash_lockres(struct dlm_lock_resource *res); | 765 | void __dlm_unhash_lockres(struct dlm_lock_resource *res); |
734 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, | 766 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, |
735 | struct dlm_lock_resource *res); | 767 | struct dlm_lock_resource *res); |
768 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, | ||
769 | const char *name, | ||
770 | unsigned int len, | ||
771 | unsigned int hash); | ||
736 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | 772 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, |
737 | const char *name, | 773 | const char *name, |
738 | unsigned int len, | 774 | unsigned int len, |
@@ -753,6 +789,47 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
753 | const char *name, | 789 | const char *name, |
754 | unsigned int namelen); | 790 | unsigned int namelen); |
755 | 791 | ||
792 | #define dlm_lockres_set_refmap_bit(bit,res) \ | ||
793 | __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__) | ||
794 | #define dlm_lockres_clear_refmap_bit(bit,res) \ | ||
795 | __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__) | ||
796 | |||
797 | static inline void __dlm_lockres_set_refmap_bit(int bit, | ||
798 | struct dlm_lock_resource *res, | ||
799 | const char *file, | ||
800 | int line) | ||
801 | { | ||
802 | //printk("%s:%d:%.*s: setting bit %d\n", file, line, | ||
803 | // res->lockname.len, res->lockname.name, bit); | ||
804 | set_bit(bit, res->refmap); | ||
805 | } | ||
806 | |||
807 | static inline void __dlm_lockres_clear_refmap_bit(int bit, | ||
808 | struct dlm_lock_resource *res, | ||
809 | const char *file, | ||
810 | int line) | ||
811 | { | ||
812 | //printk("%s:%d:%.*s: clearing bit %d\n", file, line, | ||
813 | // res->lockname.len, res->lockname.name, bit); | ||
814 | clear_bit(bit, res->refmap); | ||
815 | } | ||
816 | |||
817 | void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | ||
818 | struct dlm_lock_resource *res, | ||
819 | const char *file, | ||
820 | int line); | ||
821 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
822 | struct dlm_lock_resource *res, | ||
823 | int new_lockres, | ||
824 | const char *file, | ||
825 | int line); | ||
826 | #define dlm_lockres_drop_inflight_ref(d,r) \ | ||
827 | __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__) | ||
828 | #define dlm_lockres_grab_inflight_ref(d,r) \ | ||
829 | __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__) | ||
830 | #define dlm_lockres_grab_inflight_ref_new(d,r) \ | ||
831 | __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__) | ||
832 | |||
756 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 833 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
757 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 834 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
758 | void dlm_do_local_ast(struct dlm_ctxt *dlm, | 835 | void dlm_do_local_ast(struct dlm_ctxt *dlm, |
@@ -801,10 +878,7 @@ int dlm_heartbeat_init(struct dlm_ctxt *dlm); | |||
801 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); | 878 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); |
802 | void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); | 879 | void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); |
803 | 880 | ||
804 | int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); | 881 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
805 | int dlm_migrate_lockres(struct dlm_ctxt *dlm, | ||
806 | struct dlm_lock_resource *res, | ||
807 | u8 target); | ||
808 | int dlm_finish_migration(struct dlm_ctxt *dlm, | 882 | int dlm_finish_migration(struct dlm_ctxt *dlm, |
809 | struct dlm_lock_resource *res, | 883 | struct dlm_lock_resource *res, |
810 | u8 old_master); | 884 | u8 old_master); |
@@ -812,15 +886,27 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, | |||
812 | struct dlm_lock_resource *res); | 886 | struct dlm_lock_resource *res); |
813 | void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res); | 887 | void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res); |
814 | 888 | ||
815 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data); | 889 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, |
816 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data); | 890 | void **ret_data); |
817 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data); | 891 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, |
818 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data); | 892 | void **ret_data); |
819 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data); | 893 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data); |
820 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data); | 894 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
821 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data); | 895 | void **ret_data); |
822 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data); | 896 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, |
823 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data); | 897 | void **ret_data); |
898 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | ||
899 | void **ret_data); | ||
900 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, | ||
901 | void **ret_data); | ||
902 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data, | ||
903 | void **ret_data); | ||
904 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, | ||
905 | void **ret_data); | ||
906 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, | ||
907 | void **ret_data); | ||
908 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, | ||
909 | void **ret_data); | ||
824 | int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | 910 | int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, |
825 | u8 nodenum, u8 *real_master); | 911 | u8 nodenum, u8 *real_master); |
826 | 912 | ||
@@ -856,10 +942,12 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
856 | int dlm_init_mle_cache(void); | 942 | int dlm_init_mle_cache(void); |
857 | void dlm_destroy_mle_cache(void); | 943 | void dlm_destroy_mle_cache(void); |
858 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); | 944 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); |
945 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | ||
946 | struct dlm_lock_resource *res); | ||
859 | void dlm_clean_master_list(struct dlm_ctxt *dlm, | 947 | void dlm_clean_master_list(struct dlm_ctxt *dlm, |
860 | u8 dead_node); | 948 | u8 dead_node); |
861 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 949 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
862 | 950 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); | |
863 | int __dlm_lockres_unused(struct dlm_lock_resource *res); | 951 | int __dlm_lockres_unused(struct dlm_lock_resource *res); |
864 | 952 | ||
865 | static inline const char * dlm_lock_mode_name(int mode) | 953 | static inline const char * dlm_lock_mode_name(int mode) |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index c764dc8e40a2..ecb4d997221e 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -286,8 +286,8 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
286 | __dlm_print_one_lock_resource(res); | 286 | __dlm_print_one_lock_resource(res); |
287 | mlog(ML_ERROR, "converting a remote lock that is already " | 287 | mlog(ML_ERROR, "converting a remote lock that is already " |
288 | "converting! (cookie=%u:%llu, conv=%d)\n", | 288 | "converting! (cookie=%u:%llu, conv=%d)\n", |
289 | dlm_get_lock_cookie_node(lock->ml.cookie), | 289 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
290 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 290 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
291 | lock->ml.convert_type); | 291 | lock->ml.convert_type); |
292 | status = DLM_DENIED; | 292 | status = DLM_DENIED; |
293 | goto bail; | 293 | goto bail; |
@@ -418,7 +418,8 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, | |||
418 | * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS, | 418 | * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS, |
419 | * status from __dlmconvert_master | 419 | * status from __dlmconvert_master |
420 | */ | 420 | */ |
421 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | 421 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
422 | void **ret_data) | ||
422 | { | 423 | { |
423 | struct dlm_ctxt *dlm = data; | 424 | struct dlm_ctxt *dlm = data; |
424 | struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; | 425 | struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; |
@@ -428,7 +429,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
428 | struct dlm_lockstatus *lksb; | 429 | struct dlm_lockstatus *lksb; |
429 | enum dlm_status status = DLM_NORMAL; | 430 | enum dlm_status status = DLM_NORMAL; |
430 | u32 flags; | 431 | u32 flags; |
431 | int call_ast = 0, kick_thread = 0, ast_reserved = 0; | 432 | int call_ast = 0, kick_thread = 0, ast_reserved = 0, wake = 0; |
432 | 433 | ||
433 | if (!dlm_grab(dlm)) { | 434 | if (!dlm_grab(dlm)) { |
434 | dlm_error(DLM_REJECTED); | 435 | dlm_error(DLM_REJECTED); |
@@ -479,25 +480,14 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
479 | } | 480 | } |
480 | lock = NULL; | 481 | lock = NULL; |
481 | } | 482 | } |
482 | if (!lock) { | ||
483 | __dlm_print_one_lock_resource(res); | ||
484 | list_for_each(iter, &res->granted) { | ||
485 | lock = list_entry(iter, struct dlm_lock, list); | ||
486 | if (lock->ml.node == cnv->node_idx) { | ||
487 | mlog(ML_ERROR, "There is something here " | ||
488 | "for node %u, lock->ml.cookie=%llu, " | ||
489 | "cnv->cookie=%llu\n", cnv->node_idx, | ||
490 | (unsigned long long)lock->ml.cookie, | ||
491 | (unsigned long long)cnv->cookie); | ||
492 | break; | ||
493 | } | ||
494 | } | ||
495 | lock = NULL; | ||
496 | } | ||
497 | spin_unlock(&res->spinlock); | 483 | spin_unlock(&res->spinlock); |
498 | if (!lock) { | 484 | if (!lock) { |
499 | status = DLM_IVLOCKID; | 485 | status = DLM_IVLOCKID; |
500 | dlm_error(status); | 486 | mlog(ML_ERROR, "did not find lock to convert on grant queue! " |
487 | "cookie=%u:%llu\n", | ||
488 | dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)), | ||
489 | dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie))); | ||
490 | __dlm_print_one_lock_resource(res); | ||
501 | goto leave; | 491 | goto leave; |
502 | } | 492 | } |
503 | 493 | ||
@@ -524,8 +514,11 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
524 | cnv->requested_type, | 514 | cnv->requested_type, |
525 | &call_ast, &kick_thread); | 515 | &call_ast, &kick_thread); |
526 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 516 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
517 | wake = 1; | ||
527 | } | 518 | } |
528 | spin_unlock(&res->spinlock); | 519 | spin_unlock(&res->spinlock); |
520 | if (wake) | ||
521 | wake_up(&res->wq); | ||
529 | 522 | ||
530 | if (status != DLM_NORMAL) { | 523 | if (status != DLM_NORMAL) { |
531 | if (status != DLM_NOTQUEUED) | 524 | if (status != DLM_NOTQUEUED) |
@@ -534,12 +527,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
534 | } | 527 | } |
535 | 528 | ||
536 | leave: | 529 | leave: |
537 | if (!lock) | 530 | if (lock) |
538 | mlog(ML_ERROR, "did not find lock to convert on grant queue! " | ||
539 | "cookie=%u:%llu\n", | ||
540 | dlm_get_lock_cookie_node(cnv->cookie), | ||
541 | dlm_get_lock_cookie_seq(cnv->cookie)); | ||
542 | else | ||
543 | dlm_lock_put(lock); | 531 | dlm_lock_put(lock); |
544 | 532 | ||
545 | /* either queue the ast or release it, if reserved */ | 533 | /* either queue the ast or release it, if reserved */ |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 3f6c8d88f7af..64239b37e5d4 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -53,6 +53,23 @@ void dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
53 | spin_unlock(&res->spinlock); | 53 | spin_unlock(&res->spinlock); |
54 | } | 54 | } |
55 | 55 | ||
56 | static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | ||
57 | { | ||
58 | int bit; | ||
59 | assert_spin_locked(&res->spinlock); | ||
60 | |||
61 | mlog(ML_NOTICE, " refmap nodes: [ "); | ||
62 | bit = 0; | ||
63 | while (1) { | ||
64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); | ||
65 | if (bit >= O2NM_MAX_NODES) | ||
66 | break; | ||
67 | printk("%u ", bit); | ||
68 | bit++; | ||
69 | } | ||
70 | printk("], inflight=%u\n", res->inflight_locks); | ||
71 | } | ||
72 | |||
56 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 73 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
57 | { | 74 | { |
58 | struct list_head *iter2; | 75 | struct list_head *iter2; |
@@ -65,6 +82,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
65 | res->owner, res->state); | 82 | res->owner, res->state); |
66 | mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", | 83 | mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", |
67 | res->last_used, list_empty(&res->purge) ? "no" : "yes"); | 84 | res->last_used, list_empty(&res->purge) ? "no" : "yes"); |
85 | dlm_print_lockres_refmap(res); | ||
68 | mlog(ML_NOTICE, " granted queue: \n"); | 86 | mlog(ML_NOTICE, " granted queue: \n"); |
69 | list_for_each(iter2, &res->granted) { | 87 | list_for_each(iter2, &res->granted) { |
70 | lock = list_entry(iter2, struct dlm_lock, list); | 88 | lock = list_entry(iter2, struct dlm_lock, list); |
@@ -72,8 +90,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
72 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | 90 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " |
73 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | 91 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", |
74 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | 92 | lock->ml.type, lock->ml.convert_type, lock->ml.node, |
75 | dlm_get_lock_cookie_node(lock->ml.cookie), | 93 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
76 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 94 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
77 | list_empty(&lock->ast_list) ? 'y' : 'n', | 95 | list_empty(&lock->ast_list) ? 'y' : 'n', |
78 | lock->ast_pending ? 'y' : 'n', | 96 | lock->ast_pending ? 'y' : 'n', |
79 | list_empty(&lock->bast_list) ? 'y' : 'n', | 97 | list_empty(&lock->bast_list) ? 'y' : 'n', |
@@ -87,8 +105,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
87 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | 105 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " |
88 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | 106 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", |
89 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | 107 | lock->ml.type, lock->ml.convert_type, lock->ml.node, |
90 | dlm_get_lock_cookie_node(lock->ml.cookie), | 108 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
91 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 109 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
92 | list_empty(&lock->ast_list) ? 'y' : 'n', | 110 | list_empty(&lock->ast_list) ? 'y' : 'n', |
93 | lock->ast_pending ? 'y' : 'n', | 111 | lock->ast_pending ? 'y' : 'n', |
94 | list_empty(&lock->bast_list) ? 'y' : 'n', | 112 | list_empty(&lock->bast_list) ? 'y' : 'n', |
@@ -102,8 +120,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
102 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | 120 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " |
103 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | 121 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", |
104 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | 122 | lock->ml.type, lock->ml.convert_type, lock->ml.node, |
105 | dlm_get_lock_cookie_node(lock->ml.cookie), | 123 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
106 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 124 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
107 | list_empty(&lock->ast_list) ? 'y' : 'n', | 125 | list_empty(&lock->ast_list) ? 'y' : 'n', |
108 | lock->ast_pending ? 'y' : 'n', | 126 | lock->ast_pending ? 'y' : 'n', |
109 | list_empty(&lock->bast_list) ? 'y' : 'n', | 127 | list_empty(&lock->bast_list) ? 'y' : 'n', |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index f0b25f2dd205..6087c4749fee 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -48,6 +48,36 @@ | |||
48 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) | 48 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) |
49 | #include "cluster/masklog.h" | 49 | #include "cluster/masklog.h" |
50 | 50 | ||
51 | /* | ||
52 | * ocfs2 node maps are array of long int, which limits to send them freely | ||
53 | * across the wire due to endianness issues. To workaround this, we convert | ||
54 | * long ints to byte arrays. Following 3 routines are helper functions to | ||
55 | * set/test/copy bits within those array of bytes | ||
56 | */ | ||
57 | static inline void byte_set_bit(u8 nr, u8 map[]) | ||
58 | { | ||
59 | map[nr >> 3] |= (1UL << (nr & 7)); | ||
60 | } | ||
61 | |||
62 | static inline int byte_test_bit(u8 nr, u8 map[]) | ||
63 | { | ||
64 | return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0; | ||
65 | } | ||
66 | |||
67 | static inline void byte_copymap(u8 dmap[], unsigned long smap[], | ||
68 | unsigned int sz) | ||
69 | { | ||
70 | unsigned int nn; | ||
71 | |||
72 | if (!sz) | ||
73 | return; | ||
74 | |||
75 | memset(dmap, 0, ((sz + 7) >> 3)); | ||
76 | for (nn = 0 ; nn < sz; nn++) | ||
77 | if (test_bit(nn, smap)) | ||
78 | byte_set_bit(nn, dmap); | ||
79 | } | ||
80 | |||
51 | static void dlm_free_pagevec(void **vec, int pages) | 81 | static void dlm_free_pagevec(void **vec, int pages) |
52 | { | 82 | { |
53 | while (pages--) | 83 | while (pages--) |
@@ -95,10 +125,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
95 | 125 | ||
96 | #define DLM_DOMAIN_BACKOFF_MS 200 | 126 | #define DLM_DOMAIN_BACKOFF_MS 200 |
97 | 127 | ||
98 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data); | 128 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, |
99 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data); | 129 | void **ret_data); |
100 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data); | 130 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, |
101 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data); | 131 | void **ret_data); |
132 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | ||
133 | void **ret_data); | ||
134 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | ||
135 | void **ret_data); | ||
102 | 136 | ||
103 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); | 137 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); |
104 | 138 | ||
@@ -125,10 +159,10 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm, | |||
125 | hlist_add_head(&res->hash_node, bucket); | 159 | hlist_add_head(&res->hash_node, bucket); |
126 | } | 160 | } |
127 | 161 | ||
128 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | 162 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, |
129 | const char *name, | 163 | const char *name, |
130 | unsigned int len, | 164 | unsigned int len, |
131 | unsigned int hash) | 165 | unsigned int hash) |
132 | { | 166 | { |
133 | struct hlist_head *bucket; | 167 | struct hlist_head *bucket; |
134 | struct hlist_node *list; | 168 | struct hlist_node *list; |
@@ -154,6 +188,37 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | |||
154 | return NULL; | 188 | return NULL; |
155 | } | 189 | } |
156 | 190 | ||
191 | /* intended to be called by functions which do not care about lock | ||
192 | * resources which are being purged (most net _handler functions). | ||
193 | * this will return NULL for any lock resource which is found but | ||
194 | * currently in the process of dropping its mastery reference. | ||
195 | * use __dlm_lookup_lockres_full when you need the lock resource | ||
196 | * regardless (e.g. dlm_get_lock_resource) */ | ||
197 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | ||
198 | const char *name, | ||
199 | unsigned int len, | ||
200 | unsigned int hash) | ||
201 | { | ||
202 | struct dlm_lock_resource *res = NULL; | ||
203 | |||
204 | mlog_entry("%.*s\n", len, name); | ||
205 | |||
206 | assert_spin_locked(&dlm->spinlock); | ||
207 | |||
208 | res = __dlm_lookup_lockres_full(dlm, name, len, hash); | ||
209 | if (res) { | ||
210 | spin_lock(&res->spinlock); | ||
211 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | ||
212 | spin_unlock(&res->spinlock); | ||
213 | dlm_lockres_put(res); | ||
214 | return NULL; | ||
215 | } | ||
216 | spin_unlock(&res->spinlock); | ||
217 | } | ||
218 | |||
219 | return res; | ||
220 | } | ||
221 | |||
157 | struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, | 222 | struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, |
158 | const char *name, | 223 | const char *name, |
159 | unsigned int len) | 224 | unsigned int len) |
@@ -330,43 +395,60 @@ static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) | |||
330 | wake_up(&dlm_domain_events); | 395 | wake_up(&dlm_domain_events); |
331 | } | 396 | } |
332 | 397 | ||
333 | static void dlm_migrate_all_locks(struct dlm_ctxt *dlm) | 398 | static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) |
334 | { | 399 | { |
335 | int i; | 400 | int i, num, n, ret = 0; |
336 | struct dlm_lock_resource *res; | 401 | struct dlm_lock_resource *res; |
402 | struct hlist_node *iter; | ||
403 | struct hlist_head *bucket; | ||
404 | int dropped; | ||
337 | 405 | ||
338 | mlog(0, "Migrating locks from domain %s\n", dlm->name); | 406 | mlog(0, "Migrating locks from domain %s\n", dlm->name); |
339 | restart: | 407 | |
408 | num = 0; | ||
340 | spin_lock(&dlm->spinlock); | 409 | spin_lock(&dlm->spinlock); |
341 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 410 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
342 | while (!hlist_empty(dlm_lockres_hash(dlm, i))) { | 411 | redo_bucket: |
343 | res = hlist_entry(dlm_lockres_hash(dlm, i)->first, | 412 | n = 0; |
344 | struct dlm_lock_resource, hash_node); | 413 | bucket = dlm_lockres_hash(dlm, i); |
345 | /* need reference when manually grabbing lockres */ | 414 | iter = bucket->first; |
415 | while (iter) { | ||
416 | n++; | ||
417 | res = hlist_entry(iter, struct dlm_lock_resource, | ||
418 | hash_node); | ||
346 | dlm_lockres_get(res); | 419 | dlm_lockres_get(res); |
347 | /* this should unhash the lockres | 420 | /* migrate, if necessary. this will drop the dlm |
348 | * and exit with dlm->spinlock */ | 421 | * spinlock and retake it if it does migration. */ |
349 | mlog(0, "purging res=%p\n", res); | 422 | dropped = dlm_empty_lockres(dlm, res); |
350 | if (dlm_lockres_is_dirty(dlm, res)) { | 423 | |
351 | /* HACK! this should absolutely go. | 424 | spin_lock(&res->spinlock); |
352 | * need to figure out why some empty | 425 | __dlm_lockres_calc_usage(dlm, res); |
353 | * lockreses are still marked dirty */ | 426 | iter = res->hash_node.next; |
354 | mlog(ML_ERROR, "lockres %.*s dirty!\n", | 427 | spin_unlock(&res->spinlock); |
355 | res->lockname.len, res->lockname.name); | 428 | |
356 | |||
357 | spin_unlock(&dlm->spinlock); | ||
358 | dlm_kick_thread(dlm, res); | ||
359 | wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); | ||
360 | dlm_lockres_put(res); | ||
361 | goto restart; | ||
362 | } | ||
363 | dlm_purge_lockres(dlm, res); | ||
364 | dlm_lockres_put(res); | 429 | dlm_lockres_put(res); |
430 | |||
431 | cond_resched_lock(&dlm->spinlock); | ||
432 | |||
433 | if (dropped) | ||
434 | goto redo_bucket; | ||
365 | } | 435 | } |
436 | num += n; | ||
437 | mlog(0, "%s: touched %d lockreses in bucket %d " | ||
438 | "(tot=%d)\n", dlm->name, n, i, num); | ||
366 | } | 439 | } |
367 | spin_unlock(&dlm->spinlock); | 440 | spin_unlock(&dlm->spinlock); |
368 | 441 | wake_up(&dlm->dlm_thread_wq); | |
442 | |||
443 | /* let the dlm thread take care of purging, keep scanning until | ||
444 | * nothing remains in the hash */ | ||
445 | if (num) { | ||
446 | mlog(0, "%s: %d lock resources in hash last pass\n", | ||
447 | dlm->name, num); | ||
448 | ret = -EAGAIN; | ||
449 | } | ||
369 | mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); | 450 | mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); |
451 | return ret; | ||
370 | } | 452 | } |
371 | 453 | ||
372 | static int dlm_no_joining_node(struct dlm_ctxt *dlm) | 454 | static int dlm_no_joining_node(struct dlm_ctxt *dlm) |
@@ -418,7 +500,8 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm) | |||
418 | printk("\n"); | 500 | printk("\n"); |
419 | } | 501 | } |
420 | 502 | ||
421 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data) | 503 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
504 | void **ret_data) | ||
422 | { | 505 | { |
423 | struct dlm_ctxt *dlm = data; | 506 | struct dlm_ctxt *dlm = data; |
424 | unsigned int node; | 507 | unsigned int node; |
@@ -571,7 +654,9 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
571 | /* We changed dlm state, notify the thread */ | 654 | /* We changed dlm state, notify the thread */ |
572 | dlm_kick_thread(dlm, NULL); | 655 | dlm_kick_thread(dlm, NULL); |
573 | 656 | ||
574 | dlm_migrate_all_locks(dlm); | 657 | while (dlm_migrate_all_locks(dlm)) { |
658 | mlog(0, "%s: more migration to do\n", dlm->name); | ||
659 | } | ||
575 | dlm_mark_domain_leaving(dlm); | 660 | dlm_mark_domain_leaving(dlm); |
576 | dlm_leave_domain(dlm); | 661 | dlm_leave_domain(dlm); |
577 | dlm_complete_dlm_shutdown(dlm); | 662 | dlm_complete_dlm_shutdown(dlm); |
@@ -580,11 +665,13 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
580 | } | 665 | } |
581 | EXPORT_SYMBOL_GPL(dlm_unregister_domain); | 666 | EXPORT_SYMBOL_GPL(dlm_unregister_domain); |
582 | 667 | ||
583 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | 668 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, |
669 | void **ret_data) | ||
584 | { | 670 | { |
585 | struct dlm_query_join_request *query; | 671 | struct dlm_query_join_request *query; |
586 | enum dlm_query_join_response response; | 672 | enum dlm_query_join_response response; |
587 | struct dlm_ctxt *dlm = NULL; | 673 | struct dlm_ctxt *dlm = NULL; |
674 | u8 nodenum; | ||
588 | 675 | ||
589 | query = (struct dlm_query_join_request *) msg->buf; | 676 | query = (struct dlm_query_join_request *) msg->buf; |
590 | 677 | ||
@@ -608,6 +695,28 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
608 | 695 | ||
609 | spin_lock(&dlm_domain_lock); | 696 | spin_lock(&dlm_domain_lock); |
610 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); | 697 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); |
698 | if (!dlm) | ||
699 | goto unlock_respond; | ||
700 | |||
701 | /* | ||
702 | * There is a small window where the joining node may not see the | ||
703 | * node(s) that just left but still part of the cluster. DISALLOW | ||
704 | * join request if joining node has different node map. | ||
705 | */ | ||
706 | nodenum=0; | ||
707 | while (nodenum < O2NM_MAX_NODES) { | ||
708 | if (test_bit(nodenum, dlm->domain_map)) { | ||
709 | if (!byte_test_bit(nodenum, query->node_map)) { | ||
710 | mlog(0, "disallow join as node %u does not " | ||
711 | "have node %u in its nodemap\n", | ||
712 | query->node_idx, nodenum); | ||
713 | response = JOIN_DISALLOW; | ||
714 | goto unlock_respond; | ||
715 | } | ||
716 | } | ||
717 | nodenum++; | ||
718 | } | ||
719 | |||
611 | /* Once the dlm ctxt is marked as leaving then we don't want | 720 | /* Once the dlm ctxt is marked as leaving then we don't want |
612 | * to be put in someone's domain map. | 721 | * to be put in someone's domain map. |
613 | * Also, explicitly disallow joining at certain troublesome | 722 | * Also, explicitly disallow joining at certain troublesome |
@@ -626,15 +735,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
626 | /* Disallow parallel joins. */ | 735 | /* Disallow parallel joins. */ |
627 | response = JOIN_DISALLOW; | 736 | response = JOIN_DISALLOW; |
628 | } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { | 737 | } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { |
629 | mlog(ML_NOTICE, "node %u trying to join, but recovery " | 738 | mlog(0, "node %u trying to join, but recovery " |
630 | "is ongoing.\n", bit); | 739 | "is ongoing.\n", bit); |
631 | response = JOIN_DISALLOW; | 740 | response = JOIN_DISALLOW; |
632 | } else if (test_bit(bit, dlm->recovery_map)) { | 741 | } else if (test_bit(bit, dlm->recovery_map)) { |
633 | mlog(ML_NOTICE, "node %u trying to join, but it " | 742 | mlog(0, "node %u trying to join, but it " |
634 | "still needs recovery.\n", bit); | 743 | "still needs recovery.\n", bit); |
635 | response = JOIN_DISALLOW; | 744 | response = JOIN_DISALLOW; |
636 | } else if (test_bit(bit, dlm->domain_map)) { | 745 | } else if (test_bit(bit, dlm->domain_map)) { |
637 | mlog(ML_NOTICE, "node %u trying to join, but it " | 746 | mlog(0, "node %u trying to join, but it " |
638 | "is still in the domain! needs recovery?\n", | 747 | "is still in the domain! needs recovery?\n", |
639 | bit); | 748 | bit); |
640 | response = JOIN_DISALLOW; | 749 | response = JOIN_DISALLOW; |
@@ -649,6 +758,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
649 | 758 | ||
650 | spin_unlock(&dlm->spinlock); | 759 | spin_unlock(&dlm->spinlock); |
651 | } | 760 | } |
761 | unlock_respond: | ||
652 | spin_unlock(&dlm_domain_lock); | 762 | spin_unlock(&dlm_domain_lock); |
653 | 763 | ||
654 | respond: | 764 | respond: |
@@ -657,7 +767,8 @@ respond: | |||
657 | return response; | 767 | return response; |
658 | } | 768 | } |
659 | 769 | ||
660 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data) | 770 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, |
771 | void **ret_data) | ||
661 | { | 772 | { |
662 | struct dlm_assert_joined *assert; | 773 | struct dlm_assert_joined *assert; |
663 | struct dlm_ctxt *dlm = NULL; | 774 | struct dlm_ctxt *dlm = NULL; |
@@ -694,7 +805,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data) | |||
694 | return 0; | 805 | return 0; |
695 | } | 806 | } |
696 | 807 | ||
697 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data) | 808 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
809 | void **ret_data) | ||
698 | { | 810 | { |
699 | struct dlm_cancel_join *cancel; | 811 | struct dlm_cancel_join *cancel; |
700 | struct dlm_ctxt *dlm = NULL; | 812 | struct dlm_ctxt *dlm = NULL; |
@@ -796,6 +908,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm, | |||
796 | join_msg.name_len = strlen(dlm->name); | 908 | join_msg.name_len = strlen(dlm->name); |
797 | memcpy(join_msg.domain, dlm->name, join_msg.name_len); | 909 | memcpy(join_msg.domain, dlm->name, join_msg.name_len); |
798 | 910 | ||
911 | /* copy live node map to join message */ | ||
912 | byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); | ||
913 | |||
799 | status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, | 914 | status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, |
800 | sizeof(join_msg), node, &retval); | 915 | sizeof(join_msg), node, &retval); |
801 | if (status < 0 && status != -ENOPROTOOPT) { | 916 | if (status < 0 && status != -ENOPROTOOPT) { |
@@ -1036,98 +1151,106 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
1036 | status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, | 1151 | status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, |
1037 | sizeof(struct dlm_master_request), | 1152 | sizeof(struct dlm_master_request), |
1038 | dlm_master_request_handler, | 1153 | dlm_master_request_handler, |
1039 | dlm, &dlm->dlm_domain_handlers); | 1154 | dlm, NULL, &dlm->dlm_domain_handlers); |
1040 | if (status) | 1155 | if (status) |
1041 | goto bail; | 1156 | goto bail; |
1042 | 1157 | ||
1043 | status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, | 1158 | status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, |
1044 | sizeof(struct dlm_assert_master), | 1159 | sizeof(struct dlm_assert_master), |
1045 | dlm_assert_master_handler, | 1160 | dlm_assert_master_handler, |
1046 | dlm, &dlm->dlm_domain_handlers); | 1161 | dlm, dlm_assert_master_post_handler, |
1162 | &dlm->dlm_domain_handlers); | ||
1047 | if (status) | 1163 | if (status) |
1048 | goto bail; | 1164 | goto bail; |
1049 | 1165 | ||
1050 | status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, | 1166 | status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, |
1051 | sizeof(struct dlm_create_lock), | 1167 | sizeof(struct dlm_create_lock), |
1052 | dlm_create_lock_handler, | 1168 | dlm_create_lock_handler, |
1053 | dlm, &dlm->dlm_domain_handlers); | 1169 | dlm, NULL, &dlm->dlm_domain_handlers); |
1054 | if (status) | 1170 | if (status) |
1055 | goto bail; | 1171 | goto bail; |
1056 | 1172 | ||
1057 | status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, | 1173 | status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, |
1058 | DLM_CONVERT_LOCK_MAX_LEN, | 1174 | DLM_CONVERT_LOCK_MAX_LEN, |
1059 | dlm_convert_lock_handler, | 1175 | dlm_convert_lock_handler, |
1060 | dlm, &dlm->dlm_domain_handlers); | 1176 | dlm, NULL, &dlm->dlm_domain_handlers); |
1061 | if (status) | 1177 | if (status) |
1062 | goto bail; | 1178 | goto bail; |
1063 | 1179 | ||
1064 | status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, | 1180 | status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, |
1065 | DLM_UNLOCK_LOCK_MAX_LEN, | 1181 | DLM_UNLOCK_LOCK_MAX_LEN, |
1066 | dlm_unlock_lock_handler, | 1182 | dlm_unlock_lock_handler, |
1067 | dlm, &dlm->dlm_domain_handlers); | 1183 | dlm, NULL, &dlm->dlm_domain_handlers); |
1068 | if (status) | 1184 | if (status) |
1069 | goto bail; | 1185 | goto bail; |
1070 | 1186 | ||
1071 | status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, | 1187 | status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, |
1072 | DLM_PROXY_AST_MAX_LEN, | 1188 | DLM_PROXY_AST_MAX_LEN, |
1073 | dlm_proxy_ast_handler, | 1189 | dlm_proxy_ast_handler, |
1074 | dlm, &dlm->dlm_domain_handlers); | 1190 | dlm, NULL, &dlm->dlm_domain_handlers); |
1075 | if (status) | 1191 | if (status) |
1076 | goto bail; | 1192 | goto bail; |
1077 | 1193 | ||
1078 | status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, | 1194 | status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, |
1079 | sizeof(struct dlm_exit_domain), | 1195 | sizeof(struct dlm_exit_domain), |
1080 | dlm_exit_domain_handler, | 1196 | dlm_exit_domain_handler, |
1081 | dlm, &dlm->dlm_domain_handlers); | 1197 | dlm, NULL, &dlm->dlm_domain_handlers); |
1198 | if (status) | ||
1199 | goto bail; | ||
1200 | |||
1201 | status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key, | ||
1202 | sizeof(struct dlm_deref_lockres), | ||
1203 | dlm_deref_lockres_handler, | ||
1204 | dlm, NULL, &dlm->dlm_domain_handlers); | ||
1082 | if (status) | 1205 | if (status) |
1083 | goto bail; | 1206 | goto bail; |
1084 | 1207 | ||
1085 | status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, | 1208 | status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, |
1086 | sizeof(struct dlm_migrate_request), | 1209 | sizeof(struct dlm_migrate_request), |
1087 | dlm_migrate_request_handler, | 1210 | dlm_migrate_request_handler, |
1088 | dlm, &dlm->dlm_domain_handlers); | 1211 | dlm, NULL, &dlm->dlm_domain_handlers); |
1089 | if (status) | 1212 | if (status) |
1090 | goto bail; | 1213 | goto bail; |
1091 | 1214 | ||
1092 | status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, | 1215 | status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, |
1093 | DLM_MIG_LOCKRES_MAX_LEN, | 1216 | DLM_MIG_LOCKRES_MAX_LEN, |
1094 | dlm_mig_lockres_handler, | 1217 | dlm_mig_lockres_handler, |
1095 | dlm, &dlm->dlm_domain_handlers); | 1218 | dlm, NULL, &dlm->dlm_domain_handlers); |
1096 | if (status) | 1219 | if (status) |
1097 | goto bail; | 1220 | goto bail; |
1098 | 1221 | ||
1099 | status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, | 1222 | status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, |
1100 | sizeof(struct dlm_master_requery), | 1223 | sizeof(struct dlm_master_requery), |
1101 | dlm_master_requery_handler, | 1224 | dlm_master_requery_handler, |
1102 | dlm, &dlm->dlm_domain_handlers); | 1225 | dlm, NULL, &dlm->dlm_domain_handlers); |
1103 | if (status) | 1226 | if (status) |
1104 | goto bail; | 1227 | goto bail; |
1105 | 1228 | ||
1106 | status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, | 1229 | status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, |
1107 | sizeof(struct dlm_lock_request), | 1230 | sizeof(struct dlm_lock_request), |
1108 | dlm_request_all_locks_handler, | 1231 | dlm_request_all_locks_handler, |
1109 | dlm, &dlm->dlm_domain_handlers); | 1232 | dlm, NULL, &dlm->dlm_domain_handlers); |
1110 | if (status) | 1233 | if (status) |
1111 | goto bail; | 1234 | goto bail; |
1112 | 1235 | ||
1113 | status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, | 1236 | status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, |
1114 | sizeof(struct dlm_reco_data_done), | 1237 | sizeof(struct dlm_reco_data_done), |
1115 | dlm_reco_data_done_handler, | 1238 | dlm_reco_data_done_handler, |
1116 | dlm, &dlm->dlm_domain_handlers); | 1239 | dlm, NULL, &dlm->dlm_domain_handlers); |
1117 | if (status) | 1240 | if (status) |
1118 | goto bail; | 1241 | goto bail; |
1119 | 1242 | ||
1120 | status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, | 1243 | status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, |
1121 | sizeof(struct dlm_begin_reco), | 1244 | sizeof(struct dlm_begin_reco), |
1122 | dlm_begin_reco_handler, | 1245 | dlm_begin_reco_handler, |
1123 | dlm, &dlm->dlm_domain_handlers); | 1246 | dlm, NULL, &dlm->dlm_domain_handlers); |
1124 | if (status) | 1247 | if (status) |
1125 | goto bail; | 1248 | goto bail; |
1126 | 1249 | ||
1127 | status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, | 1250 | status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, |
1128 | sizeof(struct dlm_finalize_reco), | 1251 | sizeof(struct dlm_finalize_reco), |
1129 | dlm_finalize_reco_handler, | 1252 | dlm_finalize_reco_handler, |
1130 | dlm, &dlm->dlm_domain_handlers); | 1253 | dlm, NULL, &dlm->dlm_domain_handlers); |
1131 | if (status) | 1254 | if (status) |
1132 | goto bail; | 1255 | goto bail; |
1133 | 1256 | ||
@@ -1141,6 +1264,8 @@ bail: | |||
1141 | static int dlm_join_domain(struct dlm_ctxt *dlm) | 1264 | static int dlm_join_domain(struct dlm_ctxt *dlm) |
1142 | { | 1265 | { |
1143 | int status; | 1266 | int status; |
1267 | unsigned int backoff; | ||
1268 | unsigned int total_backoff = 0; | ||
1144 | 1269 | ||
1145 | BUG_ON(!dlm); | 1270 | BUG_ON(!dlm); |
1146 | 1271 | ||
@@ -1172,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1172 | } | 1297 | } |
1173 | 1298 | ||
1174 | do { | 1299 | do { |
1175 | unsigned int backoff; | ||
1176 | status = dlm_try_to_join_domain(dlm); | 1300 | status = dlm_try_to_join_domain(dlm); |
1177 | 1301 | ||
1178 | /* If we're racing another node to the join, then we | 1302 | /* If we're racing another node to the join, then we |
1179 | * need to back off temporarily and let them | 1303 | * need to back off temporarily and let them |
1180 | * complete. */ | 1304 | * complete. */ |
1305 | #define DLM_JOIN_TIMEOUT_MSECS 90000 | ||
1181 | if (status == -EAGAIN) { | 1306 | if (status == -EAGAIN) { |
1182 | if (signal_pending(current)) { | 1307 | if (signal_pending(current)) { |
1183 | status = -ERESTARTSYS; | 1308 | status = -ERESTARTSYS; |
1184 | goto bail; | 1309 | goto bail; |
1185 | } | 1310 | } |
1186 | 1311 | ||
1312 | if (total_backoff > | ||
1313 | msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) { | ||
1314 | status = -ERESTARTSYS; | ||
1315 | mlog(ML_NOTICE, "Timed out joining dlm domain " | ||
1316 | "%s after %u msecs\n", dlm->name, | ||
1317 | jiffies_to_msecs(total_backoff)); | ||
1318 | goto bail; | ||
1319 | } | ||
1320 | |||
1187 | /* | 1321 | /* |
1188 | * <chip> After you! | 1322 | * <chip> After you! |
1189 | * <dale> No, after you! | 1323 | * <dale> No, after you! |
@@ -1193,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1193 | */ | 1327 | */ |
1194 | backoff = (unsigned int)(jiffies & 0x3); | 1328 | backoff = (unsigned int)(jiffies & 0x3); |
1195 | backoff *= DLM_DOMAIN_BACKOFF_MS; | 1329 | backoff *= DLM_DOMAIN_BACKOFF_MS; |
1330 | total_backoff += backoff; | ||
1196 | mlog(0, "backoff %d\n", backoff); | 1331 | mlog(0, "backoff %d\n", backoff); |
1197 | msleep(backoff); | 1332 | msleep(backoff); |
1198 | } | 1333 | } |
@@ -1421,21 +1556,21 @@ static int dlm_register_net_handlers(void) | |||
1421 | status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, | 1556 | status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, |
1422 | sizeof(struct dlm_query_join_request), | 1557 | sizeof(struct dlm_query_join_request), |
1423 | dlm_query_join_handler, | 1558 | dlm_query_join_handler, |
1424 | NULL, &dlm_join_handlers); | 1559 | NULL, NULL, &dlm_join_handlers); |
1425 | if (status) | 1560 | if (status) |
1426 | goto bail; | 1561 | goto bail; |
1427 | 1562 | ||
1428 | status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, | 1563 | status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, |
1429 | sizeof(struct dlm_assert_joined), | 1564 | sizeof(struct dlm_assert_joined), |
1430 | dlm_assert_joined_handler, | 1565 | dlm_assert_joined_handler, |
1431 | NULL, &dlm_join_handlers); | 1566 | NULL, NULL, &dlm_join_handlers); |
1432 | if (status) | 1567 | if (status) |
1433 | goto bail; | 1568 | goto bail; |
1434 | 1569 | ||
1435 | status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, | 1570 | status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, |
1436 | sizeof(struct dlm_cancel_join), | 1571 | sizeof(struct dlm_cancel_join), |
1437 | dlm_cancel_join_handler, | 1572 | dlm_cancel_join_handler, |
1438 | NULL, &dlm_join_handlers); | 1573 | NULL, NULL, &dlm_join_handlers); |
1439 | 1574 | ||
1440 | bail: | 1575 | bail: |
1441 | if (status < 0) | 1576 | if (status < 0) |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index e5ca3db197f6..52578d907d9a 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -163,6 +163,10 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, | |||
163 | kick_thread = 1; | 163 | kick_thread = 1; |
164 | } | 164 | } |
165 | } | 165 | } |
166 | /* reduce the inflight count, this may result in the lockres | ||
167 | * being purged below during calc_usage */ | ||
168 | if (lock->ml.node == dlm->node_num) | ||
169 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
166 | 170 | ||
167 | spin_unlock(&res->spinlock); | 171 | spin_unlock(&res->spinlock); |
168 | wake_up(&res->wq); | 172 | wake_up(&res->wq); |
@@ -437,7 +441,8 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, | |||
437 | * held on exit: none | 441 | * held on exit: none |
438 | * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED | 442 | * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED |
439 | */ | 443 | */ |
440 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data) | 444 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
445 | void **ret_data) | ||
441 | { | 446 | { |
442 | struct dlm_ctxt *dlm = data; | 447 | struct dlm_ctxt *dlm = data; |
443 | struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf; | 448 | struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 0ad872055cb3..77e4e6169a0d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -99,9 +99,10 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm, | |||
99 | int idx); | 99 | int idx); |
100 | 100 | ||
101 | static void dlm_assert_master_worker(struct dlm_work_item *item, void *data); | 101 | static void dlm_assert_master_worker(struct dlm_work_item *item, void *data); |
102 | static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, | 102 | static int dlm_do_assert_master(struct dlm_ctxt *dlm, |
103 | unsigned int namelen, void *nodemap, | 103 | struct dlm_lock_resource *res, |
104 | u32 flags); | 104 | void *nodemap, u32 flags); |
105 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data); | ||
105 | 106 | ||
106 | static inline int dlm_mle_equal(struct dlm_ctxt *dlm, | 107 | static inline int dlm_mle_equal(struct dlm_ctxt *dlm, |
107 | struct dlm_master_list_entry *mle, | 108 | struct dlm_master_list_entry *mle, |
@@ -237,7 +238,8 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
237 | struct dlm_master_list_entry **mle, | 238 | struct dlm_master_list_entry **mle, |
238 | char *name, unsigned int namelen); | 239 | char *name, unsigned int namelen); |
239 | 240 | ||
240 | static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to); | 241 | static int dlm_do_master_request(struct dlm_lock_resource *res, |
242 | struct dlm_master_list_entry *mle, int to); | ||
241 | 243 | ||
242 | 244 | ||
243 | static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, | 245 | static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, |
@@ -687,6 +689,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
687 | INIT_LIST_HEAD(&res->purge); | 689 | INIT_LIST_HEAD(&res->purge); |
688 | atomic_set(&res->asts_reserved, 0); | 690 | atomic_set(&res->asts_reserved, 0); |
689 | res->migration_pending = 0; | 691 | res->migration_pending = 0; |
692 | res->inflight_locks = 0; | ||
690 | 693 | ||
691 | kref_init(&res->refs); | 694 | kref_init(&res->refs); |
692 | 695 | ||
@@ -700,6 +703,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
700 | res->last_used = 0; | 703 | res->last_used = 0; |
701 | 704 | ||
702 | memset(res->lvb, 0, DLM_LVB_LEN); | 705 | memset(res->lvb, 0, DLM_LVB_LEN); |
706 | memset(res->refmap, 0, sizeof(res->refmap)); | ||
703 | } | 707 | } |
704 | 708 | ||
705 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | 709 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, |
@@ -722,6 +726,42 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
722 | return res; | 726 | return res; |
723 | } | 727 | } |
724 | 728 | ||
729 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
730 | struct dlm_lock_resource *res, | ||
731 | int new_lockres, | ||
732 | const char *file, | ||
733 | int line) | ||
734 | { | ||
735 | if (!new_lockres) | ||
736 | assert_spin_locked(&res->spinlock); | ||
737 | |||
738 | if (!test_bit(dlm->node_num, res->refmap)) { | ||
739 | BUG_ON(res->inflight_locks != 0); | ||
740 | dlm_lockres_set_refmap_bit(dlm->node_num, res); | ||
741 | } | ||
742 | res->inflight_locks++; | ||
743 | mlog(0, "%s:%.*s: inflight++: now %u\n", | ||
744 | dlm->name, res->lockname.len, res->lockname.name, | ||
745 | res->inflight_locks); | ||
746 | } | ||
747 | |||
748 | void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | ||
749 | struct dlm_lock_resource *res, | ||
750 | const char *file, | ||
751 | int line) | ||
752 | { | ||
753 | assert_spin_locked(&res->spinlock); | ||
754 | |||
755 | BUG_ON(res->inflight_locks == 0); | ||
756 | res->inflight_locks--; | ||
757 | mlog(0, "%s:%.*s: inflight--: now %u\n", | ||
758 | dlm->name, res->lockname.len, res->lockname.name, | ||
759 | res->inflight_locks); | ||
760 | if (res->inflight_locks == 0) | ||
761 | dlm_lockres_clear_refmap_bit(dlm->node_num, res); | ||
762 | wake_up(&res->wq); | ||
763 | } | ||
764 | |||
725 | /* | 765 | /* |
726 | * lookup a lock resource by name. | 766 | * lookup a lock resource by name. |
727 | * may already exist in the hashtable. | 767 | * may already exist in the hashtable. |
@@ -752,6 +792,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | |||
752 | unsigned int hash; | 792 | unsigned int hash; |
753 | int tries = 0; | 793 | int tries = 0; |
754 | int bit, wait_on_recovery = 0; | 794 | int bit, wait_on_recovery = 0; |
795 | int drop_inflight_if_nonlocal = 0; | ||
755 | 796 | ||
756 | BUG_ON(!lockid); | 797 | BUG_ON(!lockid); |
757 | 798 | ||
@@ -761,9 +802,30 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | |||
761 | 802 | ||
762 | lookup: | 803 | lookup: |
763 | spin_lock(&dlm->spinlock); | 804 | spin_lock(&dlm->spinlock); |
764 | tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash); | 805 | tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); |
765 | if (tmpres) { | 806 | if (tmpres) { |
807 | int dropping_ref = 0; | ||
808 | |||
809 | spin_lock(&tmpres->spinlock); | ||
810 | if (tmpres->owner == dlm->node_num) { | ||
811 | BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); | ||
812 | dlm_lockres_grab_inflight_ref(dlm, tmpres); | ||
813 | } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) | ||
814 | dropping_ref = 1; | ||
815 | spin_unlock(&tmpres->spinlock); | ||
766 | spin_unlock(&dlm->spinlock); | 816 | spin_unlock(&dlm->spinlock); |
817 | |||
818 | /* wait until done messaging the master, drop our ref to allow | ||
819 | * the lockres to be purged, start over. */ | ||
820 | if (dropping_ref) { | ||
821 | spin_lock(&tmpres->spinlock); | ||
822 | __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF); | ||
823 | spin_unlock(&tmpres->spinlock); | ||
824 | dlm_lockres_put(tmpres); | ||
825 | tmpres = NULL; | ||
826 | goto lookup; | ||
827 | } | ||
828 | |||
767 | mlog(0, "found in hash!\n"); | 829 | mlog(0, "found in hash!\n"); |
768 | if (res) | 830 | if (res) |
769 | dlm_lockres_put(res); | 831 | dlm_lockres_put(res); |
@@ -793,6 +855,7 @@ lookup: | |||
793 | spin_lock(&res->spinlock); | 855 | spin_lock(&res->spinlock); |
794 | dlm_change_lockres_owner(dlm, res, dlm->node_num); | 856 | dlm_change_lockres_owner(dlm, res, dlm->node_num); |
795 | __dlm_insert_lockres(dlm, res); | 857 | __dlm_insert_lockres(dlm, res); |
858 | dlm_lockres_grab_inflight_ref(dlm, res); | ||
796 | spin_unlock(&res->spinlock); | 859 | spin_unlock(&res->spinlock); |
797 | spin_unlock(&dlm->spinlock); | 860 | spin_unlock(&dlm->spinlock); |
798 | /* lockres still marked IN_PROGRESS */ | 861 | /* lockres still marked IN_PROGRESS */ |
@@ -805,29 +868,40 @@ lookup: | |||
805 | /* if we found a block, wait for lock to be mastered by another node */ | 868 | /* if we found a block, wait for lock to be mastered by another node */ |
806 | blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen); | 869 | blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen); |
807 | if (blocked) { | 870 | if (blocked) { |
871 | int mig; | ||
808 | if (mle->type == DLM_MLE_MASTER) { | 872 | if (mle->type == DLM_MLE_MASTER) { |
809 | mlog(ML_ERROR, "master entry for nonexistent lock!\n"); | 873 | mlog(ML_ERROR, "master entry for nonexistent lock!\n"); |
810 | BUG(); | 874 | BUG(); |
811 | } else if (mle->type == DLM_MLE_MIGRATION) { | 875 | } |
812 | /* migration is in progress! */ | 876 | mig = (mle->type == DLM_MLE_MIGRATION); |
813 | /* the good news is that we now know the | 877 | /* if there is a migration in progress, let the migration |
814 | * "current" master (mle->master). */ | 878 | * finish before continuing. we can wait for the absence |
815 | 879 | * of the MIGRATION mle: either the migrate finished or | |
880 | * one of the nodes died and the mle was cleaned up. | ||
881 | * if there is a BLOCK here, but it already has a master | ||
882 | * set, we are too late. the master does not have a ref | ||
883 | * for us in the refmap. detach the mle and drop it. | ||
884 | * either way, go back to the top and start over. */ | ||
885 | if (mig || mle->master != O2NM_MAX_NODES) { | ||
886 | BUG_ON(mig && mle->master == dlm->node_num); | ||
887 | /* we arrived too late. the master does not | ||
888 | * have a ref for us. retry. */ | ||
889 | mlog(0, "%s:%.*s: late on %s\n", | ||
890 | dlm->name, namelen, lockid, | ||
891 | mig ? "MIGRATION" : "BLOCK"); | ||
816 | spin_unlock(&dlm->master_lock); | 892 | spin_unlock(&dlm->master_lock); |
817 | assert_spin_locked(&dlm->spinlock); | ||
818 | |||
819 | /* set the lockres owner and hash it */ | ||
820 | spin_lock(&res->spinlock); | ||
821 | dlm_set_lockres_owner(dlm, res, mle->master); | ||
822 | __dlm_insert_lockres(dlm, res); | ||
823 | spin_unlock(&res->spinlock); | ||
824 | spin_unlock(&dlm->spinlock); | 893 | spin_unlock(&dlm->spinlock); |
825 | 894 | ||
826 | /* master is known, detach */ | 895 | /* master is known, detach */ |
827 | dlm_mle_detach_hb_events(dlm, mle); | 896 | if (!mig) |
897 | dlm_mle_detach_hb_events(dlm, mle); | ||
828 | dlm_put_mle(mle); | 898 | dlm_put_mle(mle); |
829 | mle = NULL; | 899 | mle = NULL; |
830 | goto wake_waiters; | 900 | /* this is lame, but we cant wait on either |
901 | * the mle or lockres waitqueue here */ | ||
902 | if (mig) | ||
903 | msleep(100); | ||
904 | goto lookup; | ||
831 | } | 905 | } |
832 | } else { | 906 | } else { |
833 | /* go ahead and try to master lock on this node */ | 907 | /* go ahead and try to master lock on this node */ |
@@ -858,6 +932,13 @@ lookup: | |||
858 | 932 | ||
859 | /* finally add the lockres to its hash bucket */ | 933 | /* finally add the lockres to its hash bucket */ |
860 | __dlm_insert_lockres(dlm, res); | 934 | __dlm_insert_lockres(dlm, res); |
935 | /* since this lockres is new it doesnt not require the spinlock */ | ||
936 | dlm_lockres_grab_inflight_ref_new(dlm, res); | ||
937 | |||
938 | /* if this node does not become the master make sure to drop | ||
939 | * this inflight reference below */ | ||
940 | drop_inflight_if_nonlocal = 1; | ||
941 | |||
861 | /* get an extra ref on the mle in case this is a BLOCK | 942 | /* get an extra ref on the mle in case this is a BLOCK |
862 | * if so, the creator of the BLOCK may try to put the last | 943 | * if so, the creator of the BLOCK may try to put the last |
863 | * ref at this time in the assert master handler, so we | 944 | * ref at this time in the assert master handler, so we |
@@ -910,7 +991,7 @@ redo_request: | |||
910 | ret = -EINVAL; | 991 | ret = -EINVAL; |
911 | dlm_node_iter_init(mle->vote_map, &iter); | 992 | dlm_node_iter_init(mle->vote_map, &iter); |
912 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { | 993 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { |
913 | ret = dlm_do_master_request(mle, nodenum); | 994 | ret = dlm_do_master_request(res, mle, nodenum); |
914 | if (ret < 0) | 995 | if (ret < 0) |
915 | mlog_errno(ret); | 996 | mlog_errno(ret); |
916 | if (mle->master != O2NM_MAX_NODES) { | 997 | if (mle->master != O2NM_MAX_NODES) { |
@@ -960,6 +1041,8 @@ wait: | |||
960 | 1041 | ||
961 | wake_waiters: | 1042 | wake_waiters: |
962 | spin_lock(&res->spinlock); | 1043 | spin_lock(&res->spinlock); |
1044 | if (res->owner != dlm->node_num && drop_inflight_if_nonlocal) | ||
1045 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
963 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 1046 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
964 | spin_unlock(&res->spinlock); | 1047 | spin_unlock(&res->spinlock); |
965 | wake_up(&res->wq); | 1048 | wake_up(&res->wq); |
@@ -998,7 +1081,7 @@ recheck: | |||
998 | /* this will cause the master to re-assert across | 1081 | /* this will cause the master to re-assert across |
999 | * the whole cluster, freeing up mles */ | 1082 | * the whole cluster, freeing up mles */ |
1000 | if (res->owner != dlm->node_num) { | 1083 | if (res->owner != dlm->node_num) { |
1001 | ret = dlm_do_master_request(mle, res->owner); | 1084 | ret = dlm_do_master_request(res, mle, res->owner); |
1002 | if (ret < 0) { | 1085 | if (ret < 0) { |
1003 | /* give recovery a chance to run */ | 1086 | /* give recovery a chance to run */ |
1004 | mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); | 1087 | mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); |
@@ -1062,6 +1145,8 @@ recheck: | |||
1062 | * now tell other nodes that I am | 1145 | * now tell other nodes that I am |
1063 | * mastering this. */ | 1146 | * mastering this. */ |
1064 | mle->master = dlm->node_num; | 1147 | mle->master = dlm->node_num; |
1148 | /* ref was grabbed in get_lock_resource | ||
1149 | * will be dropped in dlmlock_master */ | ||
1065 | assert = 1; | 1150 | assert = 1; |
1066 | sleep = 0; | 1151 | sleep = 0; |
1067 | } | 1152 | } |
@@ -1087,7 +1172,8 @@ recheck: | |||
1087 | (atomic_read(&mle->woken) == 1), | 1172 | (atomic_read(&mle->woken) == 1), |
1088 | timeo); | 1173 | timeo); |
1089 | if (res->owner == O2NM_MAX_NODES) { | 1174 | if (res->owner == O2NM_MAX_NODES) { |
1090 | mlog(0, "waiting again\n"); | 1175 | mlog(0, "%s:%.*s: waiting again\n", dlm->name, |
1176 | res->lockname.len, res->lockname.name); | ||
1091 | goto recheck; | 1177 | goto recheck; |
1092 | } | 1178 | } |
1093 | mlog(0, "done waiting, master is %u\n", res->owner); | 1179 | mlog(0, "done waiting, master is %u\n", res->owner); |
@@ -1100,8 +1186,7 @@ recheck: | |||
1100 | m = dlm->node_num; | 1186 | m = dlm->node_num; |
1101 | mlog(0, "about to master %.*s here, this=%u\n", | 1187 | mlog(0, "about to master %.*s here, this=%u\n", |
1102 | res->lockname.len, res->lockname.name, m); | 1188 | res->lockname.len, res->lockname.name, m); |
1103 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 1189 | ret = dlm_do_assert_master(dlm, res, mle->vote_map, 0); |
1104 | res->lockname.len, mle->vote_map, 0); | ||
1105 | if (ret) { | 1190 | if (ret) { |
1106 | /* This is a failure in the network path, | 1191 | /* This is a failure in the network path, |
1107 | * not in the response to the assert_master | 1192 | * not in the response to the assert_master |
@@ -1117,6 +1202,8 @@ recheck: | |||
1117 | 1202 | ||
1118 | /* set the lockres owner */ | 1203 | /* set the lockres owner */ |
1119 | spin_lock(&res->spinlock); | 1204 | spin_lock(&res->spinlock); |
1205 | /* mastery reference obtained either during | ||
1206 | * assert_master_handler or in get_lock_resource */ | ||
1120 | dlm_change_lockres_owner(dlm, res, m); | 1207 | dlm_change_lockres_owner(dlm, res, m); |
1121 | spin_unlock(&res->spinlock); | 1208 | spin_unlock(&res->spinlock); |
1122 | 1209 | ||
@@ -1283,7 +1370,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
1283 | * | 1370 | * |
1284 | */ | 1371 | */ |
1285 | 1372 | ||
1286 | static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to) | 1373 | static int dlm_do_master_request(struct dlm_lock_resource *res, |
1374 | struct dlm_master_list_entry *mle, int to) | ||
1287 | { | 1375 | { |
1288 | struct dlm_ctxt *dlm = mle->dlm; | 1376 | struct dlm_ctxt *dlm = mle->dlm; |
1289 | struct dlm_master_request request; | 1377 | struct dlm_master_request request; |
@@ -1339,6 +1427,9 @@ again: | |||
1339 | case DLM_MASTER_RESP_YES: | 1427 | case DLM_MASTER_RESP_YES: |
1340 | set_bit(to, mle->response_map); | 1428 | set_bit(to, mle->response_map); |
1341 | mlog(0, "node %u is the master, response=YES\n", to); | 1429 | mlog(0, "node %u is the master, response=YES\n", to); |
1430 | mlog(0, "%s:%.*s: master node %u now knows I have a " | ||
1431 | "reference\n", dlm->name, res->lockname.len, | ||
1432 | res->lockname.name, to); | ||
1342 | mle->master = to; | 1433 | mle->master = to; |
1343 | break; | 1434 | break; |
1344 | case DLM_MASTER_RESP_NO: | 1435 | case DLM_MASTER_RESP_NO: |
@@ -1379,7 +1470,8 @@ out: | |||
1379 | * | 1470 | * |
1380 | * if possible, TRIM THIS DOWN!!! | 1471 | * if possible, TRIM THIS DOWN!!! |
1381 | */ | 1472 | */ |
1382 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data) | 1473 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, |
1474 | void **ret_data) | ||
1383 | { | 1475 | { |
1384 | u8 response = DLM_MASTER_RESP_MAYBE; | 1476 | u8 response = DLM_MASTER_RESP_MAYBE; |
1385 | struct dlm_ctxt *dlm = data; | 1477 | struct dlm_ctxt *dlm = data; |
@@ -1417,10 +1509,11 @@ way_up_top: | |||
1417 | 1509 | ||
1418 | /* take care of the easy cases up front */ | 1510 | /* take care of the easy cases up front */ |
1419 | spin_lock(&res->spinlock); | 1511 | spin_lock(&res->spinlock); |
1420 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 1512 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
1513 | DLM_LOCK_RES_MIGRATING)) { | ||
1421 | spin_unlock(&res->spinlock); | 1514 | spin_unlock(&res->spinlock); |
1422 | mlog(0, "returning DLM_MASTER_RESP_ERROR since res is " | 1515 | mlog(0, "returning DLM_MASTER_RESP_ERROR since res is " |
1423 | "being recovered\n"); | 1516 | "being recovered/migrated\n"); |
1424 | response = DLM_MASTER_RESP_ERROR; | 1517 | response = DLM_MASTER_RESP_ERROR; |
1425 | if (mle) | 1518 | if (mle) |
1426 | kmem_cache_free(dlm_mle_cache, mle); | 1519 | kmem_cache_free(dlm_mle_cache, mle); |
@@ -1428,8 +1521,10 @@ way_up_top: | |||
1428 | } | 1521 | } |
1429 | 1522 | ||
1430 | if (res->owner == dlm->node_num) { | 1523 | if (res->owner == dlm->node_num) { |
1524 | mlog(0, "%s:%.*s: setting bit %u in refmap\n", | ||
1525 | dlm->name, namelen, name, request->node_idx); | ||
1526 | dlm_lockres_set_refmap_bit(request->node_idx, res); | ||
1431 | spin_unlock(&res->spinlock); | 1527 | spin_unlock(&res->spinlock); |
1432 | // mlog(0, "this node is the master\n"); | ||
1433 | response = DLM_MASTER_RESP_YES; | 1528 | response = DLM_MASTER_RESP_YES; |
1434 | if (mle) | 1529 | if (mle) |
1435 | kmem_cache_free(dlm_mle_cache, mle); | 1530 | kmem_cache_free(dlm_mle_cache, mle); |
@@ -1477,7 +1572,6 @@ way_up_top: | |||
1477 | mlog(0, "node %u is master, but trying to migrate to " | 1572 | mlog(0, "node %u is master, but trying to migrate to " |
1478 | "node %u.\n", tmpmle->master, tmpmle->new_master); | 1573 | "node %u.\n", tmpmle->master, tmpmle->new_master); |
1479 | if (tmpmle->master == dlm->node_num) { | 1574 | if (tmpmle->master == dlm->node_num) { |
1480 | response = DLM_MASTER_RESP_YES; | ||
1481 | mlog(ML_ERROR, "no owner on lockres, but this " | 1575 | mlog(ML_ERROR, "no owner on lockres, but this " |
1482 | "node is trying to migrate it to %u?!\n", | 1576 | "node is trying to migrate it to %u?!\n", |
1483 | tmpmle->new_master); | 1577 | tmpmle->new_master); |
@@ -1494,6 +1588,10 @@ way_up_top: | |||
1494 | * go back and clean the mles on any | 1588 | * go back and clean the mles on any |
1495 | * other nodes */ | 1589 | * other nodes */ |
1496 | dispatch_assert = 1; | 1590 | dispatch_assert = 1; |
1591 | dlm_lockres_set_refmap_bit(request->node_idx, res); | ||
1592 | mlog(0, "%s:%.*s: setting bit %u in refmap\n", | ||
1593 | dlm->name, namelen, name, | ||
1594 | request->node_idx); | ||
1497 | } else | 1595 | } else |
1498 | response = DLM_MASTER_RESP_NO; | 1596 | response = DLM_MASTER_RESP_NO; |
1499 | } else { | 1597 | } else { |
@@ -1607,17 +1705,24 @@ send_response: | |||
1607 | * can periodically run all locks owned by this node | 1705 | * can periodically run all locks owned by this node |
1608 | * and re-assert across the cluster... | 1706 | * and re-assert across the cluster... |
1609 | */ | 1707 | */ |
1610 | static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, | 1708 | int dlm_do_assert_master(struct dlm_ctxt *dlm, |
1611 | unsigned int namelen, void *nodemap, | 1709 | struct dlm_lock_resource *res, |
1612 | u32 flags) | 1710 | void *nodemap, u32 flags) |
1613 | { | 1711 | { |
1614 | struct dlm_assert_master assert; | 1712 | struct dlm_assert_master assert; |
1615 | int to, tmpret; | 1713 | int to, tmpret; |
1616 | struct dlm_node_iter iter; | 1714 | struct dlm_node_iter iter; |
1617 | int ret = 0; | 1715 | int ret = 0; |
1618 | int reassert; | 1716 | int reassert; |
1717 | const char *lockname = res->lockname.name; | ||
1718 | unsigned int namelen = res->lockname.len; | ||
1619 | 1719 | ||
1620 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); | 1720 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); |
1721 | |||
1722 | spin_lock(&res->spinlock); | ||
1723 | res->state |= DLM_LOCK_RES_SETREF_INPROG; | ||
1724 | spin_unlock(&res->spinlock); | ||
1725 | |||
1621 | again: | 1726 | again: |
1622 | reassert = 0; | 1727 | reassert = 0; |
1623 | 1728 | ||
@@ -1647,6 +1752,7 @@ again: | |||
1647 | mlog(0, "link to %d went down!\n", to); | 1752 | mlog(0, "link to %d went down!\n", to); |
1648 | /* any nonzero status return will do */ | 1753 | /* any nonzero status return will do */ |
1649 | ret = tmpret; | 1754 | ret = tmpret; |
1755 | r = 0; | ||
1650 | } else if (r < 0) { | 1756 | } else if (r < 0) { |
1651 | /* ok, something horribly messed. kill thyself. */ | 1757 | /* ok, something horribly messed. kill thyself. */ |
1652 | mlog(ML_ERROR,"during assert master of %.*s to %u, " | 1758 | mlog(ML_ERROR,"during assert master of %.*s to %u, " |
@@ -1661,17 +1767,39 @@ again: | |||
1661 | spin_unlock(&dlm->master_lock); | 1767 | spin_unlock(&dlm->master_lock); |
1662 | spin_unlock(&dlm->spinlock); | 1768 | spin_unlock(&dlm->spinlock); |
1663 | BUG(); | 1769 | BUG(); |
1664 | } else if (r == EAGAIN) { | 1770 | } |
1771 | |||
1772 | if (r & DLM_ASSERT_RESPONSE_REASSERT && | ||
1773 | !(r & DLM_ASSERT_RESPONSE_MASTERY_REF)) { | ||
1774 | mlog(ML_ERROR, "%.*s: very strange, " | ||
1775 | "master MLE but no lockres on %u\n", | ||
1776 | namelen, lockname, to); | ||
1777 | } | ||
1778 | |||
1779 | if (r & DLM_ASSERT_RESPONSE_REASSERT) { | ||
1665 | mlog(0, "%.*s: node %u create mles on other " | 1780 | mlog(0, "%.*s: node %u create mles on other " |
1666 | "nodes and requests a re-assert\n", | 1781 | "nodes and requests a re-assert\n", |
1667 | namelen, lockname, to); | 1782 | namelen, lockname, to); |
1668 | reassert = 1; | 1783 | reassert = 1; |
1669 | } | 1784 | } |
1785 | if (r & DLM_ASSERT_RESPONSE_MASTERY_REF) { | ||
1786 | mlog(0, "%.*s: node %u has a reference to this " | ||
1787 | "lockres, set the bit in the refmap\n", | ||
1788 | namelen, lockname, to); | ||
1789 | spin_lock(&res->spinlock); | ||
1790 | dlm_lockres_set_refmap_bit(to, res); | ||
1791 | spin_unlock(&res->spinlock); | ||
1792 | } | ||
1670 | } | 1793 | } |
1671 | 1794 | ||
1672 | if (reassert) | 1795 | if (reassert) |
1673 | goto again; | 1796 | goto again; |
1674 | 1797 | ||
1798 | spin_lock(&res->spinlock); | ||
1799 | res->state &= ~DLM_LOCK_RES_SETREF_INPROG; | ||
1800 | spin_unlock(&res->spinlock); | ||
1801 | wake_up(&res->wq); | ||
1802 | |||
1675 | return ret; | 1803 | return ret; |
1676 | } | 1804 | } |
1677 | 1805 | ||
@@ -1684,7 +1812,8 @@ again: | |||
1684 | * | 1812 | * |
1685 | * if possible, TRIM THIS DOWN!!! | 1813 | * if possible, TRIM THIS DOWN!!! |
1686 | */ | 1814 | */ |
1687 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data) | 1815 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, |
1816 | void **ret_data) | ||
1688 | { | 1817 | { |
1689 | struct dlm_ctxt *dlm = data; | 1818 | struct dlm_ctxt *dlm = data; |
1690 | struct dlm_master_list_entry *mle = NULL; | 1819 | struct dlm_master_list_entry *mle = NULL; |
@@ -1693,7 +1822,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data) | |||
1693 | char *name; | 1822 | char *name; |
1694 | unsigned int namelen, hash; | 1823 | unsigned int namelen, hash; |
1695 | u32 flags; | 1824 | u32 flags; |
1696 | int master_request = 0; | 1825 | int master_request = 0, have_lockres_ref = 0; |
1697 | int ret = 0; | 1826 | int ret = 0; |
1698 | 1827 | ||
1699 | if (!dlm_grab(dlm)) | 1828 | if (!dlm_grab(dlm)) |
@@ -1851,6 +1980,7 @@ ok: | |||
1851 | spin_unlock(&mle->spinlock); | 1980 | spin_unlock(&mle->spinlock); |
1852 | 1981 | ||
1853 | if (res) { | 1982 | if (res) { |
1983 | int wake = 0; | ||
1854 | spin_lock(&res->spinlock); | 1984 | spin_lock(&res->spinlock); |
1855 | if (mle->type == DLM_MLE_MIGRATION) { | 1985 | if (mle->type == DLM_MLE_MIGRATION) { |
1856 | mlog(0, "finishing off migration of lockres %.*s, " | 1986 | mlog(0, "finishing off migration of lockres %.*s, " |
@@ -1858,12 +1988,16 @@ ok: | |||
1858 | res->lockname.len, res->lockname.name, | 1988 | res->lockname.len, res->lockname.name, |
1859 | dlm->node_num, mle->new_master); | 1989 | dlm->node_num, mle->new_master); |
1860 | res->state &= ~DLM_LOCK_RES_MIGRATING; | 1990 | res->state &= ~DLM_LOCK_RES_MIGRATING; |
1991 | wake = 1; | ||
1861 | dlm_change_lockres_owner(dlm, res, mle->new_master); | 1992 | dlm_change_lockres_owner(dlm, res, mle->new_master); |
1862 | BUG_ON(res->state & DLM_LOCK_RES_DIRTY); | 1993 | BUG_ON(res->state & DLM_LOCK_RES_DIRTY); |
1863 | } else { | 1994 | } else { |
1864 | dlm_change_lockres_owner(dlm, res, mle->master); | 1995 | dlm_change_lockres_owner(dlm, res, mle->master); |
1865 | } | 1996 | } |
1866 | spin_unlock(&res->spinlock); | 1997 | spin_unlock(&res->spinlock); |
1998 | have_lockres_ref = 1; | ||
1999 | if (wake) | ||
2000 | wake_up(&res->wq); | ||
1867 | } | 2001 | } |
1868 | 2002 | ||
1869 | /* master is known, detach if not already detached. | 2003 | /* master is known, detach if not already detached. |
@@ -1913,12 +2047,28 @@ ok: | |||
1913 | 2047 | ||
1914 | done: | 2048 | done: |
1915 | ret = 0; | 2049 | ret = 0; |
1916 | if (res) | 2050 | if (res) { |
1917 | dlm_lockres_put(res); | 2051 | spin_lock(&res->spinlock); |
2052 | res->state |= DLM_LOCK_RES_SETREF_INPROG; | ||
2053 | spin_unlock(&res->spinlock); | ||
2054 | *ret_data = (void *)res; | ||
2055 | } | ||
1918 | dlm_put(dlm); | 2056 | dlm_put(dlm); |
1919 | if (master_request) { | 2057 | if (master_request) { |
1920 | mlog(0, "need to tell master to reassert\n"); | 2058 | mlog(0, "need to tell master to reassert\n"); |
1921 | ret = EAGAIN; // positive. negative would shoot down the node. | 2059 | /* positive. negative would shoot down the node. */ |
2060 | ret |= DLM_ASSERT_RESPONSE_REASSERT; | ||
2061 | if (!have_lockres_ref) { | ||
2062 | mlog(ML_ERROR, "strange, got assert from %u, MASTER " | ||
2063 | "mle present here for %s:%.*s, but no lockres!\n", | ||
2064 | assert->node_idx, dlm->name, namelen, name); | ||
2065 | } | ||
2066 | } | ||
2067 | if (have_lockres_ref) { | ||
2068 | /* let the master know we have a reference to the lockres */ | ||
2069 | ret |= DLM_ASSERT_RESPONSE_MASTERY_REF; | ||
2070 | mlog(0, "%s:%.*s: got assert from %u, need a ref\n", | ||
2071 | dlm->name, namelen, name, assert->node_idx); | ||
1922 | } | 2072 | } |
1923 | return ret; | 2073 | return ret; |
1924 | 2074 | ||
@@ -1929,11 +2079,25 @@ kill: | |||
1929 | __dlm_print_one_lock_resource(res); | 2079 | __dlm_print_one_lock_resource(res); |
1930 | spin_unlock(&res->spinlock); | 2080 | spin_unlock(&res->spinlock); |
1931 | spin_unlock(&dlm->spinlock); | 2081 | spin_unlock(&dlm->spinlock); |
1932 | dlm_lockres_put(res); | 2082 | *ret_data = (void *)res; |
1933 | dlm_put(dlm); | 2083 | dlm_put(dlm); |
1934 | return -EINVAL; | 2084 | return -EINVAL; |
1935 | } | 2085 | } |
1936 | 2086 | ||
2087 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data) | ||
2088 | { | ||
2089 | struct dlm_lock_resource *res = (struct dlm_lock_resource *)ret_data; | ||
2090 | |||
2091 | if (ret_data) { | ||
2092 | spin_lock(&res->spinlock); | ||
2093 | res->state &= ~DLM_LOCK_RES_SETREF_INPROG; | ||
2094 | spin_unlock(&res->spinlock); | ||
2095 | wake_up(&res->wq); | ||
2096 | dlm_lockres_put(res); | ||
2097 | } | ||
2098 | return; | ||
2099 | } | ||
2100 | |||
1937 | int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, | 2101 | int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, |
1938 | struct dlm_lock_resource *res, | 2102 | struct dlm_lock_resource *res, |
1939 | int ignore_higher, u8 request_from, u32 flags) | 2103 | int ignore_higher, u8 request_from, u32 flags) |
@@ -2023,9 +2187,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) | |||
2023 | * even if one or more nodes die */ | 2187 | * even if one or more nodes die */ |
2024 | mlog(0, "worker about to master %.*s here, this=%u\n", | 2188 | mlog(0, "worker about to master %.*s here, this=%u\n", |
2025 | res->lockname.len, res->lockname.name, dlm->node_num); | 2189 | res->lockname.len, res->lockname.name, dlm->node_num); |
2026 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 2190 | ret = dlm_do_assert_master(dlm, res, nodemap, flags); |
2027 | res->lockname.len, | ||
2028 | nodemap, flags); | ||
2029 | if (ret < 0) { | 2191 | if (ret < 0) { |
2030 | /* no need to restart, we are done */ | 2192 | /* no need to restart, we are done */ |
2031 | if (!dlm_is_host_down(ret)) | 2193 | if (!dlm_is_host_down(ret)) |
@@ -2097,14 +2259,180 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, | |||
2097 | return ret; | 2259 | return ret; |
2098 | } | 2260 | } |
2099 | 2261 | ||
2262 | /* | ||
2263 | * DLM_DEREF_LOCKRES_MSG | ||
2264 | */ | ||
2265 | |||
2266 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | ||
2267 | { | ||
2268 | struct dlm_deref_lockres deref; | ||
2269 | int ret = 0, r; | ||
2270 | const char *lockname; | ||
2271 | unsigned int namelen; | ||
2272 | |||
2273 | lockname = res->lockname.name; | ||
2274 | namelen = res->lockname.len; | ||
2275 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); | ||
2276 | |||
2277 | mlog(0, "%s:%.*s: sending deref to %d\n", | ||
2278 | dlm->name, namelen, lockname, res->owner); | ||
2279 | memset(&deref, 0, sizeof(deref)); | ||
2280 | deref.node_idx = dlm->node_num; | ||
2281 | deref.namelen = namelen; | ||
2282 | memcpy(deref.name, lockname, namelen); | ||
2283 | |||
2284 | ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, | ||
2285 | &deref, sizeof(deref), res->owner, &r); | ||
2286 | if (ret < 0) | ||
2287 | mlog_errno(ret); | ||
2288 | else if (r < 0) { | ||
2289 | /* BAD. other node says I did not have a ref. */ | ||
2290 | mlog(ML_ERROR,"while dropping ref on %s:%.*s " | ||
2291 | "(master=%u) got %d.\n", dlm->name, namelen, | ||
2292 | lockname, res->owner, r); | ||
2293 | dlm_print_one_lock_resource(res); | ||
2294 | BUG(); | ||
2295 | } | ||
2296 | return ret; | ||
2297 | } | ||
2298 | |||
2299 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | ||
2300 | void **ret_data) | ||
2301 | { | ||
2302 | struct dlm_ctxt *dlm = data; | ||
2303 | struct dlm_deref_lockres *deref = (struct dlm_deref_lockres *)msg->buf; | ||
2304 | struct dlm_lock_resource *res = NULL; | ||
2305 | char *name; | ||
2306 | unsigned int namelen; | ||
2307 | int ret = -EINVAL; | ||
2308 | u8 node; | ||
2309 | unsigned int hash; | ||
2310 | struct dlm_work_item *item; | ||
2311 | int cleared = 0; | ||
2312 | int dispatch = 0; | ||
2313 | |||
2314 | if (!dlm_grab(dlm)) | ||
2315 | return 0; | ||
2316 | |||
2317 | name = deref->name; | ||
2318 | namelen = deref->namelen; | ||
2319 | node = deref->node_idx; | ||
2320 | |||
2321 | if (namelen > DLM_LOCKID_NAME_MAX) { | ||
2322 | mlog(ML_ERROR, "Invalid name length!"); | ||
2323 | goto done; | ||
2324 | } | ||
2325 | if (deref->node_idx >= O2NM_MAX_NODES) { | ||
2326 | mlog(ML_ERROR, "Invalid node number: %u\n", node); | ||
2327 | goto done; | ||
2328 | } | ||
2329 | |||
2330 | hash = dlm_lockid_hash(name, namelen); | ||
2331 | |||
2332 | spin_lock(&dlm->spinlock); | ||
2333 | res = __dlm_lookup_lockres_full(dlm, name, namelen, hash); | ||
2334 | if (!res) { | ||
2335 | spin_unlock(&dlm->spinlock); | ||
2336 | mlog(ML_ERROR, "%s:%.*s: bad lockres name\n", | ||
2337 | dlm->name, namelen, name); | ||
2338 | goto done; | ||
2339 | } | ||
2340 | spin_unlock(&dlm->spinlock); | ||
2341 | |||
2342 | spin_lock(&res->spinlock); | ||
2343 | if (res->state & DLM_LOCK_RES_SETREF_INPROG) | ||
2344 | dispatch = 1; | ||
2345 | else { | ||
2346 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); | ||
2347 | if (test_bit(node, res->refmap)) { | ||
2348 | dlm_lockres_clear_refmap_bit(node, res); | ||
2349 | cleared = 1; | ||
2350 | } | ||
2351 | } | ||
2352 | spin_unlock(&res->spinlock); | ||
2353 | |||
2354 | if (!dispatch) { | ||
2355 | if (cleared) | ||
2356 | dlm_lockres_calc_usage(dlm, res); | ||
2357 | else { | ||
2358 | mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " | ||
2359 | "but it is already dropped!\n", dlm->name, | ||
2360 | res->lockname.len, res->lockname.name, node); | ||
2361 | __dlm_print_one_lock_resource(res); | ||
2362 | } | ||
2363 | ret = 0; | ||
2364 | goto done; | ||
2365 | } | ||
2366 | |||
2367 | item = kzalloc(sizeof(*item), GFP_NOFS); | ||
2368 | if (!item) { | ||
2369 | ret = -ENOMEM; | ||
2370 | mlog_errno(ret); | ||
2371 | goto done; | ||
2372 | } | ||
2373 | |||
2374 | dlm_init_work_item(dlm, item, dlm_deref_lockres_worker, NULL); | ||
2375 | item->u.dl.deref_res = res; | ||
2376 | item->u.dl.deref_node = node; | ||
2377 | |||
2378 | spin_lock(&dlm->work_lock); | ||
2379 | list_add_tail(&item->list, &dlm->work_list); | ||
2380 | spin_unlock(&dlm->work_lock); | ||
2381 | |||
2382 | queue_work(dlm->dlm_worker, &dlm->dispatched_work); | ||
2383 | return 0; | ||
2384 | |||
2385 | done: | ||
2386 | if (res) | ||
2387 | dlm_lockres_put(res); | ||
2388 | dlm_put(dlm); | ||
2389 | |||
2390 | return ret; | ||
2391 | } | ||
2392 | |||
2393 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | ||
2394 | { | ||
2395 | struct dlm_ctxt *dlm; | ||
2396 | struct dlm_lock_resource *res; | ||
2397 | u8 node; | ||
2398 | u8 cleared = 0; | ||
2399 | |||
2400 | dlm = item->dlm; | ||
2401 | res = item->u.dl.deref_res; | ||
2402 | node = item->u.dl.deref_node; | ||
2403 | |||
2404 | spin_lock(&res->spinlock); | ||
2405 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); | ||
2406 | if (test_bit(node, res->refmap)) { | ||
2407 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); | ||
2408 | dlm_lockres_clear_refmap_bit(node, res); | ||
2409 | cleared = 1; | ||
2410 | } | ||
2411 | spin_unlock(&res->spinlock); | ||
2412 | |||
2413 | if (cleared) { | ||
2414 | mlog(0, "%s:%.*s node %u ref dropped in dispatch\n", | ||
2415 | dlm->name, res->lockname.len, res->lockname.name, node); | ||
2416 | dlm_lockres_calc_usage(dlm, res); | ||
2417 | } else { | ||
2418 | mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " | ||
2419 | "but it is already dropped!\n", dlm->name, | ||
2420 | res->lockname.len, res->lockname.name, node); | ||
2421 | __dlm_print_one_lock_resource(res); | ||
2422 | } | ||
2423 | |||
2424 | dlm_lockres_put(res); | ||
2425 | } | ||
2426 | |||
2100 | 2427 | ||
2101 | /* | 2428 | /* |
2102 | * DLM_MIGRATE_LOCKRES | 2429 | * DLM_MIGRATE_LOCKRES |
2103 | */ | 2430 | */ |
2104 | 2431 | ||
2105 | 2432 | ||
2106 | int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | 2433 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, |
2107 | u8 target) | 2434 | struct dlm_lock_resource *res, |
2435 | u8 target) | ||
2108 | { | 2436 | { |
2109 | struct dlm_master_list_entry *mle = NULL; | 2437 | struct dlm_master_list_entry *mle = NULL; |
2110 | struct dlm_master_list_entry *oldmle = NULL; | 2438 | struct dlm_master_list_entry *oldmle = NULL; |
@@ -2116,7 +2444,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2116 | struct list_head *queue, *iter; | 2444 | struct list_head *queue, *iter; |
2117 | int i; | 2445 | int i; |
2118 | struct dlm_lock *lock; | 2446 | struct dlm_lock *lock; |
2119 | int empty = 1; | 2447 | int empty = 1, wake = 0; |
2120 | 2448 | ||
2121 | if (!dlm_grab(dlm)) | 2449 | if (!dlm_grab(dlm)) |
2122 | return -EINVAL; | 2450 | return -EINVAL; |
@@ -2241,6 +2569,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2241 | res->lockname.name, target); | 2569 | res->lockname.name, target); |
2242 | spin_lock(&res->spinlock); | 2570 | spin_lock(&res->spinlock); |
2243 | res->state &= ~DLM_LOCK_RES_MIGRATING; | 2571 | res->state &= ~DLM_LOCK_RES_MIGRATING; |
2572 | wake = 1; | ||
2244 | spin_unlock(&res->spinlock); | 2573 | spin_unlock(&res->spinlock); |
2245 | ret = -EINVAL; | 2574 | ret = -EINVAL; |
2246 | } | 2575 | } |
@@ -2268,6 +2597,9 @@ fail: | |||
2268 | * the lockres | 2597 | * the lockres |
2269 | */ | 2598 | */ |
2270 | 2599 | ||
2600 | /* now that remote nodes are spinning on the MIGRATING flag, | ||
2601 | * ensure that all assert_master work is flushed. */ | ||
2602 | flush_workqueue(dlm->dlm_worker); | ||
2271 | 2603 | ||
2272 | /* get an extra reference on the mle. | 2604 | /* get an extra reference on the mle. |
2273 | * otherwise the assert_master from the new | 2605 | * otherwise the assert_master from the new |
@@ -2296,6 +2628,7 @@ fail: | |||
2296 | dlm_put_mle_inuse(mle); | 2628 | dlm_put_mle_inuse(mle); |
2297 | spin_lock(&res->spinlock); | 2629 | spin_lock(&res->spinlock); |
2298 | res->state &= ~DLM_LOCK_RES_MIGRATING; | 2630 | res->state &= ~DLM_LOCK_RES_MIGRATING; |
2631 | wake = 1; | ||
2299 | spin_unlock(&res->spinlock); | 2632 | spin_unlock(&res->spinlock); |
2300 | goto leave; | 2633 | goto leave; |
2301 | } | 2634 | } |
@@ -2322,7 +2655,8 @@ fail: | |||
2322 | res->owner == target) | 2655 | res->owner == target) |
2323 | break; | 2656 | break; |
2324 | 2657 | ||
2325 | mlog(0, "timed out during migration\n"); | 2658 | mlog(0, "%s:%.*s: timed out during migration\n", |
2659 | dlm->name, res->lockname.len, res->lockname.name); | ||
2326 | /* avoid hang during shutdown when migrating lockres | 2660 | /* avoid hang during shutdown when migrating lockres |
2327 | * to a node which also goes down */ | 2661 | * to a node which also goes down */ |
2328 | if (dlm_is_node_dead(dlm, target)) { | 2662 | if (dlm_is_node_dead(dlm, target)) { |
@@ -2330,20 +2664,20 @@ fail: | |||
2330 | "target %u is no longer up, restarting\n", | 2664 | "target %u is no longer up, restarting\n", |
2331 | dlm->name, res->lockname.len, | 2665 | dlm->name, res->lockname.len, |
2332 | res->lockname.name, target); | 2666 | res->lockname.name, target); |
2333 | ret = -ERESTARTSYS; | 2667 | ret = -EINVAL; |
2668 | /* migration failed, detach and clean up mle */ | ||
2669 | dlm_mle_detach_hb_events(dlm, mle); | ||
2670 | dlm_put_mle(mle); | ||
2671 | dlm_put_mle_inuse(mle); | ||
2672 | spin_lock(&res->spinlock); | ||
2673 | res->state &= ~DLM_LOCK_RES_MIGRATING; | ||
2674 | wake = 1; | ||
2675 | spin_unlock(&res->spinlock); | ||
2676 | goto leave; | ||
2334 | } | 2677 | } |
2335 | } | 2678 | } else |
2336 | if (ret == -ERESTARTSYS) { | 2679 | mlog(0, "%s:%.*s: caught signal during migration\n", |
2337 | /* migration failed, detach and clean up mle */ | 2680 | dlm->name, res->lockname.len, res->lockname.name); |
2338 | dlm_mle_detach_hb_events(dlm, mle); | ||
2339 | dlm_put_mle(mle); | ||
2340 | dlm_put_mle_inuse(mle); | ||
2341 | spin_lock(&res->spinlock); | ||
2342 | res->state &= ~DLM_LOCK_RES_MIGRATING; | ||
2343 | spin_unlock(&res->spinlock); | ||
2344 | goto leave; | ||
2345 | } | ||
2346 | /* TODO: if node died: stop, clean up, return error */ | ||
2347 | } | 2681 | } |
2348 | 2682 | ||
2349 | /* all done, set the owner, clear the flag */ | 2683 | /* all done, set the owner, clear the flag */ |
@@ -2366,6 +2700,11 @@ leave: | |||
2366 | if (ret < 0) | 2700 | if (ret < 0) |
2367 | dlm_kick_thread(dlm, res); | 2701 | dlm_kick_thread(dlm, res); |
2368 | 2702 | ||
2703 | /* wake up waiters if the MIGRATING flag got set | ||
2704 | * but migration failed */ | ||
2705 | if (wake) | ||
2706 | wake_up(&res->wq); | ||
2707 | |||
2369 | /* TODO: cleanup */ | 2708 | /* TODO: cleanup */ |
2370 | if (mres) | 2709 | if (mres) |
2371 | free_page((unsigned long)mres); | 2710 | free_page((unsigned long)mres); |
@@ -2376,6 +2715,53 @@ leave: | |||
2376 | return ret; | 2715 | return ret; |
2377 | } | 2716 | } |
2378 | 2717 | ||
2718 | #define DLM_MIGRATION_RETRY_MS 100 | ||
2719 | |||
2720 | /* Should be called only after beginning the domain leave process. | ||
2721 | * There should not be any remaining locks on nonlocal lock resources, | ||
2722 | * and there should be no local locks left on locally mastered resources. | ||
2723 | * | ||
2724 | * Called with the dlm spinlock held, may drop it to do migration, but | ||
2725 | * will re-acquire before exit. | ||
2726 | * | ||
2727 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ | ||
2728 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | ||
2729 | { | ||
2730 | int ret; | ||
2731 | int lock_dropped = 0; | ||
2732 | |||
2733 | if (res->owner != dlm->node_num) { | ||
2734 | if (!__dlm_lockres_unused(res)) { | ||
2735 | mlog(ML_ERROR, "%s:%.*s: this node is not master, " | ||
2736 | "trying to free this but locks remain\n", | ||
2737 | dlm->name, res->lockname.len, res->lockname.name); | ||
2738 | } | ||
2739 | goto leave; | ||
2740 | } | ||
2741 | |||
2742 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ | ||
2743 | spin_unlock(&dlm->spinlock); | ||
2744 | lock_dropped = 1; | ||
2745 | while (1) { | ||
2746 | ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); | ||
2747 | if (ret >= 0) | ||
2748 | break; | ||
2749 | if (ret == -ENOTEMPTY) { | ||
2750 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
2751 | res->lockname.len, res->lockname.name); | ||
2752 | BUG(); | ||
2753 | } | ||
2754 | |||
2755 | mlog(0, "lockres %.*s: migrate failed, " | ||
2756 | "retrying\n", res->lockname.len, | ||
2757 | res->lockname.name); | ||
2758 | msleep(DLM_MIGRATION_RETRY_MS); | ||
2759 | } | ||
2760 | spin_lock(&dlm->spinlock); | ||
2761 | leave: | ||
2762 | return lock_dropped; | ||
2763 | } | ||
2764 | |||
2379 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) | 2765 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) |
2380 | { | 2766 | { |
2381 | int ret; | 2767 | int ret; |
@@ -2405,7 +2791,8 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm, | |||
2405 | return can_proceed; | 2791 | return can_proceed; |
2406 | } | 2792 | } |
2407 | 2793 | ||
2408 | int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | 2794 | static int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, |
2795 | struct dlm_lock_resource *res) | ||
2409 | { | 2796 | { |
2410 | int ret; | 2797 | int ret; |
2411 | spin_lock(&res->spinlock); | 2798 | spin_lock(&res->spinlock); |
@@ -2434,8 +2821,15 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, | |||
2434 | __dlm_lockres_reserve_ast(res); | 2821 | __dlm_lockres_reserve_ast(res); |
2435 | spin_unlock(&res->spinlock); | 2822 | spin_unlock(&res->spinlock); |
2436 | 2823 | ||
2437 | /* now flush all the pending asts.. hang out for a bit */ | 2824 | /* now flush all the pending asts */ |
2438 | dlm_kick_thread(dlm, res); | 2825 | dlm_kick_thread(dlm, res); |
2826 | /* before waiting on DIRTY, block processes which may | ||
2827 | * try to dirty the lockres before MIGRATING is set */ | ||
2828 | spin_lock(&res->spinlock); | ||
2829 | BUG_ON(res->state & DLM_LOCK_RES_BLOCK_DIRTY); | ||
2830 | res->state |= DLM_LOCK_RES_BLOCK_DIRTY; | ||
2831 | spin_unlock(&res->spinlock); | ||
2832 | /* now wait on any pending asts and the DIRTY state */ | ||
2439 | wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); | 2833 | wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); |
2440 | dlm_lockres_release_ast(dlm, res); | 2834 | dlm_lockres_release_ast(dlm, res); |
2441 | 2835 | ||
@@ -2461,6 +2855,13 @@ again: | |||
2461 | mlog(0, "trying again...\n"); | 2855 | mlog(0, "trying again...\n"); |
2462 | goto again; | 2856 | goto again; |
2463 | } | 2857 | } |
2858 | /* now that we are sure the MIGRATING state is there, drop | ||
2859 | * the unneded state which blocked threads trying to DIRTY */ | ||
2860 | spin_lock(&res->spinlock); | ||
2861 | BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); | ||
2862 | BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); | ||
2863 | res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; | ||
2864 | spin_unlock(&res->spinlock); | ||
2464 | 2865 | ||
2465 | /* did the target go down or die? */ | 2866 | /* did the target go down or die? */ |
2466 | spin_lock(&dlm->spinlock); | 2867 | spin_lock(&dlm->spinlock); |
@@ -2490,7 +2891,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
2490 | { | 2891 | { |
2491 | struct list_head *iter, *iter2; | 2892 | struct list_head *iter, *iter2; |
2492 | struct list_head *queue = &res->granted; | 2893 | struct list_head *queue = &res->granted; |
2493 | int i; | 2894 | int i, bit; |
2494 | struct dlm_lock *lock; | 2895 | struct dlm_lock *lock; |
2495 | 2896 | ||
2496 | assert_spin_locked(&res->spinlock); | 2897 | assert_spin_locked(&res->spinlock); |
@@ -2508,12 +2909,28 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
2508 | BUG_ON(!list_empty(&lock->bast_list)); | 2909 | BUG_ON(!list_empty(&lock->bast_list)); |
2509 | BUG_ON(lock->ast_pending); | 2910 | BUG_ON(lock->ast_pending); |
2510 | BUG_ON(lock->bast_pending); | 2911 | BUG_ON(lock->bast_pending); |
2912 | dlm_lockres_clear_refmap_bit(lock->ml.node, res); | ||
2511 | list_del_init(&lock->list); | 2913 | list_del_init(&lock->list); |
2512 | dlm_lock_put(lock); | 2914 | dlm_lock_put(lock); |
2513 | } | 2915 | } |
2514 | } | 2916 | } |
2515 | queue++; | 2917 | queue++; |
2516 | } | 2918 | } |
2919 | bit = 0; | ||
2920 | while (1) { | ||
2921 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); | ||
2922 | if (bit >= O2NM_MAX_NODES) | ||
2923 | break; | ||
2924 | /* do not clear the local node reference, if there is a | ||
2925 | * process holding this, let it drop the ref itself */ | ||
2926 | if (bit != dlm->node_num) { | ||
2927 | mlog(0, "%s:%.*s: node %u had a ref to this " | ||
2928 | "migrating lockres, clearing\n", dlm->name, | ||
2929 | res->lockname.len, res->lockname.name, bit); | ||
2930 | dlm_lockres_clear_refmap_bit(bit, res); | ||
2931 | } | ||
2932 | bit++; | ||
2933 | } | ||
2517 | } | 2934 | } |
2518 | 2935 | ||
2519 | /* for now this is not too intelligent. we will | 2936 | /* for now this is not too intelligent. we will |
@@ -2601,6 +3018,16 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | |||
2601 | mlog(0, "migrate request (node %u) returned %d!\n", | 3018 | mlog(0, "migrate request (node %u) returned %d!\n", |
2602 | nodenum, status); | 3019 | nodenum, status); |
2603 | ret = status; | 3020 | ret = status; |
3021 | } else if (status == DLM_MIGRATE_RESPONSE_MASTERY_REF) { | ||
3022 | /* during the migration request we short-circuited | ||
3023 | * the mastery of the lockres. make sure we have | ||
3024 | * a mastery ref for nodenum */ | ||
3025 | mlog(0, "%s:%.*s: need ref for node %u\n", | ||
3026 | dlm->name, res->lockname.len, res->lockname.name, | ||
3027 | nodenum); | ||
3028 | spin_lock(&res->spinlock); | ||
3029 | dlm_lockres_set_refmap_bit(nodenum, res); | ||
3030 | spin_unlock(&res->spinlock); | ||
2604 | } | 3031 | } |
2605 | } | 3032 | } |
2606 | 3033 | ||
@@ -2619,7 +3046,8 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | |||
2619 | * we will have no mle in the list to start with. now we can add an mle for | 3046 | * we will have no mle in the list to start with. now we can add an mle for |
2620 | * the migration and this should be the only one found for those scanning the | 3047 | * the migration and this should be the only one found for those scanning the |
2621 | * list. */ | 3048 | * list. */ |
2622 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data) | 3049 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, |
3050 | void **ret_data) | ||
2623 | { | 3051 | { |
2624 | struct dlm_ctxt *dlm = data; | 3052 | struct dlm_ctxt *dlm = data; |
2625 | struct dlm_lock_resource *res = NULL; | 3053 | struct dlm_lock_resource *res = NULL; |
@@ -2745,7 +3173,13 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
2745 | /* remove it from the list so that only one | 3173 | /* remove it from the list so that only one |
2746 | * mle will be found */ | 3174 | * mle will be found */ |
2747 | list_del_init(&tmp->list); | 3175 | list_del_init(&tmp->list); |
2748 | __dlm_mle_detach_hb_events(dlm, mle); | 3176 | /* this was obviously WRONG. mle is uninited here. should be tmp. */ |
3177 | __dlm_mle_detach_hb_events(dlm, tmp); | ||
3178 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; | ||
3179 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " | ||
3180 | "telling master to get ref for cleared out mle " | ||
3181 | "during migration\n", dlm->name, namelen, name, | ||
3182 | master, new_master); | ||
2749 | } | 3183 | } |
2750 | spin_unlock(&tmp->spinlock); | 3184 | spin_unlock(&tmp->spinlock); |
2751 | } | 3185 | } |
@@ -2753,6 +3187,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
2753 | /* now add a migration mle to the tail of the list */ | 3187 | /* now add a migration mle to the tail of the list */ |
2754 | dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen); | 3188 | dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen); |
2755 | mle->new_master = new_master; | 3189 | mle->new_master = new_master; |
3190 | /* the new master will be sending an assert master for this. | ||
3191 | * at that point we will get the refmap reference */ | ||
2756 | mle->master = master; | 3192 | mle->master = master; |
2757 | /* do this for consistency with other mle types */ | 3193 | /* do this for consistency with other mle types */ |
2758 | set_bit(new_master, mle->maybe_map); | 3194 | set_bit(new_master, mle->maybe_map); |
@@ -2902,6 +3338,13 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2902 | clear_bit(dlm->node_num, iter.node_map); | 3338 | clear_bit(dlm->node_num, iter.node_map); |
2903 | spin_unlock(&dlm->spinlock); | 3339 | spin_unlock(&dlm->spinlock); |
2904 | 3340 | ||
3341 | /* ownership of the lockres is changing. account for the | ||
3342 | * mastery reference here since old_master will briefly have | ||
3343 | * a reference after the migration completes */ | ||
3344 | spin_lock(&res->spinlock); | ||
3345 | dlm_lockres_set_refmap_bit(old_master, res); | ||
3346 | spin_unlock(&res->spinlock); | ||
3347 | |||
2905 | mlog(0, "now time to do a migrate request to other nodes\n"); | 3348 | mlog(0, "now time to do a migrate request to other nodes\n"); |
2906 | ret = dlm_do_migrate_request(dlm, res, old_master, | 3349 | ret = dlm_do_migrate_request(dlm, res, old_master, |
2907 | dlm->node_num, &iter); | 3350 | dlm->node_num, &iter); |
@@ -2914,8 +3357,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2914 | res->lockname.len, res->lockname.name); | 3357 | res->lockname.len, res->lockname.name); |
2915 | /* this call now finishes out the nodemap | 3358 | /* this call now finishes out the nodemap |
2916 | * even if one or more nodes die */ | 3359 | * even if one or more nodes die */ |
2917 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 3360 | ret = dlm_do_assert_master(dlm, res, iter.node_map, |
2918 | res->lockname.len, iter.node_map, | ||
2919 | DLM_ASSERT_MASTER_FINISH_MIGRATION); | 3361 | DLM_ASSERT_MASTER_FINISH_MIGRATION); |
2920 | if (ret < 0) { | 3362 | if (ret < 0) { |
2921 | /* no longer need to retry. all living nodes contacted. */ | 3363 | /* no longer need to retry. all living nodes contacted. */ |
@@ -2927,8 +3369,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2927 | set_bit(old_master, iter.node_map); | 3369 | set_bit(old_master, iter.node_map); |
2928 | mlog(0, "doing assert master of %.*s back to %u\n", | 3370 | mlog(0, "doing assert master of %.*s back to %u\n", |
2929 | res->lockname.len, res->lockname.name, old_master); | 3371 | res->lockname.len, res->lockname.name, old_master); |
2930 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 3372 | ret = dlm_do_assert_master(dlm, res, iter.node_map, |
2931 | res->lockname.len, iter.node_map, | ||
2932 | DLM_ASSERT_MASTER_FINISH_MIGRATION); | 3373 | DLM_ASSERT_MASTER_FINISH_MIGRATION); |
2933 | if (ret < 0) { | 3374 | if (ret < 0) { |
2934 | mlog(0, "assert master to original master failed " | 3375 | mlog(0, "assert master to original master failed " |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 367a11e9e2ed..6d4a83d50152 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -163,9 +163,6 @@ void dlm_dispatch_work(struct work_struct *work) | |||
163 | dlm_workfunc_t *workfunc; | 163 | dlm_workfunc_t *workfunc; |
164 | int tot=0; | 164 | int tot=0; |
165 | 165 | ||
166 | if (!dlm_joined(dlm)) | ||
167 | return; | ||
168 | |||
169 | spin_lock(&dlm->work_lock); | 166 | spin_lock(&dlm->work_lock); |
170 | list_splice_init(&dlm->work_list, &tmp_list); | 167 | list_splice_init(&dlm->work_list, &tmp_list); |
171 | spin_unlock(&dlm->work_lock); | 168 | spin_unlock(&dlm->work_lock); |
@@ -821,7 +818,8 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
821 | 818 | ||
822 | } | 819 | } |
823 | 820 | ||
824 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data) | 821 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data, |
822 | void **ret_data) | ||
825 | { | 823 | { |
826 | struct dlm_ctxt *dlm = data; | 824 | struct dlm_ctxt *dlm = data; |
827 | struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf; | 825 | struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf; |
@@ -978,7 +976,8 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to) | |||
978 | } | 976 | } |
979 | 977 | ||
980 | 978 | ||
981 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data) | 979 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, |
980 | void **ret_data) | ||
982 | { | 981 | { |
983 | struct dlm_ctxt *dlm = data; | 982 | struct dlm_ctxt *dlm = data; |
984 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; | 983 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; |
@@ -1129,6 +1128,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, | |||
1129 | if (total_locks == mres_total_locks) | 1128 | if (total_locks == mres_total_locks) |
1130 | mres->flags |= DLM_MRES_ALL_DONE; | 1129 | mres->flags |= DLM_MRES_ALL_DONE; |
1131 | 1130 | ||
1131 | mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", | ||
1132 | dlm->name, res->lockname.len, res->lockname.name, | ||
1133 | orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", | ||
1134 | send_to); | ||
1135 | |||
1132 | /* send it */ | 1136 | /* send it */ |
1133 | ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, | 1137 | ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, |
1134 | sz, send_to, &status); | 1138 | sz, send_to, &status); |
@@ -1213,6 +1217,34 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock, | |||
1213 | return 0; | 1217 | return 0; |
1214 | } | 1218 | } |
1215 | 1219 | ||
1220 | static void dlm_add_dummy_lock(struct dlm_ctxt *dlm, | ||
1221 | struct dlm_migratable_lockres *mres) | ||
1222 | { | ||
1223 | struct dlm_lock dummy; | ||
1224 | memset(&dummy, 0, sizeof(dummy)); | ||
1225 | dummy.ml.cookie = 0; | ||
1226 | dummy.ml.type = LKM_IVMODE; | ||
1227 | dummy.ml.convert_type = LKM_IVMODE; | ||
1228 | dummy.ml.highest_blocked = LKM_IVMODE; | ||
1229 | dummy.lksb = NULL; | ||
1230 | dummy.ml.node = dlm->node_num; | ||
1231 | dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST); | ||
1232 | } | ||
1233 | |||
1234 | static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm, | ||
1235 | struct dlm_migratable_lock *ml, | ||
1236 | u8 *nodenum) | ||
1237 | { | ||
1238 | if (unlikely(ml->cookie == 0 && | ||
1239 | ml->type == LKM_IVMODE && | ||
1240 | ml->convert_type == LKM_IVMODE && | ||
1241 | ml->highest_blocked == LKM_IVMODE && | ||
1242 | ml->list == DLM_BLOCKED_LIST)) { | ||
1243 | *nodenum = ml->node; | ||
1244 | return 1; | ||
1245 | } | ||
1246 | return 0; | ||
1247 | } | ||
1216 | 1248 | ||
1217 | int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | 1249 | int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, |
1218 | struct dlm_migratable_lockres *mres, | 1250 | struct dlm_migratable_lockres *mres, |
@@ -1260,6 +1292,14 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
1260 | goto error; | 1292 | goto error; |
1261 | } | 1293 | } |
1262 | } | 1294 | } |
1295 | if (total_locks == 0) { | ||
1296 | /* send a dummy lock to indicate a mastery reference only */ | ||
1297 | mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n", | ||
1298 | dlm->name, res->lockname.len, res->lockname.name, | ||
1299 | send_to, flags & DLM_MRES_RECOVERY ? "recovery" : | ||
1300 | "migration"); | ||
1301 | dlm_add_dummy_lock(dlm, mres); | ||
1302 | } | ||
1263 | /* flush any remaining locks */ | 1303 | /* flush any remaining locks */ |
1264 | ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); | 1304 | ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); |
1265 | if (ret < 0) | 1305 | if (ret < 0) |
@@ -1293,7 +1333,8 @@ error: | |||
1293 | * do we spin? returning an error only delays the problem really | 1333 | * do we spin? returning an error only delays the problem really |
1294 | */ | 1334 | */ |
1295 | 1335 | ||
1296 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) | 1336 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
1337 | void **ret_data) | ||
1297 | { | 1338 | { |
1298 | struct dlm_ctxt *dlm = data; | 1339 | struct dlm_ctxt *dlm = data; |
1299 | struct dlm_migratable_lockres *mres = | 1340 | struct dlm_migratable_lockres *mres = |
@@ -1382,17 +1423,21 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) | |||
1382 | spin_lock(&res->spinlock); | 1423 | spin_lock(&res->spinlock); |
1383 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 1424 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
1384 | spin_unlock(&res->spinlock); | 1425 | spin_unlock(&res->spinlock); |
1426 | wake_up(&res->wq); | ||
1385 | 1427 | ||
1386 | /* add an extra ref for just-allocated lockres | 1428 | /* add an extra ref for just-allocated lockres |
1387 | * otherwise the lockres will be purged immediately */ | 1429 | * otherwise the lockres will be purged immediately */ |
1388 | dlm_lockres_get(res); | 1430 | dlm_lockres_get(res); |
1389 | |||
1390 | } | 1431 | } |
1391 | 1432 | ||
1392 | /* at this point we have allocated everything we need, | 1433 | /* at this point we have allocated everything we need, |
1393 | * and we have a hashed lockres with an extra ref and | 1434 | * and we have a hashed lockres with an extra ref and |
1394 | * the proper res->state flags. */ | 1435 | * the proper res->state flags. */ |
1395 | ret = 0; | 1436 | ret = 0; |
1437 | spin_lock(&res->spinlock); | ||
1438 | /* drop this either when master requery finds a different master | ||
1439 | * or when a lock is added by the recovery worker */ | ||
1440 | dlm_lockres_grab_inflight_ref(dlm, res); | ||
1396 | if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) { | 1441 | if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) { |
1397 | /* migration cannot have an unknown master */ | 1442 | /* migration cannot have an unknown master */ |
1398 | BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); | 1443 | BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); |
@@ -1400,10 +1445,11 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) | |||
1400 | "unknown owner.. will need to requery: " | 1445 | "unknown owner.. will need to requery: " |
1401 | "%.*s\n", mres->lockname_len, mres->lockname); | 1446 | "%.*s\n", mres->lockname_len, mres->lockname); |
1402 | } else { | 1447 | } else { |
1403 | spin_lock(&res->spinlock); | 1448 | /* take a reference now to pin the lockres, drop it |
1449 | * when locks are added in the worker */ | ||
1404 | dlm_change_lockres_owner(dlm, res, dlm->node_num); | 1450 | dlm_change_lockres_owner(dlm, res, dlm->node_num); |
1405 | spin_unlock(&res->spinlock); | ||
1406 | } | 1451 | } |
1452 | spin_unlock(&res->spinlock); | ||
1407 | 1453 | ||
1408 | /* queue up work for dlm_mig_lockres_worker */ | 1454 | /* queue up work for dlm_mig_lockres_worker */ |
1409 | dlm_grab(dlm); /* get an extra ref for the work item */ | 1455 | dlm_grab(dlm); /* get an extra ref for the work item */ |
@@ -1459,6 +1505,9 @@ again: | |||
1459 | "this node will take it.\n", | 1505 | "this node will take it.\n", |
1460 | res->lockname.len, res->lockname.name); | 1506 | res->lockname.len, res->lockname.name); |
1461 | } else { | 1507 | } else { |
1508 | spin_lock(&res->spinlock); | ||
1509 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
1510 | spin_unlock(&res->spinlock); | ||
1462 | mlog(0, "master needs to respond to sender " | 1511 | mlog(0, "master needs to respond to sender " |
1463 | "that node %u still owns %.*s\n", | 1512 | "that node %u still owns %.*s\n", |
1464 | real_master, res->lockname.len, | 1513 | real_master, res->lockname.len, |
@@ -1578,7 +1627,8 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
1578 | /* this function cannot error, so unless the sending | 1627 | /* this function cannot error, so unless the sending |
1579 | * or receiving of the message failed, the owner can | 1628 | * or receiving of the message failed, the owner can |
1580 | * be trusted */ | 1629 | * be trusted */ |
1581 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data) | 1630 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, |
1631 | void **ret_data) | ||
1582 | { | 1632 | { |
1583 | struct dlm_ctxt *dlm = data; | 1633 | struct dlm_ctxt *dlm = data; |
1584 | struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; | 1634 | struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; |
@@ -1660,21 +1710,38 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1660 | { | 1710 | { |
1661 | struct dlm_migratable_lock *ml; | 1711 | struct dlm_migratable_lock *ml; |
1662 | struct list_head *queue; | 1712 | struct list_head *queue; |
1713 | struct list_head *tmpq = NULL; | ||
1663 | struct dlm_lock *newlock = NULL; | 1714 | struct dlm_lock *newlock = NULL; |
1664 | struct dlm_lockstatus *lksb = NULL; | 1715 | struct dlm_lockstatus *lksb = NULL; |
1665 | int ret = 0; | 1716 | int ret = 0; |
1666 | int i, bad; | 1717 | int i, j, bad; |
1667 | struct list_head *iter; | 1718 | struct list_head *iter; |
1668 | struct dlm_lock *lock = NULL; | 1719 | struct dlm_lock *lock = NULL; |
1720 | u8 from = O2NM_MAX_NODES; | ||
1721 | unsigned int added = 0; | ||
1669 | 1722 | ||
1670 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); | 1723 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); |
1671 | for (i=0; i<mres->num_locks; i++) { | 1724 | for (i=0; i<mres->num_locks; i++) { |
1672 | ml = &(mres->ml[i]); | 1725 | ml = &(mres->ml[i]); |
1726 | |||
1727 | if (dlm_is_dummy_lock(dlm, ml, &from)) { | ||
1728 | /* placeholder, just need to set the refmap bit */ | ||
1729 | BUG_ON(mres->num_locks != 1); | ||
1730 | mlog(0, "%s:%.*s: dummy lock for %u\n", | ||
1731 | dlm->name, mres->lockname_len, mres->lockname, | ||
1732 | from); | ||
1733 | spin_lock(&res->spinlock); | ||
1734 | dlm_lockres_set_refmap_bit(from, res); | ||
1735 | spin_unlock(&res->spinlock); | ||
1736 | added++; | ||
1737 | break; | ||
1738 | } | ||
1673 | BUG_ON(ml->highest_blocked != LKM_IVMODE); | 1739 | BUG_ON(ml->highest_blocked != LKM_IVMODE); |
1674 | newlock = NULL; | 1740 | newlock = NULL; |
1675 | lksb = NULL; | 1741 | lksb = NULL; |
1676 | 1742 | ||
1677 | queue = dlm_list_num_to_pointer(res, ml->list); | 1743 | queue = dlm_list_num_to_pointer(res, ml->list); |
1744 | tmpq = NULL; | ||
1678 | 1745 | ||
1679 | /* if the lock is for the local node it needs to | 1746 | /* if the lock is for the local node it needs to |
1680 | * be moved to the proper location within the queue. | 1747 | * be moved to the proper location within the queue. |
@@ -1684,11 +1751,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1684 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); | 1751 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); |
1685 | 1752 | ||
1686 | spin_lock(&res->spinlock); | 1753 | spin_lock(&res->spinlock); |
1687 | list_for_each(iter, queue) { | 1754 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { |
1688 | lock = list_entry (iter, struct dlm_lock, list); | 1755 | tmpq = dlm_list_idx_to_ptr(res, j); |
1689 | if (lock->ml.cookie != ml->cookie) | 1756 | list_for_each(iter, tmpq) { |
1690 | lock = NULL; | 1757 | lock = list_entry (iter, struct dlm_lock, list); |
1691 | else | 1758 | if (lock->ml.cookie != ml->cookie) |
1759 | lock = NULL; | ||
1760 | else | ||
1761 | break; | ||
1762 | } | ||
1763 | if (lock) | ||
1692 | break; | 1764 | break; |
1693 | } | 1765 | } |
1694 | 1766 | ||
@@ -1698,12 +1770,20 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1698 | u64 c = ml->cookie; | 1770 | u64 c = ml->cookie; |
1699 | mlog(ML_ERROR, "could not find local lock " | 1771 | mlog(ML_ERROR, "could not find local lock " |
1700 | "with cookie %u:%llu!\n", | 1772 | "with cookie %u:%llu!\n", |
1701 | dlm_get_lock_cookie_node(c), | 1773 | dlm_get_lock_cookie_node(be64_to_cpu(c)), |
1702 | dlm_get_lock_cookie_seq(c)); | 1774 | dlm_get_lock_cookie_seq(be64_to_cpu(c))); |
1775 | __dlm_print_one_lock_resource(res); | ||
1703 | BUG(); | 1776 | BUG(); |
1704 | } | 1777 | } |
1705 | BUG_ON(lock->ml.node != ml->node); | 1778 | BUG_ON(lock->ml.node != ml->node); |
1706 | 1779 | ||
1780 | if (tmpq != queue) { | ||
1781 | mlog(0, "lock was on %u instead of %u for %.*s\n", | ||
1782 | j, ml->list, res->lockname.len, res->lockname.name); | ||
1783 | spin_unlock(&res->spinlock); | ||
1784 | continue; | ||
1785 | } | ||
1786 | |||
1707 | /* see NOTE above about why we do not update | 1787 | /* see NOTE above about why we do not update |
1708 | * to match the master here */ | 1788 | * to match the master here */ |
1709 | 1789 | ||
@@ -1711,6 +1791,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1711 | /* do not alter lock refcount. switching lists. */ | 1791 | /* do not alter lock refcount. switching lists. */ |
1712 | list_move_tail(&lock->list, queue); | 1792 | list_move_tail(&lock->list, queue); |
1713 | spin_unlock(&res->spinlock); | 1793 | spin_unlock(&res->spinlock); |
1794 | added++; | ||
1714 | 1795 | ||
1715 | mlog(0, "just reordered a local lock!\n"); | 1796 | mlog(0, "just reordered a local lock!\n"); |
1716 | continue; | 1797 | continue; |
@@ -1799,14 +1880,14 @@ skip_lvb: | |||
1799 | mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " | 1880 | mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " |
1800 | "exists on this lockres!\n", dlm->name, | 1881 | "exists on this lockres!\n", dlm->name, |
1801 | res->lockname.len, res->lockname.name, | 1882 | res->lockname.len, res->lockname.name, |
1802 | dlm_get_lock_cookie_node(c), | 1883 | dlm_get_lock_cookie_node(be64_to_cpu(c)), |
1803 | dlm_get_lock_cookie_seq(c)); | 1884 | dlm_get_lock_cookie_seq(be64_to_cpu(c))); |
1804 | 1885 | ||
1805 | mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, " | 1886 | mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, " |
1806 | "node=%u, cookie=%u:%llu, queue=%d\n", | 1887 | "node=%u, cookie=%u:%llu, queue=%d\n", |
1807 | ml->type, ml->convert_type, ml->node, | 1888 | ml->type, ml->convert_type, ml->node, |
1808 | dlm_get_lock_cookie_node(ml->cookie), | 1889 | dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)), |
1809 | dlm_get_lock_cookie_seq(ml->cookie), | 1890 | dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)), |
1810 | ml->list); | 1891 | ml->list); |
1811 | 1892 | ||
1812 | __dlm_print_one_lock_resource(res); | 1893 | __dlm_print_one_lock_resource(res); |
@@ -1817,12 +1898,22 @@ skip_lvb: | |||
1817 | if (!bad) { | 1898 | if (!bad) { |
1818 | dlm_lock_get(newlock); | 1899 | dlm_lock_get(newlock); |
1819 | list_add_tail(&newlock->list, queue); | 1900 | list_add_tail(&newlock->list, queue); |
1901 | mlog(0, "%s:%.*s: added lock for node %u, " | ||
1902 | "setting refmap bit\n", dlm->name, | ||
1903 | res->lockname.len, res->lockname.name, ml->node); | ||
1904 | dlm_lockres_set_refmap_bit(ml->node, res); | ||
1905 | added++; | ||
1820 | } | 1906 | } |
1821 | spin_unlock(&res->spinlock); | 1907 | spin_unlock(&res->spinlock); |
1822 | } | 1908 | } |
1823 | mlog(0, "done running all the locks\n"); | 1909 | mlog(0, "done running all the locks\n"); |
1824 | 1910 | ||
1825 | leave: | 1911 | leave: |
1912 | /* balance the ref taken when the work was queued */ | ||
1913 | spin_lock(&res->spinlock); | ||
1914 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
1915 | spin_unlock(&res->spinlock); | ||
1916 | |||
1826 | if (ret < 0) { | 1917 | if (ret < 0) { |
1827 | mlog_errno(ret); | 1918 | mlog_errno(ret); |
1828 | if (newlock) | 1919 | if (newlock) |
@@ -1935,9 +2026,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
1935 | if (res->owner == dead_node) { | 2026 | if (res->owner == dead_node) { |
1936 | list_del_init(&res->recovering); | 2027 | list_del_init(&res->recovering); |
1937 | spin_lock(&res->spinlock); | 2028 | spin_lock(&res->spinlock); |
2029 | /* new_master has our reference from | ||
2030 | * the lock state sent during recovery */ | ||
1938 | dlm_change_lockres_owner(dlm, res, new_master); | 2031 | dlm_change_lockres_owner(dlm, res, new_master); |
1939 | res->state &= ~DLM_LOCK_RES_RECOVERING; | 2032 | res->state &= ~DLM_LOCK_RES_RECOVERING; |
1940 | if (!__dlm_lockres_unused(res)) | 2033 | if (__dlm_lockres_has_locks(res)) |
1941 | __dlm_dirty_lockres(dlm, res); | 2034 | __dlm_dirty_lockres(dlm, res); |
1942 | spin_unlock(&res->spinlock); | 2035 | spin_unlock(&res->spinlock); |
1943 | wake_up(&res->wq); | 2036 | wake_up(&res->wq); |
@@ -1977,9 +2070,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
1977 | dlm_lockres_put(res); | 2070 | dlm_lockres_put(res); |
1978 | } | 2071 | } |
1979 | spin_lock(&res->spinlock); | 2072 | spin_lock(&res->spinlock); |
2073 | /* new_master has our reference from | ||
2074 | * the lock state sent during recovery */ | ||
1980 | dlm_change_lockres_owner(dlm, res, new_master); | 2075 | dlm_change_lockres_owner(dlm, res, new_master); |
1981 | res->state &= ~DLM_LOCK_RES_RECOVERING; | 2076 | res->state &= ~DLM_LOCK_RES_RECOVERING; |
1982 | if (!__dlm_lockres_unused(res)) | 2077 | if (__dlm_lockres_has_locks(res)) |
1983 | __dlm_dirty_lockres(dlm, res); | 2078 | __dlm_dirty_lockres(dlm, res); |
1984 | spin_unlock(&res->spinlock); | 2079 | spin_unlock(&res->spinlock); |
1985 | wake_up(&res->wq); | 2080 | wake_up(&res->wq); |
@@ -2048,6 +2143,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2048 | { | 2143 | { |
2049 | struct list_head *iter, *tmpiter; | 2144 | struct list_head *iter, *tmpiter; |
2050 | struct dlm_lock *lock; | 2145 | struct dlm_lock *lock; |
2146 | unsigned int freed = 0; | ||
2051 | 2147 | ||
2052 | /* this node is the lockres master: | 2148 | /* this node is the lockres master: |
2053 | * 1) remove any stale locks for the dead node | 2149 | * 1) remove any stale locks for the dead node |
@@ -2062,6 +2158,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2062 | if (lock->ml.node == dead_node) { | 2158 | if (lock->ml.node == dead_node) { |
2063 | list_del_init(&lock->list); | 2159 | list_del_init(&lock->list); |
2064 | dlm_lock_put(lock); | 2160 | dlm_lock_put(lock); |
2161 | freed++; | ||
2065 | } | 2162 | } |
2066 | } | 2163 | } |
2067 | list_for_each_safe(iter, tmpiter, &res->converting) { | 2164 | list_for_each_safe(iter, tmpiter, &res->converting) { |
@@ -2069,6 +2166,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2069 | if (lock->ml.node == dead_node) { | 2166 | if (lock->ml.node == dead_node) { |
2070 | list_del_init(&lock->list); | 2167 | list_del_init(&lock->list); |
2071 | dlm_lock_put(lock); | 2168 | dlm_lock_put(lock); |
2169 | freed++; | ||
2072 | } | 2170 | } |
2073 | } | 2171 | } |
2074 | list_for_each_safe(iter, tmpiter, &res->blocked) { | 2172 | list_for_each_safe(iter, tmpiter, &res->blocked) { |
@@ -2076,9 +2174,23 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2076 | if (lock->ml.node == dead_node) { | 2174 | if (lock->ml.node == dead_node) { |
2077 | list_del_init(&lock->list); | 2175 | list_del_init(&lock->list); |
2078 | dlm_lock_put(lock); | 2176 | dlm_lock_put(lock); |
2177 | freed++; | ||
2079 | } | 2178 | } |
2080 | } | 2179 | } |
2081 | 2180 | ||
2181 | if (freed) { | ||
2182 | mlog(0, "%s:%.*s: freed %u locks for dead node %u, " | ||
2183 | "dropping ref from lockres\n", dlm->name, | ||
2184 | res->lockname.len, res->lockname.name, freed, dead_node); | ||
2185 | BUG_ON(!test_bit(dead_node, res->refmap)); | ||
2186 | dlm_lockres_clear_refmap_bit(dead_node, res); | ||
2187 | } else if (test_bit(dead_node, res->refmap)) { | ||
2188 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " | ||
2189 | "no locks and had not purged before dying\n", dlm->name, | ||
2190 | res->lockname.len, res->lockname.name, dead_node); | ||
2191 | dlm_lockres_clear_refmap_bit(dead_node, res); | ||
2192 | } | ||
2193 | |||
2082 | /* do not kick thread yet */ | 2194 | /* do not kick thread yet */ |
2083 | __dlm_dirty_lockres(dlm, res); | 2195 | __dlm_dirty_lockres(dlm, res); |
2084 | } | 2196 | } |
@@ -2141,9 +2253,21 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2141 | spin_lock(&res->spinlock); | 2253 | spin_lock(&res->spinlock); |
2142 | /* zero the lvb if necessary */ | 2254 | /* zero the lvb if necessary */ |
2143 | dlm_revalidate_lvb(dlm, res, dead_node); | 2255 | dlm_revalidate_lvb(dlm, res, dead_node); |
2144 | if (res->owner == dead_node) | 2256 | if (res->owner == dead_node) { |
2257 | if (res->state & DLM_LOCK_RES_DROPPING_REF) | ||
2258 | mlog(0, "%s:%.*s: owned by " | ||
2259 | "dead node %u, this node was " | ||
2260 | "dropping its ref when it died. " | ||
2261 | "continue, dropping the flag.\n", | ||
2262 | dlm->name, res->lockname.len, | ||
2263 | res->lockname.name, dead_node); | ||
2264 | |||
2265 | /* the wake_up for this will happen when the | ||
2266 | * RECOVERING flag is dropped later */ | ||
2267 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
2268 | |||
2145 | dlm_move_lockres_to_recovery_list(dlm, res); | 2269 | dlm_move_lockres_to_recovery_list(dlm, res); |
2146 | else if (res->owner == dlm->node_num) { | 2270 | } else if (res->owner == dlm->node_num) { |
2147 | dlm_free_dead_locks(dlm, res, dead_node); | 2271 | dlm_free_dead_locks(dlm, res, dead_node); |
2148 | __dlm_lockres_calc_usage(dlm, res); | 2272 | __dlm_lockres_calc_usage(dlm, res); |
2149 | } | 2273 | } |
@@ -2480,7 +2604,8 @@ retry: | |||
2480 | return ret; | 2604 | return ret; |
2481 | } | 2605 | } |
2482 | 2606 | ||
2483 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) | 2607 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, |
2608 | void **ret_data) | ||
2484 | { | 2609 | { |
2485 | struct dlm_ctxt *dlm = data; | 2610 | struct dlm_ctxt *dlm = data; |
2486 | struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf; | 2611 | struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf; |
@@ -2608,7 +2733,8 @@ stage2: | |||
2608 | return ret; | 2733 | return ret; |
2609 | } | 2734 | } |
2610 | 2735 | ||
2611 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data) | 2736 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, |
2737 | void **ret_data) | ||
2612 | { | 2738 | { |
2613 | struct dlm_ctxt *dlm = data; | 2739 | struct dlm_ctxt *dlm = data; |
2614 | struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; | 2740 | struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 0c822f3ffb05..8ffa0916eb86 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -54,9 +54,6 @@ | |||
54 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
55 | 55 | ||
56 | static int dlm_thread(void *data); | 56 | static int dlm_thread(void *data); |
57 | static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, | ||
58 | struct dlm_lock_resource *lockres); | ||
59 | |||
60 | static void dlm_flush_asts(struct dlm_ctxt *dlm); | 57 | static void dlm_flush_asts(struct dlm_ctxt *dlm); |
61 | 58 | ||
62 | #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) | 59 | #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) |
@@ -82,14 +79,33 @@ repeat: | |||
82 | current->state = TASK_RUNNING; | 79 | current->state = TASK_RUNNING; |
83 | } | 80 | } |
84 | 81 | ||
85 | 82 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res) | |
86 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | ||
87 | { | 83 | { |
88 | if (list_empty(&res->granted) && | 84 | if (list_empty(&res->granted) && |
89 | list_empty(&res->converting) && | 85 | list_empty(&res->converting) && |
90 | list_empty(&res->blocked) && | 86 | list_empty(&res->blocked)) |
91 | list_empty(&res->dirty)) | 87 | return 0; |
92 | return 1; | 88 | return 1; |
89 | } | ||
90 | |||
91 | /* "unused": the lockres has no locks, is not on the dirty list, | ||
92 | * has no inflight locks (in the gap between mastery and acquiring | ||
93 | * the first lock), and has no bits in its refmap. | ||
94 | * truly ready to be freed. */ | ||
95 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | ||
96 | { | ||
97 | if (!__dlm_lockres_has_locks(res) && | ||
98 | (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { | ||
99 | /* try not to scan the bitmap unless the first two | ||
100 | * conditions are already true */ | ||
101 | int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | ||
102 | if (bit >= O2NM_MAX_NODES) { | ||
103 | /* since the bit for dlm->node_num is not | ||
104 | * set, inflight_locks better be zero */ | ||
105 | BUG_ON(res->inflight_locks != 0); | ||
106 | return 1; | ||
107 | } | ||
108 | } | ||
93 | return 0; | 109 | return 0; |
94 | } | 110 | } |
95 | 111 | ||
@@ -106,46 +122,21 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
106 | assert_spin_locked(&res->spinlock); | 122 | assert_spin_locked(&res->spinlock); |
107 | 123 | ||
108 | if (__dlm_lockres_unused(res)){ | 124 | if (__dlm_lockres_unused(res)){ |
109 | /* For now, just keep any resource we master */ | ||
110 | if (res->owner == dlm->node_num) | ||
111 | { | ||
112 | if (!list_empty(&res->purge)) { | ||
113 | mlog(0, "we master %s:%.*s, but it is on " | ||
114 | "the purge list. Removing\n", | ||
115 | dlm->name, res->lockname.len, | ||
116 | res->lockname.name); | ||
117 | list_del_init(&res->purge); | ||
118 | dlm->purge_count--; | ||
119 | } | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | if (list_empty(&res->purge)) { | 125 | if (list_empty(&res->purge)) { |
124 | mlog(0, "putting lockres %.*s from purge list\n", | 126 | mlog(0, "putting lockres %.*s:%p onto purge list\n", |
125 | res->lockname.len, res->lockname.name); | 127 | res->lockname.len, res->lockname.name, res); |
126 | 128 | ||
127 | res->last_used = jiffies; | 129 | res->last_used = jiffies; |
130 | dlm_lockres_get(res); | ||
128 | list_add_tail(&res->purge, &dlm->purge_list); | 131 | list_add_tail(&res->purge, &dlm->purge_list); |
129 | dlm->purge_count++; | 132 | dlm->purge_count++; |
130 | |||
131 | /* if this node is not the owner, there is | ||
132 | * no way to keep track of who the owner could be. | ||
133 | * unhash it to avoid serious problems. */ | ||
134 | if (res->owner != dlm->node_num) { | ||
135 | mlog(0, "%s:%.*s: doing immediate " | ||
136 | "purge of lockres owned by %u\n", | ||
137 | dlm->name, res->lockname.len, | ||
138 | res->lockname.name, res->owner); | ||
139 | |||
140 | dlm_purge_lockres_now(dlm, res); | ||
141 | } | ||
142 | } | 133 | } |
143 | } else if (!list_empty(&res->purge)) { | 134 | } else if (!list_empty(&res->purge)) { |
144 | mlog(0, "removing lockres %.*s from purge list, " | 135 | mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n", |
145 | "owner=%u\n", res->lockname.len, res->lockname.name, | 136 | res->lockname.len, res->lockname.name, res, res->owner); |
146 | res->owner); | ||
147 | 137 | ||
148 | list_del_init(&res->purge); | 138 | list_del_init(&res->purge); |
139 | dlm_lockres_put(res); | ||
149 | dlm->purge_count--; | 140 | dlm->purge_count--; |
150 | } | 141 | } |
151 | } | 142 | } |
@@ -163,68 +154,65 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
163 | spin_unlock(&dlm->spinlock); | 154 | spin_unlock(&dlm->spinlock); |
164 | } | 155 | } |
165 | 156 | ||
166 | /* TODO: Eventual API: Called with the dlm spinlock held, may drop it | 157 | static int dlm_purge_lockres(struct dlm_ctxt *dlm, |
167 | * to do migration, but will re-acquire before exit. */ | 158 | struct dlm_lock_resource *res) |
168 | void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres) | ||
169 | { | 159 | { |
170 | int master; | 160 | int master; |
171 | int ret; | 161 | int ret = 0; |
172 | |||
173 | spin_lock(&lockres->spinlock); | ||
174 | master = lockres->owner == dlm->node_num; | ||
175 | spin_unlock(&lockres->spinlock); | ||
176 | 162 | ||
177 | mlog(0, "purging lockres %.*s, master = %d\n", lockres->lockname.len, | 163 | spin_lock(&res->spinlock); |
178 | lockres->lockname.name, master); | 164 | if (!__dlm_lockres_unused(res)) { |
179 | 165 | spin_unlock(&res->spinlock); | |
180 | /* Non master is the easy case -- no migration required, just | 166 | mlog(0, "%s:%.*s: tried to purge but not unused\n", |
181 | * quit. */ | 167 | dlm->name, res->lockname.len, res->lockname.name); |
168 | return -ENOTEMPTY; | ||
169 | } | ||
170 | master = (res->owner == dlm->node_num); | ||
182 | if (!master) | 171 | if (!master) |
183 | goto finish; | 172 | res->state |= DLM_LOCK_RES_DROPPING_REF; |
184 | 173 | spin_unlock(&res->spinlock); | |
185 | /* Wheee! Migrate lockres here! */ | ||
186 | spin_unlock(&dlm->spinlock); | ||
187 | again: | ||
188 | 174 | ||
189 | ret = dlm_migrate_lockres(dlm, lockres, O2NM_MAX_NODES); | 175 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, |
190 | if (ret == -ENOTEMPTY) { | 176 | res->lockname.name, master); |
191 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
192 | lockres->lockname.len, lockres->lockname.name); | ||
193 | 177 | ||
194 | BUG(); | 178 | if (!master) { |
195 | } else if (ret < 0) { | 179 | spin_lock(&res->spinlock); |
196 | mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", | 180 | /* This ensures that clear refmap is sent after the set */ |
197 | lockres->lockname.len, lockres->lockname.name); | 181 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); |
198 | msleep(100); | 182 | spin_unlock(&res->spinlock); |
199 | goto again; | 183 | /* drop spinlock to do messaging, retake below */ |
184 | spin_unlock(&dlm->spinlock); | ||
185 | /* clear our bit from the master's refmap, ignore errors */ | ||
186 | ret = dlm_drop_lockres_ref(dlm, res); | ||
187 | if (ret < 0) { | ||
188 | mlog_errno(ret); | ||
189 | if (!dlm_is_host_down(ret)) | ||
190 | BUG(); | ||
191 | } | ||
192 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", | ||
193 | dlm->name, res->lockname.len, res->lockname.name, ret); | ||
194 | spin_lock(&dlm->spinlock); | ||
200 | } | 195 | } |
201 | 196 | ||
202 | spin_lock(&dlm->spinlock); | 197 | if (!list_empty(&res->purge)) { |
203 | 198 | mlog(0, "removing lockres %.*s:%p from purgelist, " | |
204 | finish: | 199 | "master = %d\n", res->lockname.len, res->lockname.name, |
205 | if (!list_empty(&lockres->purge)) { | 200 | res, master); |
206 | list_del_init(&lockres->purge); | 201 | list_del_init(&res->purge); |
202 | dlm_lockres_put(res); | ||
207 | dlm->purge_count--; | 203 | dlm->purge_count--; |
208 | } | 204 | } |
209 | __dlm_unhash_lockres(lockres); | 205 | __dlm_unhash_lockres(res); |
210 | } | ||
211 | |||
212 | /* make an unused lockres go away immediately. | ||
213 | * as soon as the dlm spinlock is dropped, this lockres | ||
214 | * will not be found. kfree still happens on last put. */ | ||
215 | static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, | ||
216 | struct dlm_lock_resource *lockres) | ||
217 | { | ||
218 | assert_spin_locked(&dlm->spinlock); | ||
219 | assert_spin_locked(&lockres->spinlock); | ||
220 | 206 | ||
221 | BUG_ON(!__dlm_lockres_unused(lockres)); | 207 | /* lockres is not in the hash now. drop the flag and wake up |
222 | 208 | * any processes waiting in dlm_get_lock_resource. */ | |
223 | if (!list_empty(&lockres->purge)) { | 209 | if (!master) { |
224 | list_del_init(&lockres->purge); | 210 | spin_lock(&res->spinlock); |
225 | dlm->purge_count--; | 211 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; |
212 | spin_unlock(&res->spinlock); | ||
213 | wake_up(&res->wq); | ||
226 | } | 214 | } |
227 | __dlm_unhash_lockres(lockres); | 215 | return 0; |
228 | } | 216 | } |
229 | 217 | ||
230 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, | 218 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
@@ -268,13 +256,17 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
268 | break; | 256 | break; |
269 | } | 257 | } |
270 | 258 | ||
259 | mlog(0, "removing lockres %.*s:%p from purgelist\n", | ||
260 | lockres->lockname.len, lockres->lockname.name, lockres); | ||
271 | list_del_init(&lockres->purge); | 261 | list_del_init(&lockres->purge); |
262 | dlm_lockres_put(lockres); | ||
272 | dlm->purge_count--; | 263 | dlm->purge_count--; |
273 | 264 | ||
274 | /* This may drop and reacquire the dlm spinlock if it | 265 | /* This may drop and reacquire the dlm spinlock if it |
275 | * has to do migration. */ | 266 | * has to do migration. */ |
276 | mlog(0, "calling dlm_purge_lockres!\n"); | 267 | mlog(0, "calling dlm_purge_lockres!\n"); |
277 | dlm_purge_lockres(dlm, lockres); | 268 | if (dlm_purge_lockres(dlm, lockres)) |
269 | BUG(); | ||
278 | mlog(0, "DONE calling dlm_purge_lockres!\n"); | 270 | mlog(0, "DONE calling dlm_purge_lockres!\n"); |
279 | 271 | ||
280 | /* Avoid adding any scheduling latencies */ | 272 | /* Avoid adding any scheduling latencies */ |
@@ -467,12 +459,17 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |||
467 | assert_spin_locked(&res->spinlock); | 459 | assert_spin_locked(&res->spinlock); |
468 | 460 | ||
469 | /* don't shuffle secondary queues */ | 461 | /* don't shuffle secondary queues */ |
470 | if ((res->owner == dlm->node_num) && | 462 | if ((res->owner == dlm->node_num)) { |
471 | !(res->state & DLM_LOCK_RES_DIRTY)) { | 463 | if (res->state & (DLM_LOCK_RES_MIGRATING | |
472 | /* ref for dirty_list */ | 464 | DLM_LOCK_RES_BLOCK_DIRTY)) |
473 | dlm_lockres_get(res); | 465 | return; |
474 | list_add_tail(&res->dirty, &dlm->dirty_list); | 466 | |
475 | res->state |= DLM_LOCK_RES_DIRTY; | 467 | if (list_empty(&res->dirty)) { |
468 | /* ref for dirty_list */ | ||
469 | dlm_lockres_get(res); | ||
470 | list_add_tail(&res->dirty, &dlm->dirty_list); | ||
471 | res->state |= DLM_LOCK_RES_DIRTY; | ||
472 | } | ||
476 | } | 473 | } |
477 | } | 474 | } |
478 | 475 | ||
@@ -651,7 +648,7 @@ static int dlm_thread(void *data) | |||
651 | dlm_lockres_get(res); | 648 | dlm_lockres_get(res); |
652 | 649 | ||
653 | spin_lock(&res->spinlock); | 650 | spin_lock(&res->spinlock); |
654 | res->state &= ~DLM_LOCK_RES_DIRTY; | 651 | /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */ |
655 | list_del_init(&res->dirty); | 652 | list_del_init(&res->dirty); |
656 | spin_unlock(&res->spinlock); | 653 | spin_unlock(&res->spinlock); |
657 | spin_unlock(&dlm->spinlock); | 654 | spin_unlock(&dlm->spinlock); |
@@ -675,10 +672,11 @@ static int dlm_thread(void *data) | |||
675 | /* it is now ok to move lockreses in these states | 672 | /* it is now ok to move lockreses in these states |
676 | * to the dirty list, assuming that they will only be | 673 | * to the dirty list, assuming that they will only be |
677 | * dirty for a short while. */ | 674 | * dirty for a short while. */ |
675 | BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); | ||
678 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | | 676 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | |
679 | DLM_LOCK_RES_MIGRATING | | ||
680 | DLM_LOCK_RES_RECOVERING)) { | 677 | DLM_LOCK_RES_RECOVERING)) { |
681 | /* move it to the tail and keep going */ | 678 | /* move it to the tail and keep going */ |
679 | res->state &= ~DLM_LOCK_RES_DIRTY; | ||
682 | spin_unlock(&res->spinlock); | 680 | spin_unlock(&res->spinlock); |
683 | mlog(0, "delaying list shuffling for in-" | 681 | mlog(0, "delaying list shuffling for in-" |
684 | "progress lockres %.*s, state=%d\n", | 682 | "progress lockres %.*s, state=%d\n", |
@@ -699,6 +697,7 @@ static int dlm_thread(void *data) | |||
699 | 697 | ||
700 | /* called while holding lockres lock */ | 698 | /* called while holding lockres lock */ |
701 | dlm_shuffle_lists(dlm, res); | 699 | dlm_shuffle_lists(dlm, res); |
700 | res->state &= ~DLM_LOCK_RES_DIRTY; | ||
702 | spin_unlock(&res->spinlock); | 701 | spin_unlock(&res->spinlock); |
703 | 702 | ||
704 | dlm_lockres_calc_usage(dlm, res); | 703 | dlm_lockres_calc_usage(dlm, res); |
@@ -709,11 +708,8 @@ in_progress: | |||
709 | /* if the lock was in-progress, stick | 708 | /* if the lock was in-progress, stick |
710 | * it on the back of the list */ | 709 | * it on the back of the list */ |
711 | if (delay) { | 710 | if (delay) { |
712 | /* ref for dirty_list */ | ||
713 | dlm_lockres_get(res); | ||
714 | spin_lock(&res->spinlock); | 711 | spin_lock(&res->spinlock); |
715 | list_add_tail(&res->dirty, &dlm->dirty_list); | 712 | __dlm_dirty_lockres(dlm, res); |
716 | res->state |= DLM_LOCK_RES_DIRTY; | ||
717 | spin_unlock(&res->spinlock); | 713 | spin_unlock(&res->spinlock); |
718 | } | 714 | } |
719 | dlm_lockres_put(res); | 715 | dlm_lockres_put(res); |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 37be4b2e0d4a..86ca085ef324 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -147,6 +147,10 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
147 | goto leave; | 147 | goto leave; |
148 | } | 148 | } |
149 | 149 | ||
150 | if (res->state & DLM_LOCK_RES_MIGRATING) { | ||
151 | status = DLM_MIGRATING; | ||
152 | goto leave; | ||
153 | } | ||
150 | 154 | ||
151 | /* see above for what the spec says about | 155 | /* see above for what the spec says about |
152 | * LKM_CANCEL and the lock queue state */ | 156 | * LKM_CANCEL and the lock queue state */ |
@@ -244,8 +248,8 @@ leave: | |||
244 | /* this should always be coupled with list removal */ | 248 | /* this should always be coupled with list removal */ |
245 | BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK)); | 249 | BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK)); |
246 | mlog(0, "lock %u:%llu should be gone now! refs=%d\n", | 250 | mlog(0, "lock %u:%llu should be gone now! refs=%d\n", |
247 | dlm_get_lock_cookie_node(lock->ml.cookie), | 251 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
248 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 252 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
249 | atomic_read(&lock->lock_refs.refcount)-1); | 253 | atomic_read(&lock->lock_refs.refcount)-1); |
250 | dlm_lock_put(lock); | 254 | dlm_lock_put(lock); |
251 | } | 255 | } |
@@ -379,7 +383,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, | |||
379 | * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID, | 383 | * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID, |
380 | * return value from dlmunlock_master | 384 | * return value from dlmunlock_master |
381 | */ | 385 | */ |
382 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data) | 386 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
387 | void **ret_data) | ||
383 | { | 388 | { |
384 | struct dlm_ctxt *dlm = data; | 389 | struct dlm_ctxt *dlm = data; |
385 | struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; | 390 | struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; |
@@ -502,8 +507,8 @@ not_found: | |||
502 | if (!found) | 507 | if (!found) |
503 | mlog(ML_ERROR, "failed to find lock to unlock! " | 508 | mlog(ML_ERROR, "failed to find lock to unlock! " |
504 | "cookie=%u:%llu\n", | 509 | "cookie=%u:%llu\n", |
505 | dlm_get_lock_cookie_node(unlock->cookie), | 510 | dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)), |
506 | dlm_get_lock_cookie_seq(unlock->cookie)); | 511 | dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie))); |
507 | else | 512 | else |
508 | dlm_lock_put(lock); | 513 | dlm_lock_put(lock); |
509 | 514 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e1216364d191..d026b4f27757 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -306,8 +306,8 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
306 | * for the dinode, one for the new block. */ | 306 | * for the dinode, one for the new block. */ |
307 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) | 307 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) |
308 | 308 | ||
309 | /* file update (nlink, etc) + dir entry block */ | 309 | /* file update (nlink, etc) + directory mtime/ctime + dir entry block */ |
310 | #define OCFS2_LINK_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | 310 | #define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1) |
311 | 311 | ||
312 | /* inode + dir inode (if we unlink a dir), + dir entry block + orphan | 312 | /* inode + dir inode (if we unlink a dir), + dir entry block + orphan |
313 | * dir inode link */ | 313 | * dir inode link */ |
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 0afd8b9af70f..f30e63b9910c 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c | |||
@@ -887,7 +887,7 @@ static inline int ocfs2_translate_response(int response) | |||
887 | 887 | ||
888 | static int ocfs2_handle_response_message(struct o2net_msg *msg, | 888 | static int ocfs2_handle_response_message(struct o2net_msg *msg, |
889 | u32 len, | 889 | u32 len, |
890 | void *data) | 890 | void *data, void **ret_data) |
891 | { | 891 | { |
892 | unsigned int response_id, node_num; | 892 | unsigned int response_id, node_num; |
893 | int response_status; | 893 | int response_status; |
@@ -943,7 +943,7 @@ bail: | |||
943 | 943 | ||
944 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, | 944 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, |
945 | u32 len, | 945 | u32 len, |
946 | void *data) | 946 | void *data, void **ret_data) |
947 | { | 947 | { |
948 | int status; | 948 | int status; |
949 | struct ocfs2_super *osb = data; | 949 | struct ocfs2_super *osb = data; |
@@ -1007,7 +1007,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb) | |||
1007 | osb->net_key, | 1007 | osb->net_key, |
1008 | sizeof(struct ocfs2_response_msg), | 1008 | sizeof(struct ocfs2_response_msg), |
1009 | ocfs2_handle_response_message, | 1009 | ocfs2_handle_response_message, |
1010 | osb, &osb->osb_net_handlers); | 1010 | osb, NULL, &osb->osb_net_handlers); |
1011 | if (status) { | 1011 | if (status) { |
1012 | mlog_errno(status); | 1012 | mlog_errno(status); |
1013 | goto bail; | 1013 | goto bail; |
@@ -1017,7 +1017,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb) | |||
1017 | osb->net_key, | 1017 | osb->net_key, |
1018 | sizeof(struct ocfs2_vote_msg), | 1018 | sizeof(struct ocfs2_vote_msg), |
1019 | ocfs2_handle_vote_message, | 1019 | ocfs2_handle_vote_message, |
1020 | osb, &osb->osb_net_handlers); | 1020 | osb, NULL, &osb->osb_net_handlers); |
1021 | if (status) { | 1021 | if (status) { |
1022 | mlog_errno(status); | 1022 | mlog_errno(status); |
1023 | goto bail; | 1023 | goto bail; |
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index e8f540d38d48..d3b9f5f07db1 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | 17 | ||
18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
19 | #include <asm/semaphore.h> | ||
19 | 20 | ||
20 | #include "sysfs.h" | 21 | #include "sysfs.h" |
21 | 22 | ||
@@ -146,7 +147,7 @@ static int open(struct inode * inode, struct file * file) | |||
146 | Error: | 147 | Error: |
147 | module_put(attr->attr.owner); | 148 | module_put(attr->attr.owner); |
148 | Done: | 149 | Done: |
149 | if (error && kobj) | 150 | if (error) |
150 | kobject_put(kobj); | 151 | kobject_put(kobj); |
151 | return error; | 152 | return error; |
152 | } | 153 | } |
@@ -157,8 +158,7 @@ static int release(struct inode * inode, struct file * file) | |||
157 | struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); | 158 | struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); |
158 | u8 * buffer = file->private_data; | 159 | u8 * buffer = file->private_data; |
159 | 160 | ||
160 | if (kobj) | 161 | kobject_put(kobj); |
161 | kobject_put(kobj); | ||
162 | module_put(attr->attr.owner); | 162 | module_put(attr->attr.owner); |
163 | kfree(buffer); | 163 | kfree(buffer); |
164 | return 0; | 164 | return 0; |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 511edef8b321..9dcdf556c99c 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/kobject.h> | 10 | #include <linux/kobject.h> |
11 | #include <linux/namei.h> | 11 | #include <linux/namei.h> |
12 | #include <asm/semaphore.h> | ||
12 | #include "sysfs.h" | 13 | #include "sysfs.h" |
13 | 14 | ||
14 | DECLARE_RWSEM(sysfs_rename_sem); | 15 | DECLARE_RWSEM(sysfs_rename_sem); |
@@ -32,8 +33,7 @@ static struct dentry_operations sysfs_dentry_ops = { | |||
32 | /* | 33 | /* |
33 | * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent | 34 | * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent |
34 | */ | 35 | */ |
35 | static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd, | 36 | static struct sysfs_dirent * __sysfs_new_dirent(void * element) |
36 | void * element) | ||
37 | { | 37 | { |
38 | struct sysfs_dirent * sd; | 38 | struct sysfs_dirent * sd; |
39 | 39 | ||
@@ -45,12 +45,28 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd, | |||
45 | atomic_set(&sd->s_count, 1); | 45 | atomic_set(&sd->s_count, 1); |
46 | atomic_set(&sd->s_event, 1); | 46 | atomic_set(&sd->s_event, 1); |
47 | INIT_LIST_HEAD(&sd->s_children); | 47 | INIT_LIST_HEAD(&sd->s_children); |
48 | list_add(&sd->s_sibling, &parent_sd->s_children); | 48 | INIT_LIST_HEAD(&sd->s_sibling); |
49 | sd->s_element = element; | 49 | sd->s_element = element; |
50 | 50 | ||
51 | return sd; | 51 | return sd; |
52 | } | 52 | } |
53 | 53 | ||
54 | static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd, | ||
55 | struct sysfs_dirent *sd) | ||
56 | { | ||
57 | if (sd) | ||
58 | list_add(&sd->s_sibling, &parent_sd->s_children); | ||
59 | } | ||
60 | |||
61 | static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd, | ||
62 | void * element) | ||
63 | { | ||
64 | struct sysfs_dirent *sd; | ||
65 | sd = __sysfs_new_dirent(element); | ||
66 | __sysfs_list_dirent(parent_sd, sd); | ||
67 | return sd; | ||
68 | } | ||
69 | |||
54 | /* | 70 | /* |
55 | * | 71 | * |
56 | * Return -EEXIST if there is already a sysfs element with the same name for | 72 | * Return -EEXIST if there is already a sysfs element with the same name for |
@@ -77,14 +93,14 @@ int sysfs_dirent_exist(struct sysfs_dirent *parent_sd, | |||
77 | } | 93 | } |
78 | 94 | ||
79 | 95 | ||
80 | int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, | 96 | static struct sysfs_dirent * |
81 | void * element, umode_t mode, int type) | 97 | __sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type) |
82 | { | 98 | { |
83 | struct sysfs_dirent * sd; | 99 | struct sysfs_dirent * sd; |
84 | 100 | ||
85 | sd = sysfs_new_dirent(parent_sd, element); | 101 | sd = __sysfs_new_dirent(element); |
86 | if (!sd) | 102 | if (!sd) |
87 | return -ENOMEM; | 103 | goto out; |
88 | 104 | ||
89 | sd->s_mode = mode; | 105 | sd->s_mode = mode; |
90 | sd->s_type = type; | 106 | sd->s_type = type; |
@@ -94,7 +110,19 @@ int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, | |||
94 | dentry->d_op = &sysfs_dentry_ops; | 110 | dentry->d_op = &sysfs_dentry_ops; |
95 | } | 111 | } |
96 | 112 | ||
97 | return 0; | 113 | out: |
114 | return sd; | ||
115 | } | ||
116 | |||
117 | int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, | ||
118 | void * element, umode_t mode, int type) | ||
119 | { | ||
120 | struct sysfs_dirent *sd; | ||
121 | |||
122 | sd = __sysfs_make_dirent(dentry, element, mode, type); | ||
123 | __sysfs_list_dirent(parent_sd, sd); | ||
124 | |||
125 | return sd ? 0 : -ENOMEM; | ||
98 | } | 126 | } |
99 | 127 | ||
100 | static int init_dir(struct inode * inode) | 128 | static int init_dir(struct inode * inode) |
@@ -165,11 +193,11 @@ int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) | |||
165 | 193 | ||
166 | /** | 194 | /** |
167 | * sysfs_create_dir - create a directory for an object. | 195 | * sysfs_create_dir - create a directory for an object. |
168 | * @parent: parent parent object. | ||
169 | * @kobj: object we're creating directory for. | 196 | * @kobj: object we're creating directory for. |
197 | * @shadow_parent: parent parent object. | ||
170 | */ | 198 | */ |
171 | 199 | ||
172 | int sysfs_create_dir(struct kobject * kobj) | 200 | int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent) |
173 | { | 201 | { |
174 | struct dentry * dentry = NULL; | 202 | struct dentry * dentry = NULL; |
175 | struct dentry * parent; | 203 | struct dentry * parent; |
@@ -177,7 +205,9 @@ int sysfs_create_dir(struct kobject * kobj) | |||
177 | 205 | ||
178 | BUG_ON(!kobj); | 206 | BUG_ON(!kobj); |
179 | 207 | ||
180 | if (kobj->parent) | 208 | if (shadow_parent) |
209 | parent = shadow_parent; | ||
210 | else if (kobj->parent) | ||
181 | parent = kobj->parent->dentry; | 211 | parent = kobj->parent->dentry; |
182 | else if (sysfs_mount && sysfs_mount->mnt_sb) | 212 | else if (sysfs_mount && sysfs_mount->mnt_sb) |
183 | parent = sysfs_mount->mnt_sb->s_root; | 213 | parent = sysfs_mount->mnt_sb->s_root; |
@@ -298,21 +328,12 @@ void sysfs_remove_subdir(struct dentry * d) | |||
298 | } | 328 | } |
299 | 329 | ||
300 | 330 | ||
301 | /** | 331 | static void __sysfs_remove_dir(struct dentry *dentry) |
302 | * sysfs_remove_dir - remove an object's directory. | ||
303 | * @kobj: object. | ||
304 | * | ||
305 | * The only thing special about this is that we remove any files in | ||
306 | * the directory before we remove the directory, and we've inlined | ||
307 | * what used to be sysfs_rmdir() below, instead of calling separately. | ||
308 | */ | ||
309 | |||
310 | void sysfs_remove_dir(struct kobject * kobj) | ||
311 | { | 332 | { |
312 | struct dentry * dentry = dget(kobj->dentry); | ||
313 | struct sysfs_dirent * parent_sd; | 333 | struct sysfs_dirent * parent_sd; |
314 | struct sysfs_dirent * sd, * tmp; | 334 | struct sysfs_dirent * sd, * tmp; |
315 | 335 | ||
336 | dget(dentry); | ||
316 | if (!dentry) | 337 | if (!dentry) |
317 | return; | 338 | return; |
318 | 339 | ||
@@ -333,32 +354,60 @@ void sysfs_remove_dir(struct kobject * kobj) | |||
333 | * Drop reference from dget() on entrance. | 354 | * Drop reference from dget() on entrance. |
334 | */ | 355 | */ |
335 | dput(dentry); | 356 | dput(dentry); |
357 | } | ||
358 | |||
359 | /** | ||
360 | * sysfs_remove_dir - remove an object's directory. | ||
361 | * @kobj: object. | ||
362 | * | ||
363 | * The only thing special about this is that we remove any files in | ||
364 | * the directory before we remove the directory, and we've inlined | ||
365 | * what used to be sysfs_rmdir() below, instead of calling separately. | ||
366 | */ | ||
367 | |||
368 | void sysfs_remove_dir(struct kobject * kobj) | ||
369 | { | ||
370 | __sysfs_remove_dir(kobj->dentry); | ||
336 | kobj->dentry = NULL; | 371 | kobj->dentry = NULL; |
337 | } | 372 | } |
338 | 373 | ||
339 | int sysfs_rename_dir(struct kobject * kobj, const char *new_name) | 374 | int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent, |
375 | const char *new_name) | ||
340 | { | 376 | { |
341 | int error = 0; | 377 | int error = 0; |
342 | struct dentry * new_dentry, * parent; | 378 | struct dentry * new_dentry; |
343 | |||
344 | if (!strcmp(kobject_name(kobj), new_name)) | ||
345 | return -EINVAL; | ||
346 | 379 | ||
347 | if (!kobj->parent) | 380 | if (!new_parent) |
348 | return -EINVAL; | 381 | return -EFAULT; |
349 | 382 | ||
350 | down_write(&sysfs_rename_sem); | 383 | down_write(&sysfs_rename_sem); |
351 | parent = kobj->parent->dentry; | 384 | mutex_lock(&new_parent->d_inode->i_mutex); |
352 | |||
353 | mutex_lock(&parent->d_inode->i_mutex); | ||
354 | 385 | ||
355 | new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); | 386 | new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); |
356 | if (!IS_ERR(new_dentry)) { | 387 | if (!IS_ERR(new_dentry)) { |
357 | if (!new_dentry->d_inode) { | 388 | /* By allowing two different directories with the |
389 | * same d_parent we allow this routine to move | ||
390 | * between different shadows of the same directory | ||
391 | */ | ||
392 | if (kobj->dentry->d_parent->d_inode != new_parent->d_inode) | ||
393 | return -EINVAL; | ||
394 | else if (new_dentry->d_parent->d_inode != new_parent->d_inode) | ||
395 | error = -EINVAL; | ||
396 | else if (new_dentry == kobj->dentry) | ||
397 | error = -EINVAL; | ||
398 | else if (!new_dentry->d_inode) { | ||
358 | error = kobject_set_name(kobj, "%s", new_name); | 399 | error = kobject_set_name(kobj, "%s", new_name); |
359 | if (!error) { | 400 | if (!error) { |
401 | struct sysfs_dirent *sd, *parent_sd; | ||
402 | |||
360 | d_add(new_dentry, NULL); | 403 | d_add(new_dentry, NULL); |
361 | d_move(kobj->dentry, new_dentry); | 404 | d_move(kobj->dentry, new_dentry); |
405 | |||
406 | sd = kobj->dentry->d_fsdata; | ||
407 | parent_sd = new_parent->d_fsdata; | ||
408 | |||
409 | list_del_init(&sd->s_sibling); | ||
410 | list_add(&sd->s_sibling, &parent_sd->s_children); | ||
362 | } | 411 | } |
363 | else | 412 | else |
364 | d_drop(new_dentry); | 413 | d_drop(new_dentry); |
@@ -366,7 +415,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) | |||
366 | error = -EEXIST; | 415 | error = -EEXIST; |
367 | dput(new_dentry); | 416 | dput(new_dentry); |
368 | } | 417 | } |
369 | mutex_unlock(&parent->d_inode->i_mutex); | 418 | mutex_unlock(&new_parent->d_inode->i_mutex); |
370 | up_write(&sysfs_rename_sem); | 419 | up_write(&sysfs_rename_sem); |
371 | 420 | ||
372 | return error; | 421 | return error; |
@@ -378,12 +427,10 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent) | |||
378 | struct sysfs_dirent *new_parent_sd, *sd; | 427 | struct sysfs_dirent *new_parent_sd, *sd; |
379 | int error; | 428 | int error; |
380 | 429 | ||
381 | if (!new_parent) | ||
382 | return -EINVAL; | ||
383 | |||
384 | old_parent_dentry = kobj->parent ? | 430 | old_parent_dentry = kobj->parent ? |
385 | kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; | 431 | kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; |
386 | new_parent_dentry = new_parent->dentry; | 432 | new_parent_dentry = new_parent ? |
433 | new_parent->dentry : sysfs_mount->mnt_sb->s_root; | ||
387 | 434 | ||
388 | again: | 435 | again: |
389 | mutex_lock(&old_parent_dentry->d_inode->i_mutex); | 436 | mutex_lock(&old_parent_dentry->d_inode->i_mutex); |
@@ -547,6 +594,95 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) | |||
547 | return offset; | 594 | return offset; |
548 | } | 595 | } |
549 | 596 | ||
597 | |||
598 | /** | ||
599 | * sysfs_make_shadowed_dir - Setup so a directory can be shadowed | ||
600 | * @kobj: object we're creating shadow of. | ||
601 | */ | ||
602 | |||
603 | int sysfs_make_shadowed_dir(struct kobject *kobj, | ||
604 | void * (*follow_link)(struct dentry *, struct nameidata *)) | ||
605 | { | ||
606 | struct inode *inode; | ||
607 | struct inode_operations *i_op; | ||
608 | |||
609 | inode = kobj->dentry->d_inode; | ||
610 | if (inode->i_op != &sysfs_dir_inode_operations) | ||
611 | return -EINVAL; | ||
612 | |||
613 | i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); | ||
614 | if (!i_op) | ||
615 | return -ENOMEM; | ||
616 | |||
617 | memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op)); | ||
618 | i_op->follow_link = follow_link; | ||
619 | |||
620 | /* Locking of inode->i_op? | ||
621 | * Since setting i_op is a single word write and they | ||
622 | * are atomic we should be ok here. | ||
623 | */ | ||
624 | inode->i_op = i_op; | ||
625 | return 0; | ||
626 | } | ||
627 | |||
628 | /** | ||
629 | * sysfs_create_shadow_dir - create a shadow directory for an object. | ||
630 | * @kobj: object we're creating directory for. | ||
631 | * | ||
632 | * sysfs_make_shadowed_dir must already have been called on this | ||
633 | * directory. | ||
634 | */ | ||
635 | |||
636 | struct dentry *sysfs_create_shadow_dir(struct kobject *kobj) | ||
637 | { | ||
638 | struct sysfs_dirent *sd; | ||
639 | struct dentry *parent, *dir, *shadow; | ||
640 | struct inode *inode; | ||
641 | |||
642 | dir = kobj->dentry; | ||
643 | inode = dir->d_inode; | ||
644 | parent = dir->d_parent; | ||
645 | shadow = ERR_PTR(-EINVAL); | ||
646 | if (!sysfs_is_shadowed_inode(inode)) | ||
647 | goto out; | ||
648 | |||
649 | shadow = d_alloc(parent, &dir->d_name); | ||
650 | if (!shadow) | ||
651 | goto nomem; | ||
652 | |||
653 | sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR); | ||
654 | if (!sd) | ||
655 | goto nomem; | ||
656 | |||
657 | d_instantiate(shadow, igrab(inode)); | ||
658 | inc_nlink(inode); | ||
659 | inc_nlink(parent->d_inode); | ||
660 | shadow->d_op = &sysfs_dentry_ops; | ||
661 | |||
662 | dget(shadow); /* Extra count - pin the dentry in core */ | ||
663 | |||
664 | out: | ||
665 | return shadow; | ||
666 | nomem: | ||
667 | dput(shadow); | ||
668 | shadow = ERR_PTR(-ENOMEM); | ||
669 | goto out; | ||
670 | } | ||
671 | |||
672 | /** | ||
673 | * sysfs_remove_shadow_dir - remove an object's directory. | ||
674 | * @shadow: dentry of shadow directory | ||
675 | * | ||
676 | * The only thing special about this is that we remove any files in | ||
677 | * the directory before we remove the directory, and we've inlined | ||
678 | * what used to be sysfs_rmdir() below, instead of calling separately. | ||
679 | */ | ||
680 | |||
681 | void sysfs_remove_shadow_dir(struct dentry *shadow) | ||
682 | { | ||
683 | __sysfs_remove_dir(shadow); | ||
684 | } | ||
685 | |||
550 | const struct file_operations sysfs_dir_operations = { | 686 | const struct file_operations sysfs_dir_operations = { |
551 | .open = sysfs_dir_open, | 687 | .open = sysfs_dir_open, |
552 | .release = sysfs_dir_close, | 688 | .release = sysfs_dir_close, |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 9cfe53e1e00d..c0e117649a4d 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/kobject.h> | 7 | #include <linux/kobject.h> |
8 | #include <linux/namei.h> | 8 | #include <linux/namei.h> |
9 | #include <linux/poll.h> | 9 | #include <linux/poll.h> |
10 | #include <linux/list.h> | ||
10 | #include <asm/uaccess.h> | 11 | #include <asm/uaccess.h> |
11 | #include <asm/semaphore.h> | 12 | #include <asm/semaphore.h> |
12 | 13 | ||
@@ -50,17 +51,29 @@ static struct sysfs_ops subsys_sysfs_ops = { | |||
50 | .store = subsys_attr_store, | 51 | .store = subsys_attr_store, |
51 | }; | 52 | }; |
52 | 53 | ||
54 | /** | ||
55 | * add_to_collection - add buffer to a collection | ||
56 | * @buffer: buffer to be added | ||
57 | * @node inode of set to add to | ||
58 | */ | ||
53 | 59 | ||
54 | struct sysfs_buffer { | 60 | static inline void |
55 | size_t count; | 61 | add_to_collection(struct sysfs_buffer *buffer, struct inode *node) |
56 | loff_t pos; | 62 | { |
57 | char * page; | 63 | struct sysfs_buffer_collection *set = node->i_private; |
58 | struct sysfs_ops * ops; | ||
59 | struct semaphore sem; | ||
60 | int needs_read_fill; | ||
61 | int event; | ||
62 | }; | ||
63 | 64 | ||
65 | mutex_lock(&node->i_mutex); | ||
66 | list_add(&buffer->associates, &set->associates); | ||
67 | mutex_unlock(&node->i_mutex); | ||
68 | } | ||
69 | |||
70 | static inline void | ||
71 | remove_from_collection(struct sysfs_buffer *buffer, struct inode *node) | ||
72 | { | ||
73 | mutex_lock(&node->i_mutex); | ||
74 | list_del(&buffer->associates); | ||
75 | mutex_unlock(&node->i_mutex); | ||
76 | } | ||
64 | 77 | ||
65 | /** | 78 | /** |
66 | * fill_read_buffer - allocate and fill buffer from object. | 79 | * fill_read_buffer - allocate and fill buffer from object. |
@@ -70,7 +83,8 @@ struct sysfs_buffer { | |||
70 | * Allocate @buffer->page, if it hasn't been already, then call the | 83 | * Allocate @buffer->page, if it hasn't been already, then call the |
71 | * kobject's show() method to fill the buffer with this attribute's | 84 | * kobject's show() method to fill the buffer with this attribute's |
72 | * data. | 85 | * data. |
73 | * This is called only once, on the file's first read. | 86 | * This is called only once, on the file's first read unless an error |
87 | * is returned. | ||
74 | */ | 88 | */ |
75 | static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) | 89 | static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) |
76 | { | 90 | { |
@@ -88,12 +102,13 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer | |||
88 | 102 | ||
89 | buffer->event = atomic_read(&sd->s_event); | 103 | buffer->event = atomic_read(&sd->s_event); |
90 | count = ops->show(kobj,attr,buffer->page); | 104 | count = ops->show(kobj,attr,buffer->page); |
91 | buffer->needs_read_fill = 0; | ||
92 | BUG_ON(count > (ssize_t)PAGE_SIZE); | 105 | BUG_ON(count > (ssize_t)PAGE_SIZE); |
93 | if (count >= 0) | 106 | if (count >= 0) { |
107 | buffer->needs_read_fill = 0; | ||
94 | buffer->count = count; | 108 | buffer->count = count; |
95 | else | 109 | } else { |
96 | ret = count; | 110 | ret = count; |
111 | } | ||
97 | return ret; | 112 | return ret; |
98 | } | 113 | } |
99 | 114 | ||
@@ -153,6 +168,10 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
153 | ssize_t retval = 0; | 168 | ssize_t retval = 0; |
154 | 169 | ||
155 | down(&buffer->sem); | 170 | down(&buffer->sem); |
171 | if (buffer->orphaned) { | ||
172 | retval = -ENODEV; | ||
173 | goto out; | ||
174 | } | ||
156 | if (buffer->needs_read_fill) { | 175 | if (buffer->needs_read_fill) { |
157 | if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) | 176 | if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) |
158 | goto out; | 177 | goto out; |
@@ -165,7 +184,6 @@ out: | |||
165 | return retval; | 184 | return retval; |
166 | } | 185 | } |
167 | 186 | ||
168 | |||
169 | /** | 187 | /** |
170 | * fill_write_buffer - copy buffer from userspace. | 188 | * fill_write_buffer - copy buffer from userspace. |
171 | * @buffer: data buffer for file. | 189 | * @buffer: data buffer for file. |
@@ -243,19 +261,25 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t | |||
243 | ssize_t len; | 261 | ssize_t len; |
244 | 262 | ||
245 | down(&buffer->sem); | 263 | down(&buffer->sem); |
264 | if (buffer->orphaned) { | ||
265 | len = -ENODEV; | ||
266 | goto out; | ||
267 | } | ||
246 | len = fill_write_buffer(buffer, buf, count); | 268 | len = fill_write_buffer(buffer, buf, count); |
247 | if (len > 0) | 269 | if (len > 0) |
248 | len = flush_write_buffer(file->f_path.dentry, buffer, len); | 270 | len = flush_write_buffer(file->f_path.dentry, buffer, len); |
249 | if (len > 0) | 271 | if (len > 0) |
250 | *ppos += len; | 272 | *ppos += len; |
273 | out: | ||
251 | up(&buffer->sem); | 274 | up(&buffer->sem); |
252 | return len; | 275 | return len; |
253 | } | 276 | } |
254 | 277 | ||
255 | static int check_perm(struct inode * inode, struct file * file) | 278 | static int sysfs_open_file(struct inode *inode, struct file *file) |
256 | { | 279 | { |
257 | struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); | 280 | struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); |
258 | struct attribute * attr = to_attr(file->f_path.dentry); | 281 | struct attribute * attr = to_attr(file->f_path.dentry); |
282 | struct sysfs_buffer_collection *set; | ||
259 | struct sysfs_buffer * buffer; | 283 | struct sysfs_buffer * buffer; |
260 | struct sysfs_ops * ops = NULL; | 284 | struct sysfs_ops * ops = NULL; |
261 | int error = 0; | 285 | int error = 0; |
@@ -285,6 +309,18 @@ static int check_perm(struct inode * inode, struct file * file) | |||
285 | if (!ops) | 309 | if (!ops) |
286 | goto Eaccess; | 310 | goto Eaccess; |
287 | 311 | ||
312 | /* make sure we have a collection to add our buffers to */ | ||
313 | mutex_lock(&inode->i_mutex); | ||
314 | if (!(set = inode->i_private)) { | ||
315 | if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) { | ||
316 | error = -ENOMEM; | ||
317 | goto Done; | ||
318 | } else { | ||
319 | INIT_LIST_HEAD(&set->associates); | ||
320 | } | ||
321 | } | ||
322 | mutex_unlock(&inode->i_mutex); | ||
323 | |||
288 | /* File needs write support. | 324 | /* File needs write support. |
289 | * The inode's perms must say it's ok, | 325 | * The inode's perms must say it's ok, |
290 | * and we must have a store method. | 326 | * and we must have a store method. |
@@ -310,9 +346,11 @@ static int check_perm(struct inode * inode, struct file * file) | |||
310 | */ | 346 | */ |
311 | buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); | 347 | buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); |
312 | if (buffer) { | 348 | if (buffer) { |
349 | INIT_LIST_HEAD(&buffer->associates); | ||
313 | init_MUTEX(&buffer->sem); | 350 | init_MUTEX(&buffer->sem); |
314 | buffer->needs_read_fill = 1; | 351 | buffer->needs_read_fill = 1; |
315 | buffer->ops = ops; | 352 | buffer->ops = ops; |
353 | add_to_collection(buffer, inode); | ||
316 | file->private_data = buffer; | 354 | file->private_data = buffer; |
317 | } else | 355 | } else |
318 | error = -ENOMEM; | 356 | error = -ENOMEM; |
@@ -325,16 +363,11 @@ static int check_perm(struct inode * inode, struct file * file) | |||
325 | error = -EACCES; | 363 | error = -EACCES; |
326 | module_put(attr->owner); | 364 | module_put(attr->owner); |
327 | Done: | 365 | Done: |
328 | if (error && kobj) | 366 | if (error) |
329 | kobject_put(kobj); | 367 | kobject_put(kobj); |
330 | return error; | 368 | return error; |
331 | } | 369 | } |
332 | 370 | ||
333 | static int sysfs_open_file(struct inode * inode, struct file * filp) | ||
334 | { | ||
335 | return check_perm(inode,filp); | ||
336 | } | ||
337 | |||
338 | static int sysfs_release(struct inode * inode, struct file * filp) | 371 | static int sysfs_release(struct inode * inode, struct file * filp) |
339 | { | 372 | { |
340 | struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); | 373 | struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); |
@@ -342,8 +375,9 @@ static int sysfs_release(struct inode * inode, struct file * filp) | |||
342 | struct module * owner = attr->owner; | 375 | struct module * owner = attr->owner; |
343 | struct sysfs_buffer * buffer = filp->private_data; | 376 | struct sysfs_buffer * buffer = filp->private_data; |
344 | 377 | ||
345 | if (kobj) | 378 | if (buffer) |
346 | kobject_put(kobj); | 379 | remove_from_collection(buffer, inode); |
380 | kobject_put(kobj); | ||
347 | /* After this point, attr should not be accessed. */ | 381 | /* After this point, attr should not be accessed. */ |
348 | module_put(owner); | 382 | module_put(owner); |
349 | 383 | ||
@@ -548,7 +582,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file); | |||
548 | 582 | ||
549 | void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) | 583 | void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) |
550 | { | 584 | { |
551 | sysfs_hash_and_remove(kobj->dentry,attr->name); | 585 | sysfs_hash_and_remove(kobj->dentry, attr->name); |
552 | } | 586 | } |
553 | 587 | ||
554 | 588 | ||
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 122145b0895c..b20951c93761 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #include <linux/dcache.h> | 13 | #include <linux/dcache.h> |
14 | #include <linux/namei.h> | 14 | #include <linux/namei.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/fs.h> | ||
17 | #include <asm/semaphore.h> | ||
16 | #include "sysfs.h" | 18 | #include "sysfs.h" |
17 | 19 | ||
18 | 20 | ||
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index e79e38d52c00..542d2bcc73df 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/backing-dev.h> | 13 | #include <linux/backing-dev.h> |
14 | #include <linux/capability.h> | 14 | #include <linux/capability.h> |
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <asm/semaphore.h> | ||
16 | #include "sysfs.h" | 17 | #include "sysfs.h" |
17 | 18 | ||
18 | extern struct super_block * sysfs_sb; | 19 | extern struct super_block * sysfs_sb; |
@@ -32,6 +33,16 @@ static struct inode_operations sysfs_inode_operations ={ | |||
32 | .setattr = sysfs_setattr, | 33 | .setattr = sysfs_setattr, |
33 | }; | 34 | }; |
34 | 35 | ||
36 | void sysfs_delete_inode(struct inode *inode) | ||
37 | { | ||
38 | /* Free the shadowed directory inode operations */ | ||
39 | if (sysfs_is_shadowed_inode(inode)) { | ||
40 | kfree(inode->i_op); | ||
41 | inode->i_op = NULL; | ||
42 | } | ||
43 | return generic_delete_inode(inode); | ||
44 | } | ||
45 | |||
35 | int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) | 46 | int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) |
36 | { | 47 | { |
37 | struct inode * inode = dentry->d_inode; | 48 | struct inode * inode = dentry->d_inode; |
@@ -209,6 +220,22 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) | |||
209 | return NULL; | 220 | return NULL; |
210 | } | 221 | } |
211 | 222 | ||
223 | static inline void orphan_all_buffers(struct inode *node) | ||
224 | { | ||
225 | struct sysfs_buffer_collection *set = node->i_private; | ||
226 | struct sysfs_buffer *buf; | ||
227 | |||
228 | mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD); | ||
229 | if (node->i_private) { | ||
230 | list_for_each_entry(buf, &set->associates, associates) { | ||
231 | down(&buf->sem); | ||
232 | buf->orphaned = 1; | ||
233 | up(&buf->sem); | ||
234 | } | ||
235 | } | ||
236 | mutex_unlock(&node->i_mutex); | ||
237 | } | ||
238 | |||
212 | 239 | ||
213 | /* | 240 | /* |
214 | * Unhashes the dentry corresponding to given sysfs_dirent | 241 | * Unhashes the dentry corresponding to given sysfs_dirent |
@@ -217,16 +244,23 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) | |||
217 | void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) | 244 | void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) |
218 | { | 245 | { |
219 | struct dentry * dentry = sd->s_dentry; | 246 | struct dentry * dentry = sd->s_dentry; |
247 | struct inode *inode; | ||
220 | 248 | ||
221 | if (dentry) { | 249 | if (dentry) { |
222 | spin_lock(&dcache_lock); | 250 | spin_lock(&dcache_lock); |
223 | spin_lock(&dentry->d_lock); | 251 | spin_lock(&dentry->d_lock); |
224 | if (!(d_unhashed(dentry) && dentry->d_inode)) { | 252 | if (!(d_unhashed(dentry) && dentry->d_inode)) { |
253 | inode = dentry->d_inode; | ||
254 | spin_lock(&inode->i_lock); | ||
255 | __iget(inode); | ||
256 | spin_unlock(&inode->i_lock); | ||
225 | dget_locked(dentry); | 257 | dget_locked(dentry); |
226 | __d_drop(dentry); | 258 | __d_drop(dentry); |
227 | spin_unlock(&dentry->d_lock); | 259 | spin_unlock(&dentry->d_lock); |
228 | spin_unlock(&dcache_lock); | 260 | spin_unlock(&dcache_lock); |
229 | simple_unlink(parent->d_inode, dentry); | 261 | simple_unlink(parent->d_inode, dentry); |
262 | orphan_all_buffers(inode); | ||
263 | iput(inode); | ||
230 | } else { | 264 | } else { |
231 | spin_unlock(&dentry->d_lock); | 265 | spin_unlock(&dentry->d_lock); |
232 | spin_unlock(&dcache_lock); | 266 | spin_unlock(&dcache_lock); |
@@ -248,7 +282,7 @@ int sysfs_hash_and_remove(struct dentry * dir, const char * name) | |||
248 | return -ENOENT; | 282 | return -ENOENT; |
249 | 283 | ||
250 | parent_sd = dir->d_fsdata; | 284 | parent_sd = dir->d_fsdata; |
251 | mutex_lock(&dir->d_inode->i_mutex); | 285 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
252 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { | 286 | list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { |
253 | if (!sd->s_element) | 287 | if (!sd->s_element) |
254 | continue; | 288 | continue; |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index e503f858fba8..f6a87a824883 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/mount.h> | 8 | #include <linux/mount.h> |
9 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <asm/semaphore.h> | ||
11 | 12 | ||
12 | #include "sysfs.h" | 13 | #include "sysfs.h" |
13 | 14 | ||
@@ -18,9 +19,12 @@ struct vfsmount *sysfs_mount; | |||
18 | struct super_block * sysfs_sb = NULL; | 19 | struct super_block * sysfs_sb = NULL; |
19 | struct kmem_cache *sysfs_dir_cachep; | 20 | struct kmem_cache *sysfs_dir_cachep; |
20 | 21 | ||
22 | static void sysfs_clear_inode(struct inode *inode); | ||
23 | |||
21 | static struct super_operations sysfs_ops = { | 24 | static struct super_operations sysfs_ops = { |
22 | .statfs = simple_statfs, | 25 | .statfs = simple_statfs, |
23 | .drop_inode = generic_delete_inode, | 26 | .drop_inode = sysfs_delete_inode, |
27 | .clear_inode = sysfs_clear_inode, | ||
24 | }; | 28 | }; |
25 | 29 | ||
26 | static struct sysfs_dirent sysfs_root = { | 30 | static struct sysfs_dirent sysfs_root = { |
@@ -31,6 +35,11 @@ static struct sysfs_dirent sysfs_root = { | |||
31 | .s_iattr = NULL, | 35 | .s_iattr = NULL, |
32 | }; | 36 | }; |
33 | 37 | ||
38 | static void sysfs_clear_inode(struct inode *inode) | ||
39 | { | ||
40 | kfree(inode->i_private); | ||
41 | } | ||
42 | |||
34 | static int sysfs_fill_super(struct super_block *sb, void *data, int silent) | 43 | static int sysfs_fill_super(struct super_block *sb, void *data, int silent) |
35 | { | 44 | { |
36 | struct inode *inode; | 45 | struct inode *inode; |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index f50e3cc2ded8..4869f611192f 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/kobject.h> | 8 | #include <linux/kobject.h> |
9 | #include <linux/namei.h> | 9 | #include <linux/namei.h> |
10 | #include <asm/semaphore.h> | ||
10 | 11 | ||
11 | #include "sysfs.h" | 12 | #include "sysfs.h" |
12 | 13 | ||
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index bd7cec295dab..fe1cbfd208ed 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -2,6 +2,7 @@ | |||
2 | extern struct vfsmount * sysfs_mount; | 2 | extern struct vfsmount * sysfs_mount; |
3 | extern struct kmem_cache *sysfs_dir_cachep; | 3 | extern struct kmem_cache *sysfs_dir_cachep; |
4 | 4 | ||
5 | extern void sysfs_delete_inode(struct inode *inode); | ||
5 | extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); | 6 | extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); |
6 | extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); | 7 | extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); |
7 | 8 | ||
@@ -33,6 +34,22 @@ struct sysfs_symlink { | |||
33 | struct kobject * target_kobj; | 34 | struct kobject * target_kobj; |
34 | }; | 35 | }; |
35 | 36 | ||
37 | struct sysfs_buffer { | ||
38 | struct list_head associates; | ||
39 | size_t count; | ||
40 | loff_t pos; | ||
41 | char * page; | ||
42 | struct sysfs_ops * ops; | ||
43 | struct semaphore sem; | ||
44 | int orphaned; | ||
45 | int needs_read_fill; | ||
46 | int event; | ||
47 | }; | ||
48 | |||
49 | struct sysfs_buffer_collection { | ||
50 | struct list_head associates; | ||
51 | }; | ||
52 | |||
36 | static inline struct kobject * to_kobj(struct dentry * dentry) | 53 | static inline struct kobject * to_kobj(struct dentry * dentry) |
37 | { | 54 | { |
38 | struct sysfs_dirent * sd = dentry->d_fsdata; | 55 | struct sysfs_dirent * sd = dentry->d_fsdata; |
@@ -96,3 +113,7 @@ static inline void sysfs_put(struct sysfs_dirent * sd) | |||
96 | release_sysfs_dirent(sd); | 113 | release_sysfs_dirent(sd); |
97 | } | 114 | } |
98 | 115 | ||
116 | static inline int sysfs_is_shadowed_inode(struct inode *inode) | ||
117 | { | ||
118 | return S_ISDIR(inode->i_mode) && inode->i_op->follow_link; | ||
119 | } | ||