diff options
| -rw-r--r-- | fs/ocfs2/cluster/masklog.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/masklog.h | 2 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/nodemanager.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/tcp.c | 14 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/tcp.h | 5 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 8 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmdebug.c | 12 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 39 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 23 | ||||
| -rw-r--r-- | fs/ocfs2/extent_map.c | 38 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 51 | ||||
| -rw-r--r-- | fs/ocfs2/heartbeat.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/inode.c | 46 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 124 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2.h | 7 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 1 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 11 |
18 files changed, 249 insertions, 142 deletions
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index fd741cea5705..636593bf4d17 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
| @@ -74,6 +74,7 @@ struct mlog_attribute { | |||
| 74 | #define define_mask(_name) { \ | 74 | #define define_mask(_name) { \ |
| 75 | .attr = { \ | 75 | .attr = { \ |
| 76 | .name = #_name, \ | 76 | .name = #_name, \ |
| 77 | .owner = THIS_MODULE, \ | ||
| 77 | .mode = S_IRUGO | S_IWUSR, \ | 78 | .mode = S_IRUGO | S_IWUSR, \ |
| 78 | }, \ | 79 | }, \ |
| 79 | .mask = ML_##_name, \ | 80 | .mask = ML_##_name, \ |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index e8c56a3d9c64..2cadc3009c83 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
| @@ -256,7 +256,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; | |||
| 256 | } \ | 256 | } \ |
| 257 | } while (0) | 257 | } while (0) |
| 258 | 258 | ||
| 259 | #if (BITS_PER_LONG == 32) || defined(CONFIG_X86_64) | 259 | #if (BITS_PER_LONG == 32) || defined(CONFIG_X86_64) || (defined(CONFIG_UML_X86) && defined(CONFIG_64BIT)) |
| 260 | #define MLFi64 "lld" | 260 | #define MLFi64 "lld" |
| 261 | #define MLFu64 "llu" | 261 | #define MLFu64 "llu" |
| 262 | #define MLFx64 "llx" | 262 | #define MLFx64 "llx" |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index cf7828f23361..e1fceb8aa32d 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
| @@ -756,7 +756,7 @@ static int __init init_o2nm(void) | |||
| 756 | if (!ocfs2_table_header) { | 756 | if (!ocfs2_table_header) { |
| 757 | printk(KERN_ERR "nodemanager: unable to register sysctl\n"); | 757 | printk(KERN_ERR "nodemanager: unable to register sysctl\n"); |
| 758 | ret = -ENOMEM; /* or something. */ | 758 | ret = -ENOMEM; /* or something. */ |
| 759 | goto out; | 759 | goto out_o2net; |
| 760 | } | 760 | } |
| 761 | 761 | ||
| 762 | ret = o2net_register_hb_callbacks(); | 762 | ret = o2net_register_hb_callbacks(); |
| @@ -780,6 +780,8 @@ out_callbacks: | |||
| 780 | o2net_unregister_hb_callbacks(); | 780 | o2net_unregister_hb_callbacks(); |
| 781 | out_sysctl: | 781 | out_sysctl: |
| 782 | unregister_sysctl_table(ocfs2_table_header); | 782 | unregister_sysctl_table(ocfs2_table_header); |
| 783 | out_o2net: | ||
| 784 | o2net_exit(); | ||
| 783 | out: | 785 | out: |
| 784 | return ret; | 786 | return ret; |
| 785 | } | 787 | } |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d22d4cf08db1..0f60cc0d3985 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -1318,7 +1318,7 @@ static void o2net_start_connect(void *arg) | |||
| 1318 | { | 1318 | { |
| 1319 | struct o2net_node *nn = arg; | 1319 | struct o2net_node *nn = arg; |
| 1320 | struct o2net_sock_container *sc = NULL; | 1320 | struct o2net_sock_container *sc = NULL; |
| 1321 | struct o2nm_node *node = NULL; | 1321 | struct o2nm_node *node = NULL, *mynode = NULL; |
| 1322 | struct socket *sock = NULL; | 1322 | struct socket *sock = NULL; |
| 1323 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1323 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
| 1324 | int ret = 0; | 1324 | int ret = 0; |
| @@ -1334,6 +1334,12 @@ static void o2net_start_connect(void *arg) | |||
| 1334 | goto out; | 1334 | goto out; |
| 1335 | } | 1335 | } |
| 1336 | 1336 | ||
| 1337 | mynode = o2nm_get_node_by_num(o2nm_this_node()); | ||
| 1338 | if (mynode == NULL) { | ||
| 1339 | ret = 0; | ||
| 1340 | goto out; | ||
| 1341 | } | ||
| 1342 | |||
| 1337 | spin_lock(&nn->nn_lock); | 1343 | spin_lock(&nn->nn_lock); |
| 1338 | /* see if we already have one pending or have given up */ | 1344 | /* see if we already have one pending or have given up */ |
| 1339 | if (nn->nn_sc || nn->nn_persistent_error) | 1345 | if (nn->nn_sc || nn->nn_persistent_error) |
| @@ -1361,12 +1367,14 @@ static void o2net_start_connect(void *arg) | |||
| 1361 | sock->sk->sk_allocation = GFP_ATOMIC; | 1367 | sock->sk->sk_allocation = GFP_ATOMIC; |
| 1362 | 1368 | ||
| 1363 | myaddr.sin_family = AF_INET; | 1369 | myaddr.sin_family = AF_INET; |
| 1370 | myaddr.sin_addr.s_addr = (__force u32)mynode->nd_ipv4_address; | ||
| 1364 | myaddr.sin_port = (__force u16)htons(0); /* any port */ | 1371 | myaddr.sin_port = (__force u16)htons(0); /* any port */ |
| 1365 | 1372 | ||
| 1366 | ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, | 1373 | ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, |
| 1367 | sizeof(myaddr)); | 1374 | sizeof(myaddr)); |
| 1368 | if (ret) { | 1375 | if (ret) { |
| 1369 | mlog(0, "bind failed: %d\n", ret); | 1376 | mlog(ML_ERROR, "bind failed with %d at address %u.%u.%u.%u\n", |
| 1377 | ret, NIPQUAD(mynode->nd_ipv4_address)); | ||
| 1370 | goto out; | 1378 | goto out; |
| 1371 | } | 1379 | } |
| 1372 | 1380 | ||
| @@ -1407,6 +1415,8 @@ out: | |||
| 1407 | sc_put(sc); | 1415 | sc_put(sc); |
| 1408 | if (node) | 1416 | if (node) |
| 1409 | o2nm_node_put(node); | 1417 | o2nm_node_put(node); |
| 1418 | if (mynode) | ||
| 1419 | o2nm_node_put(mynode); | ||
| 1410 | 1420 | ||
| 1411 | return; | 1421 | return; |
| 1412 | } | 1422 | } |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index a6f4585501c8..616ff2b8434a 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
| @@ -85,13 +85,10 @@ enum { | |||
| 85 | O2NET_DRIVER_READY, | 85 | O2NET_DRIVER_READY, |
| 86 | }; | 86 | }; |
| 87 | 87 | ||
| 88 | int o2net_init_tcp_sock(struct inode *inode); | ||
| 89 | int o2net_send_message(u32 msg_type, u32 key, void *data, u32 len, | 88 | int o2net_send_message(u32 msg_type, u32 key, void *data, u32 len, |
| 90 | u8 target_node, int *status); | 89 | u8 target_node, int *status); |
| 91 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *vec, | 90 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *vec, |
| 92 | size_t veclen, u8 target_node, int *status); | 91 | size_t veclen, u8 target_node, int *status); |
| 93 | int o2net_broadcast_message(u32 msg_type, u32 key, void *data, u32 len, | ||
| 94 | struct inode *group); | ||
| 95 | 92 | ||
| 96 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | 93 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, |
| 97 | o2net_msg_handler_func *func, void *data, | 94 | o2net_msg_handler_func *func, void *data, |
| @@ -107,7 +104,5 @@ void o2net_disconnect_node(struct o2nm_node *node); | |||
| 107 | 104 | ||
| 108 | int o2net_init(void); | 105 | int o2net_init(void); |
| 109 | void o2net_exit(void); | 106 | void o2net_exit(void); |
| 110 | int o2net_proc_init(struct proc_dir_entry *parent); | ||
| 111 | void o2net_proc_exit(struct proc_dir_entry *parent); | ||
| 112 | 107 | ||
| 113 | #endif /* O2CLUSTER_TCP_H */ | 108 | #endif /* O2CLUSTER_TCP_H */ |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 23ceaa7127b4..9c772583744a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -37,9 +37,7 @@ | |||
| 37 | #define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes | 37 | #define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes |
| 38 | #define DLM_THREAD_MS 200 // flush at least every 200 ms | 38 | #define DLM_THREAD_MS 200 // flush at least every 200 ms |
| 39 | 39 | ||
| 40 | #define DLM_HASH_BITS 7 | 40 | #define DLM_HASH_BUCKETS (PAGE_SIZE / sizeof(struct hlist_head)) |
| 41 | #define DLM_HASH_SIZE (1 << DLM_HASH_BITS) | ||
| 42 | #define DLM_HASH_MASK (DLM_HASH_SIZE - 1) | ||
| 43 | 41 | ||
| 44 | enum dlm_ast_type { | 42 | enum dlm_ast_type { |
| 45 | DLM_AST = 0, | 43 | DLM_AST = 0, |
| @@ -87,7 +85,7 @@ enum dlm_ctxt_state { | |||
| 87 | struct dlm_ctxt | 85 | struct dlm_ctxt |
| 88 | { | 86 | { |
| 89 | struct list_head list; | 87 | struct list_head list; |
| 90 | struct list_head *resources; | 88 | struct hlist_head *lockres_hash; |
| 91 | struct list_head dirty_list; | 89 | struct list_head dirty_list; |
| 92 | struct list_head purge_list; | 90 | struct list_head purge_list; |
| 93 | struct list_head pending_asts; | 91 | struct list_head pending_asts; |
| @@ -217,7 +215,7 @@ struct dlm_lock_resource | |||
| 217 | { | 215 | { |
| 218 | /* WARNING: Please see the comment in dlm_init_lockres before | 216 | /* WARNING: Please see the comment in dlm_init_lockres before |
| 219 | * adding fields here. */ | 217 | * adding fields here. */ |
| 220 | struct list_head list; | 218 | struct hlist_node hash_node; |
| 221 | struct kref refs; | 219 | struct kref refs; |
| 222 | 220 | ||
| 223 | /* please keep these next 3 in this order | 221 | /* please keep these next 3 in this order |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index f339fe27975a..54f61b76ab51 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
| @@ -117,8 +117,8 @@ EXPORT_SYMBOL_GPL(dlm_print_one_lock); | |||
| 117 | void dlm_dump_lock_resources(struct dlm_ctxt *dlm) | 117 | void dlm_dump_lock_resources(struct dlm_ctxt *dlm) |
| 118 | { | 118 | { |
| 119 | struct dlm_lock_resource *res; | 119 | struct dlm_lock_resource *res; |
| 120 | struct list_head *iter; | 120 | struct hlist_node *iter; |
| 121 | struct list_head *bucket; | 121 | struct hlist_head *bucket; |
| 122 | int i; | 122 | int i; |
| 123 | 123 | ||
| 124 | mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n", | 124 | mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n", |
| @@ -129,12 +129,10 @@ void dlm_dump_lock_resources(struct dlm_ctxt *dlm) | |||
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | spin_lock(&dlm->spinlock); | 131 | spin_lock(&dlm->spinlock); |
| 132 | for (i=0; i<DLM_HASH_SIZE; i++) { | 132 | for (i=0; i<DLM_HASH_BUCKETS; i++) { |
| 133 | bucket = &(dlm->resources[i]); | 133 | bucket = &(dlm->lockres_hash[i]); |
| 134 | list_for_each(iter, bucket) { | 134 | hlist_for_each_entry(res, iter, bucket, hash_node) |
| 135 | res = list_entry(iter, struct dlm_lock_resource, list); | ||
| 136 | dlm_print_one_lock_resource(res); | 135 | dlm_print_one_lock_resource(res); |
| 137 | } | ||
| 138 | } | 136 | } |
| 139 | spin_unlock(&dlm->spinlock); | 137 | spin_unlock(&dlm->spinlock); |
| 140 | } | 138 | } |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6ee30837389c..8f3a9e3106fd 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -77,26 +77,26 @@ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); | |||
| 77 | 77 | ||
| 78 | void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) | 78 | void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) |
| 79 | { | 79 | { |
| 80 | list_del_init(&lockres->list); | 80 | hlist_del_init(&lockres->hash_node); |
| 81 | dlm_lockres_put(lockres); | 81 | dlm_lockres_put(lockres); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, | 84 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, |
| 85 | struct dlm_lock_resource *res) | 85 | struct dlm_lock_resource *res) |
| 86 | { | 86 | { |
| 87 | struct list_head *bucket; | 87 | struct hlist_head *bucket; |
| 88 | struct qstr *q; | 88 | struct qstr *q; |
| 89 | 89 | ||
| 90 | assert_spin_locked(&dlm->spinlock); | 90 | assert_spin_locked(&dlm->spinlock); |
| 91 | 91 | ||
| 92 | q = &res->lockname; | 92 | q = &res->lockname; |
| 93 | q->hash = full_name_hash(q->name, q->len); | 93 | q->hash = full_name_hash(q->name, q->len); |
| 94 | bucket = &(dlm->resources[q->hash & DLM_HASH_MASK]); | 94 | bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]); |
| 95 | 95 | ||
| 96 | /* get a reference for our hashtable */ | 96 | /* get a reference for our hashtable */ |
| 97 | dlm_lockres_get(res); | 97 | dlm_lockres_get(res); |
| 98 | 98 | ||
| 99 | list_add_tail(&res->list, bucket); | 99 | hlist_add_head(&res->hash_node, bucket); |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | 102 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, |
| @@ -104,9 +104,9 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | |||
| 104 | unsigned int len) | 104 | unsigned int len) |
| 105 | { | 105 | { |
| 106 | unsigned int hash; | 106 | unsigned int hash; |
| 107 | struct list_head *iter; | 107 | struct hlist_node *iter; |
| 108 | struct dlm_lock_resource *tmpres=NULL; | 108 | struct dlm_lock_resource *tmpres=NULL; |
| 109 | struct list_head *bucket; | 109 | struct hlist_head *bucket; |
| 110 | 110 | ||
| 111 | mlog_entry("%.*s\n", len, name); | 111 | mlog_entry("%.*s\n", len, name); |
| 112 | 112 | ||
| @@ -114,11 +114,11 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | |||
| 114 | 114 | ||
| 115 | hash = full_name_hash(name, len); | 115 | hash = full_name_hash(name, len); |
| 116 | 116 | ||
| 117 | bucket = &(dlm->resources[hash & DLM_HASH_MASK]); | 117 | bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]); |
| 118 | 118 | ||
| 119 | /* check for pre-existing lock */ | 119 | /* check for pre-existing lock */ |
| 120 | list_for_each(iter, bucket) { | 120 | hlist_for_each(iter, bucket) { |
| 121 | tmpres = list_entry(iter, struct dlm_lock_resource, list); | 121 | tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node); |
| 122 | if (tmpres->lockname.len == len && | 122 | if (tmpres->lockname.len == len && |
| 123 | memcmp(tmpres->lockname.name, name, len) == 0) { | 123 | memcmp(tmpres->lockname.name, name, len) == 0) { |
| 124 | dlm_lockres_get(tmpres); | 124 | dlm_lockres_get(tmpres); |
| @@ -193,8 +193,8 @@ static int dlm_wait_on_domain_helper(const char *domain) | |||
| 193 | 193 | ||
| 194 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) | 194 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) |
| 195 | { | 195 | { |
| 196 | if (dlm->resources) | 196 | if (dlm->lockres_hash) |
| 197 | free_page((unsigned long) dlm->resources); | 197 | free_page((unsigned long) dlm->lockres_hash); |
| 198 | 198 | ||
| 199 | if (dlm->name) | 199 | if (dlm->name) |
| 200 | kfree(dlm->name); | 200 | kfree(dlm->name); |
| @@ -303,10 +303,10 @@ static void dlm_migrate_all_locks(struct dlm_ctxt *dlm) | |||
| 303 | mlog(0, "Migrating locks from domain %s\n", dlm->name); | 303 | mlog(0, "Migrating locks from domain %s\n", dlm->name); |
| 304 | restart: | 304 | restart: |
| 305 | spin_lock(&dlm->spinlock); | 305 | spin_lock(&dlm->spinlock); |
| 306 | for (i=0; i<DLM_HASH_SIZE; i++) { | 306 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
| 307 | while (!list_empty(&dlm->resources[i])) { | 307 | while (!hlist_empty(&dlm->lockres_hash[i])) { |
| 308 | res = list_entry(dlm->resources[i].next, | 308 | res = hlist_entry(dlm->lockres_hash[i].first, |
| 309 | struct dlm_lock_resource, list); | 309 | struct dlm_lock_resource, hash_node); |
| 310 | /* need reference when manually grabbing lockres */ | 310 | /* need reference when manually grabbing lockres */ |
| 311 | dlm_lockres_get(res); | 311 | dlm_lockres_get(res); |
| 312 | /* this should unhash the lockres | 312 | /* this should unhash the lockres |
| @@ -1191,18 +1191,17 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
| 1191 | goto leave; | 1191 | goto leave; |
| 1192 | } | 1192 | } |
| 1193 | 1193 | ||
| 1194 | dlm->resources = (struct list_head *) __get_free_page(GFP_KERNEL); | 1194 | dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL); |
| 1195 | if (!dlm->resources) { | 1195 | if (!dlm->lockres_hash) { |
| 1196 | mlog_errno(-ENOMEM); | 1196 | mlog_errno(-ENOMEM); |
| 1197 | kfree(dlm->name); | 1197 | kfree(dlm->name); |
| 1198 | kfree(dlm); | 1198 | kfree(dlm); |
| 1199 | dlm = NULL; | 1199 | dlm = NULL; |
| 1200 | goto leave; | 1200 | goto leave; |
| 1201 | } | 1201 | } |
| 1202 | memset(dlm->resources, 0, PAGE_SIZE); | ||
| 1203 | 1202 | ||
| 1204 | for (i=0; i<DLM_HASH_SIZE; i++) | 1203 | for (i=0; i<DLM_HASH_BUCKETS; i++) |
| 1205 | INIT_LIST_HEAD(&dlm->resources[i]); | 1204 | INIT_HLIST_HEAD(&dlm->lockres_hash[i]); |
| 1206 | 1205 | ||
| 1207 | strcpy(dlm->name, domain); | 1206 | strcpy(dlm->name, domain); |
| 1208 | dlm->key = key; | 1207 | dlm->key = key; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 2e2e95e69499..847dd3cc4cf5 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -564,7 +564,7 @@ static void dlm_lockres_release(struct kref *kref) | |||
| 564 | 564 | ||
| 565 | /* By the time we're ready to blow this guy away, we shouldn't | 565 | /* By the time we're ready to blow this guy away, we shouldn't |
| 566 | * be on any lists. */ | 566 | * be on any lists. */ |
| 567 | BUG_ON(!list_empty(&res->list)); | 567 | BUG_ON(!hlist_unhashed(&res->hash_node)); |
| 568 | BUG_ON(!list_empty(&res->granted)); | 568 | BUG_ON(!list_empty(&res->granted)); |
| 569 | BUG_ON(!list_empty(&res->converting)); | 569 | BUG_ON(!list_empty(&res->converting)); |
| 570 | BUG_ON(!list_empty(&res->blocked)); | 570 | BUG_ON(!list_empty(&res->blocked)); |
| @@ -605,7 +605,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 605 | 605 | ||
| 606 | init_waitqueue_head(&res->wq); | 606 | init_waitqueue_head(&res->wq); |
| 607 | spin_lock_init(&res->spinlock); | 607 | spin_lock_init(&res->spinlock); |
| 608 | INIT_LIST_HEAD(&res->list); | 608 | INIT_HLIST_NODE(&res->hash_node); |
| 609 | INIT_LIST_HEAD(&res->granted); | 609 | INIT_LIST_HEAD(&res->granted); |
| 610 | INIT_LIST_HEAD(&res->converting); | 610 | INIT_LIST_HEAD(&res->converting); |
| 611 | INIT_LIST_HEAD(&res->blocked); | 611 | INIT_LIST_HEAD(&res->blocked); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ed76bda1a534..1e232000f3f7 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -1693,7 +1693,10 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
| 1693 | u8 dead_node, u8 new_master) | 1693 | u8 dead_node, u8 new_master) |
| 1694 | { | 1694 | { |
| 1695 | int i; | 1695 | int i; |
| 1696 | struct list_head *iter, *iter2, *bucket; | 1696 | struct list_head *iter, *iter2; |
| 1697 | struct hlist_node *hash_iter; | ||
| 1698 | struct hlist_head *bucket; | ||
| 1699 | |||
| 1697 | struct dlm_lock_resource *res; | 1700 | struct dlm_lock_resource *res; |
| 1698 | 1701 | ||
| 1699 | mlog_entry_void(); | 1702 | mlog_entry_void(); |
| @@ -1717,10 +1720,9 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
| 1717 | * for now we need to run the whole hash, clear | 1720 | * for now we need to run the whole hash, clear |
| 1718 | * the RECOVERING state and set the owner | 1721 | * the RECOVERING state and set the owner |
| 1719 | * if necessary */ | 1722 | * if necessary */ |
| 1720 | for (i=0; i<DLM_HASH_SIZE; i++) { | 1723 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
| 1721 | bucket = &(dlm->resources[i]); | 1724 | bucket = &(dlm->lockres_hash[i]); |
| 1722 | list_for_each(iter, bucket) { | 1725 | hlist_for_each_entry(res, hash_iter, bucket, hash_node) { |
| 1723 | res = list_entry (iter, struct dlm_lock_resource, list); | ||
| 1724 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 1726 | if (res->state & DLM_LOCK_RES_RECOVERING) { |
| 1725 | if (res->owner == dead_node) { | 1727 | if (res->owner == dead_node) { |
| 1726 | mlog(0, "(this=%u) res %.*s owner=%u " | 1728 | mlog(0, "(this=%u) res %.*s owner=%u " |
| @@ -1852,10 +1854,10 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
| 1852 | 1854 | ||
| 1853 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | 1855 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) |
| 1854 | { | 1856 | { |
| 1855 | struct list_head *iter; | 1857 | struct hlist_node *iter; |
| 1856 | struct dlm_lock_resource *res; | 1858 | struct dlm_lock_resource *res; |
| 1857 | int i; | 1859 | int i; |
| 1858 | struct list_head *bucket; | 1860 | struct hlist_head *bucket; |
| 1859 | struct dlm_lock *lock; | 1861 | struct dlm_lock *lock; |
| 1860 | 1862 | ||
| 1861 | 1863 | ||
| @@ -1876,10 +1878,9 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 1876 | * can be kicked again to see if any ASTs or BASTs | 1878 | * can be kicked again to see if any ASTs or BASTs |
| 1877 | * need to be fired as a result. | 1879 | * need to be fired as a result. |
| 1878 | */ | 1880 | */ |
| 1879 | for (i=0; i<DLM_HASH_SIZE; i++) { | 1881 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
| 1880 | bucket = &(dlm->resources[i]); | 1882 | bucket = &(dlm->lockres_hash[i]); |
| 1881 | list_for_each(iter, bucket) { | 1883 | hlist_for_each_entry(res, iter, bucket, hash_node) { |
| 1882 | res = list_entry (iter, struct dlm_lock_resource, list); | ||
| 1883 | /* always prune any $RECOVERY entries for dead nodes, | 1884 | /* always prune any $RECOVERY entries for dead nodes, |
| 1884 | * otherwise hangs can occur during later recovery */ | 1885 | * otherwise hangs can occur during later recovery */ |
| 1885 | if (dlm_is_recovery_lock(res->lockname.name, | 1886 | if (dlm_is_recovery_lock(res->lockname.name, |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index b6ba292e9544..e6f207eebab4 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
| @@ -181,6 +181,12 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode, | |||
| 181 | ret = -EBADR; | 181 | ret = -EBADR; |
| 182 | if (rec_end > OCFS2_I(inode)->ip_clusters) { | 182 | if (rec_end > OCFS2_I(inode)->ip_clusters) { |
| 183 | mlog_errno(ret); | 183 | mlog_errno(ret); |
| 184 | ocfs2_error(inode->i_sb, | ||
| 185 | "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n", | ||
| 186 | i, | ||
| 187 | le64_to_cpu(rec->e_blkno), | ||
| 188 | OCFS2_I(inode)->ip_blkno, | ||
| 189 | OCFS2_I(inode)->ip_clusters); | ||
| 184 | goto out_free; | 190 | goto out_free; |
| 185 | } | 191 | } |
| 186 | 192 | ||
| @@ -226,6 +232,12 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode, | |||
| 226 | ret = -EBADR; | 232 | ret = -EBADR; |
| 227 | if (blkno) { | 233 | if (blkno) { |
| 228 | mlog_errno(ret); | 234 | mlog_errno(ret); |
| 235 | ocfs2_error(inode->i_sb, | ||
| 236 | "Multiple extents for (cpos = %u, clusters = %u) on inode %"MLFu64"; e_blkno %"MLFu64" and rec %d at e_blkno %"MLFu64"\n", | ||
| 237 | cpos, clusters, | ||
| 238 | OCFS2_I(inode)->ip_blkno, | ||
| 239 | blkno, i, | ||
| 240 | le64_to_cpu(rec->e_blkno)); | ||
| 229 | goto out_free; | 241 | goto out_free; |
| 230 | } | 242 | } |
| 231 | 243 | ||
| @@ -238,6 +250,10 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode, | |||
| 238 | */ | 250 | */ |
| 239 | ret = -EBADR; | 251 | ret = -EBADR; |
| 240 | if (!blkno) { | 252 | if (!blkno) { |
| 253 | ocfs2_error(inode->i_sb, | ||
| 254 | "No record found for (cpos = %u, clusters = %u) on inode %"MLFu64"\n", | ||
| 255 | cpos, clusters, | ||
| 256 | OCFS2_I(inode)->ip_blkno); | ||
| 241 | mlog_errno(ret); | 257 | mlog_errno(ret); |
| 242 | goto out_free; | 258 | goto out_free; |
| 243 | } | 259 | } |
| @@ -266,6 +282,20 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode, | |||
| 266 | 282 | ||
| 267 | for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | 283 | for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { |
| 268 | rec = &el->l_recs[i]; | 284 | rec = &el->l_recs[i]; |
| 285 | |||
| 286 | if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) > | ||
| 287 | OCFS2_I(inode)->ip_clusters) { | ||
| 288 | ret = -EBADR; | ||
| 289 | mlog_errno(ret); | ||
| 290 | ocfs2_error(inode->i_sb, | ||
| 291 | "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n", | ||
| 292 | i, | ||
| 293 | le64_to_cpu(rec->e_blkno), | ||
| 294 | OCFS2_I(inode)->ip_blkno, | ||
| 295 | OCFS2_I(inode)->ip_clusters); | ||
| 296 | return ret; | ||
| 297 | } | ||
| 298 | |||
| 269 | ret = ocfs2_extent_map_insert(inode, rec, | 299 | ret = ocfs2_extent_map_insert(inode, rec, |
| 270 | le16_to_cpu(el->l_tree_depth)); | 300 | le16_to_cpu(el->l_tree_depth)); |
| 271 | if (ret) { | 301 | if (ret) { |
| @@ -526,6 +556,10 @@ static int ocfs2_extent_map_insert(struct inode *inode, | |||
| 526 | OCFS2_I(inode)->ip_map.em_clusters) { | 556 | OCFS2_I(inode)->ip_map.em_clusters) { |
| 527 | ret = -EBADR; | 557 | ret = -EBADR; |
| 528 | mlog_errno(ret); | 558 | mlog_errno(ret); |
| 559 | ocfs2_error(inode->i_sb, | ||
| 560 | "Zero e_clusters on non-tail extent record at e_blkno %"MLFu64" on inode %"MLFu64"\n", | ||
| 561 | le64_to_cpu(rec->e_blkno), | ||
| 562 | OCFS2_I(inode)->ip_blkno); | ||
| 529 | return ret; | 563 | return ret; |
| 530 | } | 564 | } |
| 531 | 565 | ||
| @@ -588,12 +622,12 @@ static int ocfs2_extent_map_insert(struct inode *inode, | |||
| 588 | * Existing record in the extent map: | 622 | * Existing record in the extent map: |
| 589 | * | 623 | * |
| 590 | * cpos = 10, len = 10 | 624 | * cpos = 10, len = 10 |
| 591 | * |---------| | 625 | * |---------| |
| 592 | * | 626 | * |
| 593 | * New Record: | 627 | * New Record: |
| 594 | * | 628 | * |
| 595 | * cpos = 10, len = 20 | 629 | * cpos = 10, len = 20 |
| 596 | * |------------------| | 630 | * |------------------| |
| 597 | * | 631 | * |
| 598 | * The passed record is the new on-disk record. The new_clusters value | 632 | * The passed record is the new on-disk record. The new_clusters value |
| 599 | * is how many clusters were added to the file. If the append is a | 633 | * is how many clusters were added to the file. If the append is a |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 1715bc90e705..8a4048b55fdc 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -933,9 +933,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
| 933 | struct file *filp = iocb->ki_filp; | 933 | struct file *filp = iocb->ki_filp; |
| 934 | struct inode *inode = filp->f_dentry->d_inode; | 934 | struct inode *inode = filp->f_dentry->d_inode; |
| 935 | loff_t newsize, saved_pos; | 935 | loff_t newsize, saved_pos; |
| 936 | #ifdef OCFS2_ORACORE_WORKAROUNDS | ||
| 937 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 938 | #endif | ||
| 939 | 936 | ||
| 940 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, | 937 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, |
| 941 | (unsigned int)count, | 938 | (unsigned int)count, |
| @@ -951,14 +948,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
| 951 | return -EIO; | 948 | return -EIO; |
| 952 | } | 949 | } |
| 953 | 950 | ||
| 954 | #ifdef OCFS2_ORACORE_WORKAROUNDS | ||
| 955 | /* ugh, work around some applications which open everything O_DIRECT + | ||
| 956 | * O_APPEND and really don't mean to use O_DIRECT. */ | ||
| 957 | if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS && | ||
| 958 | (filp->f_flags & O_APPEND) && (filp->f_flags & O_DIRECT)) | ||
| 959 | filp->f_flags &= ~O_DIRECT; | ||
| 960 | #endif | ||
| 961 | |||
| 962 | mutex_lock(&inode->i_mutex); | 951 | mutex_lock(&inode->i_mutex); |
| 963 | /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ | 952 | /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ |
| 964 | if (filp->f_flags & O_DIRECT) { | 953 | if (filp->f_flags & O_DIRECT) { |
| @@ -1079,27 +1068,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
| 1079 | /* communicate with ocfs2_dio_end_io */ | 1068 | /* communicate with ocfs2_dio_end_io */ |
| 1080 | ocfs2_iocb_set_rw_locked(iocb); | 1069 | ocfs2_iocb_set_rw_locked(iocb); |
| 1081 | 1070 | ||
| 1082 | #ifdef OCFS2_ORACORE_WORKAROUNDS | 1071 | ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); |
| 1083 | if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS && | ||
| 1084 | filp->f_flags & O_DIRECT) { | ||
| 1085 | unsigned int saved_flags = filp->f_flags; | ||
| 1086 | int sector_size = 1 << osb->s_sectsize_bits; | ||
| 1087 | |||
| 1088 | if ((saved_pos & (sector_size - 1)) || | ||
| 1089 | (count & (sector_size - 1)) || | ||
| 1090 | ((unsigned long)buf & (sector_size - 1))) { | ||
| 1091 | filp->f_flags |= O_SYNC; | ||
| 1092 | filp->f_flags &= ~O_DIRECT; | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, | ||
| 1096 | &iocb->ki_pos); | ||
| 1097 | |||
| 1098 | filp->f_flags = saved_flags; | ||
| 1099 | } else | ||
| 1100 | #endif | ||
| 1101 | ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, | ||
| 1102 | &iocb->ki_pos); | ||
| 1103 | 1072 | ||
| 1104 | /* buffered aio wouldn't have proper lock coverage today */ | 1073 | /* buffered aio wouldn't have proper lock coverage today */ |
| 1105 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); | 1074 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); |
| @@ -1140,9 +1109,6 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
| 1140 | int ret = 0, rw_level = -1, have_alloc_sem = 0; | 1109 | int ret = 0, rw_level = -1, have_alloc_sem = 0; |
| 1141 | struct file *filp = iocb->ki_filp; | 1110 | struct file *filp = iocb->ki_filp; |
| 1142 | struct inode *inode = filp->f_dentry->d_inode; | 1111 | struct inode *inode = filp->f_dentry->d_inode; |
| 1143 | #ifdef OCFS2_ORACORE_WORKAROUNDS | ||
| 1144 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1145 | #endif | ||
| 1146 | 1112 | ||
| 1147 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, | 1113 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, |
| 1148 | (unsigned int)count, | 1114 | (unsigned int)count, |
| @@ -1155,21 +1121,6 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
| 1155 | goto bail; | 1121 | goto bail; |
| 1156 | } | 1122 | } |
| 1157 | 1123 | ||
| 1158 | #ifdef OCFS2_ORACORE_WORKAROUNDS | ||
| 1159 | if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) { | ||
| 1160 | if (filp->f_flags & O_DIRECT) { | ||
| 1161 | int sector_size = 1 << osb->s_sectsize_bits; | ||
| 1162 | |||
| 1163 | if ((pos & (sector_size - 1)) || | ||
| 1164 | (count & (sector_size - 1)) || | ||
| 1165 | ((unsigned long)buf & (sector_size - 1)) || | ||
| 1166 | (i_size_read(inode) & (sector_size -1))) { | ||
| 1167 | filp->f_flags &= ~O_DIRECT; | ||
| 1168 | } | ||
| 1169 | } | ||
| 1170 | } | ||
| 1171 | #endif | ||
| 1172 | |||
| 1173 | /* | 1124 | /* |
| 1174 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads | 1125 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads |
| 1175 | * need locks to protect pending reads from racing with truncate. | 1126 | * need locks to protect pending reads from racing with truncate. |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 0bbd22f46c80..cbfd45a97a63 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
| @@ -67,6 +67,7 @@ void ocfs2_init_node_maps(struct ocfs2_super *osb) | |||
| 67 | ocfs2_node_map_init(&osb->mounted_map); | 67 | ocfs2_node_map_init(&osb->mounted_map); |
| 68 | ocfs2_node_map_init(&osb->recovery_map); | 68 | ocfs2_node_map_init(&osb->recovery_map); |
| 69 | ocfs2_node_map_init(&osb->umount_map); | 69 | ocfs2_node_map_init(&osb->umount_map); |
| 70 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | ||
| 70 | } | 71 | } |
| 71 | 72 | ||
| 72 | static void ocfs2_do_node_down(int node_num, | 73 | static void ocfs2_do_node_down(int node_num, |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 8122489c5762..315472a5c192 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include "dlmglue.h" | 41 | #include "dlmglue.h" |
| 42 | #include "extent_map.h" | 42 | #include "extent_map.h" |
| 43 | #include "file.h" | 43 | #include "file.h" |
| 44 | #include "heartbeat.h" | ||
| 44 | #include "inode.h" | 45 | #include "inode.h" |
| 45 | #include "journal.h" | 46 | #include "journal.h" |
| 46 | #include "namei.h" | 47 | #include "namei.h" |
| @@ -544,6 +545,42 @@ bail: | |||
| 544 | return status; | 545 | return status; |
| 545 | } | 546 | } |
| 546 | 547 | ||
| 548 | /* | ||
| 549 | * Serialize with orphan dir recovery. If the process doing | ||
| 550 | * recovery on this orphan dir does an iget() with the dir | ||
| 551 | * i_mutex held, we'll deadlock here. Instead we detect this | ||
| 552 | * and exit early - recovery will wipe this inode for us. | ||
| 553 | */ | ||
| 554 | static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb, | ||
| 555 | int slot) | ||
| 556 | { | ||
| 557 | int ret = 0; | ||
| 558 | |||
| 559 | spin_lock(&osb->osb_lock); | ||
| 560 | if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) { | ||
| 561 | mlog(0, "Recovery is happening on orphan dir %d, will skip " | ||
| 562 | "this inode\n", slot); | ||
| 563 | ret = -EDEADLK; | ||
| 564 | goto out; | ||
| 565 | } | ||
| 566 | /* This signals to the orphan recovery process that it should | ||
| 567 | * wait for us to handle the wipe. */ | ||
| 568 | osb->osb_orphan_wipes[slot]++; | ||
| 569 | out: | ||
| 570 | spin_unlock(&osb->osb_lock); | ||
| 571 | return ret; | ||
| 572 | } | ||
| 573 | |||
| 574 | static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb, | ||
| 575 | int slot) | ||
| 576 | { | ||
| 577 | spin_lock(&osb->osb_lock); | ||
| 578 | osb->osb_orphan_wipes[slot]--; | ||
| 579 | spin_unlock(&osb->osb_lock); | ||
| 580 | |||
| 581 | wake_up(&osb->osb_wipe_event); | ||
| 582 | } | ||
| 583 | |||
| 547 | static int ocfs2_wipe_inode(struct inode *inode, | 584 | static int ocfs2_wipe_inode(struct inode *inode, |
| 548 | struct buffer_head *di_bh) | 585 | struct buffer_head *di_bh) |
| 549 | { | 586 | { |
| @@ -555,6 +592,11 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 555 | /* We've already voted on this so it should be readonly - no | 592 | /* We've already voted on this so it should be readonly - no |
| 556 | * spinlock needed. */ | 593 | * spinlock needed. */ |
| 557 | orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot; | 594 | orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot; |
| 595 | |||
| 596 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); | ||
| 597 | if (status) | ||
| 598 | return status; | ||
| 599 | |||
| 558 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 600 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
| 559 | ORPHAN_DIR_SYSTEM_INODE, | 601 | ORPHAN_DIR_SYSTEM_INODE, |
| 560 | orphaned_slot); | 602 | orphaned_slot); |
| @@ -597,6 +639,7 @@ bail_unlock_dir: | |||
| 597 | brelse(orphan_dir_bh); | 639 | brelse(orphan_dir_bh); |
| 598 | bail: | 640 | bail: |
| 599 | iput(orphan_dir_inode); | 641 | iput(orphan_dir_inode); |
| 642 | ocfs2_signal_wipe_completion(osb, orphaned_slot); | ||
| 600 | 643 | ||
| 601 | return status; | 644 | return status; |
| 602 | } | 645 | } |
| @@ -822,7 +865,8 @@ void ocfs2_delete_inode(struct inode *inode) | |||
| 822 | 865 | ||
| 823 | status = ocfs2_wipe_inode(inode, di_bh); | 866 | status = ocfs2_wipe_inode(inode, di_bh); |
| 824 | if (status < 0) { | 867 | if (status < 0) { |
| 825 | mlog_errno(status); | 868 | if (status != -EDEADLK) |
| 869 | mlog_errno(status); | ||
| 826 | goto bail_unlock_inode; | 870 | goto bail_unlock_inode; |
| 827 | } | 871 | } |
| 828 | 872 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index d329c9df90ae..4be801f4559b 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -1408,21 +1408,17 @@ bail: | |||
| 1408 | return status; | 1408 | return status; |
| 1409 | } | 1409 | } |
| 1410 | 1410 | ||
| 1411 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 1411 | static int ocfs2_queue_orphans(struct ocfs2_super *osb, |
| 1412 | int slot) | 1412 | int slot, |
| 1413 | struct inode **head) | ||
| 1413 | { | 1414 | { |
| 1414 | int status = 0; | 1415 | int status; |
| 1415 | int have_disk_lock = 0; | ||
| 1416 | struct inode *inode = NULL; | ||
| 1417 | struct inode *iter; | ||
| 1418 | struct inode *orphan_dir_inode = NULL; | 1416 | struct inode *orphan_dir_inode = NULL; |
| 1417 | struct inode *iter; | ||
| 1419 | unsigned long offset, blk, local; | 1418 | unsigned long offset, blk, local; |
| 1420 | struct buffer_head *bh = NULL; | 1419 | struct buffer_head *bh = NULL; |
| 1421 | struct ocfs2_dir_entry *de; | 1420 | struct ocfs2_dir_entry *de; |
| 1422 | struct super_block *sb = osb->sb; | 1421 | struct super_block *sb = osb->sb; |
| 1423 | struct ocfs2_inode_info *oi; | ||
| 1424 | |||
| 1425 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); | ||
| 1426 | 1422 | ||
| 1427 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1423 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
| 1428 | ORPHAN_DIR_SYSTEM_INODE, | 1424 | ORPHAN_DIR_SYSTEM_INODE, |
| @@ -1430,17 +1426,15 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1430 | if (!orphan_dir_inode) { | 1426 | if (!orphan_dir_inode) { |
| 1431 | status = -ENOENT; | 1427 | status = -ENOENT; |
| 1432 | mlog_errno(status); | 1428 | mlog_errno(status); |
| 1433 | goto out; | 1429 | return status; |
| 1434 | } | 1430 | } |
| 1435 | 1431 | ||
| 1436 | mutex_lock(&orphan_dir_inode->i_mutex); | 1432 | mutex_lock(&orphan_dir_inode->i_mutex); |
| 1437 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); | 1433 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); |
| 1438 | if (status < 0) { | 1434 | if (status < 0) { |
| 1439 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1440 | mlog_errno(status); | 1435 | mlog_errno(status); |
| 1441 | goto out; | 1436 | goto out; |
| 1442 | } | 1437 | } |
| 1443 | have_disk_lock = 1; | ||
| 1444 | 1438 | ||
| 1445 | offset = 0; | 1439 | offset = 0; |
| 1446 | iter = NULL; | 1440 | iter = NULL; |
| @@ -1451,11 +1445,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1451 | if (!bh) | 1445 | if (!bh) |
| 1452 | status = -EINVAL; | 1446 | status = -EINVAL; |
| 1453 | if (status < 0) { | 1447 | if (status < 0) { |
| 1454 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1455 | if (bh) | 1448 | if (bh) |
| 1456 | brelse(bh); | 1449 | brelse(bh); |
| 1457 | mlog_errno(status); | 1450 | mlog_errno(status); |
| 1458 | goto out; | 1451 | goto out_unlock; |
| 1459 | } | 1452 | } |
| 1460 | 1453 | ||
| 1461 | local = 0; | 1454 | local = 0; |
| @@ -1465,11 +1458,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1465 | 1458 | ||
| 1466 | if (!ocfs2_check_dir_entry(orphan_dir_inode, | 1459 | if (!ocfs2_check_dir_entry(orphan_dir_inode, |
| 1467 | de, bh, local)) { | 1460 | de, bh, local)) { |
| 1468 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1469 | status = -EINVAL; | 1461 | status = -EINVAL; |
| 1470 | mlog_errno(status); | 1462 | mlog_errno(status); |
| 1471 | brelse(bh); | 1463 | brelse(bh); |
| 1472 | goto out; | 1464 | goto out_unlock; |
| 1473 | } | 1465 | } |
| 1474 | 1466 | ||
| 1475 | local += le16_to_cpu(de->rec_len); | 1467 | local += le16_to_cpu(de->rec_len); |
| @@ -1504,18 +1496,95 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1504 | 1496 | ||
| 1505 | mlog(0, "queue orphan %"MLFu64"\n", | 1497 | mlog(0, "queue orphan %"MLFu64"\n", |
| 1506 | OCFS2_I(iter)->ip_blkno); | 1498 | OCFS2_I(iter)->ip_blkno); |
| 1507 | OCFS2_I(iter)->ip_next_orphan = inode; | 1499 | /* No locking is required for the next_orphan |
| 1508 | inode = iter; | 1500 | * queue as there is only ever a single |
| 1501 | * process doing orphan recovery. */ | ||
| 1502 | OCFS2_I(iter)->ip_next_orphan = *head; | ||
| 1503 | *head = iter; | ||
| 1509 | } | 1504 | } |
| 1510 | brelse(bh); | 1505 | brelse(bh); |
| 1511 | } | 1506 | } |
| 1512 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1513 | 1507 | ||
| 1508 | out_unlock: | ||
| 1514 | ocfs2_meta_unlock(orphan_dir_inode, 0); | 1509 | ocfs2_meta_unlock(orphan_dir_inode, 0); |
| 1515 | have_disk_lock = 0; | 1510 | out: |
| 1516 | 1511 | mutex_unlock(&orphan_dir_inode->i_mutex); | |
| 1517 | iput(orphan_dir_inode); | 1512 | iput(orphan_dir_inode); |
| 1518 | orphan_dir_inode = NULL; | 1513 | return status; |
| 1514 | } | ||
| 1515 | |||
| 1516 | static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb, | ||
| 1517 | int slot) | ||
| 1518 | { | ||
| 1519 | int ret; | ||
| 1520 | |||
| 1521 | spin_lock(&osb->osb_lock); | ||
| 1522 | ret = !osb->osb_orphan_wipes[slot]; | ||
| 1523 | spin_unlock(&osb->osb_lock); | ||
| 1524 | return ret; | ||
| 1525 | } | ||
| 1526 | |||
| 1527 | static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb, | ||
| 1528 | int slot) | ||
| 1529 | { | ||
| 1530 | spin_lock(&osb->osb_lock); | ||
| 1531 | /* Mark ourselves such that new processes in delete_inode() | ||
| 1532 | * know to quit early. */ | ||
| 1533 | ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | ||
| 1534 | while (osb->osb_orphan_wipes[slot]) { | ||
| 1535 | /* If any processes are already in the middle of an | ||
| 1536 | * orphan wipe on this dir, then we need to wait for | ||
| 1537 | * them. */ | ||
| 1538 | spin_unlock(&osb->osb_lock); | ||
| 1539 | wait_event_interruptible(osb->osb_wipe_event, | ||
| 1540 | ocfs2_orphan_recovery_can_continue(osb, slot)); | ||
| 1541 | spin_lock(&osb->osb_lock); | ||
| 1542 | } | ||
| 1543 | spin_unlock(&osb->osb_lock); | ||
| 1544 | } | ||
| 1545 | |||
| 1546 | static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, | ||
| 1547 | int slot) | ||
| 1548 | { | ||
| 1549 | ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | /* | ||
| 1553 | * Orphan recovery. Each mounted node has it's own orphan dir which we | ||
| 1554 | * must run during recovery. Our strategy here is to build a list of | ||
| 1555 | * the inodes in the orphan dir and iget/iput them. The VFS does | ||
| 1556 | * (most) of the rest of the work. | ||
| 1557 | * | ||
| 1558 | * Orphan recovery can happen at any time, not just mount so we have a | ||
| 1559 | * couple of extra considerations. | ||
| 1560 | * | ||
| 1561 | * - We grab as many inodes as we can under the orphan dir lock - | ||
| 1562 | * doing iget() outside the orphan dir risks getting a reference on | ||
| 1563 | * an invalid inode. | ||
| 1564 | * - We must be sure not to deadlock with other processes on the | ||
| 1565 | * system wanting to run delete_inode(). This can happen when they go | ||
| 1566 | * to lock the orphan dir and the orphan recovery process attempts to | ||
| 1567 | * iget() inside the orphan dir lock. This can be avoided by | ||
| 1568 | * advertising our state to ocfs2_delete_inode(). | ||
| 1569 | */ | ||
| 1570 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | ||
| 1571 | int slot) | ||
| 1572 | { | ||
| 1573 | int ret = 0; | ||
| 1574 | struct inode *inode = NULL; | ||
| 1575 | struct inode *iter; | ||
| 1576 | struct ocfs2_inode_info *oi; | ||
| 1577 | |||
| 1578 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); | ||
| 1579 | |||
| 1580 | ocfs2_mark_recovering_orphan_dir(osb, slot); | ||
| 1581 | ret = ocfs2_queue_orphans(osb, slot, &inode); | ||
| 1582 | ocfs2_clear_recovering_orphan_dir(osb, slot); | ||
| 1583 | |||
| 1584 | /* Error here should be noted, but we want to continue with as | ||
| 1585 | * many queued inodes as we've got. */ | ||
| 1586 | if (ret) | ||
| 1587 | mlog_errno(ret); | ||
| 1519 | 1588 | ||
| 1520 | while (inode) { | 1589 | while (inode) { |
| 1521 | oi = OCFS2_I(inode); | 1590 | oi = OCFS2_I(inode); |
| @@ -1541,14 +1610,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1541 | inode = iter; | 1610 | inode = iter; |
| 1542 | } | 1611 | } |
| 1543 | 1612 | ||
| 1544 | out: | 1613 | return ret; |
| 1545 | if (have_disk_lock) | ||
| 1546 | ocfs2_meta_unlock(orphan_dir_inode, 0); | ||
| 1547 | |||
| 1548 | if (orphan_dir_inode) | ||
| 1549 | iput(orphan_dir_inode); | ||
| 1550 | |||
| 1551 | return status; | ||
| 1552 | } | 1614 | } |
| 1553 | 1615 | ||
| 1554 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) | 1616 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8d8e4779df92..e89de9b6e491 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -174,9 +174,6 @@ enum ocfs2_mount_options | |||
| 174 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 174 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
| 175 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 175 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
| 176 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 176 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ |
| 177 | #ifdef OCFS2_ORACORE_WORKAROUNDS | ||
| 178 | OCFS2_MOUNT_COMPAT_OCFS = 1 << 30, /* ocfs1 compatibility mode */ | ||
| 179 | #endif | ||
| 180 | }; | 177 | }; |
| 181 | 178 | ||
| 182 | #define OCFS2_OSB_SOFT_RO 0x0001 | 179 | #define OCFS2_OSB_SOFT_RO 0x0001 |
| @@ -290,6 +287,10 @@ struct ocfs2_super | |||
| 290 | struct inode *osb_tl_inode; | 287 | struct inode *osb_tl_inode; |
| 291 | struct buffer_head *osb_tl_bh; | 288 | struct buffer_head *osb_tl_bh; |
| 292 | struct work_struct osb_truncate_log_wq; | 289 | struct work_struct osb_truncate_log_wq; |
| 290 | |||
| 291 | struct ocfs2_node_map osb_recovering_orphan_dirs; | ||
| 292 | unsigned int *osb_orphan_wipes; | ||
| 293 | wait_queue_head_t osb_wipe_event; | ||
| 293 | }; | 294 | }; |
| 294 | 295 | ||
| 295 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 296 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index dfb8a5bedfc8..c5b1ac547c15 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
| @@ -138,7 +138,6 @@ | |||
| 138 | 138 | ||
| 139 | /* Journal limits (in bytes) */ | 139 | /* Journal limits (in bytes) */ |
| 140 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 140 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
| 141 | #define OCFS2_MAX_JOURNAL_SIZE (500 * 1024 * 1024) | ||
| 142 | 141 | ||
| 143 | struct ocfs2_system_inode_info { | 142 | struct ocfs2_system_inode_info { |
| 144 | char *si_name; | 143 | char *si_name; |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 046824b6b625..8dd3aafec499 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1325,6 +1325,16 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1325 | } | 1325 | } |
| 1326 | mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots); | 1326 | mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots); |
| 1327 | 1327 | ||
| 1328 | init_waitqueue_head(&osb->osb_wipe_event); | ||
| 1329 | osb->osb_orphan_wipes = kcalloc(osb->max_slots, | ||
| 1330 | sizeof(*osb->osb_orphan_wipes), | ||
| 1331 | GFP_KERNEL); | ||
| 1332 | if (!osb->osb_orphan_wipes) { | ||
| 1333 | status = -ENOMEM; | ||
| 1334 | mlog_errno(status); | ||
| 1335 | goto bail; | ||
| 1336 | } | ||
| 1337 | |||
| 1328 | osb->s_feature_compat = | 1338 | osb->s_feature_compat = |
| 1329 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); | 1339 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); |
| 1330 | osb->s_feature_ro_compat = | 1340 | osb->s_feature_ro_compat = |
| @@ -1638,6 +1648,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
| 1638 | if (osb->slot_info) | 1648 | if (osb->slot_info) |
| 1639 | ocfs2_free_slot_info(osb->slot_info); | 1649 | ocfs2_free_slot_info(osb->slot_info); |
| 1640 | 1650 | ||
| 1651 | kfree(osb->osb_orphan_wipes); | ||
| 1641 | /* FIXME | 1652 | /* FIXME |
| 1642 | * This belongs in journal shutdown, but because we have to | 1653 | * This belongs in journal shutdown, but because we have to |
| 1643 | * allocate osb->journal at the start of ocfs2_initalize_osb(), | 1654 | * allocate osb->journal at the start of ocfs2_initalize_osb(), |
