aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.osdl.org>2006-12-12 13:21:01 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-12 13:21:01 -0500
commit741441ab7800f1eb031e74fd720f4f8f361678ed (patch)
treecd265afa96c3753116f570e483408ed8a94fe1d7
parent659dba34807692a6ebd55e7859dff2c7cb1b005d (diff)
parent828ae6afbef03bfe107a4a8cc38798419d6a2765 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: [patch 3/3] OCFS2 Configurable timeouts - Protocol changes [patch 2/3] OCFS2 Configurable timeouts [patch 1/3] OCFS2 - Expose struct o2nm_cluster ocfs2: Synchronize feature incompat flags in ocfs2_fs.h ocfs2: update mount option documentation ocfs2: local mounts
-rw-r--r--Documentation/filesystems/ocfs2.txt3
-rw-r--r--fs/ocfs2/cluster/nodemanager.c192
-rw-r--r--fs/ocfs2/cluster/nodemanager.h17
-rw-r--r--fs/ocfs2/cluster/tcp.c152
-rw-r--r--fs/ocfs2/cluster/tcp.h8
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h15
-rw-r--r--fs/ocfs2/dlmglue.c79
-rw-r--r--fs/ocfs2/heartbeat.c9
-rw-r--r--fs/ocfs2/inode.c3
-rw-r--r--fs/ocfs2/journal.c46
-rw-r--r--fs/ocfs2/journal.h5
-rw-r--r--fs/ocfs2/mmap.c6
-rw-r--r--fs/ocfs2/namei.c8
-rw-r--r--fs/ocfs2/ocfs2.h5
-rw-r--r--fs/ocfs2/ocfs2_fs.h14
-rw-r--r--fs/ocfs2/super.c90
-rw-r--r--fs/ocfs2/vote.c3
17 files changed, 549 insertions, 106 deletions
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index af6defd10cb6..8ccf0c1b58ed 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -54,3 +54,6 @@ errors=panic Panic and halt the machine if an error occurs.
54intr (*) Allow signals to interrupt cluster operations. 54intr (*) Allow signals to interrupt cluster operations.
55nointr Do not allow signals to interrupt cluster 55nointr Do not allow signals to interrupt cluster
56 operations. 56 operations.
57atime_quantum=60(*) OCFS2 will not update atime unless this number
58 of seconds has passed since the last update.
59 Set to zero to always update atime.
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index d11753c50bc1..357f1d551771 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,7 +35,7 @@
35/* for now we operate under the assertion that there can be only one 35/* for now we operate under the assertion that there can be only one
36 * cluster active at a time. Changing this will require trickling 36 * cluster active at a time. Changing this will require trickling
37 * cluster references throughout where nodes are looked up */ 37 * cluster references throughout where nodes are looked up */
38static struct o2nm_cluster *o2nm_single_cluster = NULL; 38struct o2nm_cluster *o2nm_single_cluster = NULL;
39 39
40#define OCFS2_MAX_HB_CTL_PATH 256 40#define OCFS2_MAX_HB_CTL_PATH 256
41static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; 41static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
@@ -97,17 +97,6 @@ const char *o2nm_get_hb_ctl_path(void)
97} 97}
98EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); 98EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path);
99 99
100struct o2nm_cluster {
101 struct config_group cl_group;
102 unsigned cl_has_local:1;
103 u8 cl_local_node;
104 rwlock_t cl_nodes_lock;
105 struct o2nm_node *cl_nodes[O2NM_MAX_NODES];
106 struct rb_root cl_node_ip_tree;
107 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
108 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
109};
110
111struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 100struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
112{ 101{
113 struct o2nm_node *node = NULL; 102 struct o2nm_node *node = NULL;
@@ -543,6 +532,179 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group)
543} 532}
544#endif 533#endif
545 534
535struct o2nm_cluster_attribute {
536 struct configfs_attribute attr;
537 ssize_t (*show)(struct o2nm_cluster *, char *);
538 ssize_t (*store)(struct o2nm_cluster *, const char *, size_t);
539};
540
541static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count,
542 unsigned int *val)
543{
544 unsigned long tmp;
545 char *p = (char *)page;
546
547 tmp = simple_strtoul(p, &p, 0);
548 if (!p || (*p && (*p != '\n')))
549 return -EINVAL;
550
551 if (tmp == 0)
552 return -EINVAL;
553 if (tmp >= (u32)-1)
554 return -ERANGE;
555
556 *val = tmp;
557
558 return count;
559}
560
561static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(
562 struct o2nm_cluster *cluster, char *page)
563{
564 return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms);
565}
566
567static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(
568 struct o2nm_cluster *cluster, const char *page, size_t count)
569{
570 ssize_t ret;
571 unsigned int val;
572
573 ret = o2nm_cluster_attr_write(page, count, &val);
574
575 if (ret > 0) {
576 if (cluster->cl_idle_timeout_ms != val
577 && o2net_num_connected_peers()) {
578 mlog(ML_NOTICE,
579 "o2net: cannot change idle timeout after "
580 "the first peer has agreed to it."
581 " %d connected peers\n",
582 o2net_num_connected_peers());
583 ret = -EINVAL;
584 } else if (val <= cluster->cl_keepalive_delay_ms) {
585 mlog(ML_NOTICE, "o2net: idle timeout must be larger "
586 "than keepalive delay\n");
587 ret = -EINVAL;
588 } else {
589 cluster->cl_idle_timeout_ms = val;
590 }
591 }
592
593 return ret;
594}
595
596static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(
597 struct o2nm_cluster *cluster, char *page)
598{
599 return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms);
600}
601
602static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(
603 struct o2nm_cluster *cluster, const char *page, size_t count)
604{
605 ssize_t ret;
606 unsigned int val;
607
608 ret = o2nm_cluster_attr_write(page, count, &val);
609
610 if (ret > 0) {
611 if (cluster->cl_keepalive_delay_ms != val
612 && o2net_num_connected_peers()) {
613 mlog(ML_NOTICE,
614 "o2net: cannot change keepalive delay after"
615 " the first peer has agreed to it."
616 " %d connected peers\n",
617 o2net_num_connected_peers());
618 ret = -EINVAL;
619 } else if (val >= cluster->cl_idle_timeout_ms) {
620 mlog(ML_NOTICE, "o2net: keepalive delay must be "
621 "smaller than idle timeout\n");
622 ret = -EINVAL;
623 } else {
624 cluster->cl_keepalive_delay_ms = val;
625 }
626 }
627
628 return ret;
629}
630
631static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read(
632 struct o2nm_cluster *cluster, char *page)
633{
634 return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms);
635}
636
637static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
638 struct o2nm_cluster *cluster, const char *page, size_t count)
639{
640 return o2nm_cluster_attr_write(page, count,
641 &cluster->cl_reconnect_delay_ms);
642}
643static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
644 .attr = { .ca_owner = THIS_MODULE,
645 .ca_name = "idle_timeout_ms",
646 .ca_mode = S_IRUGO | S_IWUSR },
647 .show = o2nm_cluster_attr_idle_timeout_ms_read,
648 .store = o2nm_cluster_attr_idle_timeout_ms_write,
649};
650
651static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = {
652 .attr = { .ca_owner = THIS_MODULE,
653 .ca_name = "keepalive_delay_ms",
654 .ca_mode = S_IRUGO | S_IWUSR },
655 .show = o2nm_cluster_attr_keepalive_delay_ms_read,
656 .store = o2nm_cluster_attr_keepalive_delay_ms_write,
657};
658
659static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
660 .attr = { .ca_owner = THIS_MODULE,
661 .ca_name = "reconnect_delay_ms",
662 .ca_mode = S_IRUGO | S_IWUSR },
663 .show = o2nm_cluster_attr_reconnect_delay_ms_read,
664 .store = o2nm_cluster_attr_reconnect_delay_ms_write,
665};
666
667static struct configfs_attribute *o2nm_cluster_attrs[] = {
668 &o2nm_cluster_attr_idle_timeout_ms.attr,
669 &o2nm_cluster_attr_keepalive_delay_ms.attr,
670 &o2nm_cluster_attr_reconnect_delay_ms.attr,
671 NULL,
672};
673static ssize_t o2nm_cluster_show(struct config_item *item,
674 struct configfs_attribute *attr,
675 char *page)
676{
677 struct o2nm_cluster *cluster = to_o2nm_cluster(item);
678 struct o2nm_cluster_attribute *o2nm_cluster_attr =
679 container_of(attr, struct o2nm_cluster_attribute, attr);
680 ssize_t ret = 0;
681
682 if (o2nm_cluster_attr->show)
683 ret = o2nm_cluster_attr->show(cluster, page);
684 return ret;
685}
686
687static ssize_t o2nm_cluster_store(struct config_item *item,
688 struct configfs_attribute *attr,
689 const char *page, size_t count)
690{
691 struct o2nm_cluster *cluster = to_o2nm_cluster(item);
692 struct o2nm_cluster_attribute *o2nm_cluster_attr =
693 container_of(attr, struct o2nm_cluster_attribute, attr);
694 ssize_t ret;
695
696 if (o2nm_cluster_attr->store == NULL) {
697 ret = -EINVAL;
698 goto out;
699 }
700
701 ret = o2nm_cluster_attr->store(cluster, page, count);
702 if (ret < count)
703 goto out;
704out:
705 return ret;
706}
707
546static struct config_item *o2nm_node_group_make_item(struct config_group *group, 708static struct config_item *o2nm_node_group_make_item(struct config_group *group,
547 const char *name) 709 const char *name)
548{ 710{
@@ -624,10 +786,13 @@ static void o2nm_cluster_release(struct config_item *item)
624 786
625static struct configfs_item_operations o2nm_cluster_item_ops = { 787static struct configfs_item_operations o2nm_cluster_item_ops = {
626 .release = o2nm_cluster_release, 788 .release = o2nm_cluster_release,
789 .show_attribute = o2nm_cluster_show,
790 .store_attribute = o2nm_cluster_store,
627}; 791};
628 792
629static struct config_item_type o2nm_cluster_type = { 793static struct config_item_type o2nm_cluster_type = {
630 .ct_item_ops = &o2nm_cluster_item_ops, 794 .ct_item_ops = &o2nm_cluster_item_ops,
795 .ct_attrs = o2nm_cluster_attrs,
631 .ct_owner = THIS_MODULE, 796 .ct_owner = THIS_MODULE,
632}; 797};
633 798
@@ -678,6 +843,9 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
678 cluster->cl_group.default_groups[2] = NULL; 843 cluster->cl_group.default_groups[2] = NULL;
679 rwlock_init(&cluster->cl_nodes_lock); 844 rwlock_init(&cluster->cl_nodes_lock);
680 cluster->cl_node_ip_tree = RB_ROOT; 845 cluster->cl_node_ip_tree = RB_ROOT;
846 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
847 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
848 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
681 849
682 ret = &cluster->cl_group; 850 ret = &cluster->cl_group;
683 o2nm_single_cluster = cluster; 851 o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index fce8033c310f..8fb23cacc2f5 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -53,6 +53,23 @@ struct o2nm_node {
53 unsigned long nd_set_attributes; 53 unsigned long nd_set_attributes;
54}; 54};
55 55
56struct o2nm_cluster {
57 struct config_group cl_group;
58 unsigned cl_has_local:1;
59 u8 cl_local_node;
60 rwlock_t cl_nodes_lock;
61 struct o2nm_node *cl_nodes[O2NM_MAX_NODES];
62 struct rb_root cl_node_ip_tree;
63 unsigned int cl_idle_timeout_ms;
64 unsigned int cl_keepalive_delay_ms;
65 unsigned int cl_reconnect_delay_ms;
66
67 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
68 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
69};
70
71extern struct o2nm_cluster *o2nm_single_cluster;
72
56u8 o2nm_this_node(void); 73u8 o2nm_this_node(void);
57 74
58int o2nm_configured_node_map(unsigned long *map, unsigned bytes); 75int o2nm_configured_node_map(unsigned long *map, unsigned bytes);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9b3209dc0b16..457753df1ae7 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes);
147static void o2net_sc_send_keep_req(struct work_struct *work); 147static void o2net_sc_send_keep_req(struct work_struct *work);
148static void o2net_idle_timer(unsigned long data); 148static void o2net_idle_timer(unsigned long data);
149static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 149static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
150static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
151
152/*
153 * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
154 * losing our parent link to the cluster during shutdown. This can be
155 * solved by adding a pre-removal callback to configfs, or passing
156 * around the cluster with the node. -jeffm
157 */
158static inline int o2net_reconnect_delay(struct o2nm_node *node)
159{
160 return o2nm_single_cluster->cl_reconnect_delay_ms;
161}
162
163static inline int o2net_keepalive_delay(struct o2nm_node *node)
164{
165 return o2nm_single_cluster->cl_keepalive_delay_ms;
166}
167
168static inline int o2net_idle_timeout(struct o2nm_node *node)
169{
170 return o2nm_single_cluster->cl_idle_timeout_ms;
171}
150 172
151static inline int o2net_sys_err_to_errno(enum o2net_system_error err) 173static inline int o2net_sys_err_to_errno(enum o2net_system_error err)
152{ 174{
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref)
271{ 293{
272 struct o2net_sock_container *sc = container_of(kref, 294 struct o2net_sock_container *sc = container_of(kref,
273 struct o2net_sock_container, sc_kref); 295 struct o2net_sock_container, sc_kref);
296 BUG_ON(timer_pending(&sc->sc_idle_timeout));
297
274 sclog(sc, "releasing\n"); 298 sclog(sc, "releasing\n");
275 299
276 if (sc->sc_sock) { 300 if (sc->sc_sock) {
@@ -356,6 +380,13 @@ static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc,
356 sc_put(sc); 380 sc_put(sc);
357} 381}
358 382
383static atomic_t o2net_connected_peers = ATOMIC_INIT(0);
384
385int o2net_num_connected_peers(void)
386{
387 return atomic_read(&o2net_connected_peers);
388}
389
359static void o2net_set_nn_state(struct o2net_node *nn, 390static void o2net_set_nn_state(struct o2net_node *nn,
360 struct o2net_sock_container *sc, 391 struct o2net_sock_container *sc,
361 unsigned valid, int err) 392 unsigned valid, int err)
@@ -366,6 +397,11 @@ static void o2net_set_nn_state(struct o2net_node *nn,
366 397
367 assert_spin_locked(&nn->nn_lock); 398 assert_spin_locked(&nn->nn_lock);
368 399
400 if (old_sc && !sc)
401 atomic_dec(&o2net_connected_peers);
402 else if (!old_sc && sc)
403 atomic_inc(&o2net_connected_peers);
404
369 /* the node num comparison and single connect/accept path should stop 405 /* the node num comparison and single connect/accept path should stop
370 * an non-null sc from being overwritten with another */ 406 * an non-null sc from being overwritten with another */
371 BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc); 407 BUG_ON(sc && nn->nn_sc && nn->nn_sc != sc);
@@ -424,9 +460,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
424 /* delay if we're withing a RECONNECT_DELAY of the 460 /* delay if we're withing a RECONNECT_DELAY of the
425 * last attempt */ 461 * last attempt */
426 delay = (nn->nn_last_connect_attempt + 462 delay = (nn->nn_last_connect_attempt +
427 msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 463 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
428 - jiffies; 464 - jiffies;
429 if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS)) 465 if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
430 delay = 0; 466 delay = 0;
431 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); 467 mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
432 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); 468 queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -1099,13 +1135,51 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
1099 return -1; 1135 return -1;
1100 } 1136 }
1101 1137
1138 /*
1139 * Ensure timeouts are consistent with other nodes, otherwise
1140 * we can end up with one node thinking that the other must be down,
1141 * but isn't. This can ultimately cause corruption.
1142 */
1143 if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
1144 o2net_idle_timeout(sc->sc_node)) {
1145 mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of "
1146 "%u ms, but we use %u ms locally. disconnecting\n",
1147 SC_NODEF_ARGS(sc),
1148 be32_to_cpu(hand->o2net_idle_timeout_ms),
1149 o2net_idle_timeout(sc->sc_node));
1150 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1151 return -1;
1152 }
1153
1154 if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
1155 o2net_keepalive_delay(sc->sc_node)) {
1156 mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of "
1157 "%u ms, but we use %u ms locally. disconnecting\n",
1158 SC_NODEF_ARGS(sc),
1159 be32_to_cpu(hand->o2net_keepalive_delay_ms),
1160 o2net_keepalive_delay(sc->sc_node));
1161 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1162 return -1;
1163 }
1164
1165 if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) !=
1166 O2HB_MAX_WRITE_TIMEOUT_MS) {
1167 mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of "
1168 "%u ms, but we use %u ms locally. disconnecting\n",
1169 SC_NODEF_ARGS(sc),
1170 be32_to_cpu(hand->o2hb_heartbeat_timeout_ms),
1171 O2HB_MAX_WRITE_TIMEOUT_MS);
1172 o2net_ensure_shutdown(nn, sc, -ENOTCONN);
1173 return -1;
1174 }
1175
1102 sc->sc_handshake_ok = 1; 1176 sc->sc_handshake_ok = 1;
1103 1177
1104 spin_lock(&nn->nn_lock); 1178 spin_lock(&nn->nn_lock);
1105 /* set valid and queue the idle timers only if it hasn't been 1179 /* set valid and queue the idle timers only if it hasn't been
1106 * shut down already */ 1180 * shut down already */
1107 if (nn->nn_sc == sc) { 1181 if (nn->nn_sc == sc) {
1108 o2net_sc_postpone_idle(sc); 1182 o2net_sc_reset_idle_timer(sc);
1109 o2net_set_nn_state(nn, sc, 1, 0); 1183 o2net_set_nn_state(nn, sc, 1, 0);
1110 } 1184 }
1111 spin_unlock(&nn->nn_lock); 1185 spin_unlock(&nn->nn_lock);
@@ -1131,6 +1205,23 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1131 sclog(sc, "receiving\n"); 1205 sclog(sc, "receiving\n");
1132 do_gettimeofday(&sc->sc_tv_advance_start); 1206 do_gettimeofday(&sc->sc_tv_advance_start);
1133 1207
1208 if (unlikely(sc->sc_handshake_ok == 0)) {
1209 if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
1210 data = page_address(sc->sc_page) + sc->sc_page_off;
1211 datalen = sizeof(struct o2net_handshake) - sc->sc_page_off;
1212 ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen);
1213 if (ret > 0)
1214 sc->sc_page_off += ret;
1215 }
1216
1217 if (sc->sc_page_off == sizeof(struct o2net_handshake)) {
1218 o2net_check_handshake(sc);
1219 if (unlikely(sc->sc_handshake_ok == 0))
1220 ret = -EPROTO;
1221 }
1222 goto out;
1223 }
1224
1134 /* do we need more header? */ 1225 /* do we need more header? */
1135 if (sc->sc_page_off < sizeof(struct o2net_msg)) { 1226 if (sc->sc_page_off < sizeof(struct o2net_msg)) {
1136 data = page_address(sc->sc_page) + sc->sc_page_off; 1227 data = page_address(sc->sc_page) + sc->sc_page_off;
@@ -1138,15 +1229,6 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
1138 ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); 1229 ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen);
1139 if (ret > 0) { 1230 if (ret > 0) {
1140 sc->sc_page_off += ret; 1231 sc->sc_page_off += ret;
1141
1142 /* this working relies on the handshake being
1143 * smaller than the normal message header */
1144 if (sc->sc_page_off >= sizeof(struct o2net_handshake)&&
1145 !sc->sc_handshake_ok && o2net_check_handshake(sc)) {
1146 ret = -EPROTO;
1147 goto out;
1148 }
1149
1150 /* only swab incoming here.. we can 1232 /* only swab incoming here.. we can
1151 * only get here once as we cross from 1233 * only get here once as we cross from
1152 * being under to over */ 1234 * being under to over */
@@ -1248,6 +1330,18 @@ static int o2net_set_nodelay(struct socket *sock)
1248 return ret; 1330 return ret;
1249} 1331}
1250 1332
1333static void o2net_initialize_handshake(void)
1334{
1335 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
1336 O2HB_MAX_WRITE_TIMEOUT_MS);
1337 o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
1338 o2net_idle_timeout(NULL));
1339 o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
1340 o2net_keepalive_delay(NULL));
1341 o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
1342 o2net_reconnect_delay(NULL));
1343}
1344
1251/* ------------------------------------------------------------ */ 1345/* ------------------------------------------------------------ */
1252 1346
1253/* called when a connect completes and after a sock is accepted. the 1347/* called when a connect completes and after a sock is accepted. the
@@ -1262,6 +1356,7 @@ static void o2net_sc_connect_completed(struct work_struct *work)
1262 (unsigned long long)O2NET_PROTOCOL_VERSION, 1356 (unsigned long long)O2NET_PROTOCOL_VERSION,
1263 (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); 1357 (unsigned long long)be64_to_cpu(o2net_hand->connector_id));
1264 1358
1359 o2net_initialize_handshake();
1265 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 1360 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
1266 sc_put(sc); 1361 sc_put(sc);
1267} 1362}
@@ -1287,8 +1382,10 @@ static void o2net_idle_timer(unsigned long data)
1287 1382
1288 do_gettimeofday(&now); 1383 do_gettimeofday(&now);
1289 1384
1290 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " 1385 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
1291 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); 1386 "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
1387 o2net_idle_timeout(sc->sc_node) / 1000,
1388 o2net_idle_timeout(sc->sc_node) % 1000);
1292 mlog(ML_NOTICE, "here are some times that might help debug the " 1389 mlog(ML_NOTICE, "here are some times that might help debug the "
1293 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1390 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1294 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1391 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1306,14 +1403,21 @@ static void o2net_idle_timer(unsigned long data)
1306 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1403 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1307} 1404}
1308 1405
1309static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1406static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1310{ 1407{
1311 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); 1408 o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
1312 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, 1409 o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
1313 O2NET_KEEPALIVE_DELAY_SECS * HZ); 1410 msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
1314 do_gettimeofday(&sc->sc_tv_timer); 1411 do_gettimeofday(&sc->sc_tv_timer);
1315 mod_timer(&sc->sc_idle_timeout, 1412 mod_timer(&sc->sc_idle_timeout,
1316 jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ)); 1413 jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
1414}
1415
1416static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
1417{
1418 /* Only push out an existing timer */
1419 if (timer_pending(&sc->sc_idle_timeout))
1420 o2net_sc_reset_idle_timer(sc);
1317} 1421}
1318 1422
1319/* this work func is kicked whenever a path sets the nn state which doesn't 1423/* this work func is kicked whenever a path sets the nn state which doesn't
@@ -1435,9 +1539,12 @@ static void o2net_connect_expired(struct work_struct *work)
1435 1539
1436 spin_lock(&nn->nn_lock); 1540 spin_lock(&nn->nn_lock);
1437 if (!nn->nn_sc_valid) { 1541 if (!nn->nn_sc_valid) {
1542 struct o2nm_node *node = nn->nn_sc->sc_node;
1438 mlog(ML_ERROR, "no connection established with node %u after " 1543 mlog(ML_ERROR, "no connection established with node %u after "
1439 "%u seconds, giving up and returning errors.\n", 1544 "%u.%u seconds, giving up and returning errors.\n",
1440 o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS); 1545 o2net_num_from_nn(nn),
1546 o2net_idle_timeout(node) / 1000,
1547 o2net_idle_timeout(node) % 1000);
1441 1548
1442 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); 1549 o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
1443 } 1550 }
@@ -1478,6 +1585,8 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num,
1478 1585
1479 if (node_num != o2nm_this_node()) 1586 if (node_num != o2nm_this_node())
1480 o2net_disconnect_node(node); 1587 o2net_disconnect_node(node);
1588
1589 BUG_ON(atomic_read(&o2net_connected_peers) < 0);
1481} 1590}
1482 1591
1483static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, 1592static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
@@ -1489,14 +1598,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
1489 1598
1490 /* ensure an immediate connect attempt */ 1599 /* ensure an immediate connect attempt */
1491 nn->nn_last_connect_attempt = jiffies - 1600 nn->nn_last_connect_attempt = jiffies -
1492 (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1); 1601 (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
1493 1602
1494 if (node_num != o2nm_this_node()) { 1603 if (node_num != o2nm_this_node()) {
1495 /* heartbeat doesn't work unless a local node number is 1604 /* heartbeat doesn't work unless a local node number is
1496 * configured and doing so brings up the o2net_wq, so we can 1605 * configured and doing so brings up the o2net_wq, so we can
1497 * use it.. */ 1606 * use it.. */
1498 queue_delayed_work(o2net_wq, &nn->nn_connect_expired, 1607 queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
1499 O2NET_IDLE_TIMEOUT_SECS * HZ); 1608 msecs_to_jiffies(o2net_idle_timeout(node)));
1500 1609
1501 /* believe it or not, accept and node hearbeating testing 1610 /* believe it or not, accept and node hearbeating testing
1502 * can succeed for this node before we got here.. so 1611 * can succeed for this node before we got here.. so
@@ -1641,6 +1750,7 @@ static int o2net_accept_one(struct socket *sock)
1641 o2net_register_callbacks(sc->sc_sock->sk, sc); 1750 o2net_register_callbacks(sc->sc_sock->sk, sc);
1642 o2net_sc_queue_work(sc, &sc->sc_rx_work); 1751 o2net_sc_queue_work(sc, &sc->sc_rx_work);
1643 1752
1753 o2net_initialize_handshake();
1644 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); 1754 o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
1645 1755
1646out: 1756out:
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 616ff2b8434a..21a4e43df836 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -54,6 +54,13 @@ typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data)
54 54
55#define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) 55#define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg))
56 56
57/* same as hb delay, we're waiting for another node to recognize our hb */
58#define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000
59
60#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000
61#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000
62
63
57/* TODO: figure this out.... */ 64/* TODO: figure this out.... */
58static inline int o2net_link_down(int err, struct socket *sock) 65static inline int o2net_link_down(int err, struct socket *sock)
59{ 66{
@@ -101,6 +108,7 @@ void o2net_unregister_hb_callbacks(void);
101int o2net_start_listening(struct o2nm_node *node); 108int o2net_start_listening(struct o2nm_node *node);
102void o2net_stop_listening(struct o2nm_node *node); 109void o2net_stop_listening(struct o2nm_node *node);
103void o2net_disconnect_node(struct o2nm_node *node); 110void o2net_disconnect_node(struct o2nm_node *node);
111int o2net_num_connected_peers(void);
104 112
105int o2net_init(void); 113int o2net_init(void);
106void o2net_exit(void); 114void o2net_exit(void);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index daebbd3a2c8c..b700dc9624d1 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -27,23 +27,20 @@
27#define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57) 27#define O2NET_MSG_KEEP_REQ_MAGIC ((u16)0xfa57)
28#define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58) 28#define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58)
29 29
30/* same as hb delay, we're waiting for another node to recognize our hb */
31#define O2NET_RECONNECT_DELAY_MS O2HB_REGION_TIMEOUT_MS
32
33/* we're delaying our quorum decision so that heartbeat will have timed 30/* we're delaying our quorum decision so that heartbeat will have timed
34 * out truly dead nodes by the time we come around to making decisions 31 * out truly dead nodes by the time we come around to making decisions
35 * on their number */ 32 * on their number */
36#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) 33#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
37 34
38#define O2NET_KEEPALIVE_DELAY_SECS 5
39#define O2NET_IDLE_TIMEOUT_SECS 10
40
41/* 35/*
42 * This version number represents quite a lot, unfortunately. It not 36 * This version number represents quite a lot, unfortunately. It not
43 * only represents the raw network message protocol on the wire but also 37 * only represents the raw network message protocol on the wire but also
44 * locking semantics of the file system using the protocol. It should 38 * locking semantics of the file system using the protocol. It should
45 * be somewhere else, I'm sure, but right now it isn't. 39 * be somewhere else, I'm sure, but right now it isn't.
46 * 40 *
41 * New in version 5:
42 * - Network timeout checking protocol
43 *
47 * New in version 4: 44 * New in version 4:
48 * - Remove i_generation from lock names for better stat performance. 45 * - Remove i_generation from lock names for better stat performance.
49 * 46 *
@@ -54,10 +51,14 @@
54 * - full 64 bit i_size in the metadata lock lvbs 51 * - full 64 bit i_size in the metadata lock lvbs
55 * - introduction of "rw" lock and pushing meta/data locking down 52 * - introduction of "rw" lock and pushing meta/data locking down
56 */ 53 */
57#define O2NET_PROTOCOL_VERSION 4ULL 54#define O2NET_PROTOCOL_VERSION 5ULL
58struct o2net_handshake { 55struct o2net_handshake {
59 __be64 protocol_version; 56 __be64 protocol_version;
60 __be64 connector_id; 57 __be64 connector_id;
58 __be32 o2hb_heartbeat_timeout_ms;
59 __be32 o2net_idle_timeout_ms;
60 __be32 o2net_keepalive_delay_ms;
61 __be32 o2net_reconnect_delay_ms;
61}; 62};
62 63
63struct o2net_node { 64struct o2net_node {
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 69fba16efbd1..e6220137bf69 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -770,7 +770,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
770 int dlm_flags) 770 int dlm_flags)
771{ 771{
772 int ret = 0; 772 int ret = 0;
773 enum dlm_status status; 773 enum dlm_status status = DLM_NORMAL;
774 unsigned long flags; 774 unsigned long flags;
775 775
776 mlog_entry_void(); 776 mlog_entry_void();
@@ -1138,6 +1138,7 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1138{ 1138{
1139 int status, level; 1139 int status, level;
1140 struct ocfs2_lock_res *lockres; 1140 struct ocfs2_lock_res *lockres;
1141 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1141 1142
1142 BUG_ON(!inode); 1143 BUG_ON(!inode);
1143 1144
@@ -1147,6 +1148,9 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1147 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1148 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1148 write ? "EXMODE" : "PRMODE"); 1149 write ? "EXMODE" : "PRMODE");
1149 1150
1151 if (ocfs2_mount_local(osb))
1152 return 0;
1153
1150 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1154 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1151 1155
1152 level = write ? LKM_EXMODE : LKM_PRMODE; 1156 level = write ? LKM_EXMODE : LKM_PRMODE;
@@ -1164,6 +1168,7 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
1164{ 1168{
1165 int level = write ? LKM_EXMODE : LKM_PRMODE; 1169 int level = write ? LKM_EXMODE : LKM_PRMODE;
1166 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1170 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1171 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1167 1172
1168 mlog_entry_void(); 1173 mlog_entry_void();
1169 1174
@@ -1171,7 +1176,8 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
1171 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1176 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1172 write ? "EXMODE" : "PRMODE"); 1177 write ? "EXMODE" : "PRMODE");
1173 1178
1174 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1179 if (!ocfs2_mount_local(osb))
1180 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1175 1181
1176 mlog_exit_void(); 1182 mlog_exit_void();
1177} 1183}
@@ -1182,6 +1188,7 @@ int ocfs2_data_lock_full(struct inode *inode,
1182{ 1188{
1183 int status = 0, level; 1189 int status = 0, level;
1184 struct ocfs2_lock_res *lockres; 1190 struct ocfs2_lock_res *lockres;
1191 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1185 1192
1186 BUG_ON(!inode); 1193 BUG_ON(!inode);
1187 1194
@@ -1201,6 +1208,9 @@ int ocfs2_data_lock_full(struct inode *inode,
1201 goto out; 1208 goto out;
1202 } 1209 }
1203 1210
1211 if (ocfs2_mount_local(osb))
1212 goto out;
1213
1204 lockres = &OCFS2_I(inode)->ip_data_lockres; 1214 lockres = &OCFS2_I(inode)->ip_data_lockres;
1205 1215
1206 level = write ? LKM_EXMODE : LKM_PRMODE; 1216 level = write ? LKM_EXMODE : LKM_PRMODE;
@@ -1269,6 +1279,7 @@ void ocfs2_data_unlock(struct inode *inode,
1269{ 1279{
1270 int level = write ? LKM_EXMODE : LKM_PRMODE; 1280 int level = write ? LKM_EXMODE : LKM_PRMODE;
1271 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; 1281 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres;
1282 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1272 1283
1273 mlog_entry_void(); 1284 mlog_entry_void();
1274 1285
@@ -1276,7 +1287,8 @@ void ocfs2_data_unlock(struct inode *inode,
1276 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1287 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1277 write ? "EXMODE" : "PRMODE"); 1288 write ? "EXMODE" : "PRMODE");
1278 1289
1279 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) 1290 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
1291 !ocfs2_mount_local(osb))
1280 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1292 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1281 1293
1282 mlog_exit_void(); 1294 mlog_exit_void();
@@ -1467,8 +1479,9 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1467{ 1479{
1468 int status = 0; 1480 int status = 0;
1469 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1481 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1470 struct ocfs2_lock_res *lockres; 1482 struct ocfs2_lock_res *lockres = NULL;
1471 struct ocfs2_dinode *fe; 1483 struct ocfs2_dinode *fe;
1484 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1472 1485
1473 mlog_entry_void(); 1486 mlog_entry_void();
1474 1487
@@ -1483,10 +1496,12 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1483 } 1496 }
1484 spin_unlock(&oi->ip_lock); 1497 spin_unlock(&oi->ip_lock);
1485 1498
1486 lockres = &oi->ip_meta_lockres; 1499 if (!ocfs2_mount_local(osb)) {
1500 lockres = &oi->ip_meta_lockres;
1487 1501
1488 if (!ocfs2_should_refresh_lock_res(lockres)) 1502 if (!ocfs2_should_refresh_lock_res(lockres))
1489 goto bail; 1503 goto bail;
1504 }
1490 1505
1491 /* This will discard any caching information we might have had 1506 /* This will discard any caching information we might have had
1492 * for the inode metadata. */ 1507 * for the inode metadata. */
@@ -1496,7 +1511,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1496 * map (directories, bitmap files, etc) */ 1511 * map (directories, bitmap files, etc) */
1497 ocfs2_extent_map_trunc(inode, 0); 1512 ocfs2_extent_map_trunc(inode, 0);
1498 1513
1499 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 1514 if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) {
1500 mlog(0, "Trusting LVB on inode %llu\n", 1515 mlog(0, "Trusting LVB on inode %llu\n",
1501 (unsigned long long)oi->ip_blkno); 1516 (unsigned long long)oi->ip_blkno);
1502 ocfs2_refresh_inode_from_lvb(inode); 1517 ocfs2_refresh_inode_from_lvb(inode);
@@ -1543,7 +1558,8 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1543 1558
1544 status = 0; 1559 status = 0;
1545bail_refresh: 1560bail_refresh:
1546 ocfs2_complete_lock_res_refresh(lockres, status); 1561 if (lockres)
1562 ocfs2_complete_lock_res_refresh(lockres, status);
1547bail: 1563bail:
1548 mlog_exit(status); 1564 mlog_exit(status);
1549 return status; 1565 return status;
@@ -1585,7 +1601,7 @@ int ocfs2_meta_lock_full(struct inode *inode,
1585 int arg_flags) 1601 int arg_flags)
1586{ 1602{
1587 int status, level, dlm_flags, acquired; 1603 int status, level, dlm_flags, acquired;
1588 struct ocfs2_lock_res *lockres; 1604 struct ocfs2_lock_res *lockres = NULL;
1589 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1605 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1590 struct buffer_head *local_bh = NULL; 1606 struct buffer_head *local_bh = NULL;
1591 1607
@@ -1607,6 +1623,9 @@ int ocfs2_meta_lock_full(struct inode *inode,
1607 goto bail; 1623 goto bail;
1608 } 1624 }
1609 1625
1626 if (ocfs2_mount_local(osb))
1627 goto local;
1628
1610 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 1629 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
1611 wait_event(osb->recovery_event, 1630 wait_event(osb->recovery_event,
1612 ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1631 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
@@ -1636,6 +1655,7 @@ int ocfs2_meta_lock_full(struct inode *inode,
1636 wait_event(osb->recovery_event, 1655 wait_event(osb->recovery_event,
1637 ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1656 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1638 1657
1658local:
1639 /* 1659 /*
1640 * We only see this flag if we're being called from 1660 * We only see this flag if we're being called from
1641 * ocfs2_read_locked_inode(). It means we're locking an inode 1661 * ocfs2_read_locked_inode(). It means we're locking an inode
@@ -1644,7 +1664,8 @@ int ocfs2_meta_lock_full(struct inode *inode,
1644 */ 1664 */
1645 if (inode->i_state & I_NEW) { 1665 if (inode->i_state & I_NEW) {
1646 status = 0; 1666 status = 0;
1647 ocfs2_complete_lock_res_refresh(lockres, 0); 1667 if (lockres)
1668 ocfs2_complete_lock_res_refresh(lockres, 0);
1648 goto bail; 1669 goto bail;
1649 } 1670 }
1650 1671
@@ -1767,6 +1788,7 @@ void ocfs2_meta_unlock(struct inode *inode,
1767{ 1788{
1768 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1789 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1769 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; 1790 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
1791 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1770 1792
1771 mlog_entry_void(); 1793 mlog_entry_void();
1772 1794
@@ -1774,7 +1796,8 @@ void ocfs2_meta_unlock(struct inode *inode,
1774 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1796 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1775 ex ? "EXMODE" : "PRMODE"); 1797 ex ? "EXMODE" : "PRMODE");
1776 1798
1777 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) 1799 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
1800 !ocfs2_mount_local(osb))
1778 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1801 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1779 1802
1780 mlog_exit_void(); 1803 mlog_exit_void();
@@ -1783,7 +1806,7 @@ void ocfs2_meta_unlock(struct inode *inode,
1783int ocfs2_super_lock(struct ocfs2_super *osb, 1806int ocfs2_super_lock(struct ocfs2_super *osb,
1784 int ex) 1807 int ex)
1785{ 1808{
1786 int status; 1809 int status = 0;
1787 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1810 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1788 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 1811 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
1789 struct buffer_head *bh; 1812 struct buffer_head *bh;
@@ -1794,6 +1817,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
1794 if (ocfs2_is_hard_readonly(osb)) 1817 if (ocfs2_is_hard_readonly(osb))
1795 return -EROFS; 1818 return -EROFS;
1796 1819
1820 if (ocfs2_mount_local(osb))
1821 goto bail;
1822
1797 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 1823 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
1798 if (status < 0) { 1824 if (status < 0) {
1799 mlog_errno(status); 1825 mlog_errno(status);
@@ -1832,7 +1858,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
1832 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1858 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1833 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 1859 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
1834 1860
1835 ocfs2_cluster_unlock(osb, lockres, level); 1861 if (!ocfs2_mount_local(osb))
1862 ocfs2_cluster_unlock(osb, lockres, level);
1836} 1863}
1837 1864
1838int ocfs2_rename_lock(struct ocfs2_super *osb) 1865int ocfs2_rename_lock(struct ocfs2_super *osb)
@@ -1843,6 +1870,9 @@ int ocfs2_rename_lock(struct ocfs2_super *osb)
1843 if (ocfs2_is_hard_readonly(osb)) 1870 if (ocfs2_is_hard_readonly(osb))
1844 return -EROFS; 1871 return -EROFS;
1845 1872
1873 if (ocfs2_mount_local(osb))
1874 return 0;
1875
1846 status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); 1876 status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0);
1847 if (status < 0) 1877 if (status < 0)
1848 mlog_errno(status); 1878 mlog_errno(status);
@@ -1854,7 +1884,8 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
1854{ 1884{
1855 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 1885 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
1856 1886
1857 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 1887 if (!ocfs2_mount_local(osb))
1888 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
1858} 1889}
1859 1890
1860int ocfs2_dentry_lock(struct dentry *dentry, int ex) 1891int ocfs2_dentry_lock(struct dentry *dentry, int ex)
@@ -1869,6 +1900,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex)
1869 if (ocfs2_is_hard_readonly(osb)) 1900 if (ocfs2_is_hard_readonly(osb))
1870 return -EROFS; 1901 return -EROFS;
1871 1902
1903 if (ocfs2_mount_local(osb))
1904 return 0;
1905
1872 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 1906 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
1873 if (ret < 0) 1907 if (ret < 0)
1874 mlog_errno(ret); 1908 mlog_errno(ret);
@@ -1882,7 +1916,8 @@ void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
1882 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 1916 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1883 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 1917 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1884 1918
1885 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 1919 if (!ocfs2_mount_local(osb))
1920 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
1886} 1921}
1887 1922
1888/* Reference counting of the dlm debug structure. We want this because 1923/* Reference counting of the dlm debug structure. We want this because
@@ -2145,12 +2180,15 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2145 2180
2146int ocfs2_dlm_init(struct ocfs2_super *osb) 2181int ocfs2_dlm_init(struct ocfs2_super *osb)
2147{ 2182{
2148 int status; 2183 int status = 0;
2149 u32 dlm_key; 2184 u32 dlm_key;
2150 struct dlm_ctxt *dlm; 2185 struct dlm_ctxt *dlm = NULL;
2151 2186
2152 mlog_entry_void(); 2187 mlog_entry_void();
2153 2188
2189 if (ocfs2_mount_local(osb))
2190 goto local;
2191
2154 status = ocfs2_dlm_init_debug(osb); 2192 status = ocfs2_dlm_init_debug(osb);
2155 if (status < 0) { 2193 if (status < 0) {
2156 mlog_errno(status); 2194 mlog_errno(status);
@@ -2178,11 +2216,12 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
2178 goto bail; 2216 goto bail;
2179 } 2217 }
2180 2218
2219 dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb);
2220
2221local:
2181 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2222 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
2182 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2223 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
2183 2224
2184 dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb);
2185
2186 osb->dlm = dlm; 2225 osb->dlm = dlm;
2187 2226
2188 status = 0; 2227 status = 0;
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index cbfd45a97a63..8fc52d6d0ce7 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -154,6 +154,9 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
154{ 154{
155 int status; 155 int status;
156 156
157 if (ocfs2_mount_local(osb))
158 return 0;
159
157 status = o2hb_register_callback(&osb->osb_hb_down); 160 status = o2hb_register_callback(&osb->osb_hb_down);
158 if (status < 0) { 161 if (status < 0) {
159 mlog_errno(status); 162 mlog_errno(status);
@@ -172,6 +175,9 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
172{ 175{
173 int status; 176 int status;
174 177
178 if (ocfs2_mount_local(osb))
179 return;
180
175 status = o2hb_unregister_callback(&osb->osb_hb_down); 181 status = o2hb_unregister_callback(&osb->osb_hb_down);
176 if (status < 0) 182 if (status < 0)
177 mlog_errno(status); 183 mlog_errno(status);
@@ -186,6 +192,9 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
186 int ret; 192 int ret;
187 char *argv[5], *envp[3]; 193 char *argv[5], *envp[3];
188 194
195 if (ocfs2_mount_local(osb))
196 return;
197
189 if (!osb->uuid_str) { 198 if (!osb->uuid_str) {
190 /* This can happen if we don't get far enough in mount... */ 199 /* This can happen if we don't get far enough in mount... */
191 mlog(0, "No UUID with which to stop heartbeat!\n\n"); 200 mlog(0, "No UUID with which to stop heartbeat!\n\n");
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 42e361f3054f..e4d91493d7d7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -423,7 +423,8 @@ static int ocfs2_read_locked_inode(struct inode *inode,
423 * cluster lock before trusting anything anyway. 423 * cluster lock before trusting anything anyway.
424 */ 424 */
425 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) 425 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
426 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK); 426 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK)
427 && !ocfs2_mount_local(osb);
427 428
428 /* 429 /*
429 * To maintain backwards compatibility with older versions of 430 * To maintain backwards compatibility with older versions of
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1d7f4ab1e5ed..825cb0ae1b4c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -144,8 +144,10 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
144 ocfs2_abort(osb->sb, "Detected aborted journal"); 144 ocfs2_abort(osb->sb, "Detected aborted journal");
145 handle = ERR_PTR(-EROFS); 145 handle = ERR_PTR(-EROFS);
146 } 146 }
147 } else 147 } else {
148 atomic_inc(&(osb->journal->j_num_trans)); 148 if (!ocfs2_mount_local(osb))
149 atomic_inc(&(osb->journal->j_num_trans));
150 }
149 151
150 return handle; 152 return handle;
151} 153}
@@ -507,9 +509,23 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
507 509
508 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); 510 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
509 511
510 status = ocfs2_journal_toggle_dirty(osb, 0); 512 if (ocfs2_mount_local(osb)) {
511 if (status < 0) 513 journal_lock_updates(journal->j_journal);
512 mlog_errno(status); 514 status = journal_flush(journal->j_journal);
515 journal_unlock_updates(journal->j_journal);
516 if (status < 0)
517 mlog_errno(status);
518 }
519
520 if (status == 0) {
521 /*
522 * Do not toggle if flush was unsuccessful otherwise
523 * will leave dirty metadata in a "clean" journal
524 */
525 status = ocfs2_journal_toggle_dirty(osb, 0);
526 if (status < 0)
527 mlog_errno(status);
528 }
513 529
514 /* Shutdown the kernel journal system */ 530 /* Shutdown the kernel journal system */
515 journal_destroy(journal->j_journal); 531 journal_destroy(journal->j_journal);
@@ -549,7 +565,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
549 } 565 }
550} 566}
551 567
552int ocfs2_journal_load(struct ocfs2_journal *journal) 568int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
553{ 569{
554 int status = 0; 570 int status = 0;
555 struct ocfs2_super *osb; 571 struct ocfs2_super *osb;
@@ -576,14 +592,18 @@ int ocfs2_journal_load(struct ocfs2_journal *journal)
576 } 592 }
577 593
578 /* Launch the commit thread */ 594 /* Launch the commit thread */
579 osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt"); 595 if (!local) {
580 if (IS_ERR(osb->commit_task)) { 596 osb->commit_task = kthread_run(ocfs2_commit_thread, osb,
581 status = PTR_ERR(osb->commit_task); 597 "ocfs2cmt");
598 if (IS_ERR(osb->commit_task)) {
599 status = PTR_ERR(osb->commit_task);
600 osb->commit_task = NULL;
601 mlog(ML_ERROR, "unable to launch ocfs2commit thread, "
602 "error=%d", status);
603 goto done;
604 }
605 } else
582 osb->commit_task = NULL; 606 osb->commit_task = NULL;
583 mlog(ML_ERROR, "unable to launch ocfs2commit thread, error=%d",
584 status);
585 goto done;
586 }
587 607
588done: 608done:
589 mlog_exit(status); 609 mlog_exit(status);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 899112ad8136..e1216364d191 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -157,7 +157,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
157void ocfs2_journal_shutdown(struct ocfs2_super *osb); 157void ocfs2_journal_shutdown(struct ocfs2_super *osb);
158int ocfs2_journal_wipe(struct ocfs2_journal *journal, 158int ocfs2_journal_wipe(struct ocfs2_journal *journal,
159 int full); 159 int full);
160int ocfs2_journal_load(struct ocfs2_journal *journal); 160int ocfs2_journal_load(struct ocfs2_journal *journal, int local);
161int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); 161int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
162void ocfs2_recovery_thread(struct ocfs2_super *osb, 162void ocfs2_recovery_thread(struct ocfs2_super *osb,
163 int node_num); 163 int node_num);
@@ -174,6 +174,9 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
174{ 174{
175 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 175 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
176 176
177 if (ocfs2_mount_local(osb))
178 return;
179
177 if (!ocfs2_inode_fully_checkpointed(inode)) { 180 if (!ocfs2_inode_fully_checkpointed(inode)) {
178 /* WARNING: This only kicks off a single 181 /* WARNING: This only kicks off a single
179 * checkpoint. If someone races you and adds more 182 * checkpoint. If someone races you and adds more
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 69f85ae392dc..51b020447683 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -83,10 +83,12 @@ static struct vm_operations_struct ocfs2_file_vm_ops = {
83int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) 83int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
84{ 84{
85 int ret = 0, lock_level = 0; 85 int ret = 0, lock_level = 0;
86 struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
86 87
87 /* We don't want to support shared writable mappings yet. */ 88 /* We don't want to support shared writable mappings yet. */
88 if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) 89 if (!ocfs2_mount_local(osb) &&
89 && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { 90 ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
91 ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
90 mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); 92 mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
91 /* This is -EINVAL because generic_file_readonly_mmap 93 /* This is -EINVAL because generic_file_readonly_mmap
92 * returns it in a similar situation. */ 94 * returns it in a similar situation. */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 21db45ddf144..9637039c2633 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -587,9 +587,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
587 } 587 }
588 588
589 ocfs2_inode_set_new(osb, inode); 589 ocfs2_inode_set_new(osb, inode);
590 status = ocfs2_create_new_inode_locks(inode); 590 if (!ocfs2_mount_local(osb)) {
591 if (status < 0) 591 status = ocfs2_create_new_inode_locks(inode);
592 mlog_errno(status); 592 if (status < 0)
593 mlog_errno(status);
594 }
593 595
594 status = 0; /* error in ocfs2_create_new_inode_locks is not 596 status = 0; /* error in ocfs2_create_new_inode_locks is not
595 * critical */ 597 * critical */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index b767fd7da6eb..db8e77cd35d3 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -349,6 +349,11 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
349 return ret; 349 return ret;
350} 350}
351 351
352static inline int ocfs2_mount_local(struct ocfs2_super *osb)
353{
354 return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
355}
356
352#define OCFS2_IS_VALID_DINODE(ptr) \ 357#define OCFS2_IS_VALID_DINODE(ptr) \
353 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) 358 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
354 359
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3330a5dc6be2..b5c68567077e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -86,7 +86,7 @@
86 OCFS2_SB(sb)->s_feature_incompat &= ~(mask) 86 OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
87 87
88#define OCFS2_FEATURE_COMPAT_SUPP 0 88#define OCFS2_FEATURE_COMPAT_SUPP 0
89#define OCFS2_FEATURE_INCOMPAT_SUPP 0 89#define OCFS2_FEATURE_INCOMPAT_SUPP OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT
90#define OCFS2_FEATURE_RO_COMPAT_SUPP 0 90#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
91 91
92/* 92/*
@@ -96,6 +96,18 @@
96 */ 96 */
97#define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV 0x0002 97#define OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV 0x0002
98 98
99/*
100 * tunefs sets this incompat flag before starting the resize and clears it
101 * at the end. This flag protects users from inadvertently mounting the fs
102 * after an aborted run without fsck-ing.
103 */
104#define OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG 0x0004
105
106/* Used to denote a non-clustered volume */
107#define OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT 0x0008
108
109/* Support for sparse allocation in b-trees */
110#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010
99 111
100/* 112/*
101 * Flags on ocfs2_dinode.i_flags 113 * Flags on ocfs2_dinode.i_flags
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4bf39540e652..a6d2f8cc165b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -508,6 +508,27 @@ bail:
508 return status; 508 return status;
509} 509}
510 510
511static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
512{
513 if (ocfs2_mount_local(osb)) {
514 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
515 mlog(ML_ERROR, "Cannot heartbeat on a locally "
516 "mounted device.\n");
517 return -EINVAL;
518 }
519 }
520
521 if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
522 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) {
523 mlog(ML_ERROR, "Heartbeat has to be started to mount "
524 "a read-write clustered device.\n");
525 return -EINVAL;
526 }
527 }
528
529 return 0;
530}
531
511static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 532static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
512{ 533{
513 struct dentry *root; 534 struct dentry *root;
@@ -516,16 +537,24 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
516 struct inode *inode = NULL; 537 struct inode *inode = NULL;
517 struct ocfs2_super *osb = NULL; 538 struct ocfs2_super *osb = NULL;
518 struct buffer_head *bh = NULL; 539 struct buffer_head *bh = NULL;
540 char nodestr[8];
519 541
520 mlog_entry("%p, %p, %i", sb, data, silent); 542 mlog_entry("%p, %p, %i", sb, data, silent);
521 543
522 /* for now we only have one cluster/node, make sure we see it 544 if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
523 * in the heartbeat universe */
524 if (!o2hb_check_local_node_heartbeating()) {
525 status = -EINVAL; 545 status = -EINVAL;
526 goto read_super_error; 546 goto read_super_error;
527 } 547 }
528 548
549 /* for now we only have one cluster/node, make sure we see it
550 * in the heartbeat universe */
551 if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) {
552 if (!o2hb_check_local_node_heartbeating()) {
553 status = -EINVAL;
554 goto read_super_error;
555 }
556 }
557
529 /* probe for superblock */ 558 /* probe for superblock */
530 status = ocfs2_sb_probe(sb, &bh, &sector_size); 559 status = ocfs2_sb_probe(sb, &bh, &sector_size);
531 if (status < 0) { 560 if (status < 0) {
@@ -541,11 +570,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
541 } 570 }
542 brelse(bh); 571 brelse(bh);
543 bh = NULL; 572 bh = NULL;
544
545 if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
546 status = -EINVAL;
547 goto read_super_error;
548 }
549 osb->s_mount_opt = parsed_opt; 573 osb->s_mount_opt = parsed_opt;
550 574
551 sb->s_magic = OCFS2_SUPER_MAGIC; 575 sb->s_magic = OCFS2_SUPER_MAGIC;
@@ -588,21 +612,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
588 } 612 }
589 613
590 if (!ocfs2_is_hard_readonly(osb)) { 614 if (!ocfs2_is_hard_readonly(osb)) {
591 /* If this isn't a hard readonly mount, then we need
592 * to make sure that heartbeat is in a valid state,
593 * and that we mark ourselves soft readonly is -oro
594 * was specified. */
595 if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
596 mlog(ML_ERROR, "No heartbeat for device (%s)\n",
597 sb->s_id);
598 status = -EINVAL;
599 goto read_super_error;
600 }
601
602 if (sb->s_flags & MS_RDONLY) 615 if (sb->s_flags & MS_RDONLY)
603 ocfs2_set_ro_flag(osb, 0); 616 ocfs2_set_ro_flag(osb, 0);
604 } 617 }
605 618
619 status = ocfs2_verify_heartbeat(osb);
620 if (status < 0) {
621 mlog_errno(status);
622 goto read_super_error;
623 }
624
606 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, 625 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
607 ocfs2_debugfs_root); 626 ocfs2_debugfs_root);
608 if (!osb->osb_debug_root) { 627 if (!osb->osb_debug_root) {
@@ -635,9 +654,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
635 654
636 ocfs2_complete_mount_recovery(osb); 655 ocfs2_complete_mount_recovery(osb);
637 656
638 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) " 657 if (ocfs2_mount_local(osb))
658 snprintf(nodestr, sizeof(nodestr), "local");
659 else
660 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
661
662 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) "
639 "with %s data mode.\n", 663 "with %s data mode.\n",
640 osb->dev_str, osb->node_num, osb->slot_num, 664 osb->dev_str, nodestr, osb->slot_num,
641 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : 665 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
642 "ordered"); 666 "ordered");
643 667
@@ -999,7 +1023,11 @@ static int ocfs2_fill_local_node_info(struct ocfs2_super *osb)
999 1023
1000 /* XXX hold a ref on the node while mounte? easy enough, if 1024 /* XXX hold a ref on the node while mounte? easy enough, if
1001 * desirable. */ 1025 * desirable. */
1002 osb->node_num = o2nm_this_node(); 1026 if (ocfs2_mount_local(osb))
1027 osb->node_num = 0;
1028 else
1029 osb->node_num = o2nm_this_node();
1030
1003 if (osb->node_num == O2NM_MAX_NODES) { 1031 if (osb->node_num == O2NM_MAX_NODES) {
1004 mlog(ML_ERROR, "could not find this host's node number\n"); 1032 mlog(ML_ERROR, "could not find this host's node number\n");
1005 status = -ENOENT; 1033 status = -ENOENT;
@@ -1084,6 +1112,9 @@ static int ocfs2_mount_volume(struct super_block *sb)
1084 goto leave; 1112 goto leave;
1085 } 1113 }
1086 1114
1115 if (ocfs2_mount_local(osb))
1116 goto leave;
1117
1087 /* This should be sent *after* we recovered our journal as it 1118 /* This should be sent *after* we recovered our journal as it
1088 * will cause other nodes to unmark us as needing 1119 * will cause other nodes to unmark us as needing
1089 * recovery. However, we need to send it *before* dropping the 1120 * recovery. However, we need to send it *before* dropping the
@@ -1114,6 +1145,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1114{ 1145{
1115 int tmp; 1146 int tmp;
1116 struct ocfs2_super *osb = NULL; 1147 struct ocfs2_super *osb = NULL;
1148 char nodestr[8];
1117 1149
1118 mlog_entry("(0x%p)\n", sb); 1150 mlog_entry("(0x%p)\n", sb);
1119 1151
@@ -1177,8 +1209,13 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1177 1209
1178 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); 1210 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
1179 1211
1180 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %d)\n", 1212 if (ocfs2_mount_local(osb))
1181 osb->dev_str, osb->node_num); 1213 snprintf(nodestr, sizeof(nodestr), "local");
1214 else
1215 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num);
1216
1217 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n",
1218 osb->dev_str, nodestr);
1182 1219
1183 ocfs2_delete_osb(osb); 1220 ocfs2_delete_osb(osb);
1184 kfree(osb); 1221 kfree(osb);
@@ -1536,6 +1573,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
1536{ 1573{
1537 int status = 0; 1574 int status = 0;
1538 int dirty; 1575 int dirty;
1576 int local;
1539 struct ocfs2_dinode *local_alloc = NULL; /* only used if we 1577 struct ocfs2_dinode *local_alloc = NULL; /* only used if we
1540 * recover 1578 * recover
1541 * ourselves. */ 1579 * ourselves. */
@@ -1563,8 +1601,10 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
1563 "recovering volume.\n"); 1601 "recovering volume.\n");
1564 } 1602 }
1565 1603
1604 local = ocfs2_mount_local(osb);
1605
1566 /* will play back anything left in the journal. */ 1606 /* will play back anything left in the journal. */
1567 ocfs2_journal_load(osb->journal); 1607 ocfs2_journal_load(osb->journal, local);
1568 1608
1569 if (dirty) { 1609 if (dirty) {
1570 /* recover my local alloc if we didn't unmount cleanly. */ 1610 /* recover my local alloc if we didn't unmount cleanly. */
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 5b4dca79990b..0315a8b61ed6 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -1000,6 +1000,9 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb)
1000{ 1000{
1001 int status = 0; 1001 int status = 0;
1002 1002
1003 if (ocfs2_mount_local(osb))
1004 return 0;
1005
1003 status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE, 1006 status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE,
1004 osb->net_key, 1007 osb->net_key,
1005 sizeof(struct ocfs2_response_msg), 1008 sizeof(struct ocfs2_response_msg),