aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/journal.c
diff options
context:
space:
mode:
authorJoel Becker <joel.becker@oracle.com>2008-02-01 15:03:57 -0500
committerMark Fasheh <mfasheh@suse.com>2008-04-18 11:56:02 -0400
commit553abd046af609191a91af7289d87d477adc659f (patch)
treecff21f65d49c0041993095a051edf76840c2af28 /fs/ocfs2/journal.c
parentd85b20e4b300edfd290f21fc2d790ba16d2f225b (diff)
ocfs2: Change the recovery map to an array of node numbers.
The old recovery map was a bitmap of node numbers. This was sufficient for the maximum node number of 254. Going forward, we want node numbers to be UINT32. Thus, we need a new recovery map. Note that we can't keep track of slots here. We must write down the node number to recovery *before* we get the locks needed to convert a node number into a slot number. The recovery map is now an array of unsigned ints, max_slots in size. It moves to journal.c with the rest of recovery. Because it needs to be initialized, we move all of recovery initialization into a new function, ocfs2_recovery_init(). This actually cleans up ocfs2_initialize_super() a little as well. Following on, recovery cleaup becomes part of ocfs2_recovery_exit(). A number of node map functions are rendered obsolete and are removed. Finally, waiting on recovery is wrapped in a function rather than naked checks on the recovery_event. This is a cleanup from Mark. Signed-off-by: Joel Becker <joel.becker@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2/journal.c')
-rw-r--r--fs/ocfs2/journal.c181
1 files changed, 166 insertions, 15 deletions
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index ed0c6d0850d7..ca4c0ea5a4cd 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
64 int slot); 64 int slot);
65static int ocfs2_commit_thread(void *arg); 65static int ocfs2_commit_thread(void *arg);
66 66
67
68/*
69 * The recovery_list is a simple linked list of node numbers to recover.
70 * It is protected by the recovery_lock.
71 */
72
73struct ocfs2_recovery_map {
74 int rm_used;
75 unsigned int *rm_entries;
76};
77
78int ocfs2_recovery_init(struct ocfs2_super *osb)
79{
80 struct ocfs2_recovery_map *rm;
81
82 mutex_init(&osb->recovery_lock);
83 osb->disable_recovery = 0;
84 osb->recovery_thread_task = NULL;
85 init_waitqueue_head(&osb->recovery_event);
86
87 rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
88 osb->max_slots * sizeof(unsigned int),
89 GFP_KERNEL);
90 if (!rm) {
91 mlog_errno(-ENOMEM);
92 return -ENOMEM;
93 }
94
95 rm->rm_entries = (unsigned int *)((char *)rm +
96 sizeof(struct ocfs2_recovery_map));
97 osb->recovery_map = rm;
98
99 return 0;
100}
101
102/* we can't grab the goofy sem lock from inside wait_event, so we use
103 * memory barriers to make sure that we'll see the null task before
104 * being woken up */
105static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
106{
107 mb();
108 return osb->recovery_thread_task != NULL;
109}
110
111void ocfs2_recovery_exit(struct ocfs2_super *osb)
112{
113 struct ocfs2_recovery_map *rm;
114
115 /* disable any new recovery threads and wait for any currently
116 * running ones to exit. Do this before setting the vol_state. */
117 mutex_lock(&osb->recovery_lock);
118 osb->disable_recovery = 1;
119 mutex_unlock(&osb->recovery_lock);
120 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
121
122 /* At this point, we know that no more recovery threads can be
123 * launched, so wait for any recovery completion work to
124 * complete. */
125 flush_workqueue(ocfs2_wq);
126
127 /*
128 * Now that recovery is shut down, and the osb is about to be
129 * freed, the osb_lock is not taken here.
130 */
131 rm = osb->recovery_map;
132 /* XXX: Should we bug if there are dirty entries? */
133
134 kfree(rm);
135}
136
137static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
138 unsigned int node_num)
139{
140 int i;
141 struct ocfs2_recovery_map *rm = osb->recovery_map;
142
143 assert_spin_locked(&osb->osb_lock);
144
145 for (i = 0; i < rm->rm_used; i++) {
146 if (rm->rm_entries[i] == node_num)
147 return 1;
148 }
149
150 return 0;
151}
152
153/* Behaves like test-and-set. Returns the previous value */
154static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
155 unsigned int node_num)
156{
157 struct ocfs2_recovery_map *rm = osb->recovery_map;
158
159 spin_lock(&osb->osb_lock);
160 if (__ocfs2_recovery_map_test(osb, node_num)) {
161 spin_unlock(&osb->osb_lock);
162 return 1;
163 }
164
165 /* XXX: Can this be exploited? Not from o2dlm... */
166 BUG_ON(rm->rm_used >= osb->max_slots);
167
168 rm->rm_entries[rm->rm_used] = node_num;
169 rm->rm_used++;
170 spin_unlock(&osb->osb_lock);
171
172 return 0;
173}
174
175static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
176 unsigned int node_num)
177{
178 int i;
179 struct ocfs2_recovery_map *rm = osb->recovery_map;
180
181 spin_lock(&osb->osb_lock);
182
183 for (i = 0; i < rm->rm_used; i++) {
184 if (rm->rm_entries[i] == node_num)
185 break;
186 }
187
188 if (i < rm->rm_used) {
189 /* XXX: be careful with the pointer math */
190 memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
191 (rm->rm_used - i - 1) * sizeof(unsigned int));
192 rm->rm_used--;
193 }
194
195 spin_unlock(&osb->osb_lock);
196}
197
67static int ocfs2_commit_cache(struct ocfs2_super *osb) 198static int ocfs2_commit_cache(struct ocfs2_super *osb)
68{ 199{
69 int status = 0; 200 int status = 0;
@@ -650,6 +781,23 @@ bail:
650 return status; 781 return status;
651} 782}
652 783
784static int ocfs2_recovery_completed(struct ocfs2_super *osb)
785{
786 int empty;
787 struct ocfs2_recovery_map *rm = osb->recovery_map;
788
789 spin_lock(&osb->osb_lock);
790 empty = (rm->rm_used == 0);
791 spin_unlock(&osb->osb_lock);
792
793 return empty;
794}
795
796void ocfs2_wait_for_recovery(struct ocfs2_super *osb)
797{
798 wait_event(osb->recovery_event, ocfs2_recovery_completed(osb));
799}
800
653/* 801/*
654 * JBD Might read a cached version of another nodes journal file. We 802 * JBD Might read a cached version of another nodes journal file. We
655 * don't want this as this file changes often and we get no 803 * don't want this as this file changes often and we get no
@@ -848,6 +996,7 @@ static int __ocfs2_recovery_thread(void *arg)
848{ 996{
849 int status, node_num; 997 int status, node_num;
850 struct ocfs2_super *osb = arg; 998 struct ocfs2_super *osb = arg;
999 struct ocfs2_recovery_map *rm = osb->recovery_map;
851 1000
852 mlog_entry_void(); 1001 mlog_entry_void();
853 1002
@@ -863,26 +1012,29 @@ restart:
863 goto bail; 1012 goto bail;
864 } 1013 }
865 1014
866 while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { 1015 spin_lock(&osb->osb_lock);
867 node_num = ocfs2_node_map_first_set_bit(osb, 1016 while (rm->rm_used) {
868 &osb->recovery_map); 1017 /* It's always safe to remove entry zero, as we won't
869 if (node_num == O2NM_INVALID_NODE_NUM) { 1018 * clear it until ocfs2_recover_node() has succeeded. */
870 mlog(0, "Out of nodes to recover.\n"); 1019 node_num = rm->rm_entries[0];
871 break; 1020 spin_unlock(&osb->osb_lock);
872 }
873 1021
874 status = ocfs2_recover_node(osb, node_num); 1022 status = ocfs2_recover_node(osb, node_num);
875 if (status < 0) { 1023 if (!status) {
1024 ocfs2_recovery_map_clear(osb, node_num);
1025 } else {
876 mlog(ML_ERROR, 1026 mlog(ML_ERROR,
877 "Error %d recovering node %d on device (%u,%u)!\n", 1027 "Error %d recovering node %d on device (%u,%u)!\n",
878 status, node_num, 1028 status, node_num,
879 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1029 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
880 mlog(ML_ERROR, "Volume requires unmount.\n"); 1030 mlog(ML_ERROR, "Volume requires unmount.\n");
881 continue;
882 } 1031 }
883 1032
884 ocfs2_recovery_map_clear(osb, node_num); 1033 spin_lock(&osb->osb_lock);
885 } 1034 }
1035 spin_unlock(&osb->osb_lock);
1036 mlog(0, "All nodes recovered\n");
1037
886 ocfs2_super_unlock(osb, 1); 1038 ocfs2_super_unlock(osb, 1);
887 1039
888 /* We always run recovery on our own orphan dir - the dead 1040 /* We always run recovery on our own orphan dir - the dead
@@ -893,8 +1045,7 @@ restart:
893 1045
894bail: 1046bail:
895 mutex_lock(&osb->recovery_lock); 1047 mutex_lock(&osb->recovery_lock);
896 if (!status && 1048 if (!status && !ocfs2_recovery_completed(osb)) {
897 !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
898 mutex_unlock(&osb->recovery_lock); 1049 mutex_unlock(&osb->recovery_lock);
899 goto restart; 1050 goto restart;
900 } 1051 }
@@ -924,8 +1075,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
924 1075
925 /* People waiting on recovery will wait on 1076 /* People waiting on recovery will wait on
926 * the recovery map to empty. */ 1077 * the recovery map to empty. */
927 if (!ocfs2_recovery_map_set(osb, node_num)) 1078 if (ocfs2_recovery_map_set(osb, node_num))
928 mlog(0, "node %d already be in recovery.\n", node_num); 1079 mlog(0, "node %d already in recovery map.\n", node_num);
929 1080
930 mlog(0, "starting recovery thread...\n"); 1081 mlog(0, "starting recovery thread...\n");
931 1082
@@ -1197,7 +1348,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1197 if (status == -ENOENT) 1348 if (status == -ENOENT)
1198 continue; 1349 continue;
1199 1350
1200 if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) 1351 if (__ocfs2_recovery_map_test(osb, node_num))
1201 continue; 1352 continue;
1202 spin_unlock(&osb->osb_lock); 1353 spin_unlock(&osb->osb_lock);
1203 1354