aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mmp.c
diff options
context:
space:
mode:
authorJohann Lombardi <johann@whamcloud.com>2011-05-24 18:31:25 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-05-24 18:31:25 -0400
commitc5e06d101aaf72f1f2192a661414459775e9bd74 (patch)
tree96d05d41be2bfea6d51be915ce196f033a5d9bf3 /fs/ext4/mmp.c
parentd02a9391f79cab65cde74cd9e8ccd2290a565229 (diff)
ext4: add support for multiple mount protection
Prevent an ext4 filesystem from being mounted multiple times. A sequence number is stored on disk and is periodically updated (every 5 seconds by default) by a mounted filesystem. At mount time, we now wait for s_mmp_update_interval seconds to make sure that the MMP sequence does not change. In case of failure, the nodename, bdevname and the time at which the MMP block was last updated is displayed. Signed-off-by: Andreas Dilger <adilger@whamcloud.com> Signed-off-by: Johann Lombardi <johann@whamcloud.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mmp.c')
-rw-r--r--fs/ext4/mmp.c351
1 files changed, 351 insertions, 0 deletions
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
new file mode 100644
index 000000000000..9bdef3f537c5
--- /dev/null
+++ b/fs/ext4/mmp.c
@@ -0,0 +1,351 @@
1#include <linux/fs.h>
2#include <linux/random.h>
3#include <linux/buffer_head.h>
4#include <linux/utsname.h>
5#include <linux/kthread.h>
6
7#include "ext4.h"
8
9/*
10 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
11 * faster.
12 */
13static int write_mmp_block(struct buffer_head *bh)
14{
15 mark_buffer_dirty(bh);
16 lock_buffer(bh);
17 bh->b_end_io = end_buffer_write_sync;
18 get_bh(bh);
19 submit_bh(WRITE_SYNC, bh);
20 wait_on_buffer(bh);
21 if (unlikely(!buffer_uptodate(bh)))
22 return 1;
23
24 return 0;
25}
26
27/*
28 * Read the MMP block. It _must_ be read from disk and hence we clear the
29 * uptodate flag on the buffer.
30 */
31static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
32 ext4_fsblk_t mmp_block)
33{
34 struct mmp_struct *mmp;
35
36 if (*bh)
37 clear_buffer_uptodate(*bh);
38
39 /* This would be sb_bread(sb, mmp_block), except we need to be sure
40 * that the MD RAID device cache has been bypassed, and that the read
41 * is not blocked in the elevator. */
42 if (!*bh)
43 *bh = sb_getblk(sb, mmp_block);
44 if (*bh) {
45 get_bh(*bh);
46 lock_buffer(*bh);
47 (*bh)->b_end_io = end_buffer_read_sync;
48 submit_bh(READ_SYNC, *bh);
49 wait_on_buffer(*bh);
50 if (!buffer_uptodate(*bh)) {
51 brelse(*bh);
52 *bh = NULL;
53 }
54 }
55 if (!*bh) {
56 ext4_warning(sb, "Error while reading MMP block %llu",
57 mmp_block);
58 return -EIO;
59 }
60
61 mmp = (struct mmp_struct *)((*bh)->b_data);
62 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
63 return -EINVAL;
64
65 return 0;
66}
67
68/*
69 * Dump as much information as possible to help the admin.
70 */
71void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
72 const char *function, unsigned int line, const char *msg)
73{
74 __ext4_warning(sb, function, line, msg);
75 __ext4_warning(sb, function, line,
76 "MMP failure info: last update time: %llu, last update "
77 "node: %s, last update device: %s\n",
78 (long long unsigned int) le64_to_cpu(mmp->mmp_time),
79 mmp->mmp_nodename, mmp->mmp_bdevname);
80}
81
82/*
83 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
84 */
85static int kmmpd(void *data)
86{
87 struct super_block *sb = ((struct mmpd_data *) data)->sb;
88 struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
89 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
90 struct mmp_struct *mmp;
91 ext4_fsblk_t mmp_block;
92 u32 seq = 0;
93 unsigned long failed_writes = 0;
94 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
95 unsigned mmp_check_interval;
96 unsigned long last_update_time;
97 unsigned long diff;
98 int retval;
99
100 mmp_block = le64_to_cpu(es->s_mmp_block);
101 mmp = (struct mmp_struct *)(bh->b_data);
102 mmp->mmp_time = cpu_to_le64(get_seconds());
103 /*
104 * Start with the higher mmp_check_interval and reduce it if
105 * the MMP block is being updated on time.
106 */
107 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
108 EXT4_MMP_MIN_CHECK_INTERVAL);
109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
110 bdevname(bh->b_bdev, mmp->mmp_bdevname);
111
112 memcpy(mmp->mmp_nodename, init_utsname()->sysname,
113 sizeof(mmp->mmp_nodename));
114
115 while (!kthread_should_stop()) {
116 if (++seq > EXT4_MMP_SEQ_MAX)
117 seq = 1;
118
119 mmp->mmp_seq = cpu_to_le32(seq);
120 mmp->mmp_time = cpu_to_le64(get_seconds());
121 last_update_time = jiffies;
122
123 retval = write_mmp_block(bh);
124 /*
125 * Don't spew too many error messages. Print one every
126 * (s_mmp_update_interval * 60) seconds.
127 */
128 if (retval && (failed_writes % 60) == 0) {
129 ext4_error(sb, "Error writing to MMP block");
130 failed_writes++;
131 }
132
133 if (!(le32_to_cpu(es->s_feature_incompat) &
134 EXT4_FEATURE_INCOMPAT_MMP)) {
135 ext4_warning(sb, "kmmpd being stopped since MMP feature"
136 " has been disabled.");
137 EXT4_SB(sb)->s_mmp_tsk = NULL;
138 goto failed;
139 }
140
141 if (sb->s_flags & MS_RDONLY) {
142 ext4_warning(sb, "kmmpd being stopped since filesystem "
143 "has been remounted as readonly.");
144 EXT4_SB(sb)->s_mmp_tsk = NULL;
145 goto failed;
146 }
147
148 diff = jiffies - last_update_time;
149 if (diff < mmp_update_interval * HZ)
150 schedule_timeout_interruptible(mmp_update_interval *
151 HZ - diff);
152
153 /*
154 * We need to make sure that more than mmp_check_interval
155 * seconds have not passed since writing. If that has happened
156 * we need to check if the MMP block is as we left it.
157 */
158 diff = jiffies - last_update_time;
159 if (diff > mmp_check_interval * HZ) {
160 struct buffer_head *bh_check = NULL;
161 struct mmp_struct *mmp_check;
162
163 retval = read_mmp_block(sb, &bh_check, mmp_block);
164 if (retval) {
165 ext4_error(sb, "error reading MMP data: %d",
166 retval);
167
168 EXT4_SB(sb)->s_mmp_tsk = NULL;
169 goto failed;
170 }
171
172 mmp_check = (struct mmp_struct *)(bh_check->b_data);
173 if (mmp->mmp_seq != mmp_check->mmp_seq ||
174 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
175 sizeof(mmp->mmp_nodename))) {
176 dump_mmp_msg(sb, mmp_check,
177 "Error while updating MMP info. "
178 "The filesystem seems to have been"
179 " multiply mounted.");
180 ext4_error(sb, "abort");
181 goto failed;
182 }
183 put_bh(bh_check);
184 }
185
186 /*
187 * Adjust the mmp_check_interval depending on how much time
188 * it took for the MMP block to be written.
189 */
190 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
191 EXT4_MMP_MAX_CHECK_INTERVAL),
192 EXT4_MMP_MIN_CHECK_INTERVAL);
193 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
194 }
195
196 /*
197 * Unmount seems to be clean.
198 */
199 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
200 mmp->mmp_time = cpu_to_le64(get_seconds());
201
202 retval = write_mmp_block(bh);
203
204failed:
205 kfree(data);
206 brelse(bh);
207 return retval;
208}
209
210/*
211 * Get a random new sequence number but make sure it is not greater than
212 * EXT4_MMP_SEQ_MAX.
213 */
214static unsigned int mmp_new_seq(void)
215{
216 u32 new_seq;
217
218 do {
219 get_random_bytes(&new_seq, sizeof(u32));
220 } while (new_seq > EXT4_MMP_SEQ_MAX);
221
222 return new_seq;
223}
224
225/*
226 * Protect the filesystem from being mounted more than once.
227 */
228int ext4_multi_mount_protect(struct super_block *sb,
229 ext4_fsblk_t mmp_block)
230{
231 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
232 struct buffer_head *bh = NULL;
233 struct mmp_struct *mmp = NULL;
234 struct mmpd_data *mmpd_data;
235 u32 seq;
236 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
237 unsigned int wait_time = 0;
238 int retval;
239
240 if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
241 mmp_block >= ext4_blocks_count(es)) {
242 ext4_warning(sb, "Invalid MMP block in superblock");
243 goto failed;
244 }
245
246 retval = read_mmp_block(sb, &bh, mmp_block);
247 if (retval)
248 goto failed;
249
250 mmp = (struct mmp_struct *)(bh->b_data);
251
252 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
253 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
254
255 /*
256 * If check_interval in MMP block is larger, use that instead of
257 * update_interval from the superblock.
258 */
259 if (mmp->mmp_check_interval > mmp_check_interval)
260 mmp_check_interval = mmp->mmp_check_interval;
261
262 seq = le32_to_cpu(mmp->mmp_seq);
263 if (seq == EXT4_MMP_SEQ_CLEAN)
264 goto skip;
265
266 if (seq == EXT4_MMP_SEQ_FSCK) {
267 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
268 goto failed;
269 }
270
271 wait_time = min(mmp_check_interval * 2 + 1,
272 mmp_check_interval + 60);
273
274 /* Print MMP interval if more than 20 secs. */
275 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
276 ext4_warning(sb, "MMP interval %u higher than expected, please"
277 " wait.\n", wait_time * 2);
278
279 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
280 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
281 goto failed;
282 }
283
284 retval = read_mmp_block(sb, &bh, mmp_block);
285 if (retval)
286 goto failed;
287 mmp = (struct mmp_struct *)(bh->b_data);
288 if (seq != le32_to_cpu(mmp->mmp_seq)) {
289 dump_mmp_msg(sb, mmp,
290 "Device is already active on another node.");
291 goto failed;
292 }
293
294skip:
295 /*
296 * write a new random sequence number.
297 */
298 mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
299
300 retval = write_mmp_block(bh);
301 if (retval)
302 goto failed;
303
304 /*
305 * wait for MMP interval and check mmp_seq.
306 */
307 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
308 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
309 goto failed;
310 }
311
312 retval = read_mmp_block(sb, &bh, mmp_block);
313 if (retval)
314 goto failed;
315 mmp = (struct mmp_struct *)(bh->b_data);
316 if (seq != le32_to_cpu(mmp->mmp_seq)) {
317 dump_mmp_msg(sb, mmp,
318 "Device is already active on another node.");
319 goto failed;
320 }
321
322 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
323 if (!mmpd_data) {
324 ext4_warning(sb, "not enough memory for mmpd_data");
325 goto failed;
326 }
327 mmpd_data->sb = sb;
328 mmpd_data->bh = bh;
329
330 /*
331 * Start a kernel thread to update the MMP block periodically.
332 */
333 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
334 bdevname(bh->b_bdev,
335 mmp->mmp_bdevname));
336 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
337 EXT4_SB(sb)->s_mmp_tsk = NULL;
338 kfree(mmpd_data);
339 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
340 sb->s_id);
341 goto failed;
342 }
343
344 return 0;
345
346failed:
347 brelse(bh);
348 return 1;
349}
350
351