aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bitmap.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@cse.unsw.edu.au>2005-06-21 20:17:14 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 22:07:43 -0400
commit32a7627cf3a35396a8e834faf34e38ae9f3b1309 (patch)
tree3fe7764f5d8e39d835a397e1099358d924b02981 /drivers/md/bitmap.c
parent57afd89f98a990747445f01c458ecae64263b2f8 (diff)
[PATCH] md: optimised resync using Bitmap based intent logging
With this patch, the intent to write to some block in the array can be logged to a bitmap file. Each bit represents some number of sectors and is set before any update happens, and only cleared when all writes relating to all sectors are complete. After an unclean shutdown, information in this bitmap can be used to optimise resync - only sectors which could be out-of-sync need to be updated. Also if a drive is removed and then added back into an array, the recovery can make use of the bitmap to optimise reconstruction. This is not implemented in this patch. Currently the bitmap is stored in a file which must (obviously) be stored on a separate device. The patch only provided infrastructure. It does not update any personalities to bitmap intent logging. Md arrays can still be used with no bitmap file. This patch has minimal impact on such arrays. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/bitmap.c')
-rw-r--r--drivers/md/bitmap.c1519
1 files changed, 1519 insertions, 0 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
new file mode 100644
index 000000000000..34ffc133db05
--- /dev/null
+++ b/drivers/md/bitmap.c
@@ -0,0 +1,1519 @@
1/*
2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3 *
4 * bitmap_create - sets up the bitmap structure
5 * bitmap_destroy - destroys the bitmap structure
6 *
7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8 * - added disk storage for bitmap
9 * - changes to allow various bitmap chunk sizes
10 * - added bitmap daemon (to asynchronously clear bitmap bits from disk)
11 */
12
13/*
14 * Still to do:
15 *
16 * flush after percent set rather than just time based. (maybe both).
17 * wait if count gets too high, wake when it drops to half.
18 * allow bitmap to be mirrored with superblock (before or after...)
19 * allow hot-add to re-instate a current device.
20 * allow hot-add of bitmap after quiessing device
21 */
22
23#include <linux/module.h>
24#include <linux/version.h>
25#include <linux/errno.h>
26#include <linux/slab.h>
27#include <linux/init.h>
28#include <linux/config.h>
29#include <linux/timer.h>
30#include <linux/sched.h>
31#include <linux/list.h>
32#include <linux/file.h>
33#include <linux/mount.h>
34#include <linux/buffer_head.h>
35#include <linux/raid/md.h>
36#include <linux/raid/bitmap.h>
37
38/* debug macros */
39
40#define DEBUG 0
41
42#if DEBUG
43/* these are for debugging purposes only! */
44
45/* define one and only one of these */
46#define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */
47#define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/
48#define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */
49#define INJECT_FAULTS_4 0 /* undef */
50#define INJECT_FAULTS_5 0 /* undef */
51#define INJECT_FAULTS_6 0
52
53/* if these are defined, the driver will fail! debug only */
54#define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */
55#define INJECT_FATAL_FAULT_2 0 /* undef */
56#define INJECT_FATAL_FAULT_3 0 /* undef */
57#endif
58
59//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */
60#define DPRINTK(x...) do { } while(0)
61
62#ifndef PRINTK
63# if DEBUG > 0
64# define PRINTK(x...) printk(KERN_DEBUG x)
65# else
66# define PRINTK(x...)
67# endif
68#endif
69
70static inline char * bmname(struct bitmap *bitmap)
71{
72 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
73}
74
75
76/*
77 * test if the bitmap is active
78 */
79int bitmap_active(struct bitmap *bitmap)
80{
81 unsigned long flags;
82 int res = 0;
83
84 if (!bitmap)
85 return res;
86 spin_lock_irqsave(&bitmap->lock, flags);
87 res = bitmap->flags & BITMAP_ACTIVE;
88 spin_unlock_irqrestore(&bitmap->lock, flags);
89 return res;
90}
91
92#define WRITE_POOL_SIZE 256
93/* mempool for queueing pending writes on the bitmap file */
94static void *write_pool_alloc(unsigned int gfp_flags, void *data)
95{
96 return kmalloc(sizeof(struct page_list), gfp_flags);
97}
98
99static void write_pool_free(void *ptr, void *data)
100{
101 kfree(ptr);
102}
103
104/*
105 * just a placeholder - calls kmalloc for bitmap pages
106 */
107static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
108{
109 unsigned char *page;
110
111#if INJECT_FAULTS_1
112 page = NULL;
113#else
114 page = kmalloc(PAGE_SIZE, GFP_NOIO);
115#endif
116 if (!page)
117 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
118 else
119 printk("%s: bitmap_alloc_page: allocated page at %p\n",
120 bmname(bitmap), page);
121 return page;
122}
123
124/*
125 * for now just a placeholder -- just calls kfree for bitmap pages
126 */
127static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
128{
129 PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page);
130 kfree(page);
131}
132
133/*
134 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
135 *
136 * 1) check to see if this page is allocated, if it's not then try to alloc
137 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
138 * page pointer directly as a counter
139 *
140 * if we find our page, we increment the page's refcount so that it stays
141 * allocated while we're using it
142 */
143static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
144{
145 unsigned char *mappage;
146
147 if (page >= bitmap->pages) {
148 printk(KERN_ALERT
149 "%s: invalid bitmap page request: %lu (> %lu)\n",
150 bmname(bitmap), page, bitmap->pages-1);
151 return -EINVAL;
152 }
153
154
155 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
156 return 0;
157
158 if (bitmap->bp[page].map) /* page is already allocated, just return */
159 return 0;
160
161 if (!create)
162 return -ENOENT;
163
164 spin_unlock_irq(&bitmap->lock);
165
166 /* this page has not been allocated yet */
167
168 if ((mappage = bitmap_alloc_page(bitmap)) == NULL) {
169 PRINTK("%s: bitmap map page allocation failed, hijacking\n",
170 bmname(bitmap));
171 /* failed - set the hijacked flag so that we can use the
172 * pointer as a counter */
173 spin_lock_irq(&bitmap->lock);
174 if (!bitmap->bp[page].map)
175 bitmap->bp[page].hijacked = 1;
176 goto out;
177 }
178
179 /* got a page */
180
181 spin_lock_irq(&bitmap->lock);
182
183 /* recheck the page */
184
185 if (bitmap->bp[page].map || bitmap->bp[page].hijacked) {
186 /* somebody beat us to getting the page */
187 bitmap_free_page(bitmap, mappage);
188 return 0;
189 }
190
191 /* no page was in place and we have one, so install it */
192
193 memset(mappage, 0, PAGE_SIZE);
194 bitmap->bp[page].map = mappage;
195 bitmap->missing_pages--;
196out:
197 return 0;
198}
199
200
201/* if page is completely empty, put it back on the free list, or dealloc it */
202/* if page was hijacked, unmark the flag so it might get alloced next time */
203/* Note: lock should be held when calling this */
204static inline void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
205{
206 char *ptr;
207
208 if (bitmap->bp[page].count) /* page is still busy */
209 return;
210
211 /* page is no longer in use, it can be released */
212
213 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
214 bitmap->bp[page].hijacked = 0;
215 bitmap->bp[page].map = NULL;
216 return;
217 }
218
219 /* normal case, free the page */
220
221#if 0
222/* actually ... let's not. We will probably need the page again exactly when
223 * memory is tight and we are flusing to disk
224 */
225 return;
226#else
227 ptr = bitmap->bp[page].map;
228 bitmap->bp[page].map = NULL;
229 bitmap->missing_pages++;
230 bitmap_free_page(bitmap, ptr);
231 return;
232#endif
233}
234
235
236/*
237 * bitmap file handling - read and write the bitmap file and its superblock
238 */
239
240/* copy the pathname of a file to a buffer */
241char *file_path(struct file *file, char *buf, int count)
242{
243 struct dentry *d;
244 struct vfsmount *v;
245
246 if (!buf)
247 return NULL;
248
249 d = file->f_dentry;
250 v = file->f_vfsmnt;
251
252 buf = d_path(d, v, buf, count);
253
254 return IS_ERR(buf) ? NULL : buf;
255}
256
257/*
258 * basic page I/O operations
259 */
260
261/*
262 * write out a page
263 */
264static int write_page(struct page *page, int wait)
265{
266 int ret = -ENOMEM;
267
268 lock_page(page);
269
270 if (page->mapping == NULL)
271 goto unlock_out;
272 else if (i_size_read(page->mapping->host) < page->index << PAGE_SHIFT) {
273 ret = -ENOENT;
274 goto unlock_out;
275 }
276
277 ret = page->mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
278 if (!ret)
279 ret = page->mapping->a_ops->commit_write(NULL, page, 0,
280 PAGE_SIZE);
281 if (ret) {
282unlock_out:
283 unlock_page(page);
284 return ret;
285 }
286
287 set_page_dirty(page); /* force it to be written out */
288 return write_one_page(page, wait);
289}
290
291/* read a page from a file, pinning it into cache, and return bytes_read */
292static struct page *read_page(struct file *file, unsigned long index,
293 unsigned long *bytes_read)
294{
295 struct inode *inode = file->f_mapping->host;
296 struct page *page = NULL;
297 loff_t isize = i_size_read(inode);
298 unsigned long end_index = isize >> PAGE_CACHE_SHIFT;
299
300 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_CACHE_SIZE,
301 (unsigned long long)index << PAGE_CACHE_SHIFT);
302
303 page = read_cache_page(inode->i_mapping, index,
304 (filler_t *)inode->i_mapping->a_ops->readpage, file);
305 if (IS_ERR(page))
306 goto out;
307 wait_on_page_locked(page);
308 if (!PageUptodate(page) || PageError(page)) {
309 page_cache_release(page);
310 page = ERR_PTR(-EIO);
311 goto out;
312 }
313
314 if (index > end_index) /* we have read beyond EOF */
315 *bytes_read = 0;
316 else if (index == end_index) /* possible short read */
317 *bytes_read = isize & ~PAGE_CACHE_MASK;
318 else
319 *bytes_read = PAGE_CACHE_SIZE; /* got a full page */
320out:
321 if (IS_ERR(page))
322 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n",
323 (int)PAGE_CACHE_SIZE,
324 (unsigned long long)index << PAGE_CACHE_SHIFT,
325 PTR_ERR(page));
326 return page;
327}
328
329/*
330 * bitmap file superblock operations
331 */
332
333/* update the event counter and sync the superblock to disk */
334int bitmap_update_sb(struct bitmap *bitmap)
335{
336 bitmap_super_t *sb;
337 unsigned long flags;
338
339 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
340 return 0;
341 spin_lock_irqsave(&bitmap->lock, flags);
342 if (!bitmap->sb_page) { /* no superblock */
343 spin_unlock_irqrestore(&bitmap->lock, flags);
344 return 0;
345 }
346 page_cache_get(bitmap->sb_page);
347 spin_unlock_irqrestore(&bitmap->lock, flags);
348 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
349 sb->events = cpu_to_le64(bitmap->mddev->events);
350 if (!bitmap->mddev->degraded)
351 sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
352 kunmap(bitmap->sb_page);
353 write_page(bitmap->sb_page, 0);
354 return 0;
355}
356
357/* print out the bitmap file superblock */
358void bitmap_print_sb(struct bitmap *bitmap)
359{
360 bitmap_super_t *sb;
361
362 if (!bitmap || !bitmap->sb_page)
363 return;
364 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
365 printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
366 printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic));
367 printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version));
368 printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n",
369 *(__u32 *)(sb->uuid+0),
370 *(__u32 *)(sb->uuid+4),
371 *(__u32 *)(sb->uuid+8),
372 *(__u32 *)(sb->uuid+12));
373 printk(KERN_DEBUG " events: %llu\n",
374 (unsigned long long) le64_to_cpu(sb->events));
375 printk(KERN_DEBUG "events_clred: %llu\n",
376 (unsigned long long) le64_to_cpu(sb->events_cleared));
377 printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state));
378 printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize));
379 printk(KERN_DEBUG "daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
380 printk(KERN_DEBUG " sync size: %llu KB\n", le64_to_cpu(sb->sync_size));
381 kunmap(bitmap->sb_page);
382}
383
384/* read the superblock from the bitmap file and initialize some bitmap fields */
385static int bitmap_read_sb(struct bitmap *bitmap)
386{
387 char *reason = NULL;
388 bitmap_super_t *sb;
389 unsigned long chunksize, daemon_sleep;
390 unsigned long bytes_read;
391 unsigned long long events;
392 int err = -EINVAL;
393
394 /* page 0 is the superblock, read it... */
395 bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read);
396 if (IS_ERR(bitmap->sb_page)) {
397 err = PTR_ERR(bitmap->sb_page);
398 bitmap->sb_page = NULL;
399 return err;
400 }
401
402 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
403
404 if (bytes_read < sizeof(*sb)) { /* short read */
405 printk(KERN_INFO "%s: bitmap file superblock truncated\n",
406 bmname(bitmap));
407 err = -ENOSPC;
408 goto out;
409 }
410
411 chunksize = le32_to_cpu(sb->chunksize);
412 daemon_sleep = le32_to_cpu(sb->daemon_sleep);
413
414 /* verify that the bitmap-specific fields are valid */
415 if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
416 reason = "bad magic";
417 else if (sb->version != cpu_to_le32(BITMAP_MAJOR))
418 reason = "unrecognized superblock version";
419 else if (chunksize < 512 || chunksize > (1024 * 1024 * 4))
420 reason = "bitmap chunksize out of range (512B - 4MB)";
421 else if ((1 << ffz(~chunksize)) != chunksize)
422 reason = "bitmap chunksize not a power of 2";
423 else if (daemon_sleep < 1 || daemon_sleep > 15)
424 reason = "daemon sleep period out of range";
425 if (reason) {
426 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
427 bmname(bitmap), reason);
428 goto out;
429 }
430
431 /* keep the array size field of the bitmap superblock up to date */
432 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
433
434 if (!bitmap->mddev->persistent)
435 goto success;
436
437 /*
438 * if we have a persistent array superblock, compare the
439 * bitmap's UUID and event counter to the mddev's
440 */
441 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
442 printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n",
443 bmname(bitmap));
444 goto out;
445 }
446 events = le64_to_cpu(sb->events);
447 if (events < bitmap->mddev->events) {
448 printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
449 "-- forcing full recovery\n", bmname(bitmap), events,
450 (unsigned long long) bitmap->mddev->events);
451 sb->state |= BITMAP_STALE;
452 }
453success:
454 /* assign fields using values from superblock */
455 bitmap->chunksize = chunksize;
456 bitmap->daemon_sleep = daemon_sleep;
457 bitmap->flags |= sb->state;
458 bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
459 err = 0;
460out:
461 kunmap(bitmap->sb_page);
462 if (err)
463 bitmap_print_sb(bitmap);
464 return err;
465}
466
467enum bitmap_mask_op {
468 MASK_SET,
469 MASK_UNSET
470};
471
472/* record the state of the bitmap in the superblock */
473static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
474 enum bitmap_mask_op op)
475{
476 bitmap_super_t *sb;
477 unsigned long flags;
478
479 spin_lock_irqsave(&bitmap->lock, flags);
480 if (!bitmap || !bitmap->sb_page) { /* can't set the state */
481 spin_unlock_irqrestore(&bitmap->lock, flags);
482 return;
483 }
484 page_cache_get(bitmap->sb_page);
485 spin_unlock_irqrestore(&bitmap->lock, flags);
486 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
487 switch (op) {
488 case MASK_SET: sb->state |= bits;
489 break;
490 case MASK_UNSET: sb->state &= ~bits;
491 break;
492 default: BUG();
493 }
494 kunmap(bitmap->sb_page);
495 page_cache_release(bitmap->sb_page);
496}
497
498/*
499 * general bitmap file operations
500 */
501
502/* calculate the index of the page that contains this bit */
503static inline unsigned long file_page_index(unsigned long chunk)
504{
505 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
506}
507
508/* calculate the (bit) offset of this bit within a page */
509static inline unsigned long file_page_offset(unsigned long chunk)
510{
511 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
512}
513
514/*
515 * return a pointer to the page in the filemap that contains the given bit
516 *
517 * this lookup is complicated by the fact that the bitmap sb might be exactly
518 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
519 * 0 or page 1
520 */
521static inline struct page *filemap_get_page(struct bitmap *bitmap,
522 unsigned long chunk)
523{
524 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
525}
526
527
528static void bitmap_file_unmap(struct bitmap *bitmap)
529{
530 struct page **map, *sb_page;
531 unsigned long *attr;
532 int pages;
533 unsigned long flags;
534
535 spin_lock_irqsave(&bitmap->lock, flags);
536 map = bitmap->filemap;
537 bitmap->filemap = NULL;
538 attr = bitmap->filemap_attr;
539 bitmap->filemap_attr = NULL;
540 pages = bitmap->file_pages;
541 bitmap->file_pages = 0;
542 sb_page = bitmap->sb_page;
543 bitmap->sb_page = NULL;
544 spin_unlock_irqrestore(&bitmap->lock, flags);
545
546 while (pages--)
547 if (map[pages]->index != 0) /* 0 is sb_page, release it below */
548 page_cache_release(map[pages]);
549 kfree(map);
550 kfree(attr);
551
552 if (sb_page)
553 page_cache_release(sb_page);
554}
555
556static void bitmap_stop_daemons(struct bitmap *bitmap);
557
558/* dequeue the next item in a page list -- don't call from irq context */
559static struct page_list *dequeue_page(struct bitmap *bitmap,
560 struct list_head *head)
561{
562 struct page_list *item = NULL;
563
564 spin_lock(&bitmap->write_lock);
565 if (list_empty(head))
566 goto out;
567 item = list_entry(head->prev, struct page_list, list);
568 list_del(head->prev);
569out:
570 spin_unlock(&bitmap->write_lock);
571 return item;
572}
573
574static void drain_write_queues(struct bitmap *bitmap)
575{
576 struct list_head *queues[] = { &bitmap->complete_pages, NULL };
577 struct list_head *head;
578 struct page_list *item;
579 int i;
580
581 for (i = 0; queues[i]; i++) {
582 head = queues[i];
583 while ((item = dequeue_page(bitmap, head))) {
584 page_cache_release(item->page);
585 mempool_free(item, bitmap->write_pool);
586 }
587 }
588
589 spin_lock(&bitmap->write_lock);
590 bitmap->writes_pending = 0; /* make sure waiters continue */
591 wake_up(&bitmap->write_wait);
592 spin_unlock(&bitmap->write_lock);
593}
594
595static void bitmap_file_put(struct bitmap *bitmap)
596{
597 struct file *file;
598 struct inode *inode;
599 unsigned long flags;
600
601 spin_lock_irqsave(&bitmap->lock, flags);
602 file = bitmap->file;
603 bitmap->file = NULL;
604 spin_unlock_irqrestore(&bitmap->lock, flags);
605
606 bitmap_stop_daemons(bitmap);
607
608 drain_write_queues(bitmap);
609
610 bitmap_file_unmap(bitmap);
611
612 if (file) {
613 inode = file->f_mapping->host;
614 spin_lock(&inode->i_lock);
615 atomic_set(&inode->i_writecount, 1); /* allow writes again */
616 spin_unlock(&inode->i_lock);
617 fput(file);
618 }
619}
620
621
622/*
623 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
624 * then it is no longer reliable, so we stop using it and we mark the file
625 * as failed in the superblock
626 */
627static void bitmap_file_kick(struct bitmap *bitmap)
628{
629 char *path, *ptr = NULL;
630
631 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET);
632 bitmap_update_sb(bitmap);
633
634 path = kmalloc(PAGE_SIZE, GFP_KERNEL);
635 if (path)
636 ptr = file_path(bitmap->file, path, PAGE_SIZE);
637
638 printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
639 bmname(bitmap), ptr ? ptr : "");
640
641 kfree(path);
642
643 bitmap_file_put(bitmap);
644
645 return;
646}
647
648enum bitmap_page_attr {
649 BITMAP_PAGE_DIRTY = 1, // there are set bits that need to be synced
650 BITMAP_PAGE_CLEAN = 2, // there are bits that might need to be cleared
651 BITMAP_PAGE_NEEDWRITE=4, // there are cleared bits that need to be synced
652};
653
654static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
655 enum bitmap_page_attr attr)
656{
657 bitmap->filemap_attr[page->index] |= attr;
658}
659
660static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
661 enum bitmap_page_attr attr)
662{
663 bitmap->filemap_attr[page->index] &= ~attr;
664}
665
666static inline unsigned long get_page_attr(struct bitmap *bitmap, struct page *page)
667{
668 return bitmap->filemap_attr[page->index];
669}
670
671/*
672 * bitmap_file_set_bit -- called before performing a write to the md device
673 * to set (and eventually sync) a particular bit in the bitmap file
674 *
675 * we set the bit immediately, then we record the page number so that
676 * when an unplug occurs, we can flush the dirty pages out to disk
677 */
678static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
679{
680 unsigned long bit;
681 struct page *page;
682 void *kaddr;
683 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
684
685 if (!bitmap->file || !bitmap->filemap) {
686 return;
687 }
688
689 page = filemap_get_page(bitmap, chunk);
690 bit = file_page_offset(chunk);
691
692
693 /* make sure the page stays cached until it gets written out */
694 if (! (get_page_attr(bitmap, page) & BITMAP_PAGE_DIRTY))
695 page_cache_get(page);
696
697 /* set the bit */
698 kaddr = kmap_atomic(page, KM_USER0);
699 set_bit(bit, kaddr);
700 kunmap_atomic(kaddr, KM_USER0);
701 PRINTK("set file bit %lu page %lu\n", bit, page->index);
702
703 /* record page number so it gets flushed to disk when unplug occurs */
704 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
705
706}
707
708/* this gets called when the md device is ready to unplug its underlying
709 * (slave) device queues -- before we let any writes go down, we need to
710 * sync the dirty pages of the bitmap file to disk */
711int bitmap_unplug(struct bitmap *bitmap)
712{
713 unsigned long i, attr, flags;
714 struct page *page;
715 int wait = 0;
716
717 if (!bitmap)
718 return 0;
719
720 /* look at each page to see if there are any set bits that need to be
721 * flushed out to disk */
722 for (i = 0; i < bitmap->file_pages; i++) {
723 spin_lock_irqsave(&bitmap->lock, flags);
724 if (!bitmap->file || !bitmap->filemap) {
725 spin_unlock_irqrestore(&bitmap->lock, flags);
726 return 0;
727 }
728 page = bitmap->filemap[i];
729 attr = get_page_attr(bitmap, page);
730 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
731 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
732 if ((attr & BITMAP_PAGE_DIRTY))
733 wait = 1;
734 spin_unlock_irqrestore(&bitmap->lock, flags);
735
736 if (attr & (BITMAP_PAGE_DIRTY | BITMAP_PAGE_NEEDWRITE))
737 write_page(page, 0);
738 }
739 if (wait) { /* if any writes were performed, we need to wait on them */
740 spin_lock_irq(&bitmap->write_lock);
741 wait_event_lock_irq(bitmap->write_wait,
742 bitmap->writes_pending == 0, bitmap->write_lock,
743 wake_up_process(bitmap->writeback_daemon->tsk));
744 spin_unlock_irq(&bitmap->write_lock);
745 }
746 return 0;
747}
748
749static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
750 unsigned long sectors, int set);
751/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
752 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
753 * memory mapping of the bitmap file
754 * Special cases:
755 * if there's no bitmap file, or if the bitmap file had been
756 * previously kicked from the array, we mark all the bits as
757 * 1's in order to cause a full resync.
758 */
759static int bitmap_init_from_disk(struct bitmap *bitmap)
760{
761 unsigned long i, chunks, index, oldindex, bit;
762 struct page *page = NULL, *oldpage = NULL;
763 unsigned long num_pages, bit_cnt = 0;
764 struct file *file;
765 unsigned long bytes, offset, dummy;
766 int outofdate;
767 int ret = -ENOSPC;
768
769 chunks = bitmap->chunks;
770 file = bitmap->file;
771
772 if (!file) { /* no file, dirty all the in-memory bits */
773 printk(KERN_INFO "%s: no bitmap file, doing full recovery\n",
774 bmname(bitmap));
775 bitmap_set_memory_bits(bitmap, 0,
776 chunks << CHUNK_BLOCK_SHIFT(bitmap), 1);
777 return 0;
778 }
779
780#if INJECT_FAULTS_3
781 outofdate = 1;
782#else
783 outofdate = bitmap->flags & BITMAP_STALE;
784#endif
785 if (outofdate)
786 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
787 "recovery\n", bmname(bitmap));
788
789 bytes = (chunks + 7) / 8;
790 num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
791 if (i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
792 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
793 bmname(bitmap),
794 (unsigned long) i_size_read(file->f_mapping->host),
795 bytes + sizeof(bitmap_super_t));
796 goto out;
797 }
798 num_pages++;
799 bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
800 if (!bitmap->filemap) {
801 ret = -ENOMEM;
802 goto out;
803 }
804
805 bitmap->filemap_attr = kmalloc(sizeof(long) * num_pages, GFP_KERNEL);
806 if (!bitmap->filemap_attr) {
807 ret = -ENOMEM;
808 goto out;
809 }
810
811 memset(bitmap->filemap_attr, 0, sizeof(long) * num_pages);
812
813 oldindex = ~0L;
814
815 for (i = 0; i < chunks; i++) {
816 index = file_page_index(i);
817 bit = file_page_offset(i);
818 if (index != oldindex) { /* this is a new page, read it in */
819 /* unmap the old page, we're done with it */
820 if (oldpage != NULL)
821 kunmap(oldpage);
822 if (index == 0) {
823 /*
824 * if we're here then the superblock page
825 * contains some bits (PAGE_SIZE != sizeof sb)
826 * we've already read it in, so just use it
827 */
828 page = bitmap->sb_page;
829 offset = sizeof(bitmap_super_t);
830 } else {
831 page = read_page(file, index, &dummy);
832 if (IS_ERR(page)) { /* read error */
833 ret = PTR_ERR(page);
834 goto out;
835 }
836 offset = 0;
837 }
838 oldindex = index;
839 oldpage = page;
840 kmap(page);
841
842 if (outofdate) {
843 /*
844 * if bitmap is out of date, dirty the
845 * whole page and write it out
846 */
847 memset(page_address(page) + offset, 0xff,
848 PAGE_SIZE - offset);
849 ret = write_page(page, 1);
850 if (ret) {
851 kunmap(page);
852 /* release, page not in filemap yet */
853 page_cache_release(page);
854 goto out;
855 }
856 }
857
858 bitmap->filemap[bitmap->file_pages++] = page;
859 }
860 if (test_bit(bit, page_address(page))) {
861 /* if the disk bit is set, set the memory bit */
862 bitmap_set_memory_bits(bitmap,
863 i << CHUNK_BLOCK_SHIFT(bitmap), 1, 1);
864 bit_cnt++;
865 }
866#if 0
867 else
868 bitmap_set_memory_bits(bitmap,
869 i << CHUNK_BLOCK_SHIFT(bitmap), 1, 0);
870#endif
871 }
872
873 /* everything went OK */
874 ret = 0;
875 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
876
877 if (page) /* unmap the last page */
878 kunmap(page);
879
880 if (bit_cnt) { /* Kick recovery if any bits were set */
881 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
882 md_wakeup_thread(bitmap->mddev->thread);
883 }
884
885out:
886 printk(KERN_INFO "%s: bitmap initialized from disk: "
887 "read %lu/%lu pages, set %lu bits, status: %d\n",
888 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, ret);
889
890 return ret;
891}
892
893
894static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
895{
896 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
897 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
898 bitmap->bp[page].count += inc;
899/*
900 if (page == 0) printk("count page 0, offset %llu: %d gives %d\n",
901 (unsigned long long)offset, inc, bitmap->bp[page].count);
902*/
903 bitmap_checkfree(bitmap, page);
904}
905static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
906 sector_t offset, int *blocks,
907 int create);
908
909/*
910 * bitmap daemon -- periodically wakes up to clean bits and flush pages
911 * out to disk
912 */
913
914int bitmap_daemon_work(struct bitmap *bitmap)
915{
916 unsigned long bit, j;
917 unsigned long flags;
918 struct page *page = NULL, *lastpage = NULL;
919 int err = 0;
920 int blocks;
921 int attr;
922
923 if (bitmap == NULL)
924 return 0;
925 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
926 return 0;
927 bitmap->daemon_lastrun = jiffies;
928
929 for (j = 0; j < bitmap->chunks; j++) {
930 bitmap_counter_t *bmc;
931 spin_lock_irqsave(&bitmap->lock, flags);
932 if (!bitmap->file || !bitmap->filemap) {
933 /* error or shutdown */
934 spin_unlock_irqrestore(&bitmap->lock, flags);
935 break;
936 }
937
938 page = filemap_get_page(bitmap, j);
939 /* skip this page unless it's marked as needing cleaning */
940 if (!((attr=get_page_attr(bitmap, page)) & BITMAP_PAGE_CLEAN)) {
941 if (attr & BITMAP_PAGE_NEEDWRITE) {
942 page_cache_get(page);
943 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
944 }
945 spin_unlock_irqrestore(&bitmap->lock, flags);
946 if (attr & BITMAP_PAGE_NEEDWRITE) {
947 if (write_page(page, 0))
948 bitmap_file_kick(bitmap);
949 page_cache_release(page);
950 }
951 continue;
952 }
953
954 bit = file_page_offset(j);
955
956 if (page != lastpage) {
957 /* grab the new page, sync and release the old */
958 page_cache_get(page);
959 if (lastpage != NULL) {
960 if (get_page_attr(bitmap, lastpage) & BITMAP_PAGE_NEEDWRITE) {
961 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
962 spin_unlock_irqrestore(&bitmap->lock, flags);
963 write_page(lastpage, 0);
964 } else {
965 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
966 spin_unlock_irqrestore(&bitmap->lock, flags);
967 }
968 kunmap(lastpage);
969 page_cache_release(lastpage);
970 if (err)
971 bitmap_file_kick(bitmap);
972 } else
973 spin_unlock_irqrestore(&bitmap->lock, flags);
974 lastpage = page;
975 kmap(page);
976/*
977 printk("bitmap clean at page %lu\n", j);
978*/
979 spin_lock_irqsave(&bitmap->lock, flags);
980 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
981 }
982 bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
983 &blocks, 0);
984 if (bmc) {
985/*
986 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
987*/
988 if (*bmc == 2) {
989 *bmc=1; /* maybe clear the bit next time */
990 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
991 } else if (*bmc == 1) {
992 /* we can clear the bit */
993 *bmc = 0;
994 bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
995 -1);
996
997 /* clear the bit */
998 clear_bit(bit, page_address(page));
999 }
1000 }
1001 spin_unlock_irqrestore(&bitmap->lock, flags);
1002 }
1003
1004 /* now sync the final page */
1005 if (lastpage != NULL) {
1006 kunmap(lastpage);
1007 spin_lock_irqsave(&bitmap->lock, flags);
1008 if (get_page_attr(bitmap, lastpage) &BITMAP_PAGE_NEEDWRITE) {
1009 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1010 spin_unlock_irqrestore(&bitmap->lock, flags);
1011 write_page(lastpage, 0);
1012 } else {
1013 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1014 spin_unlock_irqrestore(&bitmap->lock, flags);
1015 }
1016
1017 page_cache_release(lastpage);
1018 }
1019
1020 return err;
1021}
1022
1023static void daemon_exit(struct bitmap *bitmap, mdk_thread_t **daemon)
1024{
1025 mdk_thread_t *dmn;
1026 unsigned long flags;
1027
1028 /* if no one is waiting on us, we'll free the md thread struct
1029 * and exit, otherwise we let the waiter clean things up */
1030 spin_lock_irqsave(&bitmap->lock, flags);
1031 if ((dmn = *daemon)) { /* no one is waiting, cleanup and exit */
1032 *daemon = NULL;
1033 spin_unlock_irqrestore(&bitmap->lock, flags);
1034 kfree(dmn);
1035 complete_and_exit(NULL, 0); /* do_exit not exported */
1036 }
1037 spin_unlock_irqrestore(&bitmap->lock, flags);
1038}
1039
1040static void bitmap_writeback_daemon(mddev_t *mddev)
1041{
1042 struct bitmap *bitmap = mddev->bitmap;
1043 struct page *page;
1044 struct page_list *item;
1045 int err = 0;
1046
1047 while (1) {
1048 PRINTK("%s: bitmap writeback daemon waiting...\n", bmname(bitmap));
1049 down_interruptible(&bitmap->write_done);
1050 if (signal_pending(current)) {
1051 printk(KERN_INFO
1052 "%s: bitmap writeback daemon got signal, exiting...\n",
1053 bmname(bitmap));
1054 break;
1055 }
1056
1057 PRINTK("%s: bitmap writeback daemon woke up...\n", bmname(bitmap));
1058 /* wait on bitmap page writebacks */
1059 while ((item = dequeue_page(bitmap, &bitmap->complete_pages))) {
1060 page = item->page;
1061 mempool_free(item, bitmap->write_pool);
1062 PRINTK("wait on page writeback: %p %lu\n", page, bitmap->writes_pending);
1063 wait_on_page_writeback(page);
1064 PRINTK("finished page writeback: %p %lu\n", page, bitmap->writes_pending);
1065 spin_lock(&bitmap->write_lock);
1066 if (!--bitmap->writes_pending)
1067 wake_up(&bitmap->write_wait);
1068 spin_unlock(&bitmap->write_lock);
1069 err = PageError(page);
1070 page_cache_release(page);
1071 if (err) {
1072 printk(KERN_WARNING "%s: bitmap file writeback "
1073 "failed (page %lu): %d\n",
1074 bmname(bitmap), page->index, err);
1075 bitmap_file_kick(bitmap);
1076 goto out;
1077 }
1078 }
1079 }
1080out:
1081 if (err) {
1082 printk(KERN_INFO "%s: bitmap writeback daemon exiting (%d)\n",
1083 bmname(bitmap), err);
1084 daemon_exit(bitmap, &bitmap->writeback_daemon);
1085 }
1086 return;
1087}
1088
1089static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,
1090 void (*func)(mddev_t *), char *name)
1091{
1092 mdk_thread_t *daemon;
1093 unsigned long flags;
1094 char namebuf[32];
1095
1096 spin_lock_irqsave(&bitmap->lock, flags);
1097 *ptr = NULL;
1098 if (!bitmap->file) /* no need for daemon if there's no backing file */
1099 goto out_unlock;
1100
1101 spin_unlock_irqrestore(&bitmap->lock, flags);
1102
1103#if INJECT_FATAL_FAULT_2
1104 daemon = NULL;
1105#else
1106 sprintf(namebuf, "%%s_%s", name);
1107 daemon = md_register_thread(func, bitmap->mddev, namebuf);
1108#endif
1109 if (!daemon) {
1110 printk(KERN_ERR "%s: failed to start bitmap daemon\n",
1111 bmname(bitmap));
1112 return -ECHILD;
1113 }
1114
1115 spin_lock_irqsave(&bitmap->lock, flags);
1116 *ptr = daemon;
1117
1118 md_wakeup_thread(daemon); /* start it running */
1119
1120 PRINTK("%s: %s daemon (pid %d) started...\n",
1121 bmname(bitmap), name, bitmap->daemon->tsk->pid);
1122out_unlock:
1123 spin_unlock_irqrestore(&bitmap->lock, flags);
1124 return 0;
1125}
1126
1127static int bitmap_start_daemons(struct bitmap *bitmap)
1128{
1129 int err = bitmap_start_daemon(bitmap, &bitmap->writeback_daemon,
1130 bitmap_writeback_daemon, "bitmap_wb");
1131 return err;
1132}
1133
1134static void bitmap_stop_daemon(struct bitmap *bitmap, mdk_thread_t **ptr)
1135{
1136 mdk_thread_t *daemon;
1137 unsigned long flags;
1138
1139 spin_lock_irqsave(&bitmap->lock, flags);
1140 daemon = *ptr;
1141 *ptr = NULL;
1142 spin_unlock_irqrestore(&bitmap->lock, flags);
1143 if (daemon)
1144 md_unregister_thread(daemon); /* destroy the thread */
1145}
1146
1147static void bitmap_stop_daemons(struct bitmap *bitmap)
1148{
1149 /* the daemons can't stop themselves... they'll just exit instead... */
1150 if (bitmap->writeback_daemon &&
1151 current->pid != bitmap->writeback_daemon->tsk->pid)
1152 bitmap_stop_daemon(bitmap, &bitmap->writeback_daemon);
1153}
1154
1155static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1156 sector_t offset, int *blocks,
1157 int create)
1158{
1159 /* If 'create', we might release the lock and reclaim it.
1160 * The lock must have been taken with interrupts enabled.
1161 * If !create, we don't release the lock.
1162 */
1163 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1164 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1165 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1166 sector_t csize;
1167
1168 if (bitmap_checkpage(bitmap, page, create) < 0) {
1169 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1170 *blocks = csize - (offset & (csize- 1));
1171 return NULL;
1172 }
1173 /* now locked ... */
1174
1175 if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1176 /* should we use the first or second counter field
1177 * of the hijacked pointer? */
1178 int hi = (pageoff > PAGE_COUNTER_MASK);
1179 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
1180 PAGE_COUNTER_SHIFT - 1);
1181 *blocks = csize - (offset & (csize- 1));
1182 return &((bitmap_counter_t *)
1183 &bitmap->bp[page].map)[hi];
1184 } else { /* page is allocated */
1185 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1186 *blocks = csize - (offset & (csize- 1));
1187 return (bitmap_counter_t *)
1188 &(bitmap->bp[page].map[pageoff]);
1189 }
1190}
1191
1192int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors)
1193{
1194 if (!bitmap) return 0;
1195 while (sectors) {
1196 int blocks;
1197 bitmap_counter_t *bmc;
1198
1199 spin_lock_irq(&bitmap->lock);
1200 bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
1201 if (!bmc) {
1202 spin_unlock_irq(&bitmap->lock);
1203 return 0;
1204 }
1205
1206 switch(*bmc) {
1207 case 0:
1208 bitmap_file_set_bit(bitmap, offset);
1209 bitmap_count_page(bitmap,offset, 1);
1210 blk_plug_device(bitmap->mddev->queue);
1211 /* fall through */
1212 case 1:
1213 *bmc = 2;
1214 }
1215 if ((*bmc & COUNTER_MAX) == COUNTER_MAX) BUG();
1216 (*bmc)++;
1217
1218 spin_unlock_irq(&bitmap->lock);
1219
1220 offset += blocks;
1221 if (sectors > blocks)
1222 sectors -= blocks;
1223 else sectors = 0;
1224 }
1225 return 0;
1226}
1227
1228void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1229 int success)
1230{
1231 if (!bitmap) return;
1232 while (sectors) {
1233 int blocks;
1234 unsigned long flags;
1235 bitmap_counter_t *bmc;
1236
1237 spin_lock_irqsave(&bitmap->lock, flags);
1238 bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
1239 if (!bmc) {
1240 spin_unlock_irqrestore(&bitmap->lock, flags);
1241 return;
1242 }
1243
1244 if (!success && ! (*bmc & NEEDED_MASK))
1245 *bmc |= NEEDED_MASK;
1246
1247 (*bmc)--;
1248 if (*bmc <= 2) {
1249 set_page_attr(bitmap,
1250 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1251 BITMAP_PAGE_CLEAN);
1252 }
1253 spin_unlock_irqrestore(&bitmap->lock, flags);
1254 offset += blocks;
1255 if (sectors > blocks)
1256 sectors -= blocks;
1257 else sectors = 0;
1258 }
1259}
1260
1261int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
1262{
1263 bitmap_counter_t *bmc;
1264 int rv;
1265 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1266 *blocks = 1024;
1267 return 1; /* always resync if no bitmap */
1268 }
1269 spin_lock_irq(&bitmap->lock);
1270 bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1271 rv = 0;
1272 if (bmc) {
1273 /* locked */
1274 if (RESYNC(*bmc))
1275 rv = 1;
1276 else if (NEEDED(*bmc)) {
1277 rv = 1;
1278 *bmc |= RESYNC_MASK;
1279 *bmc &= ~NEEDED_MASK;
1280 }
1281 }
1282 spin_unlock_irq(&bitmap->lock);
1283 return rv;
1284}
1285
1286void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
1287{
1288 bitmap_counter_t *bmc;
1289 unsigned long flags;
1290/*
1291 if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted);
1292*/ if (bitmap == NULL) {
1293 *blocks = 1024;
1294 return;
1295 }
1296 spin_lock_irqsave(&bitmap->lock, flags);
1297 bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1298 if (bmc == NULL)
1299 goto unlock;
1300 /* locked */
1301/*
1302 if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks);
1303*/
1304 if (RESYNC(*bmc)) {
1305 *bmc &= ~RESYNC_MASK;
1306
1307 if (!NEEDED(*bmc) && aborted)
1308 *bmc |= NEEDED_MASK;
1309 else {
1310 if (*bmc <= 2) {
1311 set_page_attr(bitmap,
1312 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1313 BITMAP_PAGE_CLEAN);
1314 }
1315 }
1316 }
1317 unlock:
1318 spin_unlock_irqrestore(&bitmap->lock, flags);
1319}
1320
1321void bitmap_close_sync(struct bitmap *bitmap)
1322{
1323 /* Sync has finished, and any bitmap chunks that weren't synced
1324 * properly have been aborted. It remains to us to clear the
1325 * RESYNC bit wherever it is still on
1326 */
1327 sector_t sector = 0;
1328 int blocks;
1329 if (!bitmap) return;
1330 while (sector < bitmap->mddev->resync_max_sectors) {
1331 bitmap_end_sync(bitmap, sector, &blocks, 0);
1332/*
1333 if (sector < 500) printk("bitmap_close_sync: sec %llu blks %d\n",
1334 (unsigned long long)sector, blocks);
1335*/ sector += blocks;
1336 }
1337}
1338
1339static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
1340 unsigned long sectors, int set)
1341{
1342 /* For each chunk covered by any of these sectors, set the
1343 * resync needed bit, and the counter to 1. They should all
1344 * be 0 at this point
1345 */
1346 while (sectors) {
1347 int secs;
1348 bitmap_counter_t *bmc;
1349 spin_lock_irq(&bitmap->lock);
1350 bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
1351 if (!bmc) {
1352 spin_unlock_irq(&bitmap->lock);
1353 return;
1354 }
1355 if (set && !NEEDED(*bmc)) {
1356 BUG_ON(*bmc);
1357 *bmc = NEEDED_MASK | 1;
1358 bitmap_count_page(bitmap, offset, 1);
1359 }
1360 spin_unlock_irq(&bitmap->lock);
1361 if (sectors > secs)
1362 sectors -= secs;
1363 else
1364 sectors = 0;
1365 }
1366}
1367
1368/* dirty the entire bitmap */
1369int bitmap_setallbits(struct bitmap *bitmap)
1370{
1371 unsigned long flags;
1372 unsigned long j;
1373
1374 /* dirty the in-memory bitmap */
1375 bitmap_set_memory_bits(bitmap, 0, bitmap->chunks << CHUNK_BLOCK_SHIFT(bitmap), 1);
1376
1377 /* dirty the bitmap file */
1378 for (j = 0; j < bitmap->file_pages; j++) {
1379 struct page *page = bitmap->filemap[j];
1380
1381 spin_lock_irqsave(&bitmap->lock, flags);
1382 page_cache_get(page);
1383 spin_unlock_irqrestore(&bitmap->lock, flags);
1384 memset(kmap(page), 0xff, PAGE_SIZE);
1385 kunmap(page);
1386 write_page(page, 0);
1387 }
1388
1389 return 0;
1390}
1391
1392/*
1393 * free memory that was allocated
1394 */
1395void bitmap_destroy(mddev_t *mddev)
1396{
1397 unsigned long k, pages;
1398 struct bitmap_page *bp;
1399 struct bitmap *bitmap = mddev->bitmap;
1400
1401 if (!bitmap) /* there was no bitmap */
1402 return;
1403
1404 mddev->bitmap = NULL; /* disconnect from the md device */
1405
1406 /* release the bitmap file and kill the daemon */
1407 bitmap_file_put(bitmap);
1408
1409 bp = bitmap->bp;
1410 pages = bitmap->pages;
1411
1412 /* free all allocated memory */
1413
1414 mempool_destroy(bitmap->write_pool);
1415
1416 if (bp) /* deallocate the page memory */
1417 for (k = 0; k < pages; k++)
1418 if (bp[k].map && !bp[k].hijacked)
1419 kfree(bp[k].map);
1420 kfree(bp);
1421 kfree(bitmap);
1422}
1423
1424/*
1425 * initialize the bitmap structure
1426 * if this returns an error, bitmap_destroy must be called to do clean up
1427 */
1428int bitmap_create(mddev_t *mddev)
1429{
1430 struct bitmap *bitmap;
1431 unsigned long blocks = mddev->resync_max_sectors;
1432 unsigned long chunks;
1433 unsigned long pages;
1434 struct file *file = mddev->bitmap_file;
1435 int err;
1436
1437 BUG_ON(sizeof(bitmap_super_t) != 256);
1438
1439 if (!file) /* bitmap disabled, nothing to do */
1440 return 0;
1441
1442 bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
1443 if (!bitmap)
1444 return -ENOMEM;
1445
1446 memset(bitmap, 0, sizeof(*bitmap));
1447
1448 spin_lock_init(&bitmap->lock);
1449 bitmap->mddev = mddev;
1450 mddev->bitmap = bitmap;
1451
1452 spin_lock_init(&bitmap->write_lock);
1453 init_MUTEX_LOCKED(&bitmap->write_done);
1454 INIT_LIST_HEAD(&bitmap->complete_pages);
1455 init_waitqueue_head(&bitmap->write_wait);
1456 bitmap->write_pool = mempool_create(WRITE_POOL_SIZE, write_pool_alloc,
1457 write_pool_free, NULL);
1458 if (!bitmap->write_pool)
1459 return -ENOMEM;
1460
1461 bitmap->file = file;
1462 get_file(file);
1463 /* read superblock from bitmap file (this sets bitmap->chunksize) */
1464 err = bitmap_read_sb(bitmap);
1465 if (err)
1466 return err;
1467
1468 bitmap->chunkshift = find_first_bit(&bitmap->chunksize,
1469 sizeof(bitmap->chunksize));
1470
1471 /* now that chunksize and chunkshift are set, we can use these macros */
1472 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) /
1473 CHUNK_BLOCK_RATIO(bitmap);
1474 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
1475
1476 BUG_ON(!pages);
1477
1478 bitmap->chunks = chunks;
1479 bitmap->pages = pages;
1480 bitmap->missing_pages = pages;
1481 bitmap->counter_bits = COUNTER_BITS;
1482
1483 bitmap->syncchunk = ~0UL;
1484
1485#if INJECT_FATAL_FAULT_1
1486 bitmap->bp = NULL;
1487#else
1488 bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
1489#endif
1490 if (!bitmap->bp)
1491 return -ENOMEM;
1492 memset(bitmap->bp, 0, pages * sizeof(*bitmap->bp));
1493
1494 bitmap->flags |= BITMAP_ACTIVE;
1495
1496 /* now that we have some pages available, initialize the in-memory
1497 * bitmap from the on-disk bitmap */
1498 err = bitmap_init_from_disk(bitmap);
1499 if (err)
1500 return err;
1501
1502 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1503 pages, bmname(bitmap));
1504
1505 /* kick off the bitmap daemons */
1506 err = bitmap_start_daemons(bitmap);
1507 if (err)
1508 return err;
1509 return bitmap_update_sb(bitmap);
1510}
1511
1512/* the bitmap API -- for raid personalities */
1513EXPORT_SYMBOL(bitmap_startwrite);
1514EXPORT_SYMBOL(bitmap_endwrite);
1515EXPORT_SYMBOL(bitmap_start_sync);
1516EXPORT_SYMBOL(bitmap_end_sync);
1517EXPORT_SYMBOL(bitmap_unplug);
1518EXPORT_SYMBOL(bitmap_close_sync);
1519EXPORT_SYMBOL(bitmap_daemon_work);