diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-30 10:24:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-30 10:24:55 -0400 |
commit | 4660d3d240ac6c92cd3ad33657ca302026bdc24b (patch) | |
tree | b4ef5c5d2d1e127e9ea80bd047247fdb2d1547ed | |
parent | be3fd3cc7c2142c46d5dcfec05e6031990d1f2ca (diff) | |
parent | 6be7fa06eb4d721df734bd0946b5e63b27c0589b (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/joern/logfs
* git://git.kernel.org/pub/scm/linux/kernel/git/joern/logfs:
[LogFS] Erase new journal segments
[LogFS] Move reserved segments with journal
[LogFS] Clear PagePrivate when moving journal
Simplify and fix pad_wbuf
Prevent data corruption in logfs_rewrite_block()
Use deactivate_locked_super
Fix logfs_get_sb_final error path
Write out both superblocks on mismatch
Prevent schedule while atomic in __logfs_readdir
Plug memory leak in writeseg_end_io
Limit max_pages for insane devices
Open segment file before using it
-rw-r--r-- | fs/logfs/dev_bdev.c | 9 | ||||
-rw-r--r-- | fs/logfs/dir.c | 4 | ||||
-rw-r--r-- | fs/logfs/journal.c | 7 | ||||
-rw-r--r-- | fs/logfs/logfs.h | 1 | ||||
-rw-r--r-- | fs/logfs/readwrite.c | 13 | ||||
-rw-r--r-- | fs/logfs/segment.c | 54 | ||||
-rw-r--r-- | fs/logfs/super.c | 15 |
7 files changed, 67 insertions, 36 deletions
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 9718c22f186d..a5d0c56d3ebc 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c | |||
@@ -80,6 +80,7 @@ static void writeseg_end_io(struct bio *bio, int err) | |||
80 | prefetchw(&bvec->bv_page->flags); | 80 | prefetchw(&bvec->bv_page->flags); |
81 | 81 | ||
82 | end_page_writeback(page); | 82 | end_page_writeback(page); |
83 | page_cache_release(page); | ||
83 | } while (bvec >= bio->bi_io_vec); | 84 | } while (bvec >= bio->bi_io_vec); |
84 | bio_put(bio); | 85 | bio_put(bio); |
85 | if (atomic_dec_and_test(&super->s_pending_writes)) | 86 | if (atomic_dec_and_test(&super->s_pending_writes)) |
@@ -97,8 +98,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | |||
97 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); | 98 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); |
98 | int i; | 99 | int i; |
99 | 100 | ||
101 | if (max_pages > BIO_MAX_PAGES) | ||
102 | max_pages = BIO_MAX_PAGES; | ||
100 | bio = bio_alloc(GFP_NOFS, max_pages); | 103 | bio = bio_alloc(GFP_NOFS, max_pages); |
101 | BUG_ON(!bio); /* FIXME: handle this */ | 104 | BUG_ON(!bio); |
102 | 105 | ||
103 | for (i = 0; i < nr_pages; i++) { | 106 | for (i = 0; i < nr_pages; i++) { |
104 | if (i >= max_pages) { | 107 | if (i >= max_pages) { |
@@ -191,8 +194,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, | |||
191 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); | 194 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); |
192 | int i; | 195 | int i; |
193 | 196 | ||
197 | if (max_pages > BIO_MAX_PAGES) | ||
198 | max_pages = BIO_MAX_PAGES; | ||
194 | bio = bio_alloc(GFP_NOFS, max_pages); | 199 | bio = bio_alloc(GFP_NOFS, max_pages); |
195 | BUG_ON(!bio); /* FIXME: handle this */ | 200 | BUG_ON(!bio); |
196 | 201 | ||
197 | for (i = 0; i < nr_pages; i++) { | 202 | for (i = 0; i < nr_pages; i++) { |
198 | if (i >= max_pages) { | 203 | if (i >= max_pages) { |
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 56a8bfbb0120..c76b4b5c7ff6 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) | |||
303 | (filler_t *)logfs_readpage, NULL); | 303 | (filler_t *)logfs_readpage, NULL); |
304 | if (IS_ERR(page)) | 304 | if (IS_ERR(page)) |
305 | return PTR_ERR(page); | 305 | return PTR_ERR(page); |
306 | dd = kmap_atomic(page, KM_USER0); | 306 | dd = kmap(page); |
307 | BUG_ON(dd->namelen == 0); | 307 | BUG_ON(dd->namelen == 0); |
308 | 308 | ||
309 | full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), | 309 | full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), |
310 | pos, be64_to_cpu(dd->ino), dd->type); | 310 | pos, be64_to_cpu(dd->ino), dd->type); |
311 | kunmap_atomic(dd, KM_USER0); | 311 | kunmap(page); |
312 | page_cache_release(page); | 312 | page_cache_release(page); |
313 | if (full) | 313 | if (full) |
314 | break; | 314 | break; |
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 6ad30a4c9052..d57c7b07b60b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c | |||
@@ -800,6 +800,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) | |||
800 | { | 800 | { |
801 | struct logfs_super *super = logfs_super(sb); | 801 | struct logfs_super *super = logfs_super(sb); |
802 | struct logfs_area *area = super->s_journal_area; | 802 | struct logfs_area *area = super->s_journal_area; |
803 | struct btree_head32 *head = &super->s_reserved_segments; | ||
803 | u32 segno, ec; | 804 | u32 segno, ec; |
804 | int i, err; | 805 | int i, err; |
805 | 806 | ||
@@ -807,6 +808,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) | |||
807 | /* Drop old segments */ | 808 | /* Drop old segments */ |
808 | journal_for_each(i) | 809 | journal_for_each(i) |
809 | if (super->s_journal_seg[i]) { | 810 | if (super->s_journal_seg[i]) { |
811 | btree_remove32(head, super->s_journal_seg[i]); | ||
810 | logfs_set_segment_unreserved(sb, | 812 | logfs_set_segment_unreserved(sb, |
811 | super->s_journal_seg[i], | 813 | super->s_journal_seg[i], |
812 | super->s_journal_ec[i]); | 814 | super->s_journal_ec[i]); |
@@ -819,8 +821,13 @@ void do_logfs_journal_wl_pass(struct super_block *sb) | |||
819 | super->s_journal_seg[i] = segno; | 821 | super->s_journal_seg[i] = segno; |
820 | super->s_journal_ec[i] = ec; | 822 | super->s_journal_ec[i] = ec; |
821 | logfs_set_segment_reserved(sb, segno); | 823 | logfs_set_segment_reserved(sb, segno); |
824 | err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); | ||
825 | BUG_ON(err); /* mempool should prevent this */ | ||
826 | err = logfs_erase_segment(sb, segno, 1); | ||
827 | BUG_ON(err); /* FIXME: remount-ro would be nicer */ | ||
822 | } | 828 | } |
823 | /* Manually move journal_area */ | 829 | /* Manually move journal_area */ |
830 | freeseg(sb, area->a_segno); | ||
824 | area->a_segno = super->s_journal_seg[0]; | 831 | area->a_segno = super->s_journal_seg[0]; |
825 | area->a_is_open = 0; | 832 | area->a_is_open = 0; |
826 | area->a_used_bytes = 0; | 833 | area->a_used_bytes = 0; |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 129779431373..b84b0eec6024 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -587,6 +587,7 @@ void move_page_to_btree(struct page *page); | |||
587 | int logfs_init_mapping(struct super_block *sb); | 587 | int logfs_init_mapping(struct super_block *sb); |
588 | void logfs_sync_area(struct logfs_area *area); | 588 | void logfs_sync_area(struct logfs_area *area); |
589 | void logfs_sync_segments(struct super_block *sb); | 589 | void logfs_sync_segments(struct super_block *sb); |
590 | void freeseg(struct super_block *sb, u32 segno); | ||
590 | 591 | ||
591 | /* area handling */ | 592 | /* area handling */ |
592 | int logfs_init_areas(struct super_block *sb); | 593 | int logfs_init_areas(struct super_block *sb); |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 7a23b3e7c0a7..c3a3a6814b84 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
@@ -1594,7 +1594,6 @@ int logfs_delete(struct inode *inode, pgoff_t index, | |||
1594 | return ret; | 1594 | return ret; |
1595 | } | 1595 | } |
1596 | 1596 | ||
1597 | /* Rewrite cannot mark the inode dirty but has to write it immediatly. */ | ||
1598 | int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, | 1597 | int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, |
1599 | gc_level_t gc_level, long flags) | 1598 | gc_level_t gc_level, long flags) |
1600 | { | 1599 | { |
@@ -1611,6 +1610,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, | |||
1611 | if (level != 0) | 1610 | if (level != 0) |
1612 | alloc_indirect_block(inode, page, 0); | 1611 | alloc_indirect_block(inode, page, 0); |
1613 | err = logfs_write_buf(inode, page, flags); | 1612 | err = logfs_write_buf(inode, page, flags); |
1613 | if (!err && shrink_level(gc_level) == 0) { | ||
1614 | /* Rewrite cannot mark the inode dirty but has to | ||
1615 | * write it immediatly. | ||
1616 | * Q: Can't we just create an alias for the inode | ||
1617 | * instead? And if not, why not? | ||
1618 | */ | ||
1619 | if (inode->i_ino == LOGFS_INO_MASTER) | ||
1620 | logfs_write_anchor(inode->i_sb); | ||
1621 | else { | ||
1622 | err = __logfs_write_inode(inode, flags); | ||
1623 | } | ||
1624 | } | ||
1614 | } | 1625 | } |
1615 | logfs_put_write_page(page); | 1626 | logfs_put_write_page(page); |
1616 | return err; | 1627 | return err; |
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 1a14f9910d55..0ecd8f07c11e 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
@@ -93,50 +93,58 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, | |||
93 | } while (len); | 93 | } while (len); |
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | static void pad_partial_page(struct logfs_area *area) |
97 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. | ||
98 | */ | ||
99 | static void pad_wbuf(struct logfs_area *area, int final) | ||
100 | { | 97 | { |
101 | struct super_block *sb = area->a_sb; | 98 | struct super_block *sb = area->a_sb; |
102 | struct logfs_super *super = logfs_super(sb); | ||
103 | struct page *page; | 99 | struct page *page; |
104 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | 100 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); |
105 | pgoff_t index = ofs >> PAGE_SHIFT; | 101 | pgoff_t index = ofs >> PAGE_SHIFT; |
106 | long offset = ofs & (PAGE_SIZE-1); | 102 | long offset = ofs & (PAGE_SIZE-1); |
107 | u32 len = PAGE_SIZE - offset; | 103 | u32 len = PAGE_SIZE - offset; |
108 | 104 | ||
109 | if (len == PAGE_SIZE) { | 105 | if (len % PAGE_SIZE) { |
110 | /* The math in this function can surely use some love */ | 106 | page = get_mapping_page(sb, index, 0); |
111 | len = 0; | ||
112 | } | ||
113 | if (len) { | ||
114 | BUG_ON(area->a_used_bytes >= super->s_segsize); | ||
115 | |||
116 | page = get_mapping_page(area->a_sb, index, 0); | ||
117 | BUG_ON(!page); /* FIXME: reserve a pool */ | 107 | BUG_ON(!page); /* FIXME: reserve a pool */ |
118 | memset(page_address(page) + offset, 0xff, len); | 108 | memset(page_address(page) + offset, 0xff, len); |
119 | SetPagePrivate(page); | 109 | SetPagePrivate(page); |
120 | page_cache_release(page); | 110 | page_cache_release(page); |
121 | } | 111 | } |
112 | } | ||
122 | 113 | ||
123 | if (!final) | 114 | static void pad_full_pages(struct logfs_area *area) |
124 | return; | 115 | { |
116 | struct super_block *sb = area->a_sb; | ||
117 | struct logfs_super *super = logfs_super(sb); | ||
118 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | ||
119 | u32 len = super->s_segsize - area->a_used_bytes; | ||
120 | pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; | ||
121 | pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; | ||
122 | struct page *page; | ||
125 | 123 | ||
126 | area->a_used_bytes += len; | 124 | while (no_indizes) { |
127 | for ( ; area->a_used_bytes < super->s_segsize; | 125 | page = get_mapping_page(sb, index, 0); |
128 | area->a_used_bytes += PAGE_SIZE) { | ||
129 | /* Memset another page */ | ||
130 | index++; | ||
131 | page = get_mapping_page(area->a_sb, index, 0); | ||
132 | BUG_ON(!page); /* FIXME: reserve a pool */ | 126 | BUG_ON(!page); /* FIXME: reserve a pool */ |
133 | memset(page_address(page), 0xff, PAGE_SIZE); | 127 | SetPageUptodate(page); |
128 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
134 | SetPagePrivate(page); | 129 | SetPagePrivate(page); |
135 | page_cache_release(page); | 130 | page_cache_release(page); |
131 | index++; | ||
132 | no_indizes--; | ||
136 | } | 133 | } |
137 | } | 134 | } |
138 | 135 | ||
139 | /* | 136 | /* |
137 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. | ||
138 | * Also make sure we allocate (and memset) all pages for final writeout. | ||
139 | */ | ||
140 | static void pad_wbuf(struct logfs_area *area, int final) | ||
141 | { | ||
142 | pad_partial_page(area); | ||
143 | if (final) | ||
144 | pad_full_pages(area); | ||
145 | } | ||
146 | |||
147 | /* | ||
140 | * We have to be careful with the alias tree. Since lookup is done by bix, | 148 | * We have to be careful with the alias tree. Since lookup is done by bix, |
141 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with | 149 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with |
142 | * indirect blocks. So always use it through accessor functions. | 150 | * indirect blocks. So always use it through accessor functions. |
@@ -683,7 +691,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) | |||
683 | return 0; | 691 | return 0; |
684 | } | 692 | } |
685 | 693 | ||
686 | static void freeseg(struct super_block *sb, u32 segno) | 694 | void freeseg(struct super_block *sb, u32 segno) |
687 | { | 695 | { |
688 | struct logfs_super *super = logfs_super(sb); | 696 | struct logfs_super *super = logfs_super(sb); |
689 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | 697 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
diff --git a/fs/logfs/super.c b/fs/logfs/super.c index c66beab78dee..9d856c49afc5 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c | |||
@@ -277,7 +277,7 @@ static int logfs_recover_sb(struct super_block *sb) | |||
277 | } | 277 | } |
278 | if (valid0 && valid1 && ds_cmp(ds0, ds1)) { | 278 | if (valid0 && valid1 && ds_cmp(ds0, ds1)) { |
279 | printk(KERN_INFO"Superblocks don't match - fixing.\n"); | 279 | printk(KERN_INFO"Superblocks don't match - fixing.\n"); |
280 | return write_one_sb(sb, super->s_devops->find_last_sb); | 280 | return logfs_write_sb(sb); |
281 | } | 281 | } |
282 | /* If neither is valid now, something's wrong. Didn't we properly | 282 | /* If neither is valid now, something's wrong. Didn't we properly |
283 | * check them before?!? */ | 283 | * check them before?!? */ |
@@ -289,6 +289,10 @@ static int logfs_make_writeable(struct super_block *sb) | |||
289 | { | 289 | { |
290 | int err; | 290 | int err; |
291 | 291 | ||
292 | err = logfs_open_segfile(sb); | ||
293 | if (err) | ||
294 | return err; | ||
295 | |||
292 | /* Repair any broken superblock copies */ | 296 | /* Repair any broken superblock copies */ |
293 | err = logfs_recover_sb(sb); | 297 | err = logfs_recover_sb(sb); |
294 | if (err) | 298 | if (err) |
@@ -299,10 +303,6 @@ static int logfs_make_writeable(struct super_block *sb) | |||
299 | if (err) | 303 | if (err) |
300 | return err; | 304 | return err; |
301 | 305 | ||
302 | err = logfs_open_segfile(sb); | ||
303 | if (err) | ||
304 | return err; | ||
305 | |||
306 | /* Do one GC pass before any data gets dirtied */ | 306 | /* Do one GC pass before any data gets dirtied */ |
307 | logfs_gc_pass(sb); | 307 | logfs_gc_pass(sb); |
308 | 308 | ||
@@ -328,7 +328,7 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) | |||
328 | 328 | ||
329 | sb->s_root = d_alloc_root(rootdir); | 329 | sb->s_root = d_alloc_root(rootdir); |
330 | if (!sb->s_root) | 330 | if (!sb->s_root) |
331 | goto fail; | 331 | goto fail2; |
332 | 332 | ||
333 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); | 333 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); |
334 | if (!super->s_erase_page) | 334 | if (!super->s_erase_page) |
@@ -572,8 +572,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags, | |||
572 | return 0; | 572 | return 0; |
573 | 573 | ||
574 | err1: | 574 | err1: |
575 | up_write(&sb->s_umount); | 575 | deactivate_locked_super(sb); |
576 | deactivate_super(sb); | ||
577 | return err; | 576 | return err; |
578 | err0: | 577 | err0: |
579 | kfree(super); | 578 | kfree(super); |