aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/ext4_jbd2.h72
-rw-r--r--fs/ext4/mballoc.c151
-rw-r--r--fs/ext4/mballoc.h18
-rw-r--r--fs/ext4/super.c18
4 files changed, 171 insertions, 88 deletions
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 95af6f878501..83b20fcf9400 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -104,6 +104,78 @@
104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
106 106
107/**
108 * struct ext4_journal_cb_entry - Base structure for callback information.
109 *
110 * This struct is a 'seed' structure for a using with your own callback
111 * structs. If you are using callbacks you must allocate one of these
112 * or another struct of your own definition which has this struct
113 * as it's first element and pass it to ext4_journal_callback_add().
114 */
115struct ext4_journal_cb_entry {
116 /* list information for other callbacks attached to the same handle */
117 struct list_head jce_list;
118
119 /* Function to call with this callback structure */
120 void (*jce_func)(struct super_block *sb,
121 struct ext4_journal_cb_entry *jce, int error);
122
123 /* user data goes here */
124};
125
126/**
127 * ext4_journal_callback_add: add a function to call after transaction commit
128 * @handle: active journal transaction handle to register callback on
129 * @func: callback function to call after the transaction has committed:
130 * @sb: superblock of current filesystem for transaction
131 * @jce: returned journal callback data
132 * @rc: journal state at commit (0 = transaction committed properly)
133 * @jce: journal callback data (internal and function private data struct)
134 *
135 * The registered function will be called in the context of the journal thread
136 * after the transaction for which the handle was created has completed.
137 *
138 * No locks are held when the callback function is called, so it is safe to
139 * call blocking functions from within the callback, but the callback should
140 * not block or run for too long, or the filesystem will be blocked waiting for
141 * the next transaction to commit. No journaling functions can be used, or
142 * there is a risk of deadlock.
143 *
144 * There is no guaranteed calling order of multiple registered callbacks on
145 * the same transaction.
146 */
147static inline void ext4_journal_callback_add(handle_t *handle,
148 void (*func)(struct super_block *sb,
149 struct ext4_journal_cb_entry *jce,
150 int rc),
151 struct ext4_journal_cb_entry *jce)
152{
153 struct ext4_sb_info *sbi =
154 EXT4_SB(handle->h_transaction->t_journal->j_private);
155
156 /* Add the jce to transaction's private list */
157 jce->jce_func = func;
158 spin_lock(&sbi->s_md_lock);
159 list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
160 spin_unlock(&sbi->s_md_lock);
161}
162
163/**
164 * ext4_journal_callback_del: delete a registered callback
165 * @handle: active journal transaction handle on which callback was registered
166 * @jce: registered journal callback entry to unregister
167 */
168static inline void ext4_journal_callback_del(handle_t *handle,
169 struct ext4_journal_cb_entry *jce)
170{
171 struct ext4_sb_info *sbi =
172 EXT4_SB(handle->h_transaction->t_journal->j_private);
173
174 spin_lock(&sbi->s_md_lock);
175 list_del_init(&jce->jce_list);
176 spin_unlock(&sbi->s_md_lock);
177}
178
107int 179int
108ext4_mark_iloc_dirty(handle_t *handle, 180ext4_mark_iloc_dirty(handle_t *handle,
109 struct inode *inode, 181 struct inode *inode,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 545fa0256606..2e467718d413 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,6 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include "ext4_jbd2.h"
24#include "mballoc.h" 25#include "mballoc.h"
25#include <linux/debugfs.h> 26#include <linux/debugfs.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
@@ -339,7 +340,7 @@
339 */ 340 */
340static struct kmem_cache *ext4_pspace_cachep; 341static struct kmem_cache *ext4_pspace_cachep;
341static struct kmem_cache *ext4_ac_cachep; 342static struct kmem_cache *ext4_ac_cachep;
342static struct kmem_cache *ext4_free_ext_cachep; 343static struct kmem_cache *ext4_free_data_cachep;
343 344
344/* We create slab caches for groupinfo data structures based on the 345/* We create slab caches for groupinfo data structures based on the
345 * superblock block size. There will be one per mounted filesystem for 346 * superblock block size. There will be one per mounted filesystem for
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
357 ext4_group_t group); 358 ext4_group_t group);
358static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 359static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
359 ext4_group_t group); 360 ext4_group_t group);
360static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 361static void ext4_free_data_callback(struct super_block *sb,
362 struct ext4_journal_cb_entry *jce, int rc);
361 363
362static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 364static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
363{ 365{
@@ -2473,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2473 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2475 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2474 &ext4_mb_seq_groups_fops, sb); 2476 &ext4_mb_seq_groups_fops, sb);
2475 2477
2476 if (sbi->s_journal)
2477 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2478
2479 return 0; 2478 return 0;
2480 2479
2481out_free_locality_groups: 2480out_free_locality_groups:
@@ -2588,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb,
2588 * This function is called by the jbd2 layer once the commit has finished, 2587 * This function is called by the jbd2 layer once the commit has finished,
2589 * so we know we can free the blocks that were released with that commit. 2588 * so we know we can free the blocks that were released with that commit.
2590 */ 2589 */
2591static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) 2590static void ext4_free_data_callback(struct super_block *sb,
2591 struct ext4_journal_cb_entry *jce,
2592 int rc)
2592{ 2593{
2593 struct super_block *sb = journal->j_private; 2594 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2594 struct ext4_buddy e4b; 2595 struct ext4_buddy e4b;
2595 struct ext4_group_info *db; 2596 struct ext4_group_info *db;
2596 int err, count = 0, count2 = 0; 2597 int err, count = 0, count2 = 0;
2597 struct ext4_free_data *entry;
2598 struct list_head *l, *ltmp;
2599 2598
2600 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2599 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2601 entry = list_entry(l, struct ext4_free_data, list); 2600 entry->efd_count, entry->efd_group, entry);
2602 2601
2603 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2602 if (test_opt(sb, DISCARD))
2604 entry->count, entry->group, entry); 2603 ext4_issue_discard(sb, entry->efd_group,
2604 entry->efd_start_cluster, entry->efd_count);
2605 2605
2606 if (test_opt(sb, DISCARD)) 2606 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2607 ext4_issue_discard(sb, entry->group, 2607 /* we expect to find existing buddy because it's pinned */
2608 entry->start_cluster, entry->count); 2608 BUG_ON(err != 0);
2609 2609
2610 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2611 /* we expect to find existing buddy because it's pinned */
2612 BUG_ON(err != 0);
2613 2610
2614 db = e4b.bd_info; 2611 db = e4b.bd_info;
2615 /* there are blocks to put in buddy to make them really free */ 2612 /* there are blocks to put in buddy to make them really free */
2616 count += entry->count; 2613 count += entry->efd_count;
2617 count2++; 2614 count2++;
2618 ext4_lock_group(sb, entry->group); 2615 ext4_lock_group(sb, entry->efd_group);
2619 /* Take it out of per group rb tree */ 2616 /* Take it out of per group rb tree */
2620 rb_erase(&entry->node, &(db->bb_free_root)); 2617 rb_erase(&entry->efd_node, &(db->bb_free_root));
2621 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); 2618 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2622 2619
2623 /* 2620 /*
2624 * Clear the trimmed flag for the group so that the next 2621 * Clear the trimmed flag for the group so that the next
2625 * ext4_trim_fs can trim it. 2622 * ext4_trim_fs can trim it.
2626 * If the volume is mounted with -o discard, online discard 2623 * If the volume is mounted with -o discard, online discard
2627 * is supported and the free blocks will be trimmed online. 2624 * is supported and the free blocks will be trimmed online.
2628 */ 2625 */
2629 if (!test_opt(sb, DISCARD)) 2626 if (!test_opt(sb, DISCARD))
2630 EXT4_MB_GRP_CLEAR_TRIMMED(db); 2627 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2631 2628
2632 if (!db->bb_free_root.rb_node) { 2629 if (!db->bb_free_root.rb_node) {
2633 /* No more items in the per group rb tree 2630 /* No more items in the per group rb tree
2634 * balance refcounts from ext4_mb_free_metadata() 2631 * balance refcounts from ext4_mb_free_metadata()
2635 */ 2632 */
2636 page_cache_release(e4b.bd_buddy_page); 2633 page_cache_release(e4b.bd_buddy_page);
2637 page_cache_release(e4b.bd_bitmap_page); 2634 page_cache_release(e4b.bd_bitmap_page);
2638 }
2639 ext4_unlock_group(sb, entry->group);
2640 kmem_cache_free(ext4_free_ext_cachep, entry);
2641 ext4_mb_unload_buddy(&e4b);
2642 } 2635 }
2636 ext4_unlock_group(sb, entry->efd_group);
2637 kmem_cache_free(ext4_free_data_cachep, entry);
2638 ext4_mb_unload_buddy(&e4b);
2643 2639
2644 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2640 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2645} 2641}
@@ -2692,9 +2688,9 @@ int __init ext4_init_mballoc(void)
2692 return -ENOMEM; 2688 return -ENOMEM;
2693 } 2689 }
2694 2690
2695 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, 2691 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2696 SLAB_RECLAIM_ACCOUNT); 2692 SLAB_RECLAIM_ACCOUNT);
2697 if (ext4_free_ext_cachep == NULL) { 2693 if (ext4_free_data_cachep == NULL) {
2698 kmem_cache_destroy(ext4_pspace_cachep); 2694 kmem_cache_destroy(ext4_pspace_cachep);
2699 kmem_cache_destroy(ext4_ac_cachep); 2695 kmem_cache_destroy(ext4_ac_cachep);
2700 return -ENOMEM; 2696 return -ENOMEM;
@@ -2712,7 +2708,7 @@ void ext4_exit_mballoc(void)
2712 rcu_barrier(); 2708 rcu_barrier();
2713 kmem_cache_destroy(ext4_pspace_cachep); 2709 kmem_cache_destroy(ext4_pspace_cachep);
2714 kmem_cache_destroy(ext4_ac_cachep); 2710 kmem_cache_destroy(ext4_ac_cachep);
2715 kmem_cache_destroy(ext4_free_ext_cachep); 2711 kmem_cache_destroy(ext4_free_data_cachep);
2716 ext4_groupinfo_destroy_slabs(); 2712 ext4_groupinfo_destroy_slabs();
2717 ext4_remove_debugfs_entry(); 2713 ext4_remove_debugfs_entry();
2718} 2714}
@@ -3272,8 +3268,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3272 n = rb_first(&(grp->bb_free_root)); 3268 n = rb_first(&(grp->bb_free_root));
3273 3269
3274 while (n) { 3270 while (n) {
3275 entry = rb_entry(n, struct ext4_free_data, node); 3271 entry = rb_entry(n, struct ext4_free_data, efd_node);
3276 ext4_set_bits(bitmap, entry->start_cluster, entry->count); 3272 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3277 n = rb_next(n); 3273 n = rb_next(n);
3278 } 3274 }
3279 return; 3275 return;
@@ -4379,9 +4375,9 @@ out:
4379static int can_merge(struct ext4_free_data *entry1, 4375static int can_merge(struct ext4_free_data *entry1,
4380 struct ext4_free_data *entry2) 4376 struct ext4_free_data *entry2)
4381{ 4377{
4382 if ((entry1->t_tid == entry2->t_tid) && 4378 if ((entry1->efd_tid == entry2->efd_tid) &&
4383 (entry1->group == entry2->group) && 4379 (entry1->efd_group == entry2->efd_group) &&
4384 ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) 4380 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4385 return 1; 4381 return 1;
4386 return 0; 4382 return 0;
4387} 4383}
@@ -4403,8 +4399,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4403 BUG_ON(e4b->bd_bitmap_page == NULL); 4399 BUG_ON(e4b->bd_bitmap_page == NULL);
4404 BUG_ON(e4b->bd_buddy_page == NULL); 4400 BUG_ON(e4b->bd_buddy_page == NULL);
4405 4401
4406 new_node = &new_entry->node; 4402 new_node = &new_entry->efd_node;
4407 cluster = new_entry->start_cluster; 4403 cluster = new_entry->efd_start_cluster;
4408 4404
4409 if (!*n) { 4405 if (!*n) {
4410 /* first free block exent. We need to 4406 /* first free block exent. We need to
@@ -4417,10 +4413,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4417 } 4413 }
4418 while (*n) { 4414 while (*n) {
4419 parent = *n; 4415 parent = *n;
4420 entry = rb_entry(parent, struct ext4_free_data, node); 4416 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4421 if (cluster < entry->start_cluster) 4417 if (cluster < entry->efd_start_cluster)
4422 n = &(*n)->rb_left; 4418 n = &(*n)->rb_left;
4423 else if (cluster >= (entry->start_cluster + entry->count)) 4419 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4424 n = &(*n)->rb_right; 4420 n = &(*n)->rb_right;
4425 else { 4421 else {
4426 ext4_grp_locked_error(sb, group, 0, 4422 ext4_grp_locked_error(sb, group, 0,
@@ -4437,34 +4433,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4437 /* Now try to see the extent can be merged to left and right */ 4433 /* Now try to see the extent can be merged to left and right */
4438 node = rb_prev(new_node); 4434 node = rb_prev(new_node);
4439 if (node) { 4435 if (node) {
4440 entry = rb_entry(node, struct ext4_free_data, node); 4436 entry = rb_entry(node, struct ext4_free_data, efd_node);
4441 if (can_merge(entry, new_entry)) { 4437 if (can_merge(entry, new_entry)) {
4442 new_entry->start_cluster = entry->start_cluster; 4438 new_entry->efd_start_cluster = entry->efd_start_cluster;
4443 new_entry->count += entry->count; 4439 new_entry->efd_count += entry->efd_count;
4444 rb_erase(node, &(db->bb_free_root)); 4440 rb_erase(node, &(db->bb_free_root));
4445 spin_lock(&sbi->s_md_lock); 4441 ext4_journal_callback_del(handle, &entry->efd_jce);
4446 list_del(&entry->list); 4442 kmem_cache_free(ext4_free_data_cachep, entry);
4447 spin_unlock(&sbi->s_md_lock);
4448 kmem_cache_free(ext4_free_ext_cachep, entry);
4449 } 4443 }
4450 } 4444 }
4451 4445
4452 node = rb_next(new_node); 4446 node = rb_next(new_node);
4453 if (node) { 4447 if (node) {
4454 entry = rb_entry(node, struct ext4_free_data, node); 4448 entry = rb_entry(node, struct ext4_free_data, efd_node);
4455 if (can_merge(new_entry, entry)) { 4449 if (can_merge(new_entry, entry)) {
4456 new_entry->count += entry->count; 4450 new_entry->efd_count += entry->efd_count;
4457 rb_erase(node, &(db->bb_free_root)); 4451 rb_erase(node, &(db->bb_free_root));
4458 spin_lock(&sbi->s_md_lock); 4452 ext4_journal_callback_del(handle, &entry->efd_jce);
4459 list_del(&entry->list); 4453 kmem_cache_free(ext4_free_data_cachep, entry);
4460 spin_unlock(&sbi->s_md_lock);
4461 kmem_cache_free(ext4_free_ext_cachep, entry);
4462 } 4454 }
4463 } 4455 }
4464 /* Add the extent to transaction's private list */ 4456 /* Add the extent to transaction's private list */
4465 spin_lock(&sbi->s_md_lock); 4457 ext4_journal_callback_add(handle, ext4_free_data_callback,
4466 list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4458 &new_entry->efd_jce);
4467 spin_unlock(&sbi->s_md_lock);
4468 return 0; 4459 return 0;
4469} 4460}
4470 4461
@@ -4642,15 +4633,15 @@ do_more:
4642 * blocks being freed are metadata. these blocks shouldn't 4633 * blocks being freed are metadata. these blocks shouldn't
4643 * be used until this transaction is committed 4634 * be used until this transaction is committed
4644 */ 4635 */
4645 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4636 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4646 if (!new_entry) { 4637 if (!new_entry) {
4647 err = -ENOMEM; 4638 err = -ENOMEM;
4648 goto error_return; 4639 goto error_return;
4649 } 4640 }
4650 new_entry->start_cluster = bit; 4641 new_entry->efd_start_cluster = bit;
4651 new_entry->group = block_group; 4642 new_entry->efd_group = block_group;
4652 new_entry->count = count_clusters; 4643 new_entry->efd_count = count_clusters;
4653 new_entry->t_tid = handle->h_transaction->t_tid; 4644 new_entry->efd_tid = handle->h_transaction->t_tid;
4654 4645
4655 ext4_lock_group(sb, block_group); 4646 ext4_lock_group(sb, block_group);
4656 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 4647 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 47705f3285e3..7128f46d8501 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -96,21 +96,23 @@ extern u8 mb_enable_debug;
96 96
97 97
98struct ext4_free_data { 98struct ext4_free_data {
99 /* this links the free block information from group_info */ 99 /* MUST be the first member */
100 struct rb_node node; 100 struct ext4_journal_cb_entry efd_jce;
101
102 /* ext4_free_data private data starts from here */
101 103
102 /* this links the free block information from ext4_sb_info */ 104 /* this links the free block information from group_info */
103 struct list_head list; 105 struct rb_node efd_node;
104 106
105 /* group which free block extent belongs */ 107 /* group which free block extent belongs */
106 ext4_group_t group; 108 ext4_group_t efd_group;
107 109
108 /* free block extent */ 110 /* free block extent */
109 ext4_grpblk_t start_cluster; 111 ext4_grpblk_t efd_start_cluster;
110 ext4_grpblk_t count; 112 ext4_grpblk_t efd_count;
111 113
112 /* transaction which freed this extent */ 114 /* transaction which freed this extent */
113 tid_t t_tid; 115 tid_t efd_tid;
114}; 116};
115 117
116struct ext4_prealloc_space { 118struct ext4_prealloc_space {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 502c61fd7392..3e697ec7feca 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -431,6 +431,22 @@ static int block_device_ejected(struct super_block *sb)
431 return bdi->dev == NULL; 431 return bdi->dev == NULL;
432} 432}
433 433
434static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
435{
436 struct super_block *sb = journal->j_private;
437 struct ext4_sb_info *sbi = EXT4_SB(sb);
438 int error = is_journal_aborted(journal);
439 struct ext4_journal_cb_entry *jce, *tmp;
440
441 spin_lock(&sbi->s_md_lock);
442 list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
443 list_del_init(&jce->jce_list);
444 spin_unlock(&sbi->s_md_lock);
445 jce->jce_func(sb, jce, error);
446 spin_lock(&sbi->s_md_lock);
447 }
448 spin_unlock(&sbi->s_md_lock);
449}
434 450
435/* Deal with the reporting of failure conditions on a filesystem such as 451/* Deal with the reporting of failure conditions on a filesystem such as
436 * inconsistencies detected or read IO failures. 452 * inconsistencies detected or read IO failures.
@@ -3694,6 +3710,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3694 } 3710 }
3695 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3711 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3696 3712
3713 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
3714
3697 /* 3715 /*
3698 * The journal may have updated the bg summary counts, so we 3716 * The journal may have updated the bg summary counts, so we
3699 * need to update the global counters. 3717 * need to update the global counters.