aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-06-22 18:45:27 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-07-10 20:31:54 -0400
commit2b604351bc99b4e4504758cbac369b660b71de0b (patch)
tree293fa51f1ae9d19db0d09c721cc8433303cc8974
parentbce997682fe3121516f5a20cf7bad2e6029ba018 (diff)
ocfs2: simplify deallocation locking
Deallocation of suballocator blocks, most notably extent blocks, might involve multiple suballocator inodes. The locking for this can get extremely complicated, especially when the suballocator inodes to delete from aren't known until deep within an unrelated codepath. Implement a simple scheme for recording the blocks to be unlinked so that the actual deallocation can be done in a context which won't deadlock. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/alloc.c204
-rw-r--r--fs/ocfs2/alloc.h19
-rw-r--r--fs/ocfs2/suballoc.c27
-rw-r--r--fs/ocfs2/suballoc.h13
4 files changed, 242 insertions, 21 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 02b6e7af8edb..873bb99fc2ff 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2957,6 +2957,210 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
2957 return status; 2957 return status;
2958} 2958}
2959 2959
2960/*
2961 * Delayed de-allocation of suballocator blocks.
2962 *
2963 * Some sets of block de-allocations might involve multiple suballocator inodes.
2964 *
2965 * The locking for this can get extremely complicated, especially when
2966 * the suballocator inodes to delete from aren't known until deep
2967 * within an unrelated codepath.
2968 *
2969 * ocfs2_extent_block structures are a good example of this - an inode
2970 * btree could have been grown by any number of nodes each allocating
2971 * out of their own suballoc inode.
2972 *
2973 * These structures allow the delay of block de-allocation until a
2974 * later time, when locking of multiple cluster inodes won't cause
2975 * deadlock.
2976 */
2977
2978/*
2979 * Describes a single block free from a suballocator
2980 */
2981struct ocfs2_cached_block_free {
2982 struct ocfs2_cached_block_free *free_next;
2983 u64 free_blk;
2984 unsigned int free_bit;
2985};
2986
2987struct ocfs2_per_slot_free_list {
2988 struct ocfs2_per_slot_free_list *f_next_suballocator;
2989 int f_inode_type;
2990 int f_slot;
2991 struct ocfs2_cached_block_free *f_first;
2992};
2993
2994static int ocfs2_free_cached_items(struct ocfs2_super *osb,
2995 int sysfile_type,
2996 int slot,
2997 struct ocfs2_cached_block_free *head)
2998{
2999 int ret;
3000 u64 bg_blkno;
3001 handle_t *handle;
3002 struct inode *inode;
3003 struct buffer_head *di_bh = NULL;
3004 struct ocfs2_cached_block_free *tmp;
3005
3006 inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot);
3007 if (!inode) {
3008 ret = -EINVAL;
3009 mlog_errno(ret);
3010 goto out;
3011 }
3012
3013 mutex_lock(&inode->i_mutex);
3014
3015 ret = ocfs2_meta_lock(inode, &di_bh, 1);
3016 if (ret) {
3017 mlog_errno(ret);
3018 goto out_mutex;
3019 }
3020
3021 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
3022 if (IS_ERR(handle)) {
3023 ret = PTR_ERR(handle);
3024 mlog_errno(ret);
3025 goto out_unlock;
3026 }
3027
3028 while (head) {
3029 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
3030 head->free_bit);
3031 mlog(0, "Free bit: (bit %u, blkno %llu)\n",
3032 head->free_bit, (unsigned long long)head->free_blk);
3033
3034 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
3035 head->free_bit, bg_blkno, 1);
3036 if (ret) {
3037 mlog_errno(ret);
3038 goto out_journal;
3039 }
3040
3041 ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
3042 if (ret) {
3043 mlog_errno(ret);
3044 goto out_journal;
3045 }
3046
3047 tmp = head;
3048 head = head->free_next;
3049 kfree(tmp);
3050 }
3051
3052out_journal:
3053 ocfs2_commit_trans(osb, handle);
3054
3055out_unlock:
3056 ocfs2_meta_unlock(inode, 1);
3057 brelse(di_bh);
3058out_mutex:
3059 mutex_unlock(&inode->i_mutex);
3060 iput(inode);
3061out:
3062 while(head) {
3063 /* Premature exit may have left some dangling items. */
3064 tmp = head;
3065 head = head->free_next;
3066 kfree(tmp);
3067 }
3068
3069 return ret;
3070}
3071
3072int ocfs2_run_deallocs(struct ocfs2_super *osb,
3073 struct ocfs2_cached_dealloc_ctxt *ctxt)
3074{
3075 int ret = 0, ret2;
3076 struct ocfs2_per_slot_free_list *fl;
3077
3078 if (!ctxt)
3079 return 0;
3080
3081 while (ctxt->c_first_suballocator) {
3082 fl = ctxt->c_first_suballocator;
3083
3084 if (fl->f_first) {
3085 mlog(0, "Free items: (type %u, slot %d)\n",
3086 fl->f_inode_type, fl->f_slot);
3087 ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
3088 fl->f_slot, fl->f_first);
3089 if (ret2)
3090 mlog_errno(ret2);
3091 if (!ret)
3092 ret = ret2;
3093 }
3094
3095 ctxt->c_first_suballocator = fl->f_next_suballocator;
3096 kfree(fl);
3097 }
3098
3099 return ret;
3100}
3101
3102static struct ocfs2_per_slot_free_list *
3103ocfs2_find_per_slot_free_list(int type,
3104 int slot,
3105 struct ocfs2_cached_dealloc_ctxt *ctxt)
3106{
3107 struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
3108
3109 while (fl) {
3110 if (fl->f_inode_type == type && fl->f_slot == slot)
3111 return fl;
3112
3113 fl = fl->f_next_suballocator;
3114 }
3115
3116 fl = kmalloc(sizeof(*fl), GFP_NOFS);
3117 if (fl) {
3118 fl->f_inode_type = type;
3119 fl->f_slot = slot;
3120 fl->f_first = NULL;
3121 fl->f_next_suballocator = ctxt->c_first_suballocator;
3122
3123 ctxt->c_first_suballocator = fl;
3124 }
3125 return fl;
3126}
3127
3128static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
3129 int type, int slot, u64 blkno,
3130 unsigned int bit)
3131{
3132 int ret;
3133 struct ocfs2_per_slot_free_list *fl;
3134 struct ocfs2_cached_block_free *item;
3135
3136 fl = ocfs2_find_per_slot_free_list(type, slot, ctxt);
3137 if (fl == NULL) {
3138 ret = -ENOMEM;
3139 mlog_errno(ret);
3140 goto out;
3141 }
3142
3143 item = kmalloc(sizeof(*item), GFP_NOFS);
3144 if (item == NULL) {
3145 ret = -ENOMEM;
3146 mlog_errno(ret);
3147 goto out;
3148 }
3149
3150 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
3151 type, slot, bit, (unsigned long long)blkno);
3152
3153 item->free_blk = blkno;
3154 item->free_bit = bit;
3155 item->free_next = fl->f_first;
3156
3157 fl->f_first = item;
3158
3159 ret = 0;
3160out:
3161 return ret;
3162}
3163
2960/* This function will figure out whether the currently last extent 3164/* This function will figure out whether the currently last extent
2961 * block will be deleted, and if it will, what the new last extent 3165 * block will be deleted, and if it will, what the new last extent
2962 * block will be so we can update his h_next_leaf_blk field, as well 3166 * block will be so we can update his h_next_leaf_blk field, as well
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index fbcb5934a081..01db0adc2150 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -63,6 +63,25 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
63int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 63int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
64 struct ocfs2_dinode *tl_copy); 64 struct ocfs2_dinode *tl_copy);
65 65
66/*
67 * Process local structure which describes the block unlinks done
68 * during an operation. This is populated via
69 * ocfs2_cache_block_dealloc().
70 *
71 * ocfs2_run_deallocs() should be called after the potentially
72 * de-allocating routines. No journal handles should be open, and most
73 * locks should have been dropped.
74 */
75struct ocfs2_cached_dealloc_ctxt {
76 struct ocfs2_per_slot_free_list *c_first_suballocator;
77};
78static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
79{
80 c->c_first_suballocator = NULL;
81}
82int ocfs2_run_deallocs(struct ocfs2_super *osb,
83 struct ocfs2_cached_dealloc_ctxt *ctxt);
84
66struct ocfs2_truncate_context { 85struct ocfs2_truncate_context {
67 struct inode *tc_ext_alloc_inode; 86 struct inode *tc_ext_alloc_inode;
68 struct buffer_head *tc_ext_alloc_bh; 87 struct buffer_head *tc_ext_alloc_bh;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index e3437626d183..6788f2f1a667 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle,
98 u16 chain); 98 u16 chain);
99static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 99static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
100 u32 wanted); 100 u32 wanted);
101static int ocfs2_free_suballoc_bits(handle_t *handle,
102 struct inode *alloc_inode,
103 struct buffer_head *alloc_bh,
104 unsigned int start_bit,
105 u64 bg_blkno,
106 unsigned int count);
107static inline u64 ocfs2_which_suballoc_group(u64 block,
108 unsigned int bit);
109static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 101static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
110 u64 bg_blkno, 102 u64 bg_blkno,
111 u16 bg_bit_off); 103 u16 bg_bit_off);
@@ -1626,12 +1618,12 @@ bail:
1626/* 1618/*
1627 * expects the suballoc inode to already be locked. 1619 * expects the suballoc inode to already be locked.
1628 */ 1620 */
1629static int ocfs2_free_suballoc_bits(handle_t *handle, 1621int ocfs2_free_suballoc_bits(handle_t *handle,
1630 struct inode *alloc_inode, 1622 struct inode *alloc_inode,
1631 struct buffer_head *alloc_bh, 1623 struct buffer_head *alloc_bh,
1632 unsigned int start_bit, 1624 unsigned int start_bit,
1633 u64 bg_blkno, 1625 u64 bg_blkno,
1634 unsigned int count) 1626 unsigned int count)
1635{ 1627{
1636 int status = 0; 1628 int status = 0;
1637 u32 tmp_used; 1629 u32 tmp_used;
@@ -1703,13 +1695,6 @@ bail:
1703 return status; 1695 return status;
1704} 1696}
1705 1697
1706static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
1707{
1708 u64 group = block - (u64) bit;
1709
1710 return group;
1711}
1712
1713int ocfs2_free_dinode(handle_t *handle, 1698int ocfs2_free_dinode(handle_t *handle,
1714 struct inode *inode_alloc_inode, 1699 struct inode *inode_alloc_inode,
1715 struct buffer_head *inode_alloc_bh, 1700 struct buffer_head *inode_alloc_bh,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 1a3c94cb9250..7bc4819db4db 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -86,6 +86,12 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
86 u32 *cluster_start, 86 u32 *cluster_start,
87 u32 *num_clusters); 87 u32 *num_clusters);
88 88
89int ocfs2_free_suballoc_bits(handle_t *handle,
90 struct inode *alloc_inode,
91 struct buffer_head *alloc_bh,
92 unsigned int start_bit,
93 u64 bg_blkno,
94 unsigned int count);
89int ocfs2_free_dinode(handle_t *handle, 95int ocfs2_free_dinode(handle_t *handle,
90 struct inode *inode_alloc_inode, 96 struct inode *inode_alloc_inode,
91 struct buffer_head *inode_alloc_bh, 97 struct buffer_head *inode_alloc_bh,
@@ -100,6 +106,13 @@ int ocfs2_free_clusters(handle_t *handle,
100 u64 start_blk, 106 u64 start_blk,
101 unsigned int num_clusters); 107 unsigned int num_clusters);
102 108
109static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
110{
111 u64 group = block - (u64) bit;
112
113 return group;
114}
115
103static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, 116static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
104 u64 bg_blkno) 117 u64 bg_blkno)
105{ 118{