aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/alloc.c
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-06-22 18:45:27 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-07-10 20:31:54 -0400
commit2b604351bc99b4e4504758cbac369b660b71de0b (patch)
tree293fa51f1ae9d19db0d09c721cc8433303cc8974 /fs/ocfs2/alloc.c
parentbce997682fe3121516f5a20cf7bad2e6029ba018 (diff)
ocfs2: simplify deallocation locking
Deallocation of suballocator blocks, most notably extent blocks, might involve multiple suballocator inodes. The locking for this can get extremely complicated, especially when the suballocator inodes to delete from aren't known until deep within an unrelated codepath. Implement a simple scheme for recording the blocks to be unlinked so that the actual deallocation can be done in a context which won't deadlock. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/alloc.c')
-rw-r--r--fs/ocfs2/alloc.c204
1 files changed, 204 insertions, 0 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 02b6e7af8edb..873bb99fc2ff 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2957,6 +2957,210 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
2957 return status; 2957 return status;
2958} 2958}
2959 2959
2960/*
2961 * Delayed de-allocation of suballocator blocks.
2962 *
2963 * Some sets of block de-allocations might involve multiple suballocator inodes.
2964 *
2965 * The locking for this can get extremely complicated, especially when
2966 * the suballocator inodes to delete from aren't known until deep
2967 * within an unrelated codepath.
2968 *
2969 * ocfs2_extent_block structures are a good example of this - an inode
2970 * btree could have been grown by any number of nodes each allocating
2971 * out of their own suballoc inode.
2972 *
2973 * These structures allow the delay of block de-allocation until a
2974 * later time, when locking of multiple cluster inodes won't cause
2975 * deadlock.
2976 */
2977
2978/*
2979 * Describes a single block free from a suballocator
2980 */
2981struct ocfs2_cached_block_free {
2982 struct ocfs2_cached_block_free *free_next;
2983 u64 free_blk;
2984 unsigned int free_bit;
2985};
2986
2987struct ocfs2_per_slot_free_list {
2988 struct ocfs2_per_slot_free_list *f_next_suballocator;
2989 int f_inode_type;
2990 int f_slot;
2991 struct ocfs2_cached_block_free *f_first;
2992};
2993
2994static int ocfs2_free_cached_items(struct ocfs2_super *osb,
2995 int sysfile_type,
2996 int slot,
2997 struct ocfs2_cached_block_free *head)
2998{
2999 int ret;
3000 u64 bg_blkno;
3001 handle_t *handle;
3002 struct inode *inode;
3003 struct buffer_head *di_bh = NULL;
3004 struct ocfs2_cached_block_free *tmp;
3005
3006 inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot);
3007 if (!inode) {
3008 ret = -EINVAL;
3009 mlog_errno(ret);
3010 goto out;
3011 }
3012
3013 mutex_lock(&inode->i_mutex);
3014
3015 ret = ocfs2_meta_lock(inode, &di_bh, 1);
3016 if (ret) {
3017 mlog_errno(ret);
3018 goto out_mutex;
3019 }
3020
3021 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
3022 if (IS_ERR(handle)) {
3023 ret = PTR_ERR(handle);
3024 mlog_errno(ret);
3025 goto out_unlock;
3026 }
3027
3028 while (head) {
3029 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
3030 head->free_bit);
3031 mlog(0, "Free bit: (bit %u, blkno %llu)\n",
3032 head->free_bit, (unsigned long long)head->free_blk);
3033
3034 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
3035 head->free_bit, bg_blkno, 1);
3036 if (ret) {
3037 mlog_errno(ret);
3038 goto out_journal;
3039 }
3040
3041 ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
3042 if (ret) {
3043 mlog_errno(ret);
3044 goto out_journal;
3045 }
3046
3047 tmp = head;
3048 head = head->free_next;
3049 kfree(tmp);
3050 }
3051
3052out_journal:
3053 ocfs2_commit_trans(osb, handle);
3054
3055out_unlock:
3056 ocfs2_meta_unlock(inode, 1);
3057 brelse(di_bh);
3058out_mutex:
3059 mutex_unlock(&inode->i_mutex);
3060 iput(inode);
3061out:
3062 while(head) {
3063 /* Premature exit may have left some dangling items. */
3064 tmp = head;
3065 head = head->free_next;
3066 kfree(tmp);
3067 }
3068
3069 return ret;
3070}
3071
3072int ocfs2_run_deallocs(struct ocfs2_super *osb,
3073 struct ocfs2_cached_dealloc_ctxt *ctxt)
3074{
3075 int ret = 0, ret2;
3076 struct ocfs2_per_slot_free_list *fl;
3077
3078 if (!ctxt)
3079 return 0;
3080
3081 while (ctxt->c_first_suballocator) {
3082 fl = ctxt->c_first_suballocator;
3083
3084 if (fl->f_first) {
3085 mlog(0, "Free items: (type %u, slot %d)\n",
3086 fl->f_inode_type, fl->f_slot);
3087 ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
3088 fl->f_slot, fl->f_first);
3089 if (ret2)
3090 mlog_errno(ret2);
3091 if (!ret)
3092 ret = ret2;
3093 }
3094
3095 ctxt->c_first_suballocator = fl->f_next_suballocator;
3096 kfree(fl);
3097 }
3098
3099 return ret;
3100}
3101
3102static struct ocfs2_per_slot_free_list *
3103ocfs2_find_per_slot_free_list(int type,
3104 int slot,
3105 struct ocfs2_cached_dealloc_ctxt *ctxt)
3106{
3107 struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
3108
3109 while (fl) {
3110 if (fl->f_inode_type == type && fl->f_slot == slot)
3111 return fl;
3112
3113 fl = fl->f_next_suballocator;
3114 }
3115
3116 fl = kmalloc(sizeof(*fl), GFP_NOFS);
3117 if (fl) {
3118 fl->f_inode_type = type;
3119 fl->f_slot = slot;
3120 fl->f_first = NULL;
3121 fl->f_next_suballocator = ctxt->c_first_suballocator;
3122
3123 ctxt->c_first_suballocator = fl;
3124 }
3125 return fl;
3126}
3127
3128static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
3129 int type, int slot, u64 blkno,
3130 unsigned int bit)
3131{
3132 int ret;
3133 struct ocfs2_per_slot_free_list *fl;
3134 struct ocfs2_cached_block_free *item;
3135
3136 fl = ocfs2_find_per_slot_free_list(type, slot, ctxt);
3137 if (fl == NULL) {
3138 ret = -ENOMEM;
3139 mlog_errno(ret);
3140 goto out;
3141 }
3142
3143 item = kmalloc(sizeof(*item), GFP_NOFS);
3144 if (item == NULL) {
3145 ret = -ENOMEM;
3146 mlog_errno(ret);
3147 goto out;
3148 }
3149
3150 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
3151 type, slot, bit, (unsigned long long)blkno);
3152
3153 item->free_blk = blkno;
3154 item->free_bit = bit;
3155 item->free_next = fl->f_first;
3156
3157 fl->f_first = item;
3158
3159 ret = 0;
3160out:
3161 return ret;
3162}
3163
2960/* This function will figure out whether the currently last extent 3164/* This function will figure out whether the currently last extent
2961 * block will be deleted, and if it will, what the new last extent 3165 * block will be deleted, and if it will, what the new last extent
2962 * block will be so we can update his h_next_leaf_blk field, as well 3166 * block will be so we can update his h_next_leaf_blk field, as well