diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-06-22 18:45:27 -0400 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-10 20:31:54 -0400 |
commit | 2b604351bc99b4e4504758cbac369b660b71de0b (patch) | |
tree | 293fa51f1ae9d19db0d09c721cc8433303cc8974 /fs/ocfs2 | |
parent | bce997682fe3121516f5a20cf7bad2e6029ba018 (diff) |
ocfs2: simplify deallocation locking
Deallocation of suballocator blocks, most notably extent blocks, might
involve multiple suballocator inodes.
The locking for this can get extremely complicated, especially when the
suballocator inodes to delete from aren't known until deep within an
unrelated codepath.
Implement a simple scheme for recording the blocks to be unlinked so that
the actual deallocation can be done in a context which won't deadlock.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/alloc.c | 204 | ||||
-rw-r--r-- | fs/ocfs2/alloc.h | 19 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.c | 27 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.h | 13 |
4 files changed, 242 insertions, 21 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 02b6e7af8edb..873bb99fc2ff 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -2957,6 +2957,210 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) | |||
2957 | return status; | 2957 | return status; |
2958 | } | 2958 | } |
2959 | 2959 | ||
2960 | /* | ||
2961 | * Delayed de-allocation of suballocator blocks. | ||
2962 | * | ||
2963 | * Some sets of block de-allocations might involve multiple suballocator inodes. | ||
2964 | * | ||
2965 | * The locking for this can get extremely complicated, especially when | ||
2966 | * the suballocator inodes to delete from aren't known until deep | ||
2967 | * within an unrelated codepath. | ||
2968 | * | ||
2969 | * ocfs2_extent_block structures are a good example of this - an inode | ||
2970 | * btree could have been grown by any number of nodes each allocating | ||
2971 | * out of their own suballoc inode. | ||
2972 | * | ||
2973 | * These structures allow the delay of block de-allocation until a | ||
2974 | * later time, when locking of multiple cluster inodes won't cause | ||
2975 | * deadlock. | ||
2976 | */ | ||
2977 | |||
2978 | /* | ||
2979 | * Describes a single block free from a suballocator | ||
2980 | */ | ||
2981 | struct ocfs2_cached_block_free { | ||
2982 | struct ocfs2_cached_block_free *free_next; | ||
2983 | u64 free_blk; | ||
2984 | unsigned int free_bit; | ||
2985 | }; | ||
2986 | |||
2987 | struct ocfs2_per_slot_free_list { | ||
2988 | struct ocfs2_per_slot_free_list *f_next_suballocator; | ||
2989 | int f_inode_type; | ||
2990 | int f_slot; | ||
2991 | struct ocfs2_cached_block_free *f_first; | ||
2992 | }; | ||
2993 | |||
2994 | static int ocfs2_free_cached_items(struct ocfs2_super *osb, | ||
2995 | int sysfile_type, | ||
2996 | int slot, | ||
2997 | struct ocfs2_cached_block_free *head) | ||
2998 | { | ||
2999 | int ret; | ||
3000 | u64 bg_blkno; | ||
3001 | handle_t *handle; | ||
3002 | struct inode *inode; | ||
3003 | struct buffer_head *di_bh = NULL; | ||
3004 | struct ocfs2_cached_block_free *tmp; | ||
3005 | |||
3006 | inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot); | ||
3007 | if (!inode) { | ||
3008 | ret = -EINVAL; | ||
3009 | mlog_errno(ret); | ||
3010 | goto out; | ||
3011 | } | ||
3012 | |||
3013 | mutex_lock(&inode->i_mutex); | ||
3014 | |||
3015 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
3016 | if (ret) { | ||
3017 | mlog_errno(ret); | ||
3018 | goto out_mutex; | ||
3019 | } | ||
3020 | |||
3021 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
3022 | if (IS_ERR(handle)) { | ||
3023 | ret = PTR_ERR(handle); | ||
3024 | mlog_errno(ret); | ||
3025 | goto out_unlock; | ||
3026 | } | ||
3027 | |||
3028 | while (head) { | ||
3029 | bg_blkno = ocfs2_which_suballoc_group(head->free_blk, | ||
3030 | head->free_bit); | ||
3031 | mlog(0, "Free bit: (bit %u, blkno %llu)\n", | ||
3032 | head->free_bit, (unsigned long long)head->free_blk); | ||
3033 | |||
3034 | ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, | ||
3035 | head->free_bit, bg_blkno, 1); | ||
3036 | if (ret) { | ||
3037 | mlog_errno(ret); | ||
3038 | goto out_journal; | ||
3039 | } | ||
3040 | |||
3041 | ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); | ||
3042 | if (ret) { | ||
3043 | mlog_errno(ret); | ||
3044 | goto out_journal; | ||
3045 | } | ||
3046 | |||
3047 | tmp = head; | ||
3048 | head = head->free_next; | ||
3049 | kfree(tmp); | ||
3050 | } | ||
3051 | |||
3052 | out_journal: | ||
3053 | ocfs2_commit_trans(osb, handle); | ||
3054 | |||
3055 | out_unlock: | ||
3056 | ocfs2_meta_unlock(inode, 1); | ||
3057 | brelse(di_bh); | ||
3058 | out_mutex: | ||
3059 | mutex_unlock(&inode->i_mutex); | ||
3060 | iput(inode); | ||
3061 | out: | ||
3062 | while(head) { | ||
3063 | /* Premature exit may have left some dangling items. */ | ||
3064 | tmp = head; | ||
3065 | head = head->free_next; | ||
3066 | kfree(tmp); | ||
3067 | } | ||
3068 | |||
3069 | return ret; | ||
3070 | } | ||
3071 | |||
3072 | int ocfs2_run_deallocs(struct ocfs2_super *osb, | ||
3073 | struct ocfs2_cached_dealloc_ctxt *ctxt) | ||
3074 | { | ||
3075 | int ret = 0, ret2; | ||
3076 | struct ocfs2_per_slot_free_list *fl; | ||
3077 | |||
3078 | if (!ctxt) | ||
3079 | return 0; | ||
3080 | |||
3081 | while (ctxt->c_first_suballocator) { | ||
3082 | fl = ctxt->c_first_suballocator; | ||
3083 | |||
3084 | if (fl->f_first) { | ||
3085 | mlog(0, "Free items: (type %u, slot %d)\n", | ||
3086 | fl->f_inode_type, fl->f_slot); | ||
3087 | ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, | ||
3088 | fl->f_slot, fl->f_first); | ||
3089 | if (ret2) | ||
3090 | mlog_errno(ret2); | ||
3091 | if (!ret) | ||
3092 | ret = ret2; | ||
3093 | } | ||
3094 | |||
3095 | ctxt->c_first_suballocator = fl->f_next_suballocator; | ||
3096 | kfree(fl); | ||
3097 | } | ||
3098 | |||
3099 | return ret; | ||
3100 | } | ||
3101 | |||
3102 | static struct ocfs2_per_slot_free_list * | ||
3103 | ocfs2_find_per_slot_free_list(int type, | ||
3104 | int slot, | ||
3105 | struct ocfs2_cached_dealloc_ctxt *ctxt) | ||
3106 | { | ||
3107 | struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; | ||
3108 | |||
3109 | while (fl) { | ||
3110 | if (fl->f_inode_type == type && fl->f_slot == slot) | ||
3111 | return fl; | ||
3112 | |||
3113 | fl = fl->f_next_suballocator; | ||
3114 | } | ||
3115 | |||
3116 | fl = kmalloc(sizeof(*fl), GFP_NOFS); | ||
3117 | if (fl) { | ||
3118 | fl->f_inode_type = type; | ||
3119 | fl->f_slot = slot; | ||
3120 | fl->f_first = NULL; | ||
3121 | fl->f_next_suballocator = ctxt->c_first_suballocator; | ||
3122 | |||
3123 | ctxt->c_first_suballocator = fl; | ||
3124 | } | ||
3125 | return fl; | ||
3126 | } | ||
3127 | |||
3128 | static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
3129 | int type, int slot, u64 blkno, | ||
3130 | unsigned int bit) | ||
3131 | { | ||
3132 | int ret; | ||
3133 | struct ocfs2_per_slot_free_list *fl; | ||
3134 | struct ocfs2_cached_block_free *item; | ||
3135 | |||
3136 | fl = ocfs2_find_per_slot_free_list(type, slot, ctxt); | ||
3137 | if (fl == NULL) { | ||
3138 | ret = -ENOMEM; | ||
3139 | mlog_errno(ret); | ||
3140 | goto out; | ||
3141 | } | ||
3142 | |||
3143 | item = kmalloc(sizeof(*item), GFP_NOFS); | ||
3144 | if (item == NULL) { | ||
3145 | ret = -ENOMEM; | ||
3146 | mlog_errno(ret); | ||
3147 | goto out; | ||
3148 | } | ||
3149 | |||
3150 | mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", | ||
3151 | type, slot, bit, (unsigned long long)blkno); | ||
3152 | |||
3153 | item->free_blk = blkno; | ||
3154 | item->free_bit = bit; | ||
3155 | item->free_next = fl->f_first; | ||
3156 | |||
3157 | fl->f_first = item; | ||
3158 | |||
3159 | ret = 0; | ||
3160 | out: | ||
3161 | return ret; | ||
3162 | } | ||
3163 | |||
2960 | /* This function will figure out whether the currently last extent | 3164 | /* This function will figure out whether the currently last extent |
2961 | * block will be deleted, and if it will, what the new last extent | 3165 | * block will be deleted, and if it will, what the new last extent |
2962 | * block will be so we can update his h_next_leaf_blk field, as well | 3166 | * block will be so we can update his h_next_leaf_blk field, as well |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index fbcb5934a081..01db0adc2150 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -63,6 +63,25 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
63 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, | 63 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, |
64 | struct ocfs2_dinode *tl_copy); | 64 | struct ocfs2_dinode *tl_copy); |
65 | 65 | ||
66 | /* | ||
67 | * Process local structure which describes the block unlinks done | ||
68 | * during an operation. This is populated via | ||
69 | * ocfs2_cache_block_dealloc(). | ||
70 | * | ||
71 | * ocfs2_run_deallocs() should be called after the potentially | ||
72 | * de-allocating routines. No journal handles should be open, and most | ||
73 | * locks should have been dropped. | ||
74 | */ | ||
75 | struct ocfs2_cached_dealloc_ctxt { | ||
76 | struct ocfs2_per_slot_free_list *c_first_suballocator; | ||
77 | }; | ||
78 | static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) | ||
79 | { | ||
80 | c->c_first_suballocator = NULL; | ||
81 | } | ||
82 | int ocfs2_run_deallocs(struct ocfs2_super *osb, | ||
83 | struct ocfs2_cached_dealloc_ctxt *ctxt); | ||
84 | |||
66 | struct ocfs2_truncate_context { | 85 | struct ocfs2_truncate_context { |
67 | struct inode *tc_ext_alloc_inode; | 86 | struct inode *tc_ext_alloc_inode; |
68 | struct buffer_head *tc_ext_alloc_bh; | 87 | struct buffer_head *tc_ext_alloc_bh; |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index e3437626d183..6788f2f1a667 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
98 | u16 chain); | 98 | u16 chain); |
99 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | 99 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
100 | u32 wanted); | 100 | u32 wanted); |
101 | static int ocfs2_free_suballoc_bits(handle_t *handle, | ||
102 | struct inode *alloc_inode, | ||
103 | struct buffer_head *alloc_bh, | ||
104 | unsigned int start_bit, | ||
105 | u64 bg_blkno, | ||
106 | unsigned int count); | ||
107 | static inline u64 ocfs2_which_suballoc_group(u64 block, | ||
108 | unsigned int bit); | ||
109 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | 101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, |
110 | u64 bg_blkno, | 102 | u64 bg_blkno, |
111 | u16 bg_bit_off); | 103 | u16 bg_bit_off); |
@@ -1626,12 +1618,12 @@ bail: | |||
1626 | /* | 1618 | /* |
1627 | * expects the suballoc inode to already be locked. | 1619 | * expects the suballoc inode to already be locked. |
1628 | */ | 1620 | */ |
1629 | static int ocfs2_free_suballoc_bits(handle_t *handle, | 1621 | int ocfs2_free_suballoc_bits(handle_t *handle, |
1630 | struct inode *alloc_inode, | 1622 | struct inode *alloc_inode, |
1631 | struct buffer_head *alloc_bh, | 1623 | struct buffer_head *alloc_bh, |
1632 | unsigned int start_bit, | 1624 | unsigned int start_bit, |
1633 | u64 bg_blkno, | 1625 | u64 bg_blkno, |
1634 | unsigned int count) | 1626 | unsigned int count) |
1635 | { | 1627 | { |
1636 | int status = 0; | 1628 | int status = 0; |
1637 | u32 tmp_used; | 1629 | u32 tmp_used; |
@@ -1703,13 +1695,6 @@ bail: | |||
1703 | return status; | 1695 | return status; |
1704 | } | 1696 | } |
1705 | 1697 | ||
1706 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | ||
1707 | { | ||
1708 | u64 group = block - (u64) bit; | ||
1709 | |||
1710 | return group; | ||
1711 | } | ||
1712 | |||
1713 | int ocfs2_free_dinode(handle_t *handle, | 1698 | int ocfs2_free_dinode(handle_t *handle, |
1714 | struct inode *inode_alloc_inode, | 1699 | struct inode *inode_alloc_inode, |
1715 | struct buffer_head *inode_alloc_bh, | 1700 | struct buffer_head *inode_alloc_bh, |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 1a3c94cb9250..7bc4819db4db 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -86,6 +86,12 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
86 | u32 *cluster_start, | 86 | u32 *cluster_start, |
87 | u32 *num_clusters); | 87 | u32 *num_clusters); |
88 | 88 | ||
89 | int ocfs2_free_suballoc_bits(handle_t *handle, | ||
90 | struct inode *alloc_inode, | ||
91 | struct buffer_head *alloc_bh, | ||
92 | unsigned int start_bit, | ||
93 | u64 bg_blkno, | ||
94 | unsigned int count); | ||
89 | int ocfs2_free_dinode(handle_t *handle, | 95 | int ocfs2_free_dinode(handle_t *handle, |
90 | struct inode *inode_alloc_inode, | 96 | struct inode *inode_alloc_inode, |
91 | struct buffer_head *inode_alloc_bh, | 97 | struct buffer_head *inode_alloc_bh, |
@@ -100,6 +106,13 @@ int ocfs2_free_clusters(handle_t *handle, | |||
100 | u64 start_blk, | 106 | u64 start_blk, |
101 | unsigned int num_clusters); | 107 | unsigned int num_clusters); |
102 | 108 | ||
109 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | ||
110 | { | ||
111 | u64 group = block - (u64) bit; | ||
112 | |||
113 | return group; | ||
114 | } | ||
115 | |||
103 | static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, | 116 | static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, |
104 | u64 bg_blkno) | 117 | u64 bg_blkno) |
105 | { | 118 | { |