diff options
-rw-r--r-- | fs/ext3/inode.c | 33 | ||||
-rw-r--r-- | fs/jbd/revoke.c | 12 | ||||
-rw-r--r-- | fs/reiserfs/bitmap.c | 259 | ||||
-rw-r--r-- | fs/reiserfs/dir.c | 156 | ||||
-rw-r--r-- | fs/reiserfs/do_balan.c | 2449 | ||||
-rw-r--r-- | fs/reiserfs/file.c | 90 | ||||
-rw-r--r-- | fs/reiserfs/fix_node.c | 1008 | ||||
-rw-r--r-- | fs/reiserfs/hashes.c | 15 | ||||
-rw-r--r-- | fs/reiserfs/ibalance.c | 271 | ||||
-rw-r--r-- | fs/reiserfs/inode.c | 1206 | ||||
-rw-r--r-- | fs/reiserfs/ioctl.c | 27 | ||||
-rw-r--r-- | fs/reiserfs/item_ops.c | 108 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 1339 | ||||
-rw-r--r-- | fs/reiserfs/lbalance.c | 501 | ||||
-rw-r--r-- | fs/reiserfs/namei.c | 513 | ||||
-rw-r--r-- | fs/reiserfs/objectid.c | 101 | ||||
-rw-r--r-- | fs/reiserfs/prints.c | 176 | ||||
-rw-r--r-- | fs/reiserfs/reiserfs.h | 1921 | ||||
-rw-r--r-- | fs/reiserfs/resize.c | 75 | ||||
-rw-r--r-- | fs/reiserfs/stree.c | 884 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 552 | ||||
-rw-r--r-- | fs/reiserfs/tail_conversion.c | 161 | ||||
-rw-r--r-- | fs/reiserfs/xattr.c | 70 | ||||
-rw-r--r-- | fs/reiserfs/xattr.h | 3 | ||||
-rw-r--r-- | fs/reiserfs/xattr_acl.c | 38 |
25 files changed, 7052 insertions, 4916 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f5157d0d1b43..695abe738a24 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1716,17 +1716,17 @@ static int ext3_journalled_writepage(struct page *page, | |||
1716 | WARN_ON_ONCE(IS_RDONLY(inode) && | 1716 | WARN_ON_ONCE(IS_RDONLY(inode) && |
1717 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); | 1717 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); |
1718 | 1718 | ||
1719 | if (ext3_journal_current_handle()) | ||
1720 | goto no_write; | ||
1721 | |||
1722 | trace_ext3_journalled_writepage(page); | 1719 | trace_ext3_journalled_writepage(page); |
1723 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | ||
1724 | if (IS_ERR(handle)) { | ||
1725 | ret = PTR_ERR(handle); | ||
1726 | goto no_write; | ||
1727 | } | ||
1728 | |||
1729 | if (!page_has_buffers(page) || PageChecked(page)) { | 1720 | if (!page_has_buffers(page) || PageChecked(page)) { |
1721 | if (ext3_journal_current_handle()) | ||
1722 | goto no_write; | ||
1723 | |||
1724 | handle = ext3_journal_start(inode, | ||
1725 | ext3_writepage_trans_blocks(inode)); | ||
1726 | if (IS_ERR(handle)) { | ||
1727 | ret = PTR_ERR(handle); | ||
1728 | goto no_write; | ||
1729 | } | ||
1730 | /* | 1730 | /* |
1731 | * It's mmapped pagecache. Add buffers and journal it. There | 1731 | * It's mmapped pagecache. Add buffers and journal it. There |
1732 | * doesn't seem much point in redirtying the page here. | 1732 | * doesn't seem much point in redirtying the page here. |
@@ -1749,17 +1749,18 @@ static int ext3_journalled_writepage(struct page *page, | |||
1749 | atomic_set(&EXT3_I(inode)->i_datasync_tid, | 1749 | atomic_set(&EXT3_I(inode)->i_datasync_tid, |
1750 | handle->h_transaction->t_tid); | 1750 | handle->h_transaction->t_tid); |
1751 | unlock_page(page); | 1751 | unlock_page(page); |
1752 | err = ext3_journal_stop(handle); | ||
1753 | if (!ret) | ||
1754 | ret = err; | ||
1752 | } else { | 1755 | } else { |
1753 | /* | 1756 | /* |
1754 | * It may be a page full of checkpoint-mode buffers. We don't | 1757 | * It is a page full of checkpoint-mode buffers. Go and write |
1755 | * really know unless we go poke around in the buffer_heads. | 1758 | * them. They should have been already mapped when they went |
1756 | * But block_write_full_page will do the right thing. | 1759 | * to the journal so provide NULL get_block function to catch |
1760 | * errors. | ||
1757 | */ | 1761 | */ |
1758 | ret = block_write_full_page(page, ext3_get_block, wbc); | 1762 | ret = block_write_full_page(page, NULL, wbc); |
1759 | } | 1763 | } |
1760 | err = ext3_journal_stop(handle); | ||
1761 | if (!ret) | ||
1762 | ret = err; | ||
1763 | out: | 1764 | out: |
1764 | return ret; | 1765 | return ret; |
1765 | 1766 | ||
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 25c713e7071c..8898bbd2b61e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -231,19 +231,15 @@ record_cache_failure: | |||
231 | 231 | ||
232 | static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) | 232 | static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) |
233 | { | 233 | { |
234 | int shift = 0; | 234 | int i; |
235 | int tmp = hash_size; | ||
236 | struct jbd_revoke_table_s *table; | 235 | struct jbd_revoke_table_s *table; |
237 | 236 | ||
238 | table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); | 237 | table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); |
239 | if (!table) | 238 | if (!table) |
240 | goto out; | 239 | goto out; |
241 | 240 | ||
242 | while((tmp >>= 1UL) != 0UL) | ||
243 | shift++; | ||
244 | |||
245 | table->hash_size = hash_size; | 241 | table->hash_size = hash_size; |
246 | table->hash_shift = shift; | 242 | table->hash_shift = ilog2(hash_size); |
247 | table->hash_table = | 243 | table->hash_table = |
248 | kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); | 244 | kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); |
249 | if (!table->hash_table) { | 245 | if (!table->hash_table) { |
@@ -252,8 +248,8 @@ static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) | |||
252 | goto out; | 248 | goto out; |
253 | } | 249 | } |
254 | 250 | ||
255 | for (tmp = 0; tmp < hash_size; tmp++) | 251 | for (i = 0; i < hash_size; i++) |
256 | INIT_LIST_HEAD(&table->hash_table[tmp]); | 252 | INIT_LIST_HEAD(&table->hash_table[i]); |
257 | 253 | ||
258 | out: | 254 | out: |
259 | return table; | 255 | return table; |
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 1bcffeab713c..dc198bc64c61 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c | |||
@@ -50,8 +50,10 @@ static inline void get_bit_address(struct super_block *s, | |||
50 | unsigned int *bmap_nr, | 50 | unsigned int *bmap_nr, |
51 | unsigned int *offset) | 51 | unsigned int *offset) |
52 | { | 52 | { |
53 | /* It is in the bitmap block number equal to the block | 53 | /* |
54 | * number divided by the number of bits in a block. */ | 54 | * It is in the bitmap block number equal to the block |
55 | * number divided by the number of bits in a block. | ||
56 | */ | ||
55 | *bmap_nr = block >> (s->s_blocksize_bits + 3); | 57 | *bmap_nr = block >> (s->s_blocksize_bits + 3); |
56 | /* Within that bitmap block it is located at bit offset *offset. */ | 58 | /* Within that bitmap block it is located at bit offset *offset. */ |
57 | *offset = block & ((s->s_blocksize << 3) - 1); | 59 | *offset = block & ((s->s_blocksize << 3) - 1); |
@@ -71,10 +73,12 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) | |||
71 | 73 | ||
72 | get_bit_address(s, block, &bmap, &offset); | 74 | get_bit_address(s, block, &bmap, &offset); |
73 | 75 | ||
74 | /* Old format filesystem? Unlikely, but the bitmaps are all up front so | 76 | /* |
75 | * we need to account for it. */ | 77 | * Old format filesystem? Unlikely, but the bitmaps are all |
78 | * up front so we need to account for it. | ||
79 | */ | ||
76 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, | 80 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, |
77 | &(REISERFS_SB(s)->s_properties)))) { | 81 | &REISERFS_SB(s)->s_properties))) { |
78 | b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; | 82 | b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; |
79 | if (block >= bmap1 && | 83 | if (block >= bmap1 && |
80 | block <= bmap1 + bmap_count) { | 84 | block <= bmap1 + bmap_count) { |
@@ -108,8 +112,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) | |||
108 | return 1; | 112 | return 1; |
109 | } | 113 | } |
110 | 114 | ||
111 | /* searches in journal structures for a given block number (bmap, off). If block | 115 | /* |
112 | is found in reiserfs journal it suggests next free block candidate to test. */ | 116 | * Searches in journal structures for a given block number (bmap, off). |
117 | * If block is found in reiserfs journal it suggests next free block | ||
118 | * candidate to test. | ||
119 | */ | ||
113 | static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, | 120 | static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, |
114 | int off, int *next) | 121 | int off, int *next) |
115 | { | 122 | { |
@@ -120,7 +127,7 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, | |||
120 | *next = tmp; | 127 | *next = tmp; |
121 | PROC_INFO_INC(s, scan_bitmap.in_journal_hint); | 128 | PROC_INFO_INC(s, scan_bitmap.in_journal_hint); |
122 | } else { | 129 | } else { |
123 | (*next) = off + 1; /* inc offset to avoid looping. */ | 130 | (*next) = off + 1; /* inc offset to avoid looping. */ |
124 | PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); | 131 | PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); |
125 | } | 132 | } |
126 | PROC_INFO_INC(s, scan_bitmap.retry); | 133 | PROC_INFO_INC(s, scan_bitmap.retry); |
@@ -129,8 +136,10 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, | |||
129 | return 0; | 136 | return 0; |
130 | } | 137 | } |
131 | 138 | ||
132 | /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap | 139 | /* |
133 | * block; */ | 140 | * Searches for a window of zero bits with given minimum and maximum |
141 | * lengths in one bitmap block | ||
142 | */ | ||
134 | static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | 143 | static int scan_bitmap_block(struct reiserfs_transaction_handle *th, |
135 | unsigned int bmap_n, int *beg, int boundary, | 144 | unsigned int bmap_n, int *beg, int boundary, |
136 | int min, int max, int unfm) | 145 | int min, int max, int unfm) |
@@ -145,10 +154,6 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | |||
145 | RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " | 154 | RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " |
146 | "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); | 155 | "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); |
147 | PROC_INFO_INC(s, scan_bitmap.bmap); | 156 | PROC_INFO_INC(s, scan_bitmap.bmap); |
148 | /* this is unclear and lacks comments, explain how journal bitmaps | ||
149 | work here for the reader. Convey a sense of the design here. What | ||
150 | is a window? */ | ||
151 | /* - I mean `a window of zero bits' as in description of this function - Zam. */ | ||
152 | 157 | ||
153 | if (!bi) { | 158 | if (!bi) { |
154 | reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " | 159 | reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " |
@@ -161,18 +166,21 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | |||
161 | return 0; | 166 | return 0; |
162 | 167 | ||
163 | while (1) { | 168 | while (1) { |
164 | cont: | 169 | cont: |
165 | if (bi->free_count < min) { | 170 | if (bi->free_count < min) { |
166 | brelse(bh); | 171 | brelse(bh); |
167 | return 0; // No free blocks in this bitmap | 172 | return 0; /* No free blocks in this bitmap */ |
168 | } | 173 | } |
169 | 174 | ||
170 | /* search for a first zero bit -- beginning of a window */ | 175 | /* search for a first zero bit -- beginning of a window */ |
171 | *beg = reiserfs_find_next_zero_le_bit | 176 | *beg = reiserfs_find_next_zero_le_bit |
172 | ((unsigned long *)(bh->b_data), boundary, *beg); | 177 | ((unsigned long *)(bh->b_data), boundary, *beg); |
173 | 178 | ||
174 | if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block | 179 | /* |
175 | * cannot contain a zero window of minimum size */ | 180 | * search for a zero bit fails or the rest of bitmap block |
181 | * cannot contain a zero window of minimum size | ||
182 | */ | ||
183 | if (*beg + min > boundary) { | ||
176 | brelse(bh); | 184 | brelse(bh); |
177 | return 0; | 185 | return 0; |
178 | } | 186 | } |
@@ -186,49 +194,75 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | |||
186 | next = end; | 194 | next = end; |
187 | break; | 195 | break; |
188 | } | 196 | } |
189 | /* finding the other end of zero bit window requires looking into journal structures (in | 197 | |
190 | * case of searching for free blocks for unformatted nodes) */ | 198 | /* |
199 | * finding the other end of zero bit window requires | ||
200 | * looking into journal structures (in case of | ||
201 | * searching for free blocks for unformatted nodes) | ||
202 | */ | ||
191 | if (unfm && is_block_in_journal(s, bmap_n, end, &next)) | 203 | if (unfm && is_block_in_journal(s, bmap_n, end, &next)) |
192 | break; | 204 | break; |
193 | } | 205 | } |
194 | 206 | ||
195 | /* now (*beg) points to beginning of zero bits window, | 207 | /* |
196 | * (end) points to one bit after the window end */ | 208 | * now (*beg) points to beginning of zero bits window, |
197 | if (end - *beg >= min) { /* it seems we have found window of proper size */ | 209 | * (end) points to one bit after the window end |
210 | */ | ||
211 | |||
212 | /* found window of proper size */ | ||
213 | if (end - *beg >= min) { | ||
198 | int i; | 214 | int i; |
199 | reiserfs_prepare_for_journal(s, bh, 1); | 215 | reiserfs_prepare_for_journal(s, bh, 1); |
200 | /* try to set all blocks used checking are they still free */ | 216 | /* |
217 | * try to set all blocks used checking are | ||
218 | * they still free | ||
219 | */ | ||
201 | for (i = *beg; i < end; i++) { | 220 | for (i = *beg; i < end; i++) { |
202 | /* It seems that we should not check in journal again. */ | 221 | /* Don't check in journal again. */ |
203 | if (reiserfs_test_and_set_le_bit | 222 | if (reiserfs_test_and_set_le_bit |
204 | (i, bh->b_data)) { | 223 | (i, bh->b_data)) { |
205 | /* bit was set by another process | 224 | /* |
206 | * while we slept in prepare_for_journal() */ | 225 | * bit was set by another process while |
226 | * we slept in prepare_for_journal() | ||
227 | */ | ||
207 | PROC_INFO_INC(s, scan_bitmap.stolen); | 228 | PROC_INFO_INC(s, scan_bitmap.stolen); |
208 | if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, | 229 | |
209 | * if length of this set is more or equal to `min' */ | 230 | /* |
231 | * we can continue with smaller set | ||
232 | * of allocated blocks, if length of | ||
233 | * this set is more or equal to `min' | ||
234 | */ | ||
235 | if (i >= *beg + min) { | ||
210 | end = i; | 236 | end = i; |
211 | break; | 237 | break; |
212 | } | 238 | } |
213 | /* otherwise we clear all bit were set ... */ | 239 | |
240 | /* | ||
241 | * otherwise we clear all bit | ||
242 | * were set ... | ||
243 | */ | ||
214 | while (--i >= *beg) | 244 | while (--i >= *beg) |
215 | reiserfs_clear_le_bit | 245 | reiserfs_clear_le_bit |
216 | (i, bh->b_data); | 246 | (i, bh->b_data); |
217 | reiserfs_restore_prepared_buffer(s, bh); | 247 | reiserfs_restore_prepared_buffer(s, bh); |
218 | *beg = org; | 248 | *beg = org; |
219 | /* ... and search again in current block from beginning */ | 249 | |
250 | /* | ||
251 | * Search again in current block | ||
252 | * from beginning | ||
253 | */ | ||
220 | goto cont; | 254 | goto cont; |
221 | } | 255 | } |
222 | } | 256 | } |
223 | bi->free_count -= (end - *beg); | 257 | bi->free_count -= (end - *beg); |
224 | journal_mark_dirty(th, s, bh); | 258 | journal_mark_dirty(th, bh); |
225 | brelse(bh); | 259 | brelse(bh); |
226 | 260 | ||
227 | /* free block count calculation */ | 261 | /* free block count calculation */ |
228 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), | 262 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), |
229 | 1); | 263 | 1); |
230 | PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); | 264 | PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); |
231 | journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); | 265 | journal_mark_dirty(th, SB_BUFFER_WITH_SB(s)); |
232 | 266 | ||
233 | return end - (*beg); | 267 | return end - (*beg); |
234 | } else { | 268 | } else { |
@@ -267,11 +301,13 @@ static inline int block_group_used(struct super_block *s, u32 id) | |||
267 | int bm = bmap_hash_id(s, id); | 301 | int bm = bmap_hash_id(s, id); |
268 | struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; | 302 | struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; |
269 | 303 | ||
270 | /* If we don't have cached information on this bitmap block, we're | 304 | /* |
305 | * If we don't have cached information on this bitmap block, we're | ||
271 | * going to have to load it later anyway. Loading it here allows us | 306 | * going to have to load it later anyway. Loading it here allows us |
272 | * to make a better decision. This favors long-term performance gain | 307 | * to make a better decision. This favors long-term performance gain |
273 | * with a better on-disk layout vs. a short term gain of skipping the | 308 | * with a better on-disk layout vs. a short term gain of skipping the |
274 | * read and potentially having a bad placement. */ | 309 | * read and potentially having a bad placement. |
310 | */ | ||
275 | if (info->free_count == UINT_MAX) { | 311 | if (info->free_count == UINT_MAX) { |
276 | struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); | 312 | struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); |
277 | brelse(bh); | 313 | brelse(bh); |
@@ -304,25 +340,26 @@ __le32 reiserfs_choose_packing(struct inode * dir) | |||
304 | return packing; | 340 | return packing; |
305 | } | 341 | } |
306 | 342 | ||
307 | /* Tries to find contiguous zero bit window (given size) in given region of | 343 | /* |
308 | * bitmap and place new blocks there. Returns number of allocated blocks. */ | 344 | * Tries to find contiguous zero bit window (given size) in given region of |
345 | * bitmap and place new blocks there. Returns number of allocated blocks. | ||
346 | */ | ||
309 | static int scan_bitmap(struct reiserfs_transaction_handle *th, | 347 | static int scan_bitmap(struct reiserfs_transaction_handle *th, |
310 | b_blocknr_t * start, b_blocknr_t finish, | 348 | b_blocknr_t * start, b_blocknr_t finish, |
311 | int min, int max, int unfm, sector_t file_block) | 349 | int min, int max, int unfm, sector_t file_block) |
312 | { | 350 | { |
313 | int nr_allocated = 0; | 351 | int nr_allocated = 0; |
314 | struct super_block *s = th->t_super; | 352 | struct super_block *s = th->t_super; |
315 | /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr | ||
316 | * - Hans, it is not a block number - Zam. */ | ||
317 | |||
318 | unsigned int bm, off; | 353 | unsigned int bm, off; |
319 | unsigned int end_bm, end_off; | 354 | unsigned int end_bm, end_off; |
320 | unsigned int off_max = s->s_blocksize << 3; | 355 | unsigned int off_max = s->s_blocksize << 3; |
321 | 356 | ||
322 | BUG_ON(!th->t_trans_id); | 357 | BUG_ON(!th->t_trans_id); |
323 | PROC_INFO_INC(s, scan_bitmap.call); | 358 | PROC_INFO_INC(s, scan_bitmap.call); |
359 | |||
360 | /* No point in looking for more free blocks */ | ||
324 | if (SB_FREE_BLOCKS(s) <= 0) | 361 | if (SB_FREE_BLOCKS(s) <= 0) |
325 | return 0; // No point in looking for more free blocks | 362 | return 0; |
326 | 363 | ||
327 | get_bit_address(s, *start, &bm, &off); | 364 | get_bit_address(s, *start, &bm, &off); |
328 | get_bit_address(s, finish, &end_bm, &end_off); | 365 | get_bit_address(s, finish, &end_bm, &end_off); |
@@ -331,7 +368,8 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th, | |||
331 | if (end_bm > reiserfs_bmap_count(s)) | 368 | if (end_bm > reiserfs_bmap_count(s)) |
332 | end_bm = reiserfs_bmap_count(s); | 369 | end_bm = reiserfs_bmap_count(s); |
333 | 370 | ||
334 | /* When the bitmap is more than 10% free, anyone can allocate. | 371 | /* |
372 | * When the bitmap is more than 10% free, anyone can allocate. | ||
335 | * When it's less than 10% free, only files that already use the | 373 | * When it's less than 10% free, only files that already use the |
336 | * bitmap are allowed. Once we pass 80% full, this restriction | 374 | * bitmap are allowed. Once we pass 80% full, this restriction |
337 | * is lifted. | 375 | * is lifted. |
@@ -369,7 +407,7 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th, | |||
369 | nr_allocated = | 407 | nr_allocated = |
370 | scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); | 408 | scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); |
371 | 409 | ||
372 | ret: | 410 | ret: |
373 | *start = bm * off_max + off; | 411 | *start = bm * off_max + off; |
374 | return nr_allocated; | 412 | return nr_allocated; |
375 | 413 | ||
@@ -411,14 +449,14 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, | |||
411 | "block %lu: bit already cleared", block); | 449 | "block %lu: bit already cleared", block); |
412 | } | 450 | } |
413 | apbi[nr].free_count++; | 451 | apbi[nr].free_count++; |
414 | journal_mark_dirty(th, s, bmbh); | 452 | journal_mark_dirty(th, bmbh); |
415 | brelse(bmbh); | 453 | brelse(bmbh); |
416 | 454 | ||
417 | reiserfs_prepare_for_journal(s, sbh, 1); | 455 | reiserfs_prepare_for_journal(s, sbh, 1); |
418 | /* update super block */ | 456 | /* update super block */ |
419 | set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); | 457 | set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); |
420 | 458 | ||
421 | journal_mark_dirty(th, s, sbh); | 459 | journal_mark_dirty(th, sbh); |
422 | if (for_unformatted) { | 460 | if (for_unformatted) { |
423 | int depth = reiserfs_write_unlock_nested(s); | 461 | int depth = reiserfs_write_unlock_nested(s); |
424 | dquot_free_block_nodirty(inode, 1); | 462 | dquot_free_block_nodirty(inode, 1); |
@@ -483,7 +521,7 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th, | |||
483 | if (dirty) | 521 | if (dirty) |
484 | reiserfs_update_sd(th, inode); | 522 | reiserfs_update_sd(th, inode); |
485 | ei->i_prealloc_block = save; | 523 | ei->i_prealloc_block = save; |
486 | list_del_init(&(ei->i_prealloc_list)); | 524 | list_del_init(&ei->i_prealloc_list); |
487 | } | 525 | } |
488 | 526 | ||
489 | /* FIXME: It should be inline function */ | 527 | /* FIXME: It should be inline function */ |
@@ -529,7 +567,8 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options) | |||
529 | { | 567 | { |
530 | char *this_char, *value; | 568 | char *this_char, *value; |
531 | 569 | ||
532 | REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ | 570 | /* clear default settings */ |
571 | REISERFS_SB(s)->s_alloc_options.bits = 0; | ||
533 | 572 | ||
534 | while ((this_char = strsep(&options, ":")) != NULL) { | 573 | while ((this_char = strsep(&options, ":")) != NULL) { |
535 | if ((value = strchr(this_char, '=')) != NULL) | 574 | if ((value = strchr(this_char, '=')) != NULL) |
@@ -731,7 +770,7 @@ static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) | |||
731 | hash_in = (char *)&hint->key.k_dir_id; | 770 | hash_in = (char *)&hint->key.k_dir_id; |
732 | } else { | 771 | } else { |
733 | if (!hint->inode) { | 772 | if (!hint->inode) { |
734 | //hint->search_start = hint->beg; | 773 | /*hint->search_start = hint->beg;*/ |
735 | hash_in = (char *)&hint->key.k_dir_id; | 774 | hash_in = (char *)&hint->key.k_dir_id; |
736 | } else | 775 | } else |
737 | if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) | 776 | if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) |
@@ -785,7 +824,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint) | |||
785 | 824 | ||
786 | dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); | 825 | dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); |
787 | 826 | ||
788 | /* keep the root dir and it's first set of subdirs close to | 827 | /* |
828 | * keep the root dir and it's first set of subdirs close to | ||
789 | * the start of the disk | 829 | * the start of the disk |
790 | */ | 830 | */ |
791 | if (dirid <= 2) | 831 | if (dirid <= 2) |
@@ -799,7 +839,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint) | |||
799 | } | 839 | } |
800 | } | 840 | } |
801 | 841 | ||
802 | /* returns 1 if it finds an indirect item and gets valid hint info | 842 | /* |
843 | * returns 1 if it finds an indirect item and gets valid hint info | ||
803 | * from it, otherwise 0 | 844 | * from it, otherwise 0 |
804 | */ | 845 | */ |
805 | static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) | 846 | static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) |
@@ -811,25 +852,29 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) | |||
811 | __le32 *item; | 852 | __le32 *item; |
812 | int ret = 0; | 853 | int ret = 0; |
813 | 854 | ||
814 | if (!hint->path) /* reiserfs code can call this function w/o pointer to path | 855 | /* |
815 | * structure supplied; then we rely on supplied search_start */ | 856 | * reiserfs code can call this function w/o pointer to path |
857 | * structure supplied; then we rely on supplied search_start | ||
858 | */ | ||
859 | if (!hint->path) | ||
816 | return 0; | 860 | return 0; |
817 | 861 | ||
818 | path = hint->path; | 862 | path = hint->path; |
819 | bh = get_last_bh(path); | 863 | bh = get_last_bh(path); |
820 | RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor"); | 864 | RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor"); |
821 | ih = get_ih(path); | 865 | ih = tp_item_head(path); |
822 | pos_in_item = path->pos_in_item; | 866 | pos_in_item = path->pos_in_item; |
823 | item = get_item(path); | 867 | item = tp_item_body(path); |
824 | 868 | ||
825 | hint->search_start = bh->b_blocknr; | 869 | hint->search_start = bh->b_blocknr; |
826 | 870 | ||
871 | /* | ||
872 | * for indirect item: go to left and look for the first non-hole entry | ||
873 | * in the indirect item | ||
874 | */ | ||
827 | if (!hint->formatted_node && is_indirect_le_ih(ih)) { | 875 | if (!hint->formatted_node && is_indirect_le_ih(ih)) { |
828 | /* for indirect item: go to left and look for the first non-hole entry | ||
829 | in the indirect item */ | ||
830 | if (pos_in_item == I_UNFM_NUM(ih)) | 876 | if (pos_in_item == I_UNFM_NUM(ih)) |
831 | pos_in_item--; | 877 | pos_in_item--; |
832 | // pos_in_item = I_UNFM_NUM (ih) - 1; | ||
833 | while (pos_in_item >= 0) { | 878 | while (pos_in_item >= 0) { |
834 | int t = get_block_num(item, pos_in_item); | 879 | int t = get_block_num(item, pos_in_item); |
835 | if (t) { | 880 | if (t) { |
@@ -845,10 +890,12 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) | |||
845 | return ret; | 890 | return ret; |
846 | } | 891 | } |
847 | 892 | ||
848 | /* should be, if formatted node, then try to put on first part of the device | 893 | /* |
849 | specified as number of percent with mount option device, else try to put | 894 | * should be, if formatted node, then try to put on first part of the device |
850 | on last of device. This is not to say it is good code to do so, | 895 | * specified as number of percent with mount option device, else try to put |
851 | but the effect should be measured. */ | 896 | * on last of device. This is not to say it is good code to do so, |
897 | * but the effect should be measured. | ||
898 | */ | ||
852 | static inline void set_border_in_hint(struct super_block *s, | 899 | static inline void set_border_in_hint(struct super_block *s, |
853 | reiserfs_blocknr_hint_t * hint) | 900 | reiserfs_blocknr_hint_t * hint) |
854 | { | 901 | { |
@@ -974,21 +1021,27 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
974 | set_border_in_hint(s, hint); | 1021 | set_border_in_hint(s, hint); |
975 | 1022 | ||
976 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 1023 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
977 | /* whenever we create a new directory, we displace it. At first we will | 1024 | /* |
978 | hash for location, later we might look for a moderately empty place for | 1025 | * whenever we create a new directory, we displace it. At first |
979 | it */ | 1026 | * we will hash for location, later we might look for a moderately |
1027 | * empty place for it | ||
1028 | */ | ||
980 | if (displacing_new_packing_localities(s) | 1029 | if (displacing_new_packing_localities(s) |
981 | && hint->th->displace_new_blocks) { | 1030 | && hint->th->displace_new_blocks) { |
982 | displace_new_packing_locality(hint); | 1031 | displace_new_packing_locality(hint); |
983 | 1032 | ||
984 | /* we do not continue determine_search_start, | 1033 | /* |
985 | * if new packing locality is being displaced */ | 1034 | * we do not continue determine_search_start, |
1035 | * if new packing locality is being displaced | ||
1036 | */ | ||
986 | return; | 1037 | return; |
987 | } | 1038 | } |
988 | #endif | 1039 | #endif |
989 | 1040 | ||
990 | /* all persons should feel encouraged to add more special cases here and | 1041 | /* |
991 | * test them */ | 1042 | * all persons should feel encouraged to add more special cases |
1043 | * here and test them | ||
1044 | */ | ||
992 | 1045 | ||
993 | if (displacing_large_files(s) && !hint->formatted_node | 1046 | if (displacing_large_files(s) && !hint->formatted_node |
994 | && this_blocknr_allocation_would_make_it_a_large_file(hint)) { | 1047 | && this_blocknr_allocation_would_make_it_a_large_file(hint)) { |
@@ -996,8 +1049,10 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
996 | return; | 1049 | return; |
997 | } | 1050 | } |
998 | 1051 | ||
999 | /* if none of our special cases is relevant, use the left neighbor in the | 1052 | /* |
1000 | tree order of the new node we are allocating for */ | 1053 | * if none of our special cases is relevant, use the left |
1054 | * neighbor in the tree order of the new node we are allocating for | ||
1055 | */ | ||
1001 | if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { | 1056 | if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { |
1002 | hash_formatted_node(hint); | 1057 | hash_formatted_node(hint); |
1003 | return; | 1058 | return; |
@@ -1005,10 +1060,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
1005 | 1060 | ||
1006 | unfm_hint = get_left_neighbor(hint); | 1061 | unfm_hint = get_left_neighbor(hint); |
1007 | 1062 | ||
1008 | /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, | 1063 | /* |
1009 | new blocks are displaced based on directory ID. Also, if suggested search_start | 1064 | * Mimic old block allocator behaviour, that is if VFS allowed for |
1010 | is less than last preallocated block, we start searching from it, assuming that | 1065 | * preallocation, new blocks are displaced based on directory ID. |
1011 | HDD dataflow is faster in forward direction */ | 1066 | * Also, if suggested search_start is less than last preallocated |
1067 | * block, we start searching from it, assuming that HDD dataflow | ||
1068 | * is faster in forward direction | ||
1069 | */ | ||
1012 | if (TEST_OPTION(old_way, s)) { | 1070 | if (TEST_OPTION(old_way, s)) { |
1013 | if (!hint->formatted_node) { | 1071 | if (!hint->formatted_node) { |
1014 | if (!reiserfs_hashed_relocation(s)) | 1072 | if (!reiserfs_hashed_relocation(s)) |
@@ -1037,11 +1095,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
1037 | TEST_OPTION(old_hashed_relocation, s)) { | 1095 | TEST_OPTION(old_hashed_relocation, s)) { |
1038 | old_hashed_relocation(hint); | 1096 | old_hashed_relocation(hint); |
1039 | } | 1097 | } |
1098 | |||
1040 | /* new_hashed_relocation works with both formatted/unformatted nodes */ | 1099 | /* new_hashed_relocation works with both formatted/unformatted nodes */ |
1041 | if ((!unfm_hint || hint->formatted_node) && | 1100 | if ((!unfm_hint || hint->formatted_node) && |
1042 | TEST_OPTION(new_hashed_relocation, s)) { | 1101 | TEST_OPTION(new_hashed_relocation, s)) { |
1043 | new_hashed_relocation(hint); | 1102 | new_hashed_relocation(hint); |
1044 | } | 1103 | } |
1104 | |||
1045 | /* dirid grouping works only on unformatted nodes */ | 1105 | /* dirid grouping works only on unformatted nodes */ |
1046 | if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { | 1106 | if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { |
1047 | dirid_groups(hint); | 1107 | dirid_groups(hint); |
@@ -1079,8 +1139,6 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) | |||
1079 | return CARRY_ON; | 1139 | return CARRY_ON; |
1080 | } | 1140 | } |
1081 | 1141 | ||
1082 | /* XXX I know it could be merged with upper-level function; | ||
1083 | but may be result function would be too complex. */ | ||
1084 | static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, | 1142 | static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, |
1085 | b_blocknr_t * new_blocknrs, | 1143 | b_blocknr_t * new_blocknrs, |
1086 | b_blocknr_t start, | 1144 | b_blocknr_t start, |
@@ -1108,7 +1166,10 @@ static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, | |||
1108 | 1166 | ||
1109 | /* do we have something to fill prealloc. array also ? */ | 1167 | /* do we have something to fill prealloc. array also ? */ |
1110 | if (nr_allocated > 0) { | 1168 | if (nr_allocated > 0) { |
1111 | /* it means prealloc_size was greater that 0 and we do preallocation */ | 1169 | /* |
1170 | * it means prealloc_size was greater that 0 and | ||
1171 | * we do preallocation | ||
1172 | */ | ||
1112 | list_add(&REISERFS_I(hint->inode)->i_prealloc_list, | 1173 | list_add(&REISERFS_I(hint->inode)->i_prealloc_list, |
1113 | &SB_JOURNAL(hint->th->t_super)-> | 1174 | &SB_JOURNAL(hint->th->t_super)-> |
1114 | j_prealloc_list); | 1175 | j_prealloc_list); |
@@ -1176,7 +1237,8 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start | |||
1176 | start = 0; | 1237 | start = 0; |
1177 | finish = hint->beg; | 1238 | finish = hint->beg; |
1178 | break; | 1239 | break; |
1179 | default: /* We've tried searching everywhere, not enough space */ | 1240 | default: |
1241 | /* We've tried searching everywhere, not enough space */ | ||
1180 | /* Free the blocks */ | 1242 | /* Free the blocks */ |
1181 | if (!hint->formatted_node) { | 1243 | if (!hint->formatted_node) { |
1182 | #ifdef REISERQUOTA_DEBUG | 1244 | #ifdef REISERQUOTA_DEBUG |
@@ -1261,8 +1323,11 @@ static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint, | |||
1261 | return amount_needed; | 1323 | return amount_needed; |
1262 | } | 1324 | } |
1263 | 1325 | ||
1264 | int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have | 1326 | int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, |
1265 | already reserved */ ) | 1327 | b_blocknr_t *new_blocknrs, |
1328 | int amount_needed, | ||
1329 | /* Amount of blocks we have already reserved */ | ||
1330 | int reserved_by_us) | ||
1266 | { | 1331 | { |
1267 | int initial_amount_needed = amount_needed; | 1332 | int initial_amount_needed = amount_needed; |
1268 | int ret; | 1333 | int ret; |
@@ -1274,15 +1339,21 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new | |||
1274 | return NO_DISK_SPACE; | 1339 | return NO_DISK_SPACE; |
1275 | /* should this be if !hint->inode && hint->preallocate? */ | 1340 | /* should this be if !hint->inode && hint->preallocate? */ |
1276 | /* do you mean hint->formatted_node can be removed ? - Zam */ | 1341 | /* do you mean hint->formatted_node can be removed ? - Zam */ |
1277 | /* hint->formatted_node cannot be removed because we try to access | 1342 | /* |
1278 | inode information here, and there is often no inode assotiated with | 1343 | * hint->formatted_node cannot be removed because we try to access |
1279 | metadata allocations - green */ | 1344 | * inode information here, and there is often no inode associated with |
1345 | * metadata allocations - green | ||
1346 | */ | ||
1280 | 1347 | ||
1281 | if (!hint->formatted_node && hint->preallocate) { | 1348 | if (!hint->formatted_node && hint->preallocate) { |
1282 | amount_needed = use_preallocated_list_if_available | 1349 | amount_needed = use_preallocated_list_if_available |
1283 | (hint, new_blocknrs, amount_needed); | 1350 | (hint, new_blocknrs, amount_needed); |
1284 | if (amount_needed == 0) /* all blocknrs we need we got from | 1351 | |
1285 | prealloc. list */ | 1352 | /* |
1353 | * We have all the block numbers we need from the | ||
1354 | * prealloc list | ||
1355 | */ | ||
1356 | if (amount_needed == 0) | ||
1286 | return CARRY_ON; | 1357 | return CARRY_ON; |
1287 | new_blocknrs += (initial_amount_needed - amount_needed); | 1358 | new_blocknrs += (initial_amount_needed - amount_needed); |
1288 | } | 1359 | } |
@@ -1296,10 +1367,12 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new | |||
1296 | ret = blocknrs_and_prealloc_arrays_from_search_start | 1367 | ret = blocknrs_and_prealloc_arrays_from_search_start |
1297 | (hint, new_blocknrs, amount_needed); | 1368 | (hint, new_blocknrs, amount_needed); |
1298 | 1369 | ||
1299 | /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we | 1370 | /* |
1300 | * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second | 1371 | * We used prealloc. list to fill (partially) new_blocknrs array. |
1301 | * variant) */ | 1372 | * If final allocation fails we need to return blocks back to |
1302 | 1373 | * prealloc. list or just free them. -- Zam (I chose second | |
1374 | * variant) | ||
1375 | */ | ||
1303 | if (ret != CARRY_ON) { | 1376 | if (ret != CARRY_ON) { |
1304 | while (amount_needed++ < initial_amount_needed) { | 1377 | while (amount_needed++ < initial_amount_needed) { |
1305 | reiserfs_free_block(hint->th, hint->inode, | 1378 | reiserfs_free_block(hint->th, hint->inode, |
@@ -1338,10 +1411,12 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, | |||
1338 | struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; | 1411 | struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; |
1339 | struct buffer_head *bh; | 1412 | struct buffer_head *bh; |
1340 | 1413 | ||
1341 | /* Way old format filesystems had the bitmaps packed up front. | 1414 | /* |
1342 | * I doubt there are any of these left, but just in case... */ | 1415 | * Way old format filesystems had the bitmaps packed up front. |
1416 | * I doubt there are any of these left, but just in case... | ||
1417 | */ | ||
1343 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, | 1418 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, |
1344 | &(REISERFS_SB(sb)->s_properties)))) | 1419 | &REISERFS_SB(sb)->s_properties))) |
1345 | block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; | 1420 | block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; |
1346 | else if (bitmap == 0) | 1421 | else if (bitmap == 0) |
1347 | block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; | 1422 | block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index af677353a3f5..d9f5a60dd59b 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -59,7 +59,10 @@ static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *d | |||
59 | 59 | ||
60 | int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | 60 | int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) |
61 | { | 61 | { |
62 | struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ | 62 | |
63 | /* key of current position in the directory (key of directory entry) */ | ||
64 | struct cpu_key pos_key; | ||
65 | |||
63 | INITIALIZE_PATH(path_to_entry); | 66 | INITIALIZE_PATH(path_to_entry); |
64 | struct buffer_head *bh; | 67 | struct buffer_head *bh; |
65 | int item_num, entry_num; | 68 | int item_num, entry_num; |
@@ -77,21 +80,28 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
77 | 80 | ||
78 | reiserfs_check_lock_depth(inode->i_sb, "readdir"); | 81 | reiserfs_check_lock_depth(inode->i_sb, "readdir"); |
79 | 82 | ||
80 | /* form key for search the next directory entry using f_pos field of | 83 | /* |
81 | file structure */ | 84 | * form key for search the next directory entry using |
85 | * f_pos field of file structure | ||
86 | */ | ||
82 | make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); | 87 | make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); |
83 | next_pos = cpu_key_k_offset(&pos_key); | 88 | next_pos = cpu_key_k_offset(&pos_key); |
84 | 89 | ||
85 | path_to_entry.reada = PATH_READA; | 90 | path_to_entry.reada = PATH_READA; |
86 | while (1) { | 91 | while (1) { |
87 | research: | 92 | research: |
88 | /* search the directory item, containing entry with specified key */ | 93 | /* |
94 | * search the directory item, containing entry with | ||
95 | * specified key | ||
96 | */ | ||
89 | search_res = | 97 | search_res = |
90 | search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, | 98 | search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, |
91 | &de); | 99 | &de); |
92 | if (search_res == IO_ERROR) { | 100 | if (search_res == IO_ERROR) { |
93 | // FIXME: we could just skip part of directory which could | 101 | /* |
94 | // not be read | 102 | * FIXME: we could just skip part of directory |
103 | * which could not be read | ||
104 | */ | ||
95 | ret = -EIO; | 105 | ret = -EIO; |
96 | goto out; | 106 | goto out; |
97 | } | 107 | } |
@@ -102,41 +112,49 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
102 | store_ih(&tmp_ih, ih); | 112 | store_ih(&tmp_ih, ih); |
103 | 113 | ||
104 | /* we must have found item, that is item of this directory, */ | 114 | /* we must have found item, that is item of this directory, */ |
105 | RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key), | 115 | RFALSE(COMP_SHORT_KEYS(&ih->ih_key, &pos_key), |
106 | "vs-9000: found item %h does not match to dir we readdir %K", | 116 | "vs-9000: found item %h does not match to dir we readdir %K", |
107 | ih, &pos_key); | 117 | ih, &pos_key); |
108 | RFALSE(item_num > B_NR_ITEMS(bh) - 1, | 118 | RFALSE(item_num > B_NR_ITEMS(bh) - 1, |
109 | "vs-9005 item_num == %d, item amount == %d", | 119 | "vs-9005 item_num == %d, item amount == %d", |
110 | item_num, B_NR_ITEMS(bh)); | 120 | item_num, B_NR_ITEMS(bh)); |
111 | 121 | ||
112 | /* and entry must be not more than number of entries in the item */ | 122 | /* |
113 | RFALSE(I_ENTRY_COUNT(ih) < entry_num, | 123 | * and entry must be not more than number of entries |
124 | * in the item | ||
125 | */ | ||
126 | RFALSE(ih_entry_count(ih) < entry_num, | ||
114 | "vs-9010: entry number is too big %d (%d)", | 127 | "vs-9010: entry number is too big %d (%d)", |
115 | entry_num, I_ENTRY_COUNT(ih)); | 128 | entry_num, ih_entry_count(ih)); |
116 | 129 | ||
130 | /* | ||
131 | * go through all entries in the directory item beginning | ||
132 | * from the entry, that has been found | ||
133 | */ | ||
117 | if (search_res == POSITION_FOUND | 134 | if (search_res == POSITION_FOUND |
118 | || entry_num < I_ENTRY_COUNT(ih)) { | 135 | || entry_num < ih_entry_count(ih)) { |
119 | /* go through all entries in the directory item beginning from the entry, that has been found */ | ||
120 | struct reiserfs_de_head *deh = | 136 | struct reiserfs_de_head *deh = |
121 | B_I_DEH(bh, ih) + entry_num; | 137 | B_I_DEH(bh, ih) + entry_num; |
122 | 138 | ||
123 | for (; entry_num < I_ENTRY_COUNT(ih); | 139 | for (; entry_num < ih_entry_count(ih); |
124 | entry_num++, deh++) { | 140 | entry_num++, deh++) { |
125 | int d_reclen; | 141 | int d_reclen; |
126 | char *d_name; | 142 | char *d_name; |
127 | ino_t d_ino; | 143 | ino_t d_ino; |
128 | loff_t cur_pos = deh_offset(deh); | 144 | loff_t cur_pos = deh_offset(deh); |
129 | 145 | ||
146 | /* it is hidden entry */ | ||
130 | if (!de_visible(deh)) | 147 | if (!de_visible(deh)) |
131 | /* it is hidden entry */ | ||
132 | continue; | 148 | continue; |
133 | d_reclen = entry_length(bh, ih, entry_num); | 149 | d_reclen = entry_length(bh, ih, entry_num); |
134 | d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); | 150 | d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); |
135 | 151 | ||
136 | if (d_reclen <= 0 || | 152 | if (d_reclen <= 0 || |
137 | d_name + d_reclen > bh->b_data + bh->b_size) { | 153 | d_name + d_reclen > bh->b_data + bh->b_size) { |
138 | /* There is corrupted data in entry, | 154 | /* |
139 | * We'd better stop here */ | 155 | * There is corrupted data in entry, |
156 | * We'd better stop here | ||
157 | */ | ||
140 | pathrelse(&path_to_entry); | 158 | pathrelse(&path_to_entry); |
141 | ret = -EIO; | 159 | ret = -EIO; |
142 | goto out; | 160 | goto out; |
@@ -145,10 +163,10 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
145 | if (!d_name[d_reclen - 1]) | 163 | if (!d_name[d_reclen - 1]) |
146 | d_reclen = strlen(d_name); | 164 | d_reclen = strlen(d_name); |
147 | 165 | ||
166 | /* too big to send back to VFS */ | ||
148 | if (d_reclen > | 167 | if (d_reclen > |
149 | REISERFS_MAX_NAME(inode->i_sb-> | 168 | REISERFS_MAX_NAME(inode->i_sb-> |
150 | s_blocksize)) { | 169 | s_blocksize)) { |
151 | /* too big to send back to VFS */ | ||
152 | continue; | 170 | continue; |
153 | } | 171 | } |
154 | 172 | ||
@@ -173,10 +191,14 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
173 | goto research; | 191 | goto research; |
174 | } | 192 | } |
175 | } | 193 | } |
176 | // Note, that we copy name to user space via temporary | 194 | |
177 | // buffer (local_buf) because filldir will block if | 195 | /* |
178 | // user space buffer is swapped out. At that time | 196 | * Note, that we copy name to user space via |
179 | // entry can move to somewhere else | 197 | * temporary buffer (local_buf) because |
198 | * filldir will block if user space buffer is | ||
199 | * swapped out. At that time entry can move to | ||
200 | * somewhere else | ||
201 | */ | ||
180 | memcpy(local_buf, d_name, d_reclen); | 202 | memcpy(local_buf, d_name, d_reclen); |
181 | 203 | ||
182 | /* | 204 | /* |
@@ -209,22 +231,26 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
209 | } /* for */ | 231 | } /* for */ |
210 | } | 232 | } |
211 | 233 | ||
234 | /* end of directory has been reached */ | ||
212 | if (item_num != B_NR_ITEMS(bh) - 1) | 235 | if (item_num != B_NR_ITEMS(bh) - 1) |
213 | // end of directory has been reached | ||
214 | goto end; | 236 | goto end; |
215 | 237 | ||
216 | /* item we went through is last item of node. Using right | 238 | /* |
217 | delimiting key check is it directory end */ | 239 | * item we went through is last item of node. Using right |
240 | * delimiting key check is it directory end | ||
241 | */ | ||
218 | rkey = get_rkey(&path_to_entry, inode->i_sb); | 242 | rkey = get_rkey(&path_to_entry, inode->i_sb); |
219 | if (!comp_le_keys(rkey, &MIN_KEY)) { | 243 | if (!comp_le_keys(rkey, &MIN_KEY)) { |
220 | /* set pos_key to key, that is the smallest and greater | 244 | /* |
221 | that key of the last entry in the item */ | 245 | * set pos_key to key, that is the smallest and greater |
246 | * that key of the last entry in the item | ||
247 | */ | ||
222 | set_cpu_key_k_offset(&pos_key, next_pos); | 248 | set_cpu_key_k_offset(&pos_key, next_pos); |
223 | continue; | 249 | continue; |
224 | } | 250 | } |
225 | 251 | ||
252 | /* end of directory has been reached */ | ||
226 | if (COMP_SHORT_KEYS(rkey, &pos_key)) { | 253 | if (COMP_SHORT_KEYS(rkey, &pos_key)) { |
227 | // end of directory has been reached | ||
228 | goto end; | 254 | goto end; |
229 | } | 255 | } |
230 | 256 | ||
@@ -248,71 +274,73 @@ static int reiserfs_readdir(struct file *file, struct dir_context *ctx) | |||
248 | return reiserfs_readdir_inode(file_inode(file), ctx); | 274 | return reiserfs_readdir_inode(file_inode(file), ctx); |
249 | } | 275 | } |
250 | 276 | ||
251 | /* compose directory item containing "." and ".." entries (entries are | 277 | /* |
252 | not aligned to 4 byte boundary) */ | 278 | * compose directory item containing "." and ".." entries (entries are |
253 | /* the last four params are LE */ | 279 | * not aligned to 4 byte boundary) |
280 | */ | ||
254 | void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, | 281 | void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, |
255 | __le32 par_dirid, __le32 par_objid) | 282 | __le32 par_dirid, __le32 par_objid) |
256 | { | 283 | { |
257 | struct reiserfs_de_head *deh; | 284 | struct reiserfs_de_head *dot, *dotdot; |
258 | 285 | ||
259 | memset(body, 0, EMPTY_DIR_SIZE_V1); | 286 | memset(body, 0, EMPTY_DIR_SIZE_V1); |
260 | deh = (struct reiserfs_de_head *)body; | 287 | dot = (struct reiserfs_de_head *)body; |
288 | dotdot = dot + 1; | ||
261 | 289 | ||
262 | /* direntry header of "." */ | 290 | /* direntry header of "." */ |
263 | put_deh_offset(&(deh[0]), DOT_OFFSET); | 291 | put_deh_offset(dot, DOT_OFFSET); |
264 | /* these two are from make_le_item_head, and are are LE */ | 292 | /* these two are from make_le_item_head, and are are LE */ |
265 | deh[0].deh_dir_id = dirid; | 293 | dot->deh_dir_id = dirid; |
266 | deh[0].deh_objectid = objid; | 294 | dot->deh_objectid = objid; |
267 | deh[0].deh_state = 0; /* Endian safe if 0 */ | 295 | dot->deh_state = 0; /* Endian safe if 0 */ |
268 | put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen(".")); | 296 | put_deh_location(dot, EMPTY_DIR_SIZE_V1 - strlen(".")); |
269 | mark_de_visible(&(deh[0])); | 297 | mark_de_visible(dot); |
270 | 298 | ||
271 | /* direntry header of ".." */ | 299 | /* direntry header of ".." */ |
272 | put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); | 300 | put_deh_offset(dotdot, DOT_DOT_OFFSET); |
273 | /* key of ".." for the root directory */ | 301 | /* key of ".." for the root directory */ |
274 | /* these two are from the inode, and are are LE */ | 302 | /* these two are from the inode, and are are LE */ |
275 | deh[1].deh_dir_id = par_dirid; | 303 | dotdot->deh_dir_id = par_dirid; |
276 | deh[1].deh_objectid = par_objid; | 304 | dotdot->deh_objectid = par_objid; |
277 | deh[1].deh_state = 0; /* Endian safe if 0 */ | 305 | dotdot->deh_state = 0; /* Endian safe if 0 */ |
278 | put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen("..")); | 306 | put_deh_location(dotdot, deh_location(dot) - strlen("..")); |
279 | mark_de_visible(&(deh[1])); | 307 | mark_de_visible(dotdot); |
280 | 308 | ||
281 | /* copy ".." and "." */ | 309 | /* copy ".." and "." */ |
282 | memcpy(body + deh_location(&(deh[0])), ".", 1); | 310 | memcpy(body + deh_location(dot), ".", 1); |
283 | memcpy(body + deh_location(&(deh[1])), "..", 2); | 311 | memcpy(body + deh_location(dotdot), "..", 2); |
284 | } | 312 | } |
285 | 313 | ||
286 | /* compose directory item containing "." and ".." entries */ | 314 | /* compose directory item containing "." and ".." entries */ |
287 | void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, | 315 | void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, |
288 | __le32 par_dirid, __le32 par_objid) | 316 | __le32 par_dirid, __le32 par_objid) |
289 | { | 317 | { |
290 | struct reiserfs_de_head *deh; | 318 | struct reiserfs_de_head *dot, *dotdot; |
291 | 319 | ||
292 | memset(body, 0, EMPTY_DIR_SIZE); | 320 | memset(body, 0, EMPTY_DIR_SIZE); |
293 | deh = (struct reiserfs_de_head *)body; | 321 | dot = (struct reiserfs_de_head *)body; |
322 | dotdot = dot + 1; | ||
294 | 323 | ||
295 | /* direntry header of "." */ | 324 | /* direntry header of "." */ |
296 | put_deh_offset(&(deh[0]), DOT_OFFSET); | 325 | put_deh_offset(dot, DOT_OFFSET); |
297 | /* these two are from make_le_item_head, and are are LE */ | 326 | /* these two are from make_le_item_head, and are are LE */ |
298 | deh[0].deh_dir_id = dirid; | 327 | dot->deh_dir_id = dirid; |
299 | deh[0].deh_objectid = objid; | 328 | dot->deh_objectid = objid; |
300 | deh[0].deh_state = 0; /* Endian safe if 0 */ | 329 | dot->deh_state = 0; /* Endian safe if 0 */ |
301 | put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen("."))); | 330 | put_deh_location(dot, EMPTY_DIR_SIZE - ROUND_UP(strlen("."))); |
302 | mark_de_visible(&(deh[0])); | 331 | mark_de_visible(dot); |
303 | 332 | ||
304 | /* direntry header of ".." */ | 333 | /* direntry header of ".." */ |
305 | put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); | 334 | put_deh_offset(dotdot, DOT_DOT_OFFSET); |
306 | /* key of ".." for the root directory */ | 335 | /* key of ".." for the root directory */ |
307 | /* these two are from the inode, and are are LE */ | 336 | /* these two are from the inode, and are are LE */ |
308 | deh[1].deh_dir_id = par_dirid; | 337 | dotdot->deh_dir_id = par_dirid; |
309 | deh[1].deh_objectid = par_objid; | 338 | dotdot->deh_objectid = par_objid; |
310 | deh[1].deh_state = 0; /* Endian safe if 0 */ | 339 | dotdot->deh_state = 0; /* Endian safe if 0 */ |
311 | put_deh_location(&(deh[1]), | 340 | put_deh_location(dotdot, deh_location(dot) - ROUND_UP(strlen(".."))); |
312 | deh_location(&(deh[0])) - ROUND_UP(strlen(".."))); | 341 | mark_de_visible(dotdot); |
313 | mark_de_visible(&(deh[1])); | ||
314 | 342 | ||
315 | /* copy ".." and "." */ | 343 | /* copy ".." and "." */ |
316 | memcpy(body + deh_location(&(deh[0])), ".", 1); | 344 | memcpy(body + deh_location(dot), ".", 1); |
317 | memcpy(body + deh_location(&(deh[1])), "..", 2); | 345 | memcpy(body + deh_location(dotdot), "..", 2); |
318 | } | 346 | } |
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 9a3c68cf6026..54fdf196bfb2 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c | |||
@@ -2,18 +2,13 @@ | |||
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /* Now we have all buffers that must be used in balancing of the tree */ | 5 | /* |
6 | /* Further calculations can not cause schedule(), and thus the buffer */ | 6 | * Now we have all buffers that must be used in balancing of the tree |
7 | /* tree will be stable until the balancing will be finished */ | 7 | * Further calculations can not cause schedule(), and thus the buffer |
8 | /* balance the tree according to the analysis made before, */ | 8 | * tree will be stable until the balancing will be finished |
9 | /* and using buffers obtained after all above. */ | 9 | * balance the tree according to the analysis made before, |
10 | 10 | * and using buffers obtained after all above. | |
11 | /** | 11 | */ |
12 | ** balance_leaf_when_delete | ||
13 | ** balance_leaf | ||
14 | ** do_balance | ||
15 | ** | ||
16 | **/ | ||
17 | 12 | ||
18 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
19 | #include <linux/time.h> | 14 | #include <linux/time.h> |
@@ -61,48 +56,190 @@ static inline void buffer_info_init_bh(struct tree_balance *tb, | |||
61 | inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, | 56 | inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, |
62 | struct buffer_head *bh, int flag) | 57 | struct buffer_head *bh, int flag) |
63 | { | 58 | { |
64 | journal_mark_dirty(tb->transaction_handle, | 59 | journal_mark_dirty(tb->transaction_handle, bh); |
65 | tb->transaction_handle->t_super, bh); | ||
66 | } | 60 | } |
67 | 61 | ||
68 | #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty | 62 | #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty |
69 | #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty | 63 | #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty |
70 | 64 | ||
71 | /* summary: | 65 | /* |
72 | if deleting something ( tb->insert_size[0] < 0 ) | 66 | * summary: |
73 | return(balance_leaf_when_delete()); (flag d handled here) | 67 | * if deleting something ( tb->insert_size[0] < 0 ) |
74 | else | 68 | * return(balance_leaf_when_delete()); (flag d handled here) |
75 | if lnum is larger than 0 we put items into the left node | 69 | * else |
76 | if rnum is larger than 0 we put items into the right node | 70 | * if lnum is larger than 0 we put items into the left node |
77 | if snum1 is larger than 0 we put items into the new node s1 | 71 | * if rnum is larger than 0 we put items into the right node |
78 | if snum2 is larger than 0 we put items into the new node s2 | 72 | * if snum1 is larger than 0 we put items into the new node s1 |
79 | Note that all *num* count new items being created. | 73 | * if snum2 is larger than 0 we put items into the new node s2 |
80 | 74 | * Note that all *num* count new items being created. | |
81 | It would be easier to read balance_leaf() if each of these summary | 75 | */ |
82 | lines was a separate procedure rather than being inlined. I think | 76 | |
83 | that there are many passages here and in balance_leaf_when_delete() in | 77 | static void balance_leaf_when_delete_del(struct tree_balance *tb) |
84 | which two calls to one procedure can replace two passages, and it | 78 | { |
85 | might save cache space and improve software maintenance costs to do so. | 79 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); |
86 | 80 | int item_pos = PATH_LAST_POSITION(tb->tb_path); | |
87 | Vladimir made the perceptive comment that we should offload most of | 81 | struct buffer_info bi; |
88 | the decision making in this function into fix_nodes/check_balance, and | 82 | #ifdef CONFIG_REISERFS_CHECK |
89 | then create some sort of structure in tb that says what actions should | 83 | struct item_head *ih = item_head(tbS0, item_pos); |
90 | be performed by do_balance. | 84 | #endif |
91 | 85 | ||
92 | -Hans */ | 86 | RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], |
93 | 87 | "vs-12013: mode Delete, insert size %d, ih to be deleted %h", | |
94 | /* Balance leaf node in case of delete or cut: insert_size[0] < 0 | 88 | -tb->insert_size[0], ih); |
89 | |||
90 | buffer_info_init_tbS0(tb, &bi); | ||
91 | leaf_delete_items(&bi, 0, item_pos, 1, -1); | ||
92 | |||
93 | if (!item_pos && tb->CFL[0]) { | ||
94 | if (B_NR_ITEMS(tbS0)) { | ||
95 | replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); | ||
96 | } else { | ||
97 | if (!PATH_H_POSITION(tb->tb_path, 1)) | ||
98 | replace_key(tb, tb->CFL[0], tb->lkey[0], | ||
99 | PATH_H_PPARENT(tb->tb_path, 0), 0); | ||
100 | } | ||
101 | } | ||
102 | |||
103 | RFALSE(!item_pos && !tb->CFL[0], | ||
104 | "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], | ||
105 | tb->L[0]); | ||
106 | } | ||
107 | |||
108 | /* cut item in S[0] */ | ||
109 | static void balance_leaf_when_delete_cut(struct tree_balance *tb) | ||
110 | { | ||
111 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
112 | int item_pos = PATH_LAST_POSITION(tb->tb_path); | ||
113 | struct item_head *ih = item_head(tbS0, item_pos); | ||
114 | int pos_in_item = tb->tb_path->pos_in_item; | ||
115 | struct buffer_info bi; | ||
116 | buffer_info_init_tbS0(tb, &bi); | ||
117 | |||
118 | if (is_direntry_le_ih(ih)) { | ||
119 | /* | ||
120 | * UFS unlink semantics are such that you can only | ||
121 | * delete one directory entry at a time. | ||
122 | * | ||
123 | * when we cut a directory tb->insert_size[0] means | ||
124 | * number of entries to be cut (always 1) | ||
125 | */ | ||
126 | tb->insert_size[0] = -1; | ||
127 | leaf_cut_from_buffer(&bi, item_pos, pos_in_item, | ||
128 | -tb->insert_size[0]); | ||
129 | |||
130 | RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], | ||
131 | "PAP-12030: can not change delimiting key. CFL[0]=%p", | ||
132 | tb->CFL[0]); | ||
133 | |||
134 | if (!item_pos && !pos_in_item && tb->CFL[0]) | ||
135 | replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); | ||
136 | } else { | ||
137 | leaf_cut_from_buffer(&bi, item_pos, pos_in_item, | ||
138 | -tb->insert_size[0]); | ||
139 | |||
140 | RFALSE(!ih_item_len(ih), | ||
141 | "PAP-12035: cut must leave non-zero dynamic " | ||
142 | "length of item"); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | static int balance_leaf_when_delete_left(struct tree_balance *tb) | ||
147 | { | ||
148 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
149 | int n = B_NR_ITEMS(tbS0); | ||
150 | |||
151 | /* L[0] must be joined with S[0] */ | ||
152 | if (tb->lnum[0] == -1) { | ||
153 | /* R[0] must be also joined with S[0] */ | ||
154 | if (tb->rnum[0] == -1) { | ||
155 | if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { | ||
156 | /* | ||
157 | * all contents of all the | ||
158 | * 3 buffers will be in L[0] | ||
159 | */ | ||
160 | if (PATH_H_POSITION(tb->tb_path, 1) == 0 && | ||
161 | 1 < B_NR_ITEMS(tb->FR[0])) | ||
162 | replace_key(tb, tb->CFL[0], | ||
163 | tb->lkey[0], tb->FR[0], 1); | ||
164 | |||
165 | leaf_move_items(LEAF_FROM_S_TO_L, tb, n, -1, | ||
166 | NULL); | ||
167 | leaf_move_items(LEAF_FROM_R_TO_L, tb, | ||
168 | B_NR_ITEMS(tb->R[0]), -1, | ||
169 | NULL); | ||
170 | |||
171 | reiserfs_invalidate_buffer(tb, tbS0); | ||
172 | reiserfs_invalidate_buffer(tb, tb->R[0]); | ||
173 | |||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | /* all contents of all the 3 buffers will be in R[0] */ | ||
178 | leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, NULL); | ||
179 | leaf_move_items(LEAF_FROM_L_TO_R, tb, | ||
180 | B_NR_ITEMS(tb->L[0]), -1, NULL); | ||
181 | |||
182 | /* right_delimiting_key is correct in R[0] */ | ||
183 | replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); | ||
184 | |||
185 | reiserfs_invalidate_buffer(tb, tbS0); | ||
186 | reiserfs_invalidate_buffer(tb, tb->L[0]); | ||
187 | |||
188 | return -1; | ||
189 | } | ||
190 | |||
191 | RFALSE(tb->rnum[0] != 0, | ||
192 | "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); | ||
193 | /* all contents of L[0] and S[0] will be in L[0] */ | ||
194 | leaf_shift_left(tb, n, -1); | ||
195 | |||
196 | reiserfs_invalidate_buffer(tb, tbS0); | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * a part of contents of S[0] will be in L[0] and | ||
203 | * the rest part of S[0] will be in R[0] | ||
204 | */ | ||
205 | |||
206 | RFALSE((tb->lnum[0] + tb->rnum[0] < n) || | ||
207 | (tb->lnum[0] + tb->rnum[0] > n + 1), | ||
208 | "PAP-12050: rnum(%d) and lnum(%d) and item " | ||
209 | "number(%d) in S[0] are not consistent", | ||
210 | tb->rnum[0], tb->lnum[0], n); | ||
211 | RFALSE((tb->lnum[0] + tb->rnum[0] == n) && | ||
212 | (tb->lbytes != -1 || tb->rbytes != -1), | ||
213 | "PAP-12055: bad rbytes (%d)/lbytes (%d) " | ||
214 | "parameters when items are not split", | ||
215 | tb->rbytes, tb->lbytes); | ||
216 | RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && | ||
217 | (tb->lbytes < 1 || tb->rbytes != -1), | ||
218 | "PAP-12060: bad rbytes (%d)/lbytes (%d) " | ||
219 | "parameters when items are split", | ||
220 | tb->rbytes, tb->lbytes); | ||
221 | |||
222 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
223 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
224 | |||
225 | reiserfs_invalidate_buffer(tb, tbS0); | ||
226 | |||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Balance leaf node in case of delete or cut: insert_size[0] < 0 | ||
95 | * | 232 | * |
96 | * lnum, rnum can have values >= -1 | 233 | * lnum, rnum can have values >= -1 |
97 | * -1 means that the neighbor must be joined with S | 234 | * -1 means that the neighbor must be joined with S |
98 | * 0 means that nothing should be done with the neighbor | 235 | * 0 means that nothing should be done with the neighbor |
99 | * >0 means to shift entirely or partly the specified number of items to the neighbor | 236 | * >0 means to shift entirely or partly the specified number of items |
237 | * to the neighbor | ||
100 | */ | 238 | */ |
101 | static int balance_leaf_when_delete(struct tree_balance *tb, int flag) | 239 | static int balance_leaf_when_delete(struct tree_balance *tb, int flag) |
102 | { | 240 | { |
103 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | 241 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); |
104 | int item_pos = PATH_LAST_POSITION(tb->tb_path); | 242 | int item_pos = PATH_LAST_POSITION(tb->tb_path); |
105 | int pos_in_item = tb->tb_path->pos_in_item; | ||
106 | struct buffer_info bi; | 243 | struct buffer_info bi; |
107 | int n; | 244 | int n; |
108 | struct item_head *ih; | 245 | struct item_head *ih; |
@@ -114,1022 +251,1202 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag) | |||
114 | RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), | 251 | RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), |
115 | "PAP-12010: tree can not be empty"); | 252 | "PAP-12010: tree can not be empty"); |
116 | 253 | ||
117 | ih = B_N_PITEM_HEAD(tbS0, item_pos); | 254 | ih = item_head(tbS0, item_pos); |
118 | buffer_info_init_tbS0(tb, &bi); | 255 | buffer_info_init_tbS0(tb, &bi); |
119 | 256 | ||
120 | /* Delete or truncate the item */ | 257 | /* Delete or truncate the item */ |
121 | 258 | ||
122 | switch (flag) { | 259 | BUG_ON(flag != M_DELETE && flag != M_CUT); |
123 | case M_DELETE: /* delete item in S[0] */ | 260 | if (flag == M_DELETE) |
261 | balance_leaf_when_delete_del(tb); | ||
262 | else /* M_CUT */ | ||
263 | balance_leaf_when_delete_cut(tb); | ||
124 | 264 | ||
125 | RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], | ||
126 | "vs-12013: mode Delete, insert size %d, ih to be deleted %h", | ||
127 | -tb->insert_size[0], ih); | ||
128 | 265 | ||
129 | leaf_delete_items(&bi, 0, item_pos, 1, -1); | 266 | /* |
267 | * the rule is that no shifting occurs unless by shifting | ||
268 | * a node can be freed | ||
269 | */ | ||
270 | n = B_NR_ITEMS(tbS0); | ||
130 | 271 | ||
131 | if (!item_pos && tb->CFL[0]) { | ||
132 | if (B_NR_ITEMS(tbS0)) { | ||
133 | replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, | ||
134 | 0); | ||
135 | } else { | ||
136 | if (!PATH_H_POSITION(tb->tb_path, 1)) | ||
137 | replace_key(tb, tb->CFL[0], tb->lkey[0], | ||
138 | PATH_H_PPARENT(tb->tb_path, | ||
139 | 0), 0); | ||
140 | } | ||
141 | } | ||
142 | 272 | ||
143 | RFALSE(!item_pos && !tb->CFL[0], | 273 | /* L[0] takes part in balancing */ |
144 | "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], | 274 | if (tb->lnum[0]) |
145 | tb->L[0]); | 275 | return balance_leaf_when_delete_left(tb); |
146 | 276 | ||
147 | break; | 277 | if (tb->rnum[0] == -1) { |
278 | /* all contents of R[0] and S[0] will be in R[0] */ | ||
279 | leaf_shift_right(tb, n, -1); | ||
280 | reiserfs_invalidate_buffer(tb, tbS0); | ||
281 | return 0; | ||
282 | } | ||
148 | 283 | ||
149 | case M_CUT:{ /* cut item in S[0] */ | 284 | RFALSE(tb->rnum[0], |
150 | if (is_direntry_le_ih(ih)) { | 285 | "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); |
286 | return 0; | ||
287 | } | ||
151 | 288 | ||
152 | /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ | 289 | static void balance_leaf_insert_left(struct tree_balance *tb, |
153 | /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ | 290 | struct item_head *ih, const char *body) |
154 | tb->insert_size[0] = -1; | 291 | { |
155 | leaf_cut_from_buffer(&bi, item_pos, pos_in_item, | 292 | int ret; |
156 | -tb->insert_size[0]); | 293 | struct buffer_info bi; |
294 | int n = B_NR_ITEMS(tb->L[0]); | ||
295 | |||
296 | if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { | ||
297 | /* part of new item falls into L[0] */ | ||
298 | int new_item_len, shift; | ||
299 | int version; | ||
300 | |||
301 | ret = leaf_shift_left(tb, tb->lnum[0] - 1, -1); | ||
302 | |||
303 | /* Calculate item length to insert to S[0] */ | ||
304 | new_item_len = ih_item_len(ih) - tb->lbytes; | ||
305 | |||
306 | /* Calculate and check item length to insert to L[0] */ | ||
307 | put_ih_item_len(ih, ih_item_len(ih) - new_item_len); | ||
308 | |||
309 | RFALSE(ih_item_len(ih) <= 0, | ||
310 | "PAP-12080: there is nothing to insert into L[0]: " | ||
311 | "ih_item_len=%d", ih_item_len(ih)); | ||
312 | |||
313 | /* Insert new item into L[0] */ | ||
314 | buffer_info_init_left(tb, &bi); | ||
315 | leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body, | ||
316 | min_t(int, tb->zeroes_num, ih_item_len(ih))); | ||
317 | |||
318 | version = ih_version(ih); | ||
319 | |||
320 | /* | ||
321 | * Calculate key component, item length and body to | ||
322 | * insert into S[0] | ||
323 | */ | ||
324 | shift = 0; | ||
325 | if (is_indirect_le_ih(ih)) | ||
326 | shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; | ||
327 | |||
328 | add_le_ih_k_offset(ih, tb->lbytes << shift); | ||
329 | |||
330 | put_ih_item_len(ih, new_item_len); | ||
331 | if (tb->lbytes > tb->zeroes_num) { | ||
332 | body += (tb->lbytes - tb->zeroes_num); | ||
333 | tb->zeroes_num = 0; | ||
334 | } else | ||
335 | tb->zeroes_num -= tb->lbytes; | ||
336 | |||
337 | RFALSE(ih_item_len(ih) <= 0, | ||
338 | "PAP-12085: there is nothing to insert into S[0]: " | ||
339 | "ih_item_len=%d", ih_item_len(ih)); | ||
340 | } else { | ||
341 | /* new item in whole falls into L[0] */ | ||
342 | /* Shift lnum[0]-1 items to L[0] */ | ||
343 | ret = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes); | ||
344 | |||
345 | /* Insert new item into L[0] */ | ||
346 | buffer_info_init_left(tb, &bi); | ||
347 | leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body, | ||
348 | tb->zeroes_num); | ||
349 | tb->insert_size[0] = 0; | ||
350 | tb->zeroes_num = 0; | ||
351 | } | ||
352 | } | ||
157 | 353 | ||
158 | RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], | 354 | static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb, |
159 | "PAP-12030: can not change delimiting key. CFL[0]=%p", | 355 | struct item_head *ih, |
160 | tb->CFL[0]); | 356 | const char *body) |
357 | { | ||
358 | int n = B_NR_ITEMS(tb->L[0]); | ||
359 | struct buffer_info bi; | ||
161 | 360 | ||
162 | if (!item_pos && !pos_in_item && tb->CFL[0]) { | 361 | RFALSE(tb->zeroes_num, |
163 | replace_key(tb, tb->CFL[0], tb->lkey[0], | 362 | "PAP-12090: invalid parameter in case of a directory"); |
164 | tbS0, 0); | 363 | |
165 | } | 364 | /* directory item */ |
166 | } else { | 365 | if (tb->lbytes > tb->pos_in_item) { |
167 | leaf_cut_from_buffer(&bi, item_pos, pos_in_item, | 366 | /* new directory entry falls into L[0] */ |
168 | -tb->insert_size[0]); | 367 | struct item_head *pasted; |
368 | int ret, l_pos_in_item = tb->pos_in_item; | ||
369 | |||
370 | /* | ||
371 | * Shift lnum[0] - 1 items in whole. | ||
372 | * Shift lbytes - 1 entries from given directory item | ||
373 | */ | ||
374 | ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); | ||
375 | if (ret && !tb->item_pos) { | ||
376 | pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1); | ||
377 | l_pos_in_item += ih_entry_count(pasted) - | ||
378 | (tb->lbytes - 1); | ||
379 | } | ||
169 | 380 | ||
170 | RFALSE(!ih_item_len(ih), | 381 | /* Append given directory entry to directory item */ |
171 | "PAP-12035: cut must leave non-zero dynamic length of item"); | 382 | buffer_info_init_left(tb, &bi); |
172 | } | 383 | leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, |
173 | break; | 384 | l_pos_in_item, tb->insert_size[0], |
385 | body, tb->zeroes_num); | ||
386 | |||
387 | /* | ||
388 | * previous string prepared space for pasting new entry, | ||
389 | * following string pastes this entry | ||
390 | */ | ||
391 | |||
392 | /* | ||
393 | * when we have merge directory item, pos_in_item | ||
394 | * has been changed too | ||
395 | */ | ||
396 | |||
397 | /* paste new directory entry. 1 is entry number */ | ||
398 | leaf_paste_entries(&bi, n + tb->item_pos - ret, | ||
399 | l_pos_in_item, 1, | ||
400 | (struct reiserfs_de_head *) body, | ||
401 | body + DEH_SIZE, tb->insert_size[0]); | ||
402 | tb->insert_size[0] = 0; | ||
403 | } else { | ||
404 | /* new directory item doesn't fall into L[0] */ | ||
405 | /* | ||
406 | * Shift lnum[0]-1 items in whole. Shift lbytes | ||
407 | * directory entries from directory item number lnum[0] | ||
408 | */ | ||
409 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
410 | } | ||
411 | |||
412 | /* Calculate new position to append in item body */ | ||
413 | tb->pos_in_item -= tb->lbytes; | ||
414 | } | ||
415 | |||
416 | static void balance_leaf_paste_left_shift(struct tree_balance *tb, | ||
417 | struct item_head *ih, | ||
418 | const char *body) | ||
419 | { | ||
420 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
421 | int n = B_NR_ITEMS(tb->L[0]); | ||
422 | struct buffer_info bi; | ||
423 | |||
424 | if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { | ||
425 | balance_leaf_paste_left_shift_dirent(tb, ih, body); | ||
426 | return; | ||
427 | } | ||
428 | |||
429 | RFALSE(tb->lbytes <= 0, | ||
430 | "PAP-12095: there is nothing to shift to L[0]. " | ||
431 | "lbytes=%d", tb->lbytes); | ||
432 | RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)), | ||
433 | "PAP-12100: incorrect position to paste: " | ||
434 | "item_len=%d, pos_in_item=%d", | ||
435 | ih_item_len(item_head(tbS0, tb->item_pos)), tb->pos_in_item); | ||
436 | |||
437 | /* appended item will be in L[0] in whole */ | ||
438 | if (tb->lbytes >= tb->pos_in_item) { | ||
439 | struct item_head *tbS0_pos_ih, *tbL0_ih; | ||
440 | struct item_head *tbS0_0_ih; | ||
441 | struct reiserfs_key *left_delim_key; | ||
442 | int ret, l_n, version, temp_l; | ||
443 | |||
444 | tbS0_pos_ih = item_head(tbS0, tb->item_pos); | ||
445 | tbS0_0_ih = item_head(tbS0, 0); | ||
446 | |||
447 | /* | ||
448 | * this bytes number must be appended | ||
449 | * to the last item of L[h] | ||
450 | */ | ||
451 | l_n = tb->lbytes - tb->pos_in_item; | ||
452 | |||
453 | /* Calculate new insert_size[0] */ | ||
454 | tb->insert_size[0] -= l_n; | ||
455 | |||
456 | RFALSE(tb->insert_size[0] <= 0, | ||
457 | "PAP-12105: there is nothing to paste into " | ||
458 | "L[0]. insert_size=%d", tb->insert_size[0]); | ||
459 | |||
460 | ret = leaf_shift_left(tb, tb->lnum[0], | ||
461 | ih_item_len(tbS0_pos_ih)); | ||
462 | |||
463 | tbL0_ih = item_head(tb->L[0], n + tb->item_pos - ret); | ||
464 | |||
465 | /* Append to body of item in L[0] */ | ||
466 | buffer_info_init_left(tb, &bi); | ||
467 | leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, | ||
468 | ih_item_len(tbL0_ih), l_n, body, | ||
469 | min_t(int, l_n, tb->zeroes_num)); | ||
470 | |||
471 | /* | ||
472 | * 0-th item in S0 can be only of DIRECT type | ||
473 | * when l_n != 0 | ||
474 | */ | ||
475 | temp_l = l_n; | ||
476 | |||
477 | RFALSE(ih_item_len(tbS0_0_ih), | ||
478 | "PAP-12106: item length must be 0"); | ||
479 | RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key, | ||
480 | leaf_key(tb->L[0], n + tb->item_pos - ret)), | ||
481 | "PAP-12107: items must be of the same file"); | ||
482 | |||
483 | if (is_indirect_le_ih(tbL0_ih)) { | ||
484 | int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; | ||
485 | temp_l = l_n << shift; | ||
174 | } | 486 | } |
487 | /* update key of first item in S0 */ | ||
488 | version = ih_version(tbS0_0_ih); | ||
489 | add_le_key_k_offset(version, &tbS0_0_ih->ih_key, temp_l); | ||
490 | |||
491 | /* update left delimiting key */ | ||
492 | left_delim_key = internal_key(tb->CFL[0], tb->lkey[0]); | ||
493 | add_le_key_k_offset(version, left_delim_key, temp_l); | ||
494 | |||
495 | /* | ||
496 | * Calculate new body, position in item and | ||
497 | * insert_size[0] | ||
498 | */ | ||
499 | if (l_n > tb->zeroes_num) { | ||
500 | body += (l_n - tb->zeroes_num); | ||
501 | tb->zeroes_num = 0; | ||
502 | } else | ||
503 | tb->zeroes_num -= l_n; | ||
504 | tb->pos_in_item = 0; | ||
505 | |||
506 | RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key, | ||
507 | leaf_key(tb->L[0], | ||
508 | B_NR_ITEMS(tb->L[0]) - 1)) || | ||
509 | !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size) || | ||
510 | !op_is_left_mergeable(left_delim_key, tbS0->b_size), | ||
511 | "PAP-12120: item must be merge-able with left " | ||
512 | "neighboring item"); | ||
513 | } else { | ||
514 | /* only part of the appended item will be in L[0] */ | ||
515 | |||
516 | /* Calculate position in item for append in S[0] */ | ||
517 | tb->pos_in_item -= tb->lbytes; | ||
518 | |||
519 | RFALSE(tb->pos_in_item <= 0, | ||
520 | "PAP-12125: no place for paste. pos_in_item=%d", | ||
521 | tb->pos_in_item); | ||
522 | |||
523 | /* | ||
524 | * Shift lnum[0] - 1 items in whole. | ||
525 | * Shift lbytes - 1 byte from item number lnum[0] | ||
526 | */ | ||
527 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
528 | } | ||
529 | } | ||
175 | 530 | ||
176 | default: | 531 | |
177 | print_cur_tb("12040"); | 532 | /* appended item will be in L[0] in whole */ |
178 | reiserfs_panic(tb->tb_sb, "PAP-12040", | 533 | static void balance_leaf_paste_left_whole(struct tree_balance *tb, |
179 | "unexpected mode: %s(%d)", | 534 | struct item_head *ih, |
180 | (flag == | 535 | const char *body) |
181 | M_PASTE) ? "PASTE" : ((flag == | 536 | { |
182 | M_INSERT) ? "INSERT" : | 537 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); |
183 | "UNKNOWN"), flag); | 538 | int n = B_NR_ITEMS(tb->L[0]); |
539 | struct buffer_info bi; | ||
540 | struct item_head *pasted; | ||
541 | int ret; | ||
542 | |||
543 | /* if we paste into first item of S[0] and it is left mergable */ | ||
544 | if (!tb->item_pos && | ||
545 | op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) { | ||
546 | /* | ||
547 | * then increment pos_in_item by the size of the | ||
548 | * last item in L[0] | ||
549 | */ | ||
550 | pasted = item_head(tb->L[0], n - 1); | ||
551 | if (is_direntry_le_ih(pasted)) | ||
552 | tb->pos_in_item += ih_entry_count(pasted); | ||
553 | else | ||
554 | tb->pos_in_item += ih_item_len(pasted); | ||
184 | } | 555 | } |
185 | 556 | ||
186 | /* the rule is that no shifting occurs unless by shifting a node can be freed */ | 557 | /* |
187 | n = B_NR_ITEMS(tbS0); | 558 | * Shift lnum[0] - 1 items in whole. |
188 | if (tb->lnum[0]) { /* L[0] takes part in balancing */ | 559 | * Shift lbytes - 1 byte from item number lnum[0] |
189 | if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ | 560 | */ |
190 | if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ | 561 | ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes); |
191 | if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { | 562 | |
192 | /* all contents of all the 3 buffers will be in L[0] */ | 563 | /* Append to body of item in L[0] */ |
193 | if (PATH_H_POSITION(tb->tb_path, 1) == 0 | 564 | buffer_info_init_left(tb, &bi); |
194 | && 1 < B_NR_ITEMS(tb->FR[0])) | 565 | leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, tb->pos_in_item, |
195 | replace_key(tb, tb->CFL[0], | 566 | tb->insert_size[0], body, tb->zeroes_num); |
196 | tb->lkey[0], | 567 | |
197 | tb->FR[0], 1); | 568 | /* if appended item is directory, paste entry */ |
198 | 569 | pasted = item_head(tb->L[0], n + tb->item_pos - ret); | |
199 | leaf_move_items(LEAF_FROM_S_TO_L, tb, n, | 570 | if (is_direntry_le_ih(pasted)) |
200 | -1, NULL); | 571 | leaf_paste_entries(&bi, n + tb->item_pos - ret, |
201 | leaf_move_items(LEAF_FROM_R_TO_L, tb, | 572 | tb->pos_in_item, 1, |
202 | B_NR_ITEMS(tb->R[0]), | 573 | (struct reiserfs_de_head *)body, |
203 | -1, NULL); | 574 | body + DEH_SIZE, tb->insert_size[0]); |
204 | 575 | ||
205 | reiserfs_invalidate_buffer(tb, tbS0); | 576 | /* |
206 | reiserfs_invalidate_buffer(tb, | 577 | * if appended item is indirect item, put unformatted node |
207 | tb->R[0]); | 578 | * into un list |
208 | 579 | */ | |
209 | return 0; | 580 | if (is_indirect_le_ih(pasted)) |
210 | } | 581 | set_ih_free_space(pasted, 0); |
211 | /* all contents of all the 3 buffers will be in R[0] */ | ||
212 | leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, | ||
213 | NULL); | ||
214 | leaf_move_items(LEAF_FROM_L_TO_R, tb, | ||
215 | B_NR_ITEMS(tb->L[0]), -1, NULL); | ||
216 | 582 | ||
217 | /* right_delimiting_key is correct in R[0] */ | 583 | tb->insert_size[0] = 0; |
218 | replace_key(tb, tb->CFR[0], tb->rkey[0], | 584 | tb->zeroes_num = 0; |
219 | tb->R[0], 0); | 585 | } |
220 | 586 | ||
221 | reiserfs_invalidate_buffer(tb, tbS0); | 587 | static void balance_leaf_paste_left(struct tree_balance *tb, |
222 | reiserfs_invalidate_buffer(tb, tb->L[0]); | 588 | struct item_head *ih, const char *body) |
589 | { | ||
590 | /* we must shift the part of the appended item */ | ||
591 | if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) | ||
592 | balance_leaf_paste_left_shift(tb, ih, body); | ||
593 | else | ||
594 | balance_leaf_paste_left_whole(tb, ih, body); | ||
595 | } | ||
223 | 596 | ||
224 | return -1; | 597 | /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ |
225 | } | 598 | static void balance_leaf_left(struct tree_balance *tb, struct item_head *ih, |
599 | const char *body, int flag) | ||
600 | { | ||
601 | if (tb->lnum[0] <= 0) | ||
602 | return; | ||
226 | 603 | ||
227 | RFALSE(tb->rnum[0] != 0, | 604 | /* new item or it part falls to L[0], shift it too */ |
228 | "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); | 605 | if (tb->item_pos < tb->lnum[0]) { |
229 | /* all contents of L[0] and S[0] will be in L[0] */ | 606 | BUG_ON(flag != M_INSERT && flag != M_PASTE); |
230 | leaf_shift_left(tb, n, -1); | 607 | |
608 | if (flag == M_INSERT) | ||
609 | balance_leaf_insert_left(tb, ih, body); | ||
610 | else /* M_PASTE */ | ||
611 | balance_leaf_paste_left(tb, ih, body); | ||
612 | } else | ||
613 | /* new item doesn't fall into L[0] */ | ||
614 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
615 | } | ||
231 | 616 | ||
232 | reiserfs_invalidate_buffer(tb, tbS0); | ||
233 | 617 | ||
234 | return 0; | 618 | static void balance_leaf_insert_right(struct tree_balance *tb, |
619 | struct item_head *ih, const char *body) | ||
620 | { | ||
621 | |||
622 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
623 | int n = B_NR_ITEMS(tbS0); | ||
624 | struct buffer_info bi; | ||
625 | int ret; | ||
626 | |||
627 | /* new item or part of it doesn't fall into R[0] */ | ||
628 | if (n - tb->rnum[0] >= tb->item_pos) { | ||
629 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
630 | return; | ||
631 | } | ||
632 | |||
633 | /* new item or its part falls to R[0] */ | ||
634 | |||
635 | /* part of new item falls into R[0] */ | ||
636 | if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { | ||
637 | loff_t old_key_comp, old_len, r_zeroes_number; | ||
638 | const char *r_body; | ||
639 | int version, shift; | ||
640 | loff_t offset; | ||
641 | |||
642 | leaf_shift_right(tb, tb->rnum[0] - 1, -1); | ||
643 | |||
644 | version = ih_version(ih); | ||
645 | |||
646 | /* Remember key component and item length */ | ||
647 | old_key_comp = le_ih_k_offset(ih); | ||
648 | old_len = ih_item_len(ih); | ||
649 | |||
650 | /* | ||
651 | * Calculate key component and item length to insert | ||
652 | * into R[0] | ||
653 | */ | ||
654 | shift = 0; | ||
655 | if (is_indirect_le_ih(ih)) | ||
656 | shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; | ||
657 | offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << shift); | ||
658 | set_le_ih_k_offset(ih, offset); | ||
659 | put_ih_item_len(ih, tb->rbytes); | ||
660 | |||
661 | /* Insert part of the item into R[0] */ | ||
662 | buffer_info_init_right(tb, &bi); | ||
663 | if ((old_len - tb->rbytes) > tb->zeroes_num) { | ||
664 | r_zeroes_number = 0; | ||
665 | r_body = body + (old_len - tb->rbytes) - tb->zeroes_num; | ||
666 | } else { | ||
667 | r_body = body; | ||
668 | r_zeroes_number = tb->zeroes_num - | ||
669 | (old_len - tb->rbytes); | ||
670 | tb->zeroes_num -= r_zeroes_number; | ||
235 | } | 671 | } |
236 | /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ | ||
237 | |||
238 | RFALSE((tb->lnum[0] + tb->rnum[0] < n) || | ||
239 | (tb->lnum[0] + tb->rnum[0] > n + 1), | ||
240 | "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", | ||
241 | tb->rnum[0], tb->lnum[0], n); | ||
242 | RFALSE((tb->lnum[0] + tb->rnum[0] == n) && | ||
243 | (tb->lbytes != -1 || tb->rbytes != -1), | ||
244 | "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", | ||
245 | tb->rbytes, tb->lbytes); | ||
246 | RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && | ||
247 | (tb->lbytes < 1 || tb->rbytes != -1), | ||
248 | "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", | ||
249 | tb->rbytes, tb->lbytes); | ||
250 | 672 | ||
251 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | 673 | leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number); |
674 | |||
675 | /* Replace right delimiting key by first key in R[0] */ | ||
676 | replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); | ||
677 | |||
678 | /* | ||
679 | * Calculate key component and item length to | ||
680 | * insert into S[0] | ||
681 | */ | ||
682 | set_le_ih_k_offset(ih, old_key_comp); | ||
683 | put_ih_item_len(ih, old_len - tb->rbytes); | ||
684 | |||
685 | tb->insert_size[0] -= tb->rbytes; | ||
686 | |||
687 | } else { | ||
688 | /* whole new item falls into R[0] */ | ||
689 | |||
690 | /* Shift rnum[0]-1 items to R[0] */ | ||
691 | ret = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes); | ||
692 | |||
693 | /* Insert new item into R[0] */ | ||
694 | buffer_info_init_right(tb, &bi); | ||
695 | leaf_insert_into_buf(&bi, tb->item_pos - n + tb->rnum[0] - 1, | ||
696 | ih, body, tb->zeroes_num); | ||
697 | |||
698 | if (tb->item_pos - n + tb->rnum[0] - 1 == 0) | ||
699 | replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); | ||
700 | |||
701 | tb->zeroes_num = tb->insert_size[0] = 0; | ||
702 | } | ||
703 | } | ||
704 | |||
705 | |||
706 | static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb, | ||
707 | struct item_head *ih, const char *body) | ||
708 | { | ||
709 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
710 | struct buffer_info bi; | ||
711 | int entry_count; | ||
712 | |||
713 | RFALSE(tb->zeroes_num, | ||
714 | "PAP-12145: invalid parameter in case of a directory"); | ||
715 | entry_count = ih_entry_count(item_head(tbS0, tb->item_pos)); | ||
716 | |||
717 | /* new directory entry falls into R[0] */ | ||
718 | if (entry_count - tb->rbytes < tb->pos_in_item) { | ||
719 | int paste_entry_position; | ||
720 | |||
721 | RFALSE(tb->rbytes - 1 >= entry_count || !tb->insert_size[0], | ||
722 | "PAP-12150: no enough of entries to shift to R[0]: " | ||
723 | "rbytes=%d, entry_count=%d", tb->rbytes, entry_count); | ||
724 | |||
725 | /* | ||
726 | * Shift rnum[0]-1 items in whole. | ||
727 | * Shift rbytes-1 directory entries from directory | ||
728 | * item number rnum[0] | ||
729 | */ | ||
730 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1); | ||
731 | |||
732 | /* Paste given directory entry to directory item */ | ||
733 | paste_entry_position = tb->pos_in_item - entry_count + | ||
734 | tb->rbytes - 1; | ||
735 | buffer_info_init_right(tb, &bi); | ||
736 | leaf_paste_in_buffer(&bi, 0, paste_entry_position, | ||
737 | tb->insert_size[0], body, tb->zeroes_num); | ||
738 | |||
739 | /* paste entry */ | ||
740 | leaf_paste_entries(&bi, 0, paste_entry_position, 1, | ||
741 | (struct reiserfs_de_head *) body, | ||
742 | body + DEH_SIZE, tb->insert_size[0]); | ||
743 | |||
744 | /* change delimiting keys */ | ||
745 | if (paste_entry_position == 0) | ||
746 | replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); | ||
747 | |||
748 | tb->insert_size[0] = 0; | ||
749 | tb->pos_in_item++; | ||
750 | } else { | ||
751 | /* new directory entry doesn't fall into R[0] */ | ||
252 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | 752 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); |
753 | } | ||
754 | } | ||
253 | 755 | ||
254 | reiserfs_invalidate_buffer(tb, tbS0); | 756 | static void balance_leaf_paste_right_shift(struct tree_balance *tb, |
757 | struct item_head *ih, const char *body) | ||
758 | { | ||
759 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
760 | int n_shift, n_rem, r_zeroes_number, version; | ||
761 | unsigned long temp_rem; | ||
762 | const char *r_body; | ||
763 | struct buffer_info bi; | ||
255 | 764 | ||
256 | return 0; | 765 | /* we append to directory item */ |
766 | if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { | ||
767 | balance_leaf_paste_right_shift_dirent(tb, ih, body); | ||
768 | return; | ||
257 | } | 769 | } |
258 | 770 | ||
259 | if (tb->rnum[0] == -1) { | 771 | /* regular object */ |
260 | /* all contents of R[0] and S[0] will be in R[0] */ | 772 | |
261 | leaf_shift_right(tb, n, -1); | 773 | /* |
262 | reiserfs_invalidate_buffer(tb, tbS0); | 774 | * Calculate number of bytes which must be shifted |
263 | return 0; | 775 | * from appended item |
776 | */ | ||
777 | n_shift = tb->rbytes - tb->insert_size[0]; | ||
778 | if (n_shift < 0) | ||
779 | n_shift = 0; | ||
780 | |||
781 | RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)), | ||
782 | "PAP-12155: invalid position to paste. ih_item_len=%d, " | ||
783 | "pos_in_item=%d", tb->pos_in_item, | ||
784 | ih_item_len(item_head(tbS0, tb->item_pos))); | ||
785 | |||
786 | leaf_shift_right(tb, tb->rnum[0], n_shift); | ||
787 | |||
788 | /* | ||
789 | * Calculate number of bytes which must remain in body | ||
790 | * after appending to R[0] | ||
791 | */ | ||
792 | n_rem = tb->insert_size[0] - tb->rbytes; | ||
793 | if (n_rem < 0) | ||
794 | n_rem = 0; | ||
795 | |||
796 | temp_rem = n_rem; | ||
797 | |||
798 | version = ih_version(item_head(tb->R[0], 0)); | ||
799 | |||
800 | if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) { | ||
801 | int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; | ||
802 | temp_rem = n_rem << shift; | ||
264 | } | 803 | } |
265 | 804 | ||
266 | RFALSE(tb->rnum[0], | 805 | add_le_key_k_offset(version, leaf_key(tb->R[0], 0), temp_rem); |
267 | "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); | 806 | add_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]), |
268 | return 0; | 807 | temp_rem); |
808 | |||
809 | do_balance_mark_internal_dirty(tb, tb->CFR[0], 0); | ||
810 | |||
811 | /* Append part of body into R[0] */ | ||
812 | buffer_info_init_right(tb, &bi); | ||
813 | if (n_rem > tb->zeroes_num) { | ||
814 | r_zeroes_number = 0; | ||
815 | r_body = body + n_rem - tb->zeroes_num; | ||
816 | } else { | ||
817 | r_body = body; | ||
818 | r_zeroes_number = tb->zeroes_num - n_rem; | ||
819 | tb->zeroes_num -= r_zeroes_number; | ||
820 | } | ||
821 | |||
822 | leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, | ||
823 | r_body, r_zeroes_number); | ||
824 | |||
825 | if (is_indirect_le_ih(item_head(tb->R[0], 0))) | ||
826 | set_ih_free_space(item_head(tb->R[0], 0), 0); | ||
827 | |||
828 | tb->insert_size[0] = n_rem; | ||
829 | if (!n_rem) | ||
830 | tb->pos_in_item++; | ||
269 | } | 831 | } |
270 | 832 | ||
271 | static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ | 833 | static void balance_leaf_paste_right_whole(struct tree_balance *tb, |
272 | const char *body, /* body of inserted item or bytes to paste */ | 834 | struct item_head *ih, const char *body) |
273 | int flag, /* i - insert, d - delete, c - cut, p - paste | ||
274 | (see comment to do_balance) */ | ||
275 | struct item_head *insert_key, /* in our processing of one level we sometimes determine what | ||
276 | must be inserted into the next higher level. This insertion | ||
277 | consists of a key or two keys and their corresponding | ||
278 | pointers */ | ||
279 | struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ | ||
280 | ) | ||
281 | { | 835 | { |
282 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | 836 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); |
283 | int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] | 837 | int n = B_NR_ITEMS(tbS0); |
284 | of the affected item */ | 838 | struct item_head *pasted; |
285 | struct buffer_info bi; | 839 | struct buffer_info bi; |
286 | struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ | ||
287 | int snum[2]; /* number of items that will be placed | ||
288 | into S_new (includes partially shifted | ||
289 | items) */ | ||
290 | int sbytes[2]; /* if an item is partially shifted into S_new then | ||
291 | if it is a directory item | ||
292 | it is the number of entries from the item that are shifted into S_new | ||
293 | else | ||
294 | it is the number of bytes from the item that are shifted into S_new | ||
295 | */ | ||
296 | int n, i; | ||
297 | int ret_val; | ||
298 | int pos_in_item; | ||
299 | int zeros_num; | ||
300 | 840 | ||
301 | PROC_INFO_INC(tb->tb_sb, balance_at[0]); | 841 | buffer_info_init_right(tb, &bi); |
842 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
843 | |||
844 | /* append item in R[0] */ | ||
845 | if (tb->pos_in_item >= 0) { | ||
846 | buffer_info_init_right(tb, &bi); | ||
847 | leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->rnum[0], | ||
848 | tb->pos_in_item, tb->insert_size[0], body, | ||
849 | tb->zeroes_num); | ||
850 | } | ||
302 | 851 | ||
303 | /* Make balance in case insert_size[0] < 0 */ | 852 | /* paste new entry, if item is directory item */ |
304 | if (tb->insert_size[0] < 0) | 853 | pasted = item_head(tb->R[0], tb->item_pos - n + tb->rnum[0]); |
305 | return balance_leaf_when_delete(tb, flag); | 854 | if (is_direntry_le_ih(pasted) && tb->pos_in_item >= 0) { |
855 | leaf_paste_entries(&bi, tb->item_pos - n + tb->rnum[0], | ||
856 | tb->pos_in_item, 1, | ||
857 | (struct reiserfs_de_head *)body, | ||
858 | body + DEH_SIZE, tb->insert_size[0]); | ||
306 | 859 | ||
307 | zeros_num = 0; | 860 | if (!tb->pos_in_item) { |
308 | if (flag == M_INSERT && !body) | ||
309 | zeros_num = ih_item_len(ih); | ||
310 | 861 | ||
311 | pos_in_item = tb->tb_path->pos_in_item; | 862 | RFALSE(tb->item_pos - n + tb->rnum[0], |
312 | /* for indirect item pos_in_item is measured in unformatted node | 863 | "PAP-12165: directory item must be first " |
313 | pointers. Recalculate to bytes */ | 864 | "item of node when pasting is in 0th position"); |
314 | if (flag != M_INSERT | 865 | |
315 | && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) | 866 | /* update delimiting keys */ |
316 | pos_in_item *= UNFM_P_SIZE; | 867 | replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); |
317 | |||
318 | if (tb->lnum[0] > 0) { | ||
319 | /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ | ||
320 | if (item_pos < tb->lnum[0]) { | ||
321 | /* new item or it part falls to L[0], shift it too */ | ||
322 | n = B_NR_ITEMS(tb->L[0]); | ||
323 | |||
324 | switch (flag) { | ||
325 | case M_INSERT: /* insert item into L[0] */ | ||
326 | |||
327 | if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { | ||
328 | /* part of new item falls into L[0] */ | ||
329 | int new_item_len; | ||
330 | int version; | ||
331 | |||
332 | ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1); | ||
333 | |||
334 | /* Calculate item length to insert to S[0] */ | ||
335 | new_item_len = ih_item_len(ih) - tb->lbytes; | ||
336 | /* Calculate and check item length to insert to L[0] */ | ||
337 | put_ih_item_len(ih, ih_item_len(ih) - new_item_len); | ||
338 | |||
339 | RFALSE(ih_item_len(ih) <= 0, | ||
340 | "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", | ||
341 | ih_item_len(ih)); | ||
342 | |||
343 | /* Insert new item into L[0] */ | ||
344 | buffer_info_init_left(tb, &bi); | ||
345 | leaf_insert_into_buf(&bi, | ||
346 | n + item_pos - ret_val, ih, body, | ||
347 | zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); | ||
348 | |||
349 | version = ih_version(ih); | ||
350 | |||
351 | /* Calculate key component, item length and body to insert into S[0] */ | ||
352 | set_le_ih_k_offset(ih, le_ih_k_offset(ih) + | ||
353 | (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); | ||
354 | |||
355 | put_ih_item_len(ih, new_item_len); | ||
356 | if (tb->lbytes > zeros_num) { | ||
357 | body += (tb->lbytes - zeros_num); | ||
358 | zeros_num = 0; | ||
359 | } else | ||
360 | zeros_num -= tb->lbytes; | ||
361 | |||
362 | RFALSE(ih_item_len(ih) <= 0, | ||
363 | "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", | ||
364 | ih_item_len(ih)); | ||
365 | } else { | ||
366 | /* new item in whole falls into L[0] */ | ||
367 | /* Shift lnum[0]-1 items to L[0] */ | ||
368 | ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes); | ||
369 | /* Insert new item into L[0] */ | ||
370 | buffer_info_init_left(tb, &bi); | ||
371 | leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num); | ||
372 | tb->insert_size[0] = 0; | ||
373 | zeros_num = 0; | ||
374 | } | ||
375 | break; | ||
376 | |||
377 | case M_PASTE: /* append item in L[0] */ | ||
378 | |||
379 | if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { | ||
380 | /* we must shift the part of the appended item */ | ||
381 | if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { | ||
382 | |||
383 | RFALSE(zeros_num, | ||
384 | "PAP-12090: invalid parameter in case of a directory"); | ||
385 | /* directory item */ | ||
386 | if (tb->lbytes > pos_in_item) { | ||
387 | /* new directory entry falls into L[0] */ | ||
388 | struct item_head *pasted; | ||
389 | int l_pos_in_item = pos_in_item; | ||
390 | |||
391 | /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ | ||
392 | ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1); | ||
393 | if (ret_val && !item_pos) { | ||
394 | pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1); | ||
395 | l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1); | ||
396 | } | ||
397 | |||
398 | /* Append given directory entry to directory item */ | ||
399 | buffer_info_init_left(tb, &bi); | ||
400 | leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num); | ||
401 | |||
402 | /* previous string prepared space for pasting new entry, following string pastes this entry */ | ||
403 | |||
404 | /* when we have merge directory item, pos_in_item has been changed too */ | ||
405 | |||
406 | /* paste new directory entry. 1 is entry number */ | ||
407 | leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item, | ||
408 | 1, (struct reiserfs_de_head *) body, | ||
409 | body + DEH_SIZE, tb->insert_size[0]); | ||
410 | tb->insert_size[0] = 0; | ||
411 | } else { | ||
412 | /* new directory item doesn't fall into L[0] */ | ||
413 | /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ | ||
414 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
415 | } | ||
416 | /* Calculate new position to append in item body */ | ||
417 | pos_in_item -= tb->lbytes; | ||
418 | } else { | ||
419 | /* regular object */ | ||
420 | RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes); | ||
421 | RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)), | ||
422 | "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", | ||
423 | ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item); | ||
424 | |||
425 | if (tb->lbytes >= pos_in_item) { | ||
426 | /* appended item will be in L[0] in whole */ | ||
427 | int l_n; | ||
428 | |||
429 | /* this bytes number must be appended to the last item of L[h] */ | ||
430 | l_n = tb->lbytes - pos_in_item; | ||
431 | |||
432 | /* Calculate new insert_size[0] */ | ||
433 | tb->insert_size[0] -= l_n; | ||
434 | |||
435 | RFALSE(tb->insert_size[0] <= 0, | ||
436 | "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", | ||
437 | tb->insert_size[0]); | ||
438 | ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len | ||
439 | (B_N_PITEM_HEAD(tbS0, item_pos))); | ||
440 | /* Append to body of item in L[0] */ | ||
441 | buffer_info_init_left(tb, &bi); | ||
442 | leaf_paste_in_buffer | ||
443 | (&bi, n + item_pos - ret_val, ih_item_len | ||
444 | (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)), | ||
445 | l_n, body, | ||
446 | zeros_num > l_n ? l_n : zeros_num); | ||
447 | /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ | ||
448 | { | ||
449 | int version; | ||
450 | int temp_l = l_n; | ||
451 | |||
452 | RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)), | ||
453 | "PAP-12106: item length must be 0"); | ||
454 | RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY | ||
455 | (tb->L[0], n + item_pos - ret_val)), | ||
456 | "PAP-12107: items must be of the same file"); | ||
457 | if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { | ||
458 | temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT); | ||
459 | } | ||
460 | /* update key of first item in S0 */ | ||
461 | version = ih_version(B_N_PITEM_HEAD(tbS0, 0)); | ||
462 | set_le_key_k_offset(version, B_N_PKEY(tbS0, 0), | ||
463 | le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l); | ||
464 | /* update left delimiting key */ | ||
465 | set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), | ||
466 | le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l); | ||
467 | } | ||
468 | |||
469 | /* Calculate new body, position in item and insert_size[0] */ | ||
470 | if (l_n > zeros_num) { | ||
471 | body += (l_n - zeros_num); | ||
472 | zeros_num = 0; | ||
473 | } else | ||
474 | zeros_num -= l_n; | ||
475 | pos_in_item = 0; | ||
476 | |||
477 | RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1)) | ||
478 | || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size) | ||
479 | || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size), | ||
480 | "PAP-12120: item must be merge-able with left neighboring item"); | ||
481 | } else { /* only part of the appended item will be in L[0] */ | ||
482 | |||
483 | /* Calculate position in item for append in S[0] */ | ||
484 | pos_in_item -= tb->lbytes; | ||
485 | |||
486 | RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); | ||
487 | |||
488 | /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ | ||
489 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
490 | } | ||
491 | } | ||
492 | } else { /* appended item will be in L[0] in whole */ | ||
493 | |||
494 | struct item_head *pasted; | ||
495 | |||
496 | if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ | ||
497 | /* then increment pos_in_item by the size of the last item in L[0] */ | ||
498 | pasted = B_N_PITEM_HEAD(tb->L[0], n - 1); | ||
499 | if (is_direntry_le_ih(pasted)) | ||
500 | pos_in_item += ih_entry_count(pasted); | ||
501 | else | ||
502 | pos_in_item += ih_item_len(pasted); | ||
503 | } | ||
504 | |||
505 | /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ | ||
506 | ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
507 | /* Append to body of item in L[0] */ | ||
508 | buffer_info_init_left(tb, &bi); | ||
509 | leaf_paste_in_buffer(&bi, n + item_pos - ret_val, | ||
510 | pos_in_item, | ||
511 | tb->insert_size[0], | ||
512 | body, zeros_num); | ||
513 | |||
514 | /* if appended item is directory, paste entry */ | ||
515 | pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val); | ||
516 | if (is_direntry_le_ih(pasted)) | ||
517 | leaf_paste_entries(&bi, n + item_pos - ret_val, | ||
518 | pos_in_item, 1, | ||
519 | (struct reiserfs_de_head *) body, | ||
520 | body + DEH_SIZE, | ||
521 | tb->insert_size[0]); | ||
522 | /* if appended item is indirect item, put unformatted node into un list */ | ||
523 | if (is_indirect_le_ih(pasted)) | ||
524 | set_ih_free_space(pasted, 0); | ||
525 | tb->insert_size[0] = 0; | ||
526 | zeros_num = 0; | ||
527 | } | ||
528 | break; | ||
529 | default: /* cases d and t */ | ||
530 | reiserfs_panic(tb->tb_sb, "PAP-12130", | ||
531 | "lnum > 0: unexpected mode: " | ||
532 | " %s(%d)", | ||
533 | (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); | ||
534 | } | ||
535 | } else { | ||
536 | /* new item doesn't fall into L[0] */ | ||
537 | leaf_shift_left(tb, tb->lnum[0], tb->lbytes); | ||
538 | } | 868 | } |
539 | } | 869 | } |
540 | 870 | ||
541 | /* tb->lnum[0] > 0 */ | 871 | if (is_indirect_le_ih(pasted)) |
542 | /* Calculate new item position */ | 872 | set_ih_free_space(pasted, 0); |
543 | item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); | 873 | tb->zeroes_num = tb->insert_size[0] = 0; |
544 | 874 | } | |
545 | if (tb->rnum[0] > 0) { | ||
546 | /* shift rnum[0] items from S[0] to the right neighbor R[0] */ | ||
547 | n = B_NR_ITEMS(tbS0); | ||
548 | switch (flag) { | ||
549 | |||
550 | case M_INSERT: /* insert item */ | ||
551 | if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ | ||
552 | if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ | ||
553 | loff_t old_key_comp, old_len, r_zeros_number; | ||
554 | const char *r_body; | ||
555 | int version; | ||
556 | loff_t offset; | ||
557 | |||
558 | leaf_shift_right(tb, tb->rnum[0] - 1, -1); | ||
559 | |||
560 | version = ih_version(ih); | ||
561 | /* Remember key component and item length */ | ||
562 | old_key_comp = le_ih_k_offset(ih); | ||
563 | old_len = ih_item_len(ih); | ||
564 | |||
565 | /* Calculate key component and item length to insert into R[0] */ | ||
566 | offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0)); | ||
567 | set_le_ih_k_offset(ih, offset); | ||
568 | put_ih_item_len(ih, tb->rbytes); | ||
569 | /* Insert part of the item into R[0] */ | ||
570 | buffer_info_init_right(tb, &bi); | ||
571 | if ((old_len - tb->rbytes) > zeros_num) { | ||
572 | r_zeros_number = 0; | ||
573 | r_body = body + (old_len - tb->rbytes) - zeros_num; | ||
574 | } else { | ||
575 | r_body = body; | ||
576 | r_zeros_number = zeros_num - (old_len - tb->rbytes); | ||
577 | zeros_num -= r_zeros_number; | ||
578 | } | ||
579 | |||
580 | leaf_insert_into_buf(&bi, 0, ih, r_body, | ||
581 | r_zeros_number); | ||
582 | |||
583 | /* Replace right delimiting key by first key in R[0] */ | ||
584 | replace_key(tb, tb->CFR[0], tb->rkey[0], | ||
585 | tb->R[0], 0); | ||
586 | |||
587 | /* Calculate key component and item length to insert into S[0] */ | ||
588 | set_le_ih_k_offset(ih, old_key_comp); | ||
589 | put_ih_item_len(ih, old_len - tb->rbytes); | ||
590 | |||
591 | tb->insert_size[0] -= tb->rbytes; | ||
592 | |||
593 | } else { /* whole new item falls into R[0] */ | ||
594 | |||
595 | /* Shift rnum[0]-1 items to R[0] */ | ||
596 | ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes); | ||
597 | /* Insert new item into R[0] */ | ||
598 | buffer_info_init_right(tb, &bi); | ||
599 | leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1, | ||
600 | ih, body, zeros_num); | ||
601 | |||
602 | if (item_pos - n + tb->rnum[0] - 1 == 0) { | ||
603 | replace_key(tb, tb->CFR[0], | ||
604 | tb->rkey[0], | ||
605 | tb->R[0], 0); | ||
606 | |||
607 | } | ||
608 | zeros_num = tb->insert_size[0] = 0; | ||
609 | } | ||
610 | } else { /* new item or part of it doesn't fall into R[0] */ | ||
611 | |||
612 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
613 | } | ||
614 | break; | ||
615 | 875 | ||
616 | case M_PASTE: /* append item */ | 876 | static void balance_leaf_paste_right(struct tree_balance *tb, |
617 | 877 | struct item_head *ih, const char *body) | |
618 | if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ | 878 | { |
619 | if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ | 879 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); |
620 | if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ | 880 | int n = B_NR_ITEMS(tbS0); |
621 | int entry_count; | ||
622 | |||
623 | RFALSE(zeros_num, | ||
624 | "PAP-12145: invalid parameter in case of a directory"); | ||
625 | entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD | ||
626 | (tbS0, item_pos)); | ||
627 | if (entry_count - tb->rbytes < | ||
628 | pos_in_item) | ||
629 | /* new directory entry falls into R[0] */ | ||
630 | { | ||
631 | int paste_entry_position; | ||
632 | |||
633 | RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0], | ||
634 | "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", | ||
635 | tb->rbytes, entry_count); | ||
636 | /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ | ||
637 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1); | ||
638 | /* Paste given directory entry to directory item */ | ||
639 | paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1; | ||
640 | buffer_info_init_right(tb, &bi); | ||
641 | leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num); | ||
642 | /* paste entry */ | ||
643 | leaf_paste_entries(&bi, 0, paste_entry_position, 1, | ||
644 | (struct reiserfs_de_head *) body, | ||
645 | body + DEH_SIZE, tb->insert_size[0]); | ||
646 | |||
647 | if (paste_entry_position == 0) { | ||
648 | /* change delimiting keys */ | ||
649 | replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0); | ||
650 | } | ||
651 | |||
652 | tb->insert_size[0] = 0; | ||
653 | pos_in_item++; | ||
654 | } else { /* new directory entry doesn't fall into R[0] */ | ||
655 | |||
656 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
657 | } | ||
658 | } else { /* regular object */ | ||
659 | |||
660 | int n_shift, n_rem, r_zeros_number; | ||
661 | const char *r_body; | ||
662 | |||
663 | /* Calculate number of bytes which must be shifted from appended item */ | ||
664 | if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0) | ||
665 | n_shift = 0; | ||
666 | |||
667 | RFALSE(pos_in_item != ih_item_len | ||
668 | (B_N_PITEM_HEAD(tbS0, item_pos)), | ||
669 | "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", | ||
670 | pos_in_item, ih_item_len | ||
671 | (B_N_PITEM_HEAD(tbS0, item_pos))); | ||
672 | |||
673 | leaf_shift_right(tb, tb->rnum[0], n_shift); | ||
674 | /* Calculate number of bytes which must remain in body after appending to R[0] */ | ||
675 | if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0) | ||
676 | n_rem = 0; | ||
677 | |||
678 | { | ||
679 | int version; | ||
680 | unsigned long temp_rem = n_rem; | ||
681 | |||
682 | version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0)); | ||
683 | if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) { | ||
684 | temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT); | ||
685 | } | ||
686 | set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0), | ||
687 | le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem); | ||
688 | set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]), | ||
689 | le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem); | ||
690 | } | ||
691 | /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; | ||
692 | k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ | ||
693 | do_balance_mark_internal_dirty(tb, tb->CFR[0], 0); | ||
694 | |||
695 | /* Append part of body into R[0] */ | ||
696 | buffer_info_init_right(tb, &bi); | ||
697 | if (n_rem > zeros_num) { | ||
698 | r_zeros_number = 0; | ||
699 | r_body = body + n_rem - zeros_num; | ||
700 | } else { | ||
701 | r_body = body; | ||
702 | r_zeros_number = zeros_num - n_rem; | ||
703 | zeros_num -= r_zeros_number; | ||
704 | } | ||
705 | |||
706 | leaf_paste_in_buffer(&bi, 0, n_shift, | ||
707 | tb->insert_size[0] - n_rem, | ||
708 | r_body, r_zeros_number); | ||
709 | |||
710 | if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) { | ||
711 | #if 0 | ||
712 | RFALSE(n_rem, | ||
713 | "PAP-12160: paste more than one unformatted node pointer"); | ||
714 | #endif | ||
715 | set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0); | ||
716 | } | ||
717 | tb->insert_size[0] = n_rem; | ||
718 | if (!n_rem) | ||
719 | pos_in_item++; | ||
720 | } | ||
721 | } else { /* pasted item in whole falls into R[0] */ | ||
722 | |||
723 | struct item_head *pasted; | ||
724 | |||
725 | ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
726 | /* append item in R[0] */ | ||
727 | if (pos_in_item >= 0) { | ||
728 | buffer_info_init_right(tb, &bi); | ||
729 | leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item, | ||
730 | tb->insert_size[0], body, zeros_num); | ||
731 | } | ||
732 | |||
733 | /* paste new entry, if item is directory item */ | ||
734 | pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]); | ||
735 | if (is_direntry_le_ih(pasted) && pos_in_item >= 0) { | ||
736 | leaf_paste_entries(&bi, item_pos - n + tb->rnum[0], | ||
737 | pos_in_item, 1, | ||
738 | (struct reiserfs_de_head *) body, | ||
739 | body + DEH_SIZE, tb->insert_size[0]); | ||
740 | if (!pos_in_item) { | ||
741 | |||
742 | RFALSE(item_pos - n + tb->rnum[0], | ||
743 | "PAP-12165: directory item must be first item of node when pasting is in 0th position"); | ||
744 | |||
745 | /* update delimiting keys */ | ||
746 | replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); | ||
747 | } | ||
748 | } | ||
749 | |||
750 | if (is_indirect_le_ih(pasted)) | ||
751 | set_ih_free_space(pasted, 0); | ||
752 | zeros_num = tb->insert_size[0] = 0; | ||
753 | } | ||
754 | } else { /* new item doesn't fall into R[0] */ | ||
755 | |||
756 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
757 | } | ||
758 | break; | ||
759 | default: /* cases d and t */ | ||
760 | reiserfs_panic(tb->tb_sb, "PAP-12175", | ||
761 | "rnum > 0: unexpected mode: %s(%d)", | ||
762 | (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); | ||
763 | } | ||
764 | 881 | ||
882 | /* new item doesn't fall into R[0] */ | ||
883 | if (n - tb->rnum[0] > tb->item_pos) { | ||
884 | leaf_shift_right(tb, tb->rnum[0], tb->rbytes); | ||
885 | return; | ||
765 | } | 886 | } |
766 | 887 | ||
767 | /* tb->rnum[0] > 0 */ | 888 | /* pasted item or part of it falls to R[0] */ |
768 | RFALSE(tb->blknum[0] > 3, | ||
769 | "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); | ||
770 | RFALSE(tb->blknum[0] < 0, | ||
771 | "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); | ||
772 | 889 | ||
773 | /* if while adding to a node we discover that it is possible to split | 890 | if (tb->item_pos == n - tb->rnum[0] && tb->rbytes != -1) |
774 | it in two, and merge the left part into the left neighbor and the | 891 | /* we must shift the part of the appended item */ |
775 | right part into the right neighbor, eliminating the node */ | 892 | balance_leaf_paste_right_shift(tb, ih, body); |
776 | if (tb->blknum[0] == 0) { /* node S[0] is empty now */ | 893 | else |
894 | /* pasted item in whole falls into R[0] */ | ||
895 | balance_leaf_paste_right_whole(tb, ih, body); | ||
896 | } | ||
777 | 897 | ||
778 | RFALSE(!tb->lnum[0] || !tb->rnum[0], | 898 | /* shift rnum[0] items from S[0] to the right neighbor R[0] */ |
779 | "PAP-12190: lnum and rnum must not be zero"); | 899 | static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih, |
780 | /* if insertion was done before 0-th position in R[0], right | 900 | const char *body, int flag) |
781 | delimiting key of the tb->L[0]'s and left delimiting key are | 901 | { |
782 | not set correctly */ | 902 | if (tb->rnum[0] <= 0) |
783 | if (tb->CFL[0]) { | 903 | return; |
784 | if (!tb->CFR[0]) | 904 | |
785 | reiserfs_panic(tb->tb_sb, "vs-12195", | 905 | BUG_ON(flag != M_INSERT && flag != M_PASTE); |
786 | "CFR not initialized"); | 906 | |
787 | copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), | 907 | if (flag == M_INSERT) |
788 | B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); | 908 | balance_leaf_insert_right(tb, ih, body); |
789 | do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); | 909 | else /* M_PASTE */ |
910 | balance_leaf_paste_right(tb, ih, body); | ||
911 | } | ||
912 | |||
913 | static void balance_leaf_new_nodes_insert(struct tree_balance *tb, | ||
914 | struct item_head *ih, | ||
915 | const char *body, | ||
916 | struct item_head *insert_key, | ||
917 | struct buffer_head **insert_ptr, | ||
918 | int i) | ||
919 | { | ||
920 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
921 | int n = B_NR_ITEMS(tbS0); | ||
922 | struct buffer_info bi; | ||
923 | int shift; | ||
924 | |||
925 | /* new item or it part don't falls into S_new[i] */ | ||
926 | if (n - tb->snum[i] >= tb->item_pos) { | ||
927 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, | ||
928 | tb->snum[i], tb->sbytes[i], tb->S_new[i]); | ||
929 | return; | ||
930 | } | ||
931 | |||
932 | /* new item or it's part falls to first new node S_new[i] */ | ||
933 | |||
934 | /* part of new item falls into S_new[i] */ | ||
935 | if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) { | ||
936 | int old_key_comp, old_len, r_zeroes_number; | ||
937 | const char *r_body; | ||
938 | int version; | ||
939 | |||
940 | /* Move snum[i]-1 items from S[0] to S_new[i] */ | ||
941 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, -1, | ||
942 | tb->S_new[i]); | ||
943 | |||
944 | /* Remember key component and item length */ | ||
945 | version = ih_version(ih); | ||
946 | old_key_comp = le_ih_k_offset(ih); | ||
947 | old_len = ih_item_len(ih); | ||
948 | |||
949 | /* | ||
950 | * Calculate key component and item length to insert | ||
951 | * into S_new[i] | ||
952 | */ | ||
953 | shift = 0; | ||
954 | if (is_indirect_le_ih(ih)) | ||
955 | shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; | ||
956 | set_le_ih_k_offset(ih, | ||
957 | le_ih_k_offset(ih) + | ||
958 | ((old_len - tb->sbytes[i]) << shift)); | ||
959 | |||
960 | put_ih_item_len(ih, tb->sbytes[i]); | ||
961 | |||
962 | /* Insert part of the item into S_new[i] before 0-th item */ | ||
963 | buffer_info_init_bh(tb, &bi, tb->S_new[i]); | ||
964 | |||
965 | if ((old_len - tb->sbytes[i]) > tb->zeroes_num) { | ||
966 | r_zeroes_number = 0; | ||
967 | r_body = body + (old_len - tb->sbytes[i]) - | ||
968 | tb->zeroes_num; | ||
969 | } else { | ||
970 | r_body = body; | ||
971 | r_zeroes_number = tb->zeroes_num - (old_len - | ||
972 | tb->sbytes[i]); | ||
973 | tb->zeroes_num -= r_zeroes_number; | ||
790 | } | 974 | } |
791 | 975 | ||
792 | reiserfs_invalidate_buffer(tb, tbS0); | 976 | leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number); |
793 | return 0; | 977 | |
978 | /* | ||
979 | * Calculate key component and item length to | ||
980 | * insert into S[i] | ||
981 | */ | ||
982 | set_le_ih_k_offset(ih, old_key_comp); | ||
983 | put_ih_item_len(ih, old_len - tb->sbytes[i]); | ||
984 | tb->insert_size[0] -= tb->sbytes[i]; | ||
985 | } else { | ||
986 | /* whole new item falls into S_new[i] */ | ||
987 | |||
988 | /* | ||
989 | * Shift snum[0] - 1 items to S_new[i] | ||
990 | * (sbytes[i] of split item) | ||
991 | */ | ||
992 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, | ||
993 | tb->snum[i] - 1, tb->sbytes[i], tb->S_new[i]); | ||
994 | |||
995 | /* Insert new item into S_new[i] */ | ||
996 | buffer_info_init_bh(tb, &bi, tb->S_new[i]); | ||
997 | leaf_insert_into_buf(&bi, tb->item_pos - n + tb->snum[i] - 1, | ||
998 | ih, body, tb->zeroes_num); | ||
999 | |||
1000 | tb->zeroes_num = tb->insert_size[0] = 0; | ||
794 | } | 1001 | } |
1002 | } | ||
795 | 1003 | ||
796 | /* Fill new nodes that appear in place of S[0] */ | 1004 | /* we append to directory item */ |
1005 | static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb, | ||
1006 | struct item_head *ih, | ||
1007 | const char *body, | ||
1008 | struct item_head *insert_key, | ||
1009 | struct buffer_head **insert_ptr, | ||
1010 | int i) | ||
1011 | { | ||
1012 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1013 | struct item_head *aux_ih = item_head(tbS0, tb->item_pos); | ||
1014 | int entry_count = ih_entry_count(aux_ih); | ||
1015 | struct buffer_info bi; | ||
1016 | |||
1017 | if (entry_count - tb->sbytes[i] < tb->pos_in_item && | ||
1018 | tb->pos_in_item <= entry_count) { | ||
1019 | /* new directory entry falls into S_new[i] */ | ||
1020 | |||
1021 | RFALSE(!tb->insert_size[0], | ||
1022 | "PAP-12215: insert_size is already 0"); | ||
1023 | RFALSE(tb->sbytes[i] - 1 >= entry_count, | ||
1024 | "PAP-12220: there are no so much entries (%d), only %d", | ||
1025 | tb->sbytes[i] - 1, entry_count); | ||
1026 | |||
1027 | /* | ||
1028 | * Shift snum[i]-1 items in whole. | ||
1029 | * Shift sbytes[i] directory entries | ||
1030 | * from directory item number snum[i] | ||
1031 | */ | ||
1032 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], | ||
1033 | tb->sbytes[i] - 1, tb->S_new[i]); | ||
1034 | |||
1035 | /* | ||
1036 | * Paste given directory entry to | ||
1037 | * directory item | ||
1038 | */ | ||
1039 | buffer_info_init_bh(tb, &bi, tb->S_new[i]); | ||
1040 | leaf_paste_in_buffer(&bi, 0, tb->pos_in_item - entry_count + | ||
1041 | tb->sbytes[i] - 1, tb->insert_size[0], | ||
1042 | body, tb->zeroes_num); | ||
1043 | |||
1044 | /* paste new directory entry */ | ||
1045 | leaf_paste_entries(&bi, 0, tb->pos_in_item - entry_count + | ||
1046 | tb->sbytes[i] - 1, 1, | ||
1047 | (struct reiserfs_de_head *) body, | ||
1048 | body + DEH_SIZE, tb->insert_size[0]); | ||
1049 | |||
1050 | tb->insert_size[0] = 0; | ||
1051 | tb->pos_in_item++; | ||
1052 | } else { | ||
1053 | /* new directory entry doesn't fall into S_new[i] */ | ||
1054 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], | ||
1055 | tb->sbytes[i], tb->S_new[i]); | ||
1056 | } | ||
1057 | |||
1058 | } | ||
797 | 1059 | ||
798 | /* I am told that this copying is because we need an array to enable | 1060 | static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb, |
799 | the looping code. -Hans */ | 1061 | struct item_head *ih, |
800 | snum[0] = tb->s1num, snum[1] = tb->s2num; | 1062 | const char *body, |
801 | sbytes[0] = tb->s1bytes; | 1063 | struct item_head *insert_key, |
802 | sbytes[1] = tb->s2bytes; | 1064 | struct buffer_head **insert_ptr, |
1065 | int i) | ||
1066 | { | ||
1067 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1068 | struct item_head *aux_ih = item_head(tbS0, tb->item_pos); | ||
1069 | int n_shift, n_rem, r_zeroes_number, shift; | ||
1070 | const char *r_body; | ||
1071 | struct item_head *tmp; | ||
1072 | struct buffer_info bi; | ||
1073 | |||
1074 | RFALSE(ih, "PAP-12210: ih must be 0"); | ||
1075 | |||
1076 | if (is_direntry_le_ih(aux_ih)) { | ||
1077 | balance_leaf_new_nodes_paste_dirent(tb, ih, body, insert_key, | ||
1078 | insert_ptr, i); | ||
1079 | return; | ||
1080 | } | ||
1081 | |||
1082 | /* regular object */ | ||
1083 | |||
1084 | |||
1085 | RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)) || | ||
1086 | tb->insert_size[0] <= 0, | ||
1087 | "PAP-12225: item too short or insert_size <= 0"); | ||
1088 | |||
1089 | /* | ||
1090 | * Calculate number of bytes which must be shifted from appended item | ||
1091 | */ | ||
1092 | n_shift = tb->sbytes[i] - tb->insert_size[0]; | ||
1093 | if (n_shift < 0) | ||
1094 | n_shift = 0; | ||
1095 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], n_shift, | ||
1096 | tb->S_new[i]); | ||
1097 | |||
1098 | /* | ||
1099 | * Calculate number of bytes which must remain in body after | ||
1100 | * append to S_new[i] | ||
1101 | */ | ||
1102 | n_rem = tb->insert_size[0] - tb->sbytes[i]; | ||
1103 | if (n_rem < 0) | ||
1104 | n_rem = 0; | ||
1105 | |||
1106 | /* Append part of body into S_new[0] */ | ||
1107 | buffer_info_init_bh(tb, &bi, tb->S_new[i]); | ||
1108 | if (n_rem > tb->zeroes_num) { | ||
1109 | r_zeroes_number = 0; | ||
1110 | r_body = body + n_rem - tb->zeroes_num; | ||
1111 | } else { | ||
1112 | r_body = body; | ||
1113 | r_zeroes_number = tb->zeroes_num - n_rem; | ||
1114 | tb->zeroes_num -= r_zeroes_number; | ||
1115 | } | ||
1116 | |||
1117 | leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, | ||
1118 | r_body, r_zeroes_number); | ||
1119 | |||
1120 | tmp = item_head(tb->S_new[i], 0); | ||
1121 | shift = 0; | ||
1122 | if (is_indirect_le_ih(tmp)) { | ||
1123 | set_ih_free_space(tmp, 0); | ||
1124 | shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT; | ||
1125 | } | ||
1126 | add_le_ih_k_offset(tmp, n_rem << shift); | ||
1127 | |||
1128 | tb->insert_size[0] = n_rem; | ||
1129 | if (!n_rem) | ||
1130 | tb->pos_in_item++; | ||
1131 | } | ||
1132 | |||
1133 | static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb, | ||
1134 | struct item_head *ih, | ||
1135 | const char *body, | ||
1136 | struct item_head *insert_key, | ||
1137 | struct buffer_head **insert_ptr, | ||
1138 | int i) | ||
1139 | |||
1140 | { | ||
1141 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1142 | int n = B_NR_ITEMS(tbS0); | ||
1143 | int leaf_mi; | ||
1144 | struct item_head *pasted; | ||
1145 | struct buffer_info bi; | ||
1146 | |||
1147 | #ifdef CONFIG_REISERFS_CHECK | ||
1148 | struct item_head *ih_check = item_head(tbS0, tb->item_pos); | ||
1149 | |||
1150 | if (!is_direntry_le_ih(ih_check) && | ||
1151 | (tb->pos_in_item != ih_item_len(ih_check) || | ||
1152 | tb->insert_size[0] <= 0)) | ||
1153 | reiserfs_panic(tb->tb_sb, | ||
1154 | "PAP-12235", | ||
1155 | "pos_in_item must be equal to ih_item_len"); | ||
1156 | #endif | ||
1157 | |||
1158 | leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], | ||
1159 | tb->sbytes[i], tb->S_new[i]); | ||
1160 | |||
1161 | RFALSE(leaf_mi, | ||
1162 | "PAP-12240: unexpected value returned by leaf_move_items (%d)", | ||
1163 | leaf_mi); | ||
1164 | |||
1165 | /* paste into item */ | ||
1166 | buffer_info_init_bh(tb, &bi, tb->S_new[i]); | ||
1167 | leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->snum[i], | ||
1168 | tb->pos_in_item, tb->insert_size[0], | ||
1169 | body, tb->zeroes_num); | ||
1170 | |||
1171 | pasted = item_head(tb->S_new[i], tb->item_pos - n + | ||
1172 | tb->snum[i]); | ||
1173 | if (is_direntry_le_ih(pasted)) | ||
1174 | leaf_paste_entries(&bi, tb->item_pos - n + tb->snum[i], | ||
1175 | tb->pos_in_item, 1, | ||
1176 | (struct reiserfs_de_head *)body, | ||
1177 | body + DEH_SIZE, tb->insert_size[0]); | ||
1178 | |||
1179 | /* if we paste to indirect item update ih_free_space */ | ||
1180 | if (is_indirect_le_ih(pasted)) | ||
1181 | set_ih_free_space(pasted, 0); | ||
1182 | |||
1183 | tb->zeroes_num = tb->insert_size[0] = 0; | ||
1184 | |||
1185 | } | ||
1186 | static void balance_leaf_new_nodes_paste(struct tree_balance *tb, | ||
1187 | struct item_head *ih, | ||
1188 | const char *body, | ||
1189 | struct item_head *insert_key, | ||
1190 | struct buffer_head **insert_ptr, | ||
1191 | int i) | ||
1192 | { | ||
1193 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1194 | int n = B_NR_ITEMS(tbS0); | ||
1195 | |||
1196 | /* pasted item doesn't fall into S_new[i] */ | ||
1197 | if (n - tb->snum[i] > tb->item_pos) { | ||
1198 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, | ||
1199 | tb->snum[i], tb->sbytes[i], tb->S_new[i]); | ||
1200 | return; | ||
1201 | } | ||
1202 | |||
1203 | /* pasted item or part if it falls to S_new[i] */ | ||
1204 | |||
1205 | if (tb->item_pos == n - tb->snum[i] && tb->sbytes[i] != -1) | ||
1206 | /* we must shift part of the appended item */ | ||
1207 | balance_leaf_new_nodes_paste_shift(tb, ih, body, insert_key, | ||
1208 | insert_ptr, i); | ||
1209 | else | ||
1210 | /* item falls wholly into S_new[i] */ | ||
1211 | balance_leaf_new_nodes_paste_whole(tb, ih, body, insert_key, | ||
1212 | insert_ptr, i); | ||
1213 | } | ||
1214 | |||
1215 | /* Fill new nodes that appear in place of S[0] */ | ||
1216 | static void balance_leaf_new_nodes(struct tree_balance *tb, | ||
1217 | struct item_head *ih, | ||
1218 | const char *body, | ||
1219 | struct item_head *insert_key, | ||
1220 | struct buffer_head **insert_ptr, | ||
1221 | int flag) | ||
1222 | { | ||
1223 | int i; | ||
803 | for (i = tb->blknum[0] - 2; i >= 0; i--) { | 1224 | for (i = tb->blknum[0] - 2; i >= 0; i--) { |
1225 | BUG_ON(flag != M_INSERT && flag != M_PASTE); | ||
804 | 1226 | ||
805 | RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, | 1227 | RFALSE(!tb->snum[i], |
806 | snum[i]); | 1228 | "PAP-12200: snum[%d] == %d. Must be > 0", i, |
1229 | tb->snum[i]); | ||
807 | 1230 | ||
808 | /* here we shift from S to S_new nodes */ | 1231 | /* here we shift from S to S_new nodes */ |
809 | 1232 | ||
810 | S_new[i] = get_FEB(tb); | 1233 | tb->S_new[i] = get_FEB(tb); |
811 | 1234 | ||
812 | /* initialized block type and tree level */ | 1235 | /* initialized block type and tree level */ |
813 | set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); | 1236 | set_blkh_level(B_BLK_HEAD(tb->S_new[i]), DISK_LEAF_NODE_LEVEL); |
814 | 1237 | ||
815 | n = B_NR_ITEMS(tbS0); | 1238 | if (flag == M_INSERT) |
816 | 1239 | balance_leaf_new_nodes_insert(tb, ih, body, insert_key, | |
817 | switch (flag) { | 1240 | insert_ptr, i); |
818 | case M_INSERT: /* insert item */ | 1241 | else /* M_PASTE */ |
819 | 1242 | balance_leaf_new_nodes_paste(tb, ih, body, insert_key, | |
820 | if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ | 1243 | insert_ptr, i); |
821 | if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ | 1244 | |
822 | int old_key_comp, old_len, r_zeros_number; | 1245 | memcpy(insert_key + i, leaf_key(tb->S_new[i], 0), KEY_SIZE); |
823 | const char *r_body; | 1246 | insert_ptr[i] = tb->S_new[i]; |
824 | int version; | 1247 | |
825 | 1248 | RFALSE(!buffer_journaled(tb->S_new[i]) | |
826 | /* Move snum[i]-1 items from S[0] to S_new[i] */ | 1249 | || buffer_journal_dirty(tb->S_new[i]) |
827 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, | 1250 | || buffer_dirty(tb->S_new[i]), |
828 | snum[i] - 1, -1, | 1251 | "PAP-12247: S_new[%d] : (%b)", |
829 | S_new[i]); | 1252 | i, tb->S_new[i]); |
830 | /* Remember key component and item length */ | 1253 | } |
831 | version = ih_version(ih); | 1254 | } |
832 | old_key_comp = le_ih_k_offset(ih); | ||
833 | old_len = ih_item_len(ih); | ||
834 | |||
835 | /* Calculate key component and item length to insert into S_new[i] */ | ||
836 | set_le_ih_k_offset(ih, le_ih_k_offset(ih) + | ||
837 | ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); | ||
838 | |||
839 | put_ih_item_len(ih, sbytes[i]); | ||
840 | |||
841 | /* Insert part of the item into S_new[i] before 0-th item */ | ||
842 | buffer_info_init_bh(tb, &bi, S_new[i]); | ||
843 | |||
844 | if ((old_len - sbytes[i]) > zeros_num) { | ||
845 | r_zeros_number = 0; | ||
846 | r_body = body + (old_len - sbytes[i]) - zeros_num; | ||
847 | } else { | ||
848 | r_body = body; | ||
849 | r_zeros_number = zeros_num - (old_len - sbytes[i]); | ||
850 | zeros_num -= r_zeros_number; | ||
851 | } | ||
852 | |||
853 | leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number); | ||
854 | |||
855 | /* Calculate key component and item length to insert into S[i] */ | ||
856 | set_le_ih_k_offset(ih, old_key_comp); | ||
857 | put_ih_item_len(ih, old_len - sbytes[i]); | ||
858 | tb->insert_size[0] -= sbytes[i]; | ||
859 | } else { /* whole new item falls into S_new[i] */ | ||
860 | |||
861 | /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ | ||
862 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, | ||
863 | snum[i] - 1, sbytes[i], S_new[i]); | ||
864 | |||
865 | /* Insert new item into S_new[i] */ | ||
866 | buffer_info_init_bh(tb, &bi, S_new[i]); | ||
867 | leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1, | ||
868 | ih, body, zeros_num); | ||
869 | |||
870 | zeros_num = tb->insert_size[0] = 0; | ||
871 | } | ||
872 | } | ||
873 | |||
874 | else { /* new item or it part don't falls into S_new[i] */ | ||
875 | 1255 | ||
876 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, | 1256 | static void balance_leaf_finish_node_insert(struct tree_balance *tb, |
877 | snum[i], sbytes[i], S_new[i]); | 1257 | struct item_head *ih, |
878 | } | 1258 | const char *body) |
879 | break; | 1259 | { |
1260 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1261 | struct buffer_info bi; | ||
1262 | buffer_info_init_tbS0(tb, &bi); | ||
1263 | leaf_insert_into_buf(&bi, tb->item_pos, ih, body, tb->zeroes_num); | ||
880 | 1264 | ||
881 | case M_PASTE: /* append item */ | 1265 | /* If we insert the first key change the delimiting key */ |
882 | 1266 | if (tb->item_pos == 0) { | |
883 | if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ | 1267 | if (tb->CFL[0]) /* can be 0 in reiserfsck */ |
884 | if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ | 1268 | replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); |
885 | struct item_head *aux_ih; | ||
886 | |||
887 | RFALSE(ih, "PAP-12210: ih must be 0"); | ||
888 | |||
889 | aux_ih = B_N_PITEM_HEAD(tbS0, item_pos); | ||
890 | if (is_direntry_le_ih(aux_ih)) { | ||
891 | /* we append to directory item */ | ||
892 | |||
893 | int entry_count; | ||
894 | |||
895 | entry_count = ih_entry_count(aux_ih); | ||
896 | |||
897 | if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) { | ||
898 | /* new directory entry falls into S_new[i] */ | ||
899 | |||
900 | RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0"); | ||
901 | RFALSE(sbytes[i] - 1 >= entry_count, | ||
902 | "PAP-12220: there are no so much entries (%d), only %d", | ||
903 | sbytes[i] - 1, entry_count); | ||
904 | |||
905 | /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ | ||
906 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]); | ||
907 | /* Paste given directory entry to directory item */ | ||
908 | buffer_info_init_bh(tb, &bi, S_new[i]); | ||
909 | leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, | ||
910 | tb->insert_size[0], body, zeros_num); | ||
911 | /* paste new directory entry */ | ||
912 | leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1, | ||
913 | (struct reiserfs_de_head *) body, | ||
914 | body + DEH_SIZE, tb->insert_size[0]); | ||
915 | tb->insert_size[0] = 0; | ||
916 | pos_in_item++; | ||
917 | } else { /* new directory entry doesn't fall into S_new[i] */ | ||
918 | leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]); | ||
919 | } | ||
920 | } else { /* regular object */ | ||
921 | |||
922 | int n_shift, n_rem, r_zeros_number; | ||
923 | const char *r_body; | ||
924 | |||
925 | RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0, | ||
926 | "PAP-12225: item too short or insert_size <= 0"); | ||
927 | |||
928 | /* Calculate number of bytes which must be shifted from appended item */ | ||
929 | n_shift = sbytes[i] - tb->insert_size[0]; | ||
930 | if (n_shift < 0) | ||
931 | n_shift = 0; | ||
932 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); | ||
933 | |||
934 | /* Calculate number of bytes which must remain in body after append to S_new[i] */ | ||
935 | n_rem = tb->insert_size[0] - sbytes[i]; | ||
936 | if (n_rem < 0) | ||
937 | n_rem = 0; | ||
938 | /* Append part of body into S_new[0] */ | ||
939 | buffer_info_init_bh(tb, &bi, S_new[i]); | ||
940 | if (n_rem > zeros_num) { | ||
941 | r_zeros_number = 0; | ||
942 | r_body = body + n_rem - zeros_num; | ||
943 | } else { | ||
944 | r_body = body; | ||
945 | r_zeros_number = zeros_num - n_rem; | ||
946 | zeros_num -= r_zeros_number; | ||
947 | } | ||
948 | |||
949 | leaf_paste_in_buffer(&bi, 0, n_shift, | ||
950 | tb->insert_size[0] - n_rem, | ||
951 | r_body, r_zeros_number); | ||
952 | { | ||
953 | struct item_head *tmp; | ||
954 | |||
955 | tmp = B_N_PITEM_HEAD(S_new[i], 0); | ||
956 | if (is_indirect_le_ih | ||
957 | (tmp)) { | ||
958 | set_ih_free_space(tmp, 0); | ||
959 | set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT))); | ||
960 | } else { | ||
961 | set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem); | ||
962 | } | ||
963 | } | ||
964 | |||
965 | tb->insert_size[0] = n_rem; | ||
966 | if (!n_rem) | ||
967 | pos_in_item++; | ||
968 | } | ||
969 | } else | ||
970 | /* item falls wholly into S_new[i] */ | ||
971 | { | ||
972 | int leaf_mi; | ||
973 | struct item_head *pasted; | ||
974 | 1269 | ||
975 | #ifdef CONFIG_REISERFS_CHECK | 1270 | } |
976 | struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos); | 1271 | } |
977 | |||
978 | if (!is_direntry_le_ih(ih_check) | ||
979 | && (pos_in_item != ih_item_len(ih_check) | ||
980 | || tb->insert_size[0] <= 0)) | ||
981 | reiserfs_panic(tb->tb_sb, | ||
982 | "PAP-12235", | ||
983 | "pos_in_item " | ||
984 | "must be equal " | ||
985 | "to ih_item_len"); | ||
986 | #endif /* CONFIG_REISERFS_CHECK */ | ||
987 | |||
988 | leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, | ||
989 | tb, snum[i], | ||
990 | sbytes[i], | ||
991 | S_new[i]); | ||
992 | |||
993 | RFALSE(leaf_mi, | ||
994 | "PAP-12240: unexpected value returned by leaf_move_items (%d)", | ||
995 | leaf_mi); | ||
996 | |||
997 | /* paste into item */ | ||
998 | buffer_info_init_bh(tb, &bi, S_new[i]); | ||
999 | leaf_paste_in_buffer(&bi, | ||
1000 | item_pos - n + snum[i], | ||
1001 | pos_in_item, | ||
1002 | tb->insert_size[0], | ||
1003 | body, zeros_num); | ||
1004 | |||
1005 | pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]); | ||
1006 | if (is_direntry_le_ih(pasted)) { | ||
1007 | leaf_paste_entries(&bi, | ||
1008 | item_pos - n + snum[i], | ||
1009 | pos_in_item, 1, | ||
1010 | (struct reiserfs_de_head *)body, | ||
1011 | body + DEH_SIZE, | ||
1012 | tb->insert_size[0] | ||
1013 | ); | ||
1014 | } | ||
1015 | |||
1016 | /* if we paste to indirect item update ih_free_space */ | ||
1017 | if (is_indirect_le_ih(pasted)) | ||
1018 | set_ih_free_space(pasted, 0); | ||
1019 | zeros_num = tb->insert_size[0] = 0; | ||
1020 | } | ||
1021 | } | ||
1022 | 1272 | ||
1023 | else { /* pasted item doesn't fall into S_new[i] */ | 1273 | static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb, |
1274 | struct item_head *ih, | ||
1275 | const char *body) | ||
1276 | { | ||
1277 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1278 | struct item_head *pasted = item_head(tbS0, tb->item_pos); | ||
1279 | struct buffer_info bi; | ||
1024 | 1280 | ||
1025 | leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, | 1281 | if (tb->pos_in_item >= 0 && tb->pos_in_item <= ih_entry_count(pasted)) { |
1026 | snum[i], sbytes[i], S_new[i]); | 1282 | RFALSE(!tb->insert_size[0], |
1027 | } | 1283 | "PAP-12260: insert_size is 0 already"); |
1028 | break; | 1284 | |
1029 | default: /* cases d and t */ | 1285 | /* prepare space */ |
1030 | reiserfs_panic(tb->tb_sb, "PAP-12245", | 1286 | buffer_info_init_tbS0(tb, &bi); |
1031 | "blknum > 2: unexpected mode: %s(%d)", | 1287 | leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item, |
1032 | (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); | 1288 | tb->insert_size[0], body, tb->zeroes_num); |
1289 | |||
1290 | /* paste entry */ | ||
1291 | leaf_paste_entries(&bi, tb->item_pos, tb->pos_in_item, 1, | ||
1292 | (struct reiserfs_de_head *)body, | ||
1293 | body + DEH_SIZE, tb->insert_size[0]); | ||
1294 | |||
1295 | if (!tb->item_pos && !tb->pos_in_item) { | ||
1296 | RFALSE(!tb->CFL[0] || !tb->L[0], | ||
1297 | "PAP-12270: CFL[0]/L[0] must be specified"); | ||
1298 | if (tb->CFL[0]) | ||
1299 | replace_key(tb, tb->CFL[0], tb->lkey[0], | ||
1300 | tbS0, 0); | ||
1033 | } | 1301 | } |
1034 | 1302 | ||
1035 | memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); | 1303 | tb->insert_size[0] = 0; |
1036 | insert_ptr[i] = S_new[i]; | 1304 | } |
1305 | } | ||
1306 | |||
1307 | static void balance_leaf_finish_node_paste(struct tree_balance *tb, | ||
1308 | struct item_head *ih, | ||
1309 | const char *body) | ||
1310 | { | ||
1311 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1312 | struct buffer_info bi; | ||
1313 | struct item_head *pasted = item_head(tbS0, tb->item_pos); | ||
1037 | 1314 | ||
1038 | RFALSE(!buffer_journaled(S_new[i]) | 1315 | /* when directory, may be new entry already pasted */ |
1039 | || buffer_journal_dirty(S_new[i]) | 1316 | if (is_direntry_le_ih(pasted)) { |
1040 | || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", | 1317 | balance_leaf_finish_node_paste_dirent(tb, ih, body); |
1041 | i, S_new[i]); | 1318 | return; |
1042 | } | 1319 | } |
1043 | 1320 | ||
1044 | /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the | 1321 | /* regular object */ |
1045 | affected item which remains in S */ | ||
1046 | if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ | ||
1047 | 1322 | ||
1048 | switch (flag) { | 1323 | if (tb->pos_in_item == ih_item_len(pasted)) { |
1049 | case M_INSERT: /* insert item into S[0] */ | 1324 | RFALSE(tb->insert_size[0] <= 0, |
1050 | buffer_info_init_tbS0(tb, &bi); | 1325 | "PAP-12275: insert size must not be %d", |
1051 | leaf_insert_into_buf(&bi, item_pos, ih, body, | 1326 | tb->insert_size[0]); |
1052 | zeros_num); | 1327 | buffer_info_init_tbS0(tb, &bi); |
1328 | leaf_paste_in_buffer(&bi, tb->item_pos, | ||
1329 | tb->pos_in_item, tb->insert_size[0], body, | ||
1330 | tb->zeroes_num); | ||
1053 | 1331 | ||
1054 | /* If we insert the first key change the delimiting key */ | 1332 | if (is_indirect_le_ih(pasted)) |
1055 | if (item_pos == 0) { | 1333 | set_ih_free_space(pasted, 0); |
1056 | if (tb->CFL[0]) /* can be 0 in reiserfsck */ | ||
1057 | replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); | ||
1058 | } | ||
1059 | break; | ||
1060 | 1334 | ||
1061 | case M_PASTE:{ /* append item in S[0] */ | 1335 | tb->insert_size[0] = 0; |
1062 | struct item_head *pasted; | 1336 | } |
1063 | |||
1064 | pasted = B_N_PITEM_HEAD(tbS0, item_pos); | ||
1065 | /* when directory, may be new entry already pasted */ | ||
1066 | if (is_direntry_le_ih(pasted)) { | ||
1067 | if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) { | ||
1068 | |||
1069 | RFALSE(!tb->insert_size[0], | ||
1070 | "PAP-12260: insert_size is 0 already"); | ||
1071 | |||
1072 | /* prepare space */ | ||
1073 | buffer_info_init_tbS0(tb, &bi); | ||
1074 | leaf_paste_in_buffer(&bi, item_pos, pos_in_item, | ||
1075 | tb->insert_size[0], body, | ||
1076 | zeros_num); | ||
1077 | |||
1078 | /* paste entry */ | ||
1079 | leaf_paste_entries(&bi, item_pos, pos_in_item, 1, | ||
1080 | (struct reiserfs_de_head *)body, | ||
1081 | body + DEH_SIZE, | ||
1082 | tb->insert_size[0]); | ||
1083 | if (!item_pos && !pos_in_item) { | ||
1084 | RFALSE(!tb->CFL[0] || !tb->L[0], | ||
1085 | "PAP-12270: CFL[0]/L[0] must be specified"); | ||
1086 | if (tb->CFL[0]) | ||
1087 | replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); | ||
1088 | } | ||
1089 | tb->insert_size[0] = 0; | ||
1090 | } | ||
1091 | } else { /* regular object */ | ||
1092 | if (pos_in_item == ih_item_len(pasted)) { | ||
1093 | |||
1094 | RFALSE(tb->insert_size[0] <= 0, | ||
1095 | "PAP-12275: insert size must not be %d", | ||
1096 | tb->insert_size[0]); | ||
1097 | buffer_info_init_tbS0(tb, &bi); | ||
1098 | leaf_paste_in_buffer(&bi, item_pos, pos_in_item, | ||
1099 | tb->insert_size[0], body, zeros_num); | ||
1100 | |||
1101 | if (is_indirect_le_ih(pasted)) { | ||
1102 | #if 0 | ||
1103 | RFALSE(tb-> | ||
1104 | insert_size[0] != | ||
1105 | UNFM_P_SIZE, | ||
1106 | "PAP-12280: insert_size for indirect item must be %d, not %d", | ||
1107 | UNFM_P_SIZE, | ||
1108 | tb-> | ||
1109 | insert_size[0]); | ||
1110 | #endif | ||
1111 | set_ih_free_space(pasted, 0); | ||
1112 | } | ||
1113 | tb->insert_size[0] = 0; | ||
1114 | } | ||
1115 | #ifdef CONFIG_REISERFS_CHECK | 1337 | #ifdef CONFIG_REISERFS_CHECK |
1116 | else { | 1338 | else if (tb->insert_size[0]) { |
1117 | if (tb->insert_size[0]) { | 1339 | print_cur_tb("12285"); |
1118 | print_cur_tb("12285"); | 1340 | reiserfs_panic(tb->tb_sb, "PAP-12285", |
1119 | reiserfs_panic(tb->tb_sb, | 1341 | "insert_size must be 0 (%d)", tb->insert_size[0]); |
1120 | "PAP-12285", | 1342 | } |
1121 | "insert_size " | 1343 | #endif |
1122 | "must be 0 " | 1344 | } |
1123 | "(%d)", | 1345 | |
1124 | tb->insert_size[0]); | 1346 | /* |
1125 | } | 1347 | * if the affected item was not wholly shifted then we |
1126 | } | 1348 | * perform all necessary operations on that part or whole |
1127 | #endif /* CONFIG_REISERFS_CHECK */ | 1349 | * of the affected item which remains in S |
1128 | 1350 | */ | |
1129 | } | 1351 | static void balance_leaf_finish_node(struct tree_balance *tb, |
1130 | } /* case M_PASTE: */ | 1352 | struct item_head *ih, |
1353 | const char *body, int flag) | ||
1354 | { | ||
1355 | /* if we must insert or append into buffer S[0] */ | ||
1356 | if (0 <= tb->item_pos && tb->item_pos < tb->s0num) { | ||
1357 | if (flag == M_INSERT) | ||
1358 | balance_leaf_finish_node_insert(tb, ih, body); | ||
1359 | else /* M_PASTE */ | ||
1360 | balance_leaf_finish_node_paste(tb, ih, body); | ||
1361 | } | ||
1362 | } | ||
1363 | |||
1364 | /** | ||
1365 | * balance_leaf - reiserfs tree balancing algorithm | ||
1366 | * @tb: tree balance state | ||
1367 | * @ih: item header of inserted item (little endian) | ||
1368 | * @body: body of inserted item or bytes to paste | ||
1369 | * @flag: i - insert, d - delete, c - cut, p - paste (see do_balance) | ||
1370 | * passed back: | ||
1371 | * @insert_key: key to insert new nodes | ||
1372 | * @insert_ptr: array of nodes to insert at the next level | ||
1373 | * | ||
1374 | * In our processing of one level we sometimes determine what must be | ||
1375 | * inserted into the next higher level. This insertion consists of a | ||
1376 | * key or two keys and their corresponding pointers. | ||
1377 | */ | ||
1378 | static int balance_leaf(struct tree_balance *tb, struct item_head *ih, | ||
1379 | const char *body, int flag, | ||
1380 | struct item_head *insert_key, | ||
1381 | struct buffer_head **insert_ptr) | ||
1382 | { | ||
1383 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | ||
1384 | |||
1385 | PROC_INFO_INC(tb->tb_sb, balance_at[0]); | ||
1386 | |||
1387 | /* Make balance in case insert_size[0] < 0 */ | ||
1388 | if (tb->insert_size[0] < 0) | ||
1389 | return balance_leaf_when_delete(tb, flag); | ||
1390 | |||
1391 | tb->item_pos = PATH_LAST_POSITION(tb->tb_path), | ||
1392 | tb->pos_in_item = tb->tb_path->pos_in_item, | ||
1393 | tb->zeroes_num = 0; | ||
1394 | if (flag == M_INSERT && !body) | ||
1395 | tb->zeroes_num = ih_item_len(ih); | ||
1396 | |||
1397 | /* | ||
1398 | * for indirect item pos_in_item is measured in unformatted node | ||
1399 | * pointers. Recalculate to bytes | ||
1400 | */ | ||
1401 | if (flag != M_INSERT | ||
1402 | && is_indirect_le_ih(item_head(tbS0, tb->item_pos))) | ||
1403 | tb->pos_in_item *= UNFM_P_SIZE; | ||
1404 | |||
1405 | balance_leaf_left(tb, ih, body, flag); | ||
1406 | |||
1407 | /* tb->lnum[0] > 0 */ | ||
1408 | /* Calculate new item position */ | ||
1409 | tb->item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); | ||
1410 | |||
1411 | balance_leaf_right(tb, ih, body, flag); | ||
1412 | |||
1413 | /* tb->rnum[0] > 0 */ | ||
1414 | RFALSE(tb->blknum[0] > 3, | ||
1415 | "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); | ||
1416 | RFALSE(tb->blknum[0] < 0, | ||
1417 | "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); | ||
1418 | |||
1419 | /* | ||
1420 | * if while adding to a node we discover that it is possible to split | ||
1421 | * it in two, and merge the left part into the left neighbor and the | ||
1422 | * right part into the right neighbor, eliminating the node | ||
1423 | */ | ||
1424 | if (tb->blknum[0] == 0) { /* node S[0] is empty now */ | ||
1425 | |||
1426 | RFALSE(!tb->lnum[0] || !tb->rnum[0], | ||
1427 | "PAP-12190: lnum and rnum must not be zero"); | ||
1428 | /* | ||
1429 | * if insertion was done before 0-th position in R[0], right | ||
1430 | * delimiting key of the tb->L[0]'s and left delimiting key are | ||
1431 | * not set correctly | ||
1432 | */ | ||
1433 | if (tb->CFL[0]) { | ||
1434 | if (!tb->CFR[0]) | ||
1435 | reiserfs_panic(tb->tb_sb, "vs-12195", | ||
1436 | "CFR not initialized"); | ||
1437 | copy_key(internal_key(tb->CFL[0], tb->lkey[0]), | ||
1438 | internal_key(tb->CFR[0], tb->rkey[0])); | ||
1439 | do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); | ||
1131 | } | 1440 | } |
1441 | |||
1442 | reiserfs_invalidate_buffer(tb, tbS0); | ||
1443 | return 0; | ||
1132 | } | 1444 | } |
1445 | |||
1446 | balance_leaf_new_nodes(tb, ih, body, insert_key, insert_ptr, flag); | ||
1447 | |||
1448 | balance_leaf_finish_node(tb, ih, body, flag); | ||
1449 | |||
1133 | #ifdef CONFIG_REISERFS_CHECK | 1450 | #ifdef CONFIG_REISERFS_CHECK |
1134 | if (flag == M_PASTE && tb->insert_size[0]) { | 1451 | if (flag == M_PASTE && tb->insert_size[0]) { |
1135 | print_cur_tb("12290"); | 1452 | print_cur_tb("12290"); |
@@ -1137,9 +1454,11 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h | |||
1137 | "PAP-12290", "insert_size is still not 0 (%d)", | 1454 | "PAP-12290", "insert_size is still not 0 (%d)", |
1138 | tb->insert_size[0]); | 1455 | tb->insert_size[0]); |
1139 | } | 1456 | } |
1140 | #endif /* CONFIG_REISERFS_CHECK */ | 1457 | #endif |
1458 | |||
1459 | /* Leaf level of the tree is balanced (end of balance_leaf) */ | ||
1141 | return 0; | 1460 | return 0; |
1142 | } /* Leaf level of the tree is balanced (end of balance_leaf) */ | 1461 | } |
1143 | 1462 | ||
1144 | /* Make empty node */ | 1463 | /* Make empty node */ |
1145 | void make_empty_node(struct buffer_info *bi) | 1464 | void make_empty_node(struct buffer_info *bi) |
@@ -1178,9 +1497,7 @@ struct buffer_head *get_FEB(struct tree_balance *tb) | |||
1178 | return tb->used[i]; | 1497 | return tb->used[i]; |
1179 | } | 1498 | } |
1180 | 1499 | ||
1181 | /* This is now used because reiserfs_free_block has to be able to | 1500 | /* This is now used because reiserfs_free_block has to be able to schedule. */ |
1182 | ** schedule. | ||
1183 | */ | ||
1184 | static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) | 1501 | static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) |
1185 | { | 1502 | { |
1186 | int i; | 1503 | int i; |
@@ -1246,10 +1563,10 @@ void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, | |||
1246 | 1563 | ||
1247 | if (B_IS_ITEMS_LEVEL(src)) | 1564 | if (B_IS_ITEMS_LEVEL(src)) |
1248 | /* source buffer contains leaf node */ | 1565 | /* source buffer contains leaf node */ |
1249 | memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), | 1566 | memcpy(internal_key(dest, n_dest), item_head(src, n_src), |
1250 | KEY_SIZE); | 1567 | KEY_SIZE); |
1251 | else | 1568 | else |
1252 | memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), | 1569 | memcpy(internal_key(dest, n_dest), internal_key(src, n_src), |
1253 | KEY_SIZE); | 1570 | KEY_SIZE); |
1254 | 1571 | ||
1255 | do_balance_mark_internal_dirty(tb, dest, 0); | 1572 | do_balance_mark_internal_dirty(tb, dest, 0); |
@@ -1335,8 +1652,10 @@ static int check_before_balancing(struct tree_balance *tb) | |||
1335 | "mount point."); | 1652 | "mount point."); |
1336 | } | 1653 | } |
1337 | 1654 | ||
1338 | /* double check that buffers that we will modify are unlocked. (fix_nodes should already have | 1655 | /* |
1339 | prepped all of these for us). */ | 1656 | * double check that buffers that we will modify are unlocked. |
1657 | * (fix_nodes should already have prepped all of these for us). | ||
1658 | */ | ||
1340 | if (tb->lnum[0]) { | 1659 | if (tb->lnum[0]) { |
1341 | retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); | 1660 | retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); |
1342 | retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); | 1661 | retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); |
@@ -1429,49 +1748,51 @@ static void check_internal_levels(struct tree_balance *tb) | |||
1429 | 1748 | ||
1430 | #endif | 1749 | #endif |
1431 | 1750 | ||
1432 | /* Now we have all of the buffers that must be used in balancing of | 1751 | /* |
1433 | the tree. We rely on the assumption that schedule() will not occur | 1752 | * Now we have all of the buffers that must be used in balancing of |
1434 | while do_balance works. ( Only interrupt handlers are acceptable.) | 1753 | * the tree. We rely on the assumption that schedule() will not occur |
1435 | We balance the tree according to the analysis made before this, | 1754 | * while do_balance works. ( Only interrupt handlers are acceptable.) |
1436 | using buffers already obtained. For SMP support it will someday be | 1755 | * We balance the tree according to the analysis made before this, |
1437 | necessary to add ordered locking of tb. */ | 1756 | * using buffers already obtained. For SMP support it will someday be |
1438 | 1757 | * necessary to add ordered locking of tb. | |
1439 | /* Some interesting rules of balancing: | 1758 | */ |
1440 | |||
1441 | we delete a maximum of two nodes per level per balancing: we never | ||
1442 | delete R, when we delete two of three nodes L, S, R then we move | ||
1443 | them into R. | ||
1444 | |||
1445 | we only delete L if we are deleting two nodes, if we delete only | ||
1446 | one node we delete S | ||
1447 | |||
1448 | if we shift leaves then we shift as much as we can: this is a | ||
1449 | deliberate policy of extremism in node packing which results in | ||
1450 | higher average utilization after repeated random balance operations | ||
1451 | at the cost of more memory copies and more balancing as a result of | ||
1452 | small insertions to full nodes. | ||
1453 | |||
1454 | if we shift internal nodes we try to evenly balance the node | ||
1455 | utilization, with consequent less balancing at the cost of lower | ||
1456 | utilization. | ||
1457 | |||
1458 | one could argue that the policy for directories in leaves should be | ||
1459 | that of internal nodes, but we will wait until another day to | ||
1460 | evaluate this.... It would be nice to someday measure and prove | ||
1461 | these assumptions as to what is optimal.... | ||
1462 | 1759 | ||
1463 | */ | 1760 | /* |
1761 | * Some interesting rules of balancing: | ||
1762 | * we delete a maximum of two nodes per level per balancing: we never | ||
1763 | * delete R, when we delete two of three nodes L, S, R then we move | ||
1764 | * them into R. | ||
1765 | * | ||
1766 | * we only delete L if we are deleting two nodes, if we delete only | ||
1767 | * one node we delete S | ||
1768 | * | ||
1769 | * if we shift leaves then we shift as much as we can: this is a | ||
1770 | * deliberate policy of extremism in node packing which results in | ||
1771 | * higher average utilization after repeated random balance operations | ||
1772 | * at the cost of more memory copies and more balancing as a result of | ||
1773 | * small insertions to full nodes. | ||
1774 | * | ||
1775 | * if we shift internal nodes we try to evenly balance the node | ||
1776 | * utilization, with consequent less balancing at the cost of lower | ||
1777 | * utilization. | ||
1778 | * | ||
1779 | * one could argue that the policy for directories in leaves should be | ||
1780 | * that of internal nodes, but we will wait until another day to | ||
1781 | * evaluate this.... It would be nice to someday measure and prove | ||
1782 | * these assumptions as to what is optimal.... | ||
1783 | */ | ||
1464 | 1784 | ||
1465 | static inline void do_balance_starts(struct tree_balance *tb) | 1785 | static inline void do_balance_starts(struct tree_balance *tb) |
1466 | { | 1786 | { |
1467 | /* use print_cur_tb() to see initial state of struct | 1787 | /* use print_cur_tb() to see initial state of struct tree_balance */ |
1468 | tree_balance */ | ||
1469 | 1788 | ||
1470 | /* store_print_tb (tb); */ | 1789 | /* store_print_tb (tb); */ |
1471 | 1790 | ||
1472 | /* do not delete, just comment it out */ | 1791 | /* do not delete, just comment it out */ |
1473 | /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, | 1792 | /* |
1474 | "check");*/ | 1793 | print_tb(flag, PATH_LAST_POSITION(tb->tb_path), |
1794 | tb->tb_path->pos_in_item, tb, "check"); | ||
1795 | */ | ||
1475 | RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); | 1796 | RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); |
1476 | #ifdef CONFIG_REISERFS_CHECK | 1797 | #ifdef CONFIG_REISERFS_CHECK |
1477 | REISERFS_SB(tb->tb_sb)->cur_tb = tb; | 1798 | REISERFS_SB(tb->tb_sb)->cur_tb = tb; |
@@ -1487,9 +1808,10 @@ static inline void do_balance_completed(struct tree_balance *tb) | |||
1487 | REISERFS_SB(tb->tb_sb)->cur_tb = NULL; | 1808 | REISERFS_SB(tb->tb_sb)->cur_tb = NULL; |
1488 | #endif | 1809 | #endif |
1489 | 1810 | ||
1490 | /* reiserfs_free_block is no longer schedule safe. So, we need to | 1811 | /* |
1491 | ** put the buffers we want freed on the thrown list during do_balance, | 1812 | * reiserfs_free_block is no longer schedule safe. So, we need to |
1492 | ** and then free them now | 1813 | * put the buffers we want freed on the thrown list during do_balance, |
1814 | * and then free them now | ||
1493 | */ | 1815 | */ |
1494 | 1816 | ||
1495 | REISERFS_SB(tb->tb_sb)->s_do_balance++; | 1817 | REISERFS_SB(tb->tb_sb)->s_do_balance++; |
@@ -1500,36 +1822,40 @@ static inline void do_balance_completed(struct tree_balance *tb) | |||
1500 | free_thrown(tb); | 1822 | free_thrown(tb); |
1501 | } | 1823 | } |
1502 | 1824 | ||
1503 | void do_balance(struct tree_balance *tb, /* tree_balance structure */ | 1825 | /* |
1504 | struct item_head *ih, /* item header of inserted item */ | 1826 | * do_balance - balance the tree |
1505 | const char *body, /* body of inserted item or bytes to paste */ | 1827 | * |
1506 | int flag) | 1828 | * @tb: tree_balance structure |
1507 | { /* i - insert, d - delete | 1829 | * @ih: item header of inserted item |
1508 | c - cut, p - paste | 1830 | * @body: body of inserted item or bytes to paste |
1509 | 1831 | * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste | |
1510 | Cut means delete part of an item | 1832 | * |
1511 | (includes removing an entry from a | 1833 | * Cut means delete part of an item (includes removing an entry from a |
1512 | directory). | 1834 | * directory). |
1513 | 1835 | * | |
1514 | Delete means delete whole item. | 1836 | * Delete means delete whole item. |
1515 | 1837 | * | |
1516 | Insert means add a new item into the | 1838 | * Insert means add a new item into the tree. |
1517 | tree. | 1839 | * |
1518 | 1840 | * Paste means to append to the end of an existing file or to | |
1519 | Paste means to append to the end of an | 1841 | * insert a directory entry. |
1520 | existing file or to insert a directory | 1842 | */ |
1521 | entry. */ | 1843 | void do_balance(struct tree_balance *tb, struct item_head *ih, |
1522 | int child_pos, /* position of a child node in its parent */ | 1844 | const char *body, int flag) |
1523 | h; /* level of the tree being processed */ | 1845 | { |
1524 | struct item_head insert_key[2]; /* in our processing of one level | 1846 | int child_pos; /* position of a child node in its parent */ |
1525 | we sometimes determine what | 1847 | int h; /* level of the tree being processed */ |
1526 | must be inserted into the next | 1848 | |
1527 | higher level. This insertion | 1849 | /* |
1528 | consists of a key or two keys | 1850 | * in our processing of one level we sometimes determine what |
1529 | and their corresponding | 1851 | * must be inserted into the next higher level. This insertion |
1530 | pointers */ | 1852 | * consists of a key or two keys and their corresponding |
1531 | struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next | 1853 | * pointers |
1532 | level */ | 1854 | */ |
1855 | struct item_head insert_key[2]; | ||
1856 | |||
1857 | /* inserted node-ptrs for the next level */ | ||
1858 | struct buffer_head *insert_ptr[2]; | ||
1533 | 1859 | ||
1534 | tb->tb_mode = flag; | 1860 | tb->tb_mode = flag; |
1535 | tb->need_balance_dirty = 0; | 1861 | tb->need_balance_dirty = 0; |
@@ -1546,12 +1872,14 @@ void do_balance(struct tree_balance *tb, /* tree_balance structure */ | |||
1546 | return; | 1872 | return; |
1547 | } | 1873 | } |
1548 | 1874 | ||
1549 | atomic_inc(&(fs_generation(tb->tb_sb))); | 1875 | atomic_inc(&fs_generation(tb->tb_sb)); |
1550 | do_balance_starts(tb); | 1876 | do_balance_starts(tb); |
1551 | 1877 | ||
1552 | /* balance leaf returns 0 except if combining L R and S into | 1878 | /* |
1553 | one node. see balance_internal() for explanation of this | 1879 | * balance_leaf returns 0 except if combining L R and S into |
1554 | line of code. */ | 1880 | * one node. see balance_internal() for explanation of this |
1881 | * line of code. | ||
1882 | */ | ||
1555 | child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + | 1883 | child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + |
1556 | balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); | 1884 | balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); |
1557 | 1885 | ||
@@ -1561,9 +1889,8 @@ void do_balance(struct tree_balance *tb, /* tree_balance structure */ | |||
1561 | 1889 | ||
1562 | /* Balance internal level of the tree. */ | 1890 | /* Balance internal level of the tree. */ |
1563 | for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) | 1891 | for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) |
1564 | child_pos = | 1892 | child_pos = balance_internal(tb, h, child_pos, insert_key, |
1565 | balance_internal(tb, h, child_pos, insert_key, insert_ptr); | 1893 | insert_ptr); |
1566 | 1894 | ||
1567 | do_balance_completed(tb); | 1895 | do_balance_completed(tb); |
1568 | |||
1569 | } | 1896 | } |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index ed58d843d578..5f6c32c668b6 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -15,20 +15,20 @@ | |||
15 | #include <linux/quotaops.h> | 15 | #include <linux/quotaops.h> |
16 | 16 | ||
17 | /* | 17 | /* |
18 | ** We pack the tails of files on file close, not at the time they are written. | 18 | * We pack the tails of files on file close, not at the time they are written. |
19 | ** This implies an unnecessary copy of the tail and an unnecessary indirect item | 19 | * This implies an unnecessary copy of the tail and an unnecessary indirect item |
20 | ** insertion/balancing, for files that are written in one write. | 20 | * insertion/balancing, for files that are written in one write. |
21 | ** It avoids unnecessary tail packings (balances) for files that are written in | 21 | * It avoids unnecessary tail packings (balances) for files that are written in |
22 | ** multiple writes and are small enough to have tails. | 22 | * multiple writes and are small enough to have tails. |
23 | ** | 23 | * |
24 | ** file_release is called by the VFS layer when the file is closed. If | 24 | * file_release is called by the VFS layer when the file is closed. If |
25 | ** this is the last open file descriptor, and the file | 25 | * this is the last open file descriptor, and the file |
26 | ** small enough to have a tail, and the tail is currently in an | 26 | * small enough to have a tail, and the tail is currently in an |
27 | ** unformatted node, the tail is converted back into a direct item. | 27 | * unformatted node, the tail is converted back into a direct item. |
28 | ** | 28 | * |
29 | ** We use reiserfs_truncate_file to pack the tail, since it already has | 29 | * We use reiserfs_truncate_file to pack the tail, since it already has |
30 | ** all the conditions coded. | 30 | * all the conditions coded. |
31 | */ | 31 | */ |
32 | static int reiserfs_file_release(struct inode *inode, struct file *filp) | 32 | static int reiserfs_file_release(struct inode *inode, struct file *filp) |
33 | { | 33 | { |
34 | 34 | ||
@@ -41,10 +41,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
41 | if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1)) | 41 | if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1)) |
42 | return 0; | 42 | return 0; |
43 | 43 | ||
44 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | 44 | mutex_lock(&REISERFS_I(inode)->tailpack); |
45 | 45 | ||
46 | if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) { | 46 | if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) { |
47 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | 47 | mutex_unlock(&REISERFS_I(inode)->tailpack); |
48 | return 0; | 48 | return 0; |
49 | } | 49 | } |
50 | 50 | ||
@@ -52,31 +52,35 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
52 | if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || | 52 | if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || |
53 | !tail_has_to_be_packed(inode)) && | 53 | !tail_has_to_be_packed(inode)) && |
54 | REISERFS_I(inode)->i_prealloc_count <= 0) { | 54 | REISERFS_I(inode)->i_prealloc_count <= 0) { |
55 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | 55 | mutex_unlock(&REISERFS_I(inode)->tailpack); |
56 | return 0; | 56 | return 0; |
57 | } | 57 | } |
58 | 58 | ||
59 | reiserfs_write_lock(inode->i_sb); | 59 | reiserfs_write_lock(inode->i_sb); |
60 | /* freeing preallocation only involves relogging blocks that | 60 | /* |
61 | * freeing preallocation only involves relogging blocks that | ||
61 | * are already in the current transaction. preallocation gets | 62 | * are already in the current transaction. preallocation gets |
62 | * freed at the end of each transaction, so it is impossible for | 63 | * freed at the end of each transaction, so it is impossible for |
63 | * us to log any additional blocks (including quota blocks) | 64 | * us to log any additional blocks (including quota blocks) |
64 | */ | 65 | */ |
65 | err = journal_begin(&th, inode->i_sb, 1); | 66 | err = journal_begin(&th, inode->i_sb, 1); |
66 | if (err) { | 67 | if (err) { |
67 | /* uh oh, we can't allow the inode to go away while there | 68 | /* |
69 | * uh oh, we can't allow the inode to go away while there | ||
68 | * is still preallocation blocks pending. Try to join the | 70 | * is still preallocation blocks pending. Try to join the |
69 | * aborted transaction | 71 | * aborted transaction |
70 | */ | 72 | */ |
71 | jbegin_failure = err; | 73 | jbegin_failure = err; |
72 | err = journal_join_abort(&th, inode->i_sb, 1); | 74 | err = journal_join_abort(&th, inode->i_sb); |
73 | 75 | ||
74 | if (err) { | 76 | if (err) { |
75 | /* hmpf, our choices here aren't good. We can pin the inode | 77 | /* |
76 | * which will disallow unmount from every happening, we can | 78 | * hmpf, our choices here aren't good. We can pin |
77 | * do nothing, which will corrupt random memory on unmount, | 79 | * the inode which will disallow unmount from ever |
78 | * or we can forcibly remove the file from the preallocation | 80 | * happening, we can do nothing, which will corrupt |
79 | * list, which will leak blocks on disk. Lets pin the inode | 81 | * random memory on unmount, or we can forcibly |
82 | * remove the file from the preallocation list, which | ||
83 | * will leak blocks on disk. Lets pin the inode | ||
80 | * and let the admin know what is going on. | 84 | * and let the admin know what is going on. |
81 | */ | 85 | */ |
82 | igrab(inode); | 86 | igrab(inode); |
@@ -92,7 +96,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
92 | #ifdef REISERFS_PREALLOCATE | 96 | #ifdef REISERFS_PREALLOCATE |
93 | reiserfs_discard_prealloc(&th, inode); | 97 | reiserfs_discard_prealloc(&th, inode); |
94 | #endif | 98 | #endif |
95 | err = journal_end(&th, inode->i_sb, 1); | 99 | err = journal_end(&th); |
96 | 100 | ||
97 | /* copy back the error code from journal_begin */ | 101 | /* copy back the error code from journal_begin */ |
98 | if (!err) | 102 | if (!err) |
@@ -102,35 +106,38 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
102 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && | 106 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && |
103 | tail_has_to_be_packed(inode)) { | 107 | tail_has_to_be_packed(inode)) { |
104 | 108 | ||
105 | /* if regular file is released by last holder and it has been | 109 | /* |
106 | appended (we append by unformatted node only) or its direct | 110 | * if regular file is released by last holder and it has been |
107 | item(s) had to be converted, then it may have to be | 111 | * appended (we append by unformatted node only) or its direct |
108 | indirect2direct converted */ | 112 | * item(s) had to be converted, then it may have to be |
113 | * indirect2direct converted | ||
114 | */ | ||
109 | err = reiserfs_truncate_file(inode, 0); | 115 | err = reiserfs_truncate_file(inode, 0); |
110 | } | 116 | } |
111 | out: | 117 | out: |
112 | reiserfs_write_unlock(inode->i_sb); | 118 | reiserfs_write_unlock(inode->i_sb); |
113 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | 119 | mutex_unlock(&REISERFS_I(inode)->tailpack); |
114 | return err; | 120 | return err; |
115 | } | 121 | } |
116 | 122 | ||
117 | static int reiserfs_file_open(struct inode *inode, struct file *file) | 123 | static int reiserfs_file_open(struct inode *inode, struct file *file) |
118 | { | 124 | { |
119 | int err = dquot_file_open(inode, file); | 125 | int err = dquot_file_open(inode, file); |
126 | |||
127 | /* somebody might be tailpacking on final close; wait for it */ | ||
120 | if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { | 128 | if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { |
121 | /* somebody might be tailpacking on final close; wait for it */ | 129 | mutex_lock(&REISERFS_I(inode)->tailpack); |
122 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | ||
123 | atomic_inc(&REISERFS_I(inode)->openers); | 130 | atomic_inc(&REISERFS_I(inode)->openers); |
124 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | 131 | mutex_unlock(&REISERFS_I(inode)->tailpack); |
125 | } | 132 | } |
126 | return err; | 133 | return err; |
127 | } | 134 | } |
128 | 135 | ||
129 | void reiserfs_vfs_truncate_file(struct inode *inode) | 136 | void reiserfs_vfs_truncate_file(struct inode *inode) |
130 | { | 137 | { |
131 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | 138 | mutex_lock(&REISERFS_I(inode)->tailpack); |
132 | reiserfs_truncate_file(inode, 1); | 139 | reiserfs_truncate_file(inode, 1); |
133 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | 140 | mutex_unlock(&REISERFS_I(inode)->tailpack); |
134 | } | 141 | } |
135 | 142 | ||
136 | /* Sync a reiserfs file. */ | 143 | /* Sync a reiserfs file. */ |
@@ -205,10 +212,11 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, | |||
205 | set_buffer_uptodate(bh); | 212 | set_buffer_uptodate(bh); |
206 | if (logit) { | 213 | if (logit) { |
207 | reiserfs_prepare_for_journal(s, bh, 1); | 214 | reiserfs_prepare_for_journal(s, bh, 1); |
208 | journal_mark_dirty(&th, s, bh); | 215 | journal_mark_dirty(&th, bh); |
209 | } else if (!buffer_dirty(bh)) { | 216 | } else if (!buffer_dirty(bh)) { |
210 | mark_buffer_dirty(bh); | 217 | mark_buffer_dirty(bh); |
211 | /* do data=ordered on any page past the end | 218 | /* |
219 | * do data=ordered on any page past the end | ||
212 | * of file and any buffer marked BH_New. | 220 | * of file and any buffer marked BH_New. |
213 | */ | 221 | */ |
214 | if (reiserfs_data_ordered(inode->i_sb) && | 222 | if (reiserfs_data_ordered(inode->i_sb) && |
@@ -219,8 +227,8 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, | |||
219 | } | 227 | } |
220 | } | 228 | } |
221 | if (logit) { | 229 | if (logit) { |
222 | ret = journal_end(&th, s, bh_per_page + 1); | 230 | ret = journal_end(&th); |
223 | drop_write_lock: | 231 | drop_write_lock: |
224 | reiserfs_write_unlock(s); | 232 | reiserfs_write_unlock(s); |
225 | } | 233 | } |
226 | /* | 234 | /* |
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index dc4d41530316..6b0ddb2a9091 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c | |||
@@ -2,59 +2,32 @@ | |||
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /** | ||
6 | ** old_item_num | ||
7 | ** old_entry_num | ||
8 | ** set_entry_sizes | ||
9 | ** create_virtual_node | ||
10 | ** check_left | ||
11 | ** check_right | ||
12 | ** directory_part_size | ||
13 | ** get_num_ver | ||
14 | ** set_parameters | ||
15 | ** is_leaf_removable | ||
16 | ** are_leaves_removable | ||
17 | ** get_empty_nodes | ||
18 | ** get_lfree | ||
19 | ** get_rfree | ||
20 | ** is_left_neighbor_in_cache | ||
21 | ** decrement_key | ||
22 | ** get_far_parent | ||
23 | ** get_parents | ||
24 | ** can_node_be_removed | ||
25 | ** ip_check_balance | ||
26 | ** dc_check_balance_internal | ||
27 | ** dc_check_balance_leaf | ||
28 | ** dc_check_balance | ||
29 | ** check_balance | ||
30 | ** get_direct_parent | ||
31 | ** get_neighbors | ||
32 | ** fix_nodes | ||
33 | ** | ||
34 | ** | ||
35 | **/ | ||
36 | |||
37 | #include <linux/time.h> | 5 | #include <linux/time.h> |
38 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
39 | #include <linux/string.h> | 7 | #include <linux/string.h> |
40 | #include "reiserfs.h" | 8 | #include "reiserfs.h" |
41 | #include <linux/buffer_head.h> | 9 | #include <linux/buffer_head.h> |
42 | 10 | ||
43 | /* To make any changes in the tree we find a node, that contains item | 11 | /* |
44 | to be changed/deleted or position in the node we insert a new item | 12 | * To make any changes in the tree we find a node that contains item |
45 | to. We call this node S. To do balancing we need to decide what we | 13 | * to be changed/deleted or position in the node we insert a new item |
46 | will shift to left/right neighbor, or to a new node, where new item | 14 | * to. We call this node S. To do balancing we need to decide what we |
47 | will be etc. To make this analysis simpler we build virtual | 15 | * will shift to left/right neighbor, or to a new node, where new item |
48 | node. Virtual node is an array of items, that will replace items of | 16 | * will be etc. To make this analysis simpler we build virtual |
49 | node S. (For instance if we are going to delete an item, virtual | 17 | * node. Virtual node is an array of items, that will replace items of |
50 | node does not contain it). Virtual node keeps information about | 18 | * node S. (For instance if we are going to delete an item, virtual |
51 | item sizes and types, mergeability of first and last items, sizes | 19 | * node does not contain it). Virtual node keeps information about |
52 | of all entries in directory item. We use this array of items when | 20 | * item sizes and types, mergeability of first and last items, sizes |
53 | calculating what we can shift to neighbors and how many nodes we | 21 | * of all entries in directory item. We use this array of items when |
54 | have to have if we do not any shiftings, if we shift to left/right | 22 | * calculating what we can shift to neighbors and how many nodes we |
55 | neighbor or to both. */ | 23 | * have to have if we do not any shiftings, if we shift to left/right |
56 | 24 | * neighbor or to both. | |
57 | /* taking item number in virtual node, returns number of item, that it has in source buffer */ | 25 | */ |
26 | |||
27 | /* | ||
28 | * Takes item number in virtual node, returns number of item | ||
29 | * that it has in source buffer | ||
30 | */ | ||
58 | static inline int old_item_num(int new_num, int affected_item_num, int mode) | 31 | static inline int old_item_num(int new_num, int affected_item_num, int mode) |
59 | { | 32 | { |
60 | if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) | 33 | if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) |
@@ -105,14 +78,17 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
105 | vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); | 78 | vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); |
106 | 79 | ||
107 | /* first item in the node */ | 80 | /* first item in the node */ |
108 | ih = B_N_PITEM_HEAD(Sh, 0); | 81 | ih = item_head(Sh, 0); |
109 | 82 | ||
110 | /* define the mergeability for 0-th item (if it is not being deleted) */ | 83 | /* define the mergeability for 0-th item (if it is not being deleted) */ |
111 | if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size) | 84 | if (op_is_left_mergeable(&ih->ih_key, Sh->b_size) |
112 | && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) | 85 | && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) |
113 | vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; | 86 | vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; |
114 | 87 | ||
115 | /* go through all items those remain in the virtual node (except for the new (inserted) one) */ | 88 | /* |
89 | * go through all items that remain in the virtual | ||
90 | * node (except for the new (inserted) one) | ||
91 | */ | ||
116 | for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { | 92 | for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { |
117 | int j; | 93 | int j; |
118 | struct virtual_item *vi = vn->vn_vi + new_num; | 94 | struct virtual_item *vi = vn->vn_vi + new_num; |
@@ -128,11 +104,13 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
128 | 104 | ||
129 | vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; | 105 | vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; |
130 | vi->vi_ih = ih + j; | 106 | vi->vi_ih = ih + j; |
131 | vi->vi_item = B_I_PITEM(Sh, ih + j); | 107 | vi->vi_item = ih_item_body(Sh, ih + j); |
132 | vi->vi_uarea = vn->vn_free_ptr; | 108 | vi->vi_uarea = vn->vn_free_ptr; |
133 | 109 | ||
134 | // FIXME: there is no check, that item operation did not | 110 | /* |
135 | // consume too much memory | 111 | * FIXME: there is no check that item operation did not |
112 | * consume too much memory | ||
113 | */ | ||
136 | vn->vn_free_ptr += | 114 | vn->vn_free_ptr += |
137 | op_create_vi(vn, vi, is_affected, tb->insert_size[0]); | 115 | op_create_vi(vn, vi, is_affected, tb->insert_size[0]); |
138 | if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) | 116 | if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) |
@@ -145,7 +123,8 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
145 | 123 | ||
146 | if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { | 124 | if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { |
147 | vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; | 125 | vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; |
148 | vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted | 126 | /* pointer to data which is going to be pasted */ |
127 | vi->vi_new_data = vn->vn_data; | ||
149 | } | 128 | } |
150 | } | 129 | } |
151 | 130 | ||
@@ -164,11 +143,14 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
164 | tb->insert_size[0]); | 143 | tb->insert_size[0]); |
165 | } | 144 | } |
166 | 145 | ||
167 | /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ | 146 | /* |
147 | * set right merge flag we take right delimiting key and | ||
148 | * check whether it is a mergeable item | ||
149 | */ | ||
168 | if (tb->CFR[0]) { | 150 | if (tb->CFR[0]) { |
169 | struct reiserfs_key *key; | 151 | struct reiserfs_key *key; |
170 | 152 | ||
171 | key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]); | 153 | key = internal_key(tb->CFR[0], tb->rkey[0]); |
172 | if (op_is_left_mergeable(key, Sh->b_size) | 154 | if (op_is_left_mergeable(key, Sh->b_size) |
173 | && (vn->vn_mode != M_DELETE | 155 | && (vn->vn_mode != M_DELETE |
174 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) | 156 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) |
@@ -179,12 +161,19 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
179 | if (op_is_left_mergeable(key, Sh->b_size) && | 161 | if (op_is_left_mergeable(key, Sh->b_size) && |
180 | !(vn->vn_mode != M_DELETE | 162 | !(vn->vn_mode != M_DELETE |
181 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { | 163 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { |
182 | /* we delete last item and it could be merged with right neighbor's first item */ | 164 | /* |
165 | * we delete last item and it could be merged | ||
166 | * with right neighbor's first item | ||
167 | */ | ||
183 | if (! | 168 | if (! |
184 | (B_NR_ITEMS(Sh) == 1 | 169 | (B_NR_ITEMS(Sh) == 1 |
185 | && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0)) | 170 | && is_direntry_le_ih(item_head(Sh, 0)) |
186 | && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) { | 171 | && ih_entry_count(item_head(Sh, 0)) == 1)) { |
187 | /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ | 172 | /* |
173 | * node contains more than 1 item, or item | ||
174 | * is not directory item, or this item | ||
175 | * contains more than 1 entry | ||
176 | */ | ||
188 | print_block(Sh, 0, -1, -1); | 177 | print_block(Sh, 0, -1, -1); |
189 | reiserfs_panic(tb->tb_sb, "vs-8045", | 178 | reiserfs_panic(tb->tb_sb, "vs-8045", |
190 | "rdkey %k, affected item==%d " | 179 | "rdkey %k, affected item==%d " |
@@ -198,8 +187,10 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
198 | } | 187 | } |
199 | } | 188 | } |
200 | 189 | ||
201 | /* using virtual node check, how many items can be shifted to left | 190 | /* |
202 | neighbor */ | 191 | * Using virtual node check, how many items can be |
192 | * shifted to left neighbor | ||
193 | */ | ||
203 | static void check_left(struct tree_balance *tb, int h, int cur_free) | 194 | static void check_left(struct tree_balance *tb, int h, int cur_free) |
204 | { | 195 | { |
205 | int i; | 196 | int i; |
@@ -259,9 +250,13 @@ static void check_left(struct tree_balance *tb, int h, int cur_free) | |||
259 | } | 250 | } |
260 | 251 | ||
261 | /* the item cannot be shifted entirely, try to split it */ | 252 | /* the item cannot be shifted entirely, try to split it */ |
262 | /* check whether L[0] can hold ih and at least one byte of the item body */ | 253 | /* |
254 | * check whether L[0] can hold ih and at least one byte | ||
255 | * of the item body | ||
256 | */ | ||
257 | |||
258 | /* cannot shift even a part of the current item */ | ||
263 | if (cur_free <= ih_size) { | 259 | if (cur_free <= ih_size) { |
264 | /* cannot shift even a part of the current item */ | ||
265 | tb->lbytes = -1; | 260 | tb->lbytes = -1; |
266 | return; | 261 | return; |
267 | } | 262 | } |
@@ -278,8 +273,10 @@ static void check_left(struct tree_balance *tb, int h, int cur_free) | |||
278 | return; | 273 | return; |
279 | } | 274 | } |
280 | 275 | ||
281 | /* using virtual node check, how many items can be shifted to right | 276 | /* |
282 | neighbor */ | 277 | * Using virtual node check, how many items can be |
278 | * shifted to right neighbor | ||
279 | */ | ||
283 | static void check_right(struct tree_balance *tb, int h, int cur_free) | 280 | static void check_right(struct tree_balance *tb, int h, int cur_free) |
284 | { | 281 | { |
285 | int i; | 282 | int i; |
@@ -338,13 +335,21 @@ static void check_right(struct tree_balance *tb, int h, int cur_free) | |||
338 | continue; | 335 | continue; |
339 | } | 336 | } |
340 | 337 | ||
341 | /* check whether R[0] can hold ih and at least one byte of the item body */ | 338 | /* |
342 | if (cur_free <= ih_size) { /* cannot shift even a part of the current item */ | 339 | * check whether R[0] can hold ih and at least one |
340 | * byte of the item body | ||
341 | */ | ||
342 | |||
343 | /* cannot shift even a part of the current item */ | ||
344 | if (cur_free <= ih_size) { | ||
343 | tb->rbytes = -1; | 345 | tb->rbytes = -1; |
344 | return; | 346 | return; |
345 | } | 347 | } |
346 | 348 | ||
347 | /* R[0] can hold the header of the item and at least one byte of its body */ | 349 | /* |
350 | * R[0] can hold the header of the item and at least | ||
351 | * one byte of its body | ||
352 | */ | ||
348 | cur_free -= ih_size; /* cur_free is still > 0 */ | 353 | cur_free -= ih_size; /* cur_free is still > 0 */ |
349 | 354 | ||
350 | tb->rbytes = op_check_right(vi, cur_free); | 355 | tb->rbytes = op_check_right(vi, cur_free); |
@@ -361,45 +366,64 @@ static void check_right(struct tree_balance *tb, int h, int cur_free) | |||
361 | /* | 366 | /* |
362 | * from - number of items, which are shifted to left neighbor entirely | 367 | * from - number of items, which are shifted to left neighbor entirely |
363 | * to - number of item, which are shifted to right neighbor entirely | 368 | * to - number of item, which are shifted to right neighbor entirely |
364 | * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor | 369 | * from_bytes - number of bytes of boundary item (or directory entries) |
365 | * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ | 370 | * which are shifted to left neighbor |
371 | * to_bytes - number of bytes of boundary item (or directory entries) | ||
372 | * which are shifted to right neighbor | ||
373 | */ | ||
366 | static int get_num_ver(int mode, struct tree_balance *tb, int h, | 374 | static int get_num_ver(int mode, struct tree_balance *tb, int h, |
367 | int from, int from_bytes, | 375 | int from, int from_bytes, |
368 | int to, int to_bytes, short *snum012, int flow) | 376 | int to, int to_bytes, short *snum012, int flow) |
369 | { | 377 | { |
370 | int i; | 378 | int i; |
371 | int cur_free; | 379 | int cur_free; |
372 | // int bytes; | ||
373 | int units; | 380 | int units; |
374 | struct virtual_node *vn = tb->tb_vn; | 381 | struct virtual_node *vn = tb->tb_vn; |
375 | // struct virtual_item * vi; | ||
376 | |||
377 | int total_node_size, max_node_size, current_item_size; | 382 | int total_node_size, max_node_size, current_item_size; |
378 | int needed_nodes; | 383 | int needed_nodes; |
379 | int start_item, /* position of item we start filling node from */ | 384 | |
380 | end_item, /* position of item we finish filling node by */ | 385 | /* position of item we start filling node from */ |
381 | start_bytes, /* number of first bytes (entries for directory) of start_item-th item | 386 | int start_item; |
382 | we do not include into node that is being filled */ | 387 | |
383 | end_bytes; /* number of last bytes (entries for directory) of end_item-th item | 388 | /* position of item we finish filling node by */ |
384 | we do node include into node that is being filled */ | 389 | int end_item; |
385 | int split_item_positions[2]; /* these are positions in virtual item of | 390 | |
386 | items, that are split between S[0] and | 391 | /* |
387 | S1new and S1new and S2new */ | 392 | * number of first bytes (entries for directory) of start_item-th item |
393 | * we do not include into node that is being filled | ||
394 | */ | ||
395 | int start_bytes; | ||
396 | |||
397 | /* | ||
398 | * number of last bytes (entries for directory) of end_item-th item | ||
399 | * we do node include into node that is being filled | ||
400 | */ | ||
401 | int end_bytes; | ||
402 | |||
403 | /* | ||
404 | * these are positions in virtual item of items, that are split | ||
405 | * between S[0] and S1new and S1new and S2new | ||
406 | */ | ||
407 | int split_item_positions[2]; | ||
388 | 408 | ||
389 | split_item_positions[0] = -1; | 409 | split_item_positions[0] = -1; |
390 | split_item_positions[1] = -1; | 410 | split_item_positions[1] = -1; |
391 | 411 | ||
392 | /* We only create additional nodes if we are in insert or paste mode | 412 | /* |
393 | or we are in replace mode at the internal level. If h is 0 and | 413 | * We only create additional nodes if we are in insert or paste mode |
394 | the mode is M_REPLACE then in fix_nodes we change the mode to | 414 | * or we are in replace mode at the internal level. If h is 0 and |
395 | paste or insert before we get here in the code. */ | 415 | * the mode is M_REPLACE then in fix_nodes we change the mode to |
416 | * paste or insert before we get here in the code. | ||
417 | */ | ||
396 | RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), | 418 | RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), |
397 | "vs-8100: insert_size < 0 in overflow"); | 419 | "vs-8100: insert_size < 0 in overflow"); |
398 | 420 | ||
399 | max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); | 421 | max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); |
400 | 422 | ||
401 | /* snum012 [0-2] - number of items, that lay | 423 | /* |
402 | to S[0], first new node and second new node */ | 424 | * snum012 [0-2] - number of items, that lay |
425 | * to S[0], first new node and second new node | ||
426 | */ | ||
403 | snum012[3] = -1; /* s1bytes */ | 427 | snum012[3] = -1; /* s1bytes */ |
404 | snum012[4] = -1; /* s2bytes */ | 428 | snum012[4] = -1; /* s2bytes */ |
405 | 429 | ||
@@ -416,20 +440,22 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
416 | total_node_size = 0; | 440 | total_node_size = 0; |
417 | cur_free = max_node_size; | 441 | cur_free = max_node_size; |
418 | 442 | ||
419 | // start from 'from'-th item | 443 | /* start from 'from'-th item */ |
420 | start_item = from; | 444 | start_item = from; |
421 | // skip its first 'start_bytes' units | 445 | /* skip its first 'start_bytes' units */ |
422 | start_bytes = ((from_bytes != -1) ? from_bytes : 0); | 446 | start_bytes = ((from_bytes != -1) ? from_bytes : 0); |
423 | 447 | ||
424 | // last included item is the 'end_item'-th one | 448 | /* last included item is the 'end_item'-th one */ |
425 | end_item = vn->vn_nr_item - to - 1; | 449 | end_item = vn->vn_nr_item - to - 1; |
426 | // do not count last 'end_bytes' units of 'end_item'-th item | 450 | /* do not count last 'end_bytes' units of 'end_item'-th item */ |
427 | end_bytes = (to_bytes != -1) ? to_bytes : 0; | 451 | end_bytes = (to_bytes != -1) ? to_bytes : 0; |
428 | 452 | ||
429 | /* go through all item beginning from the start_item-th item and ending by | 453 | /* |
430 | the end_item-th item. Do not count first 'start_bytes' units of | 454 | * go through all item beginning from the start_item-th item |
431 | 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ | 455 | * and ending by the end_item-th item. Do not count first |
432 | 456 | * 'start_bytes' units of 'start_item'-th item and last | |
457 | * 'end_bytes' of 'end_item'-th item | ||
458 | */ | ||
433 | for (i = start_item; i <= end_item; i++) { | 459 | for (i = start_item; i <= end_item; i++) { |
434 | struct virtual_item *vi = vn->vn_vi + i; | 460 | struct virtual_item *vi = vn->vn_vi + i; |
435 | int skip_from_end = ((i == end_item) ? end_bytes : 0); | 461 | int skip_from_end = ((i == end_item) ? end_bytes : 0); |
@@ -439,7 +465,10 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
439 | /* get size of current item */ | 465 | /* get size of current item */ |
440 | current_item_size = vi->vi_item_len; | 466 | current_item_size = vi->vi_item_len; |
441 | 467 | ||
442 | /* do not take in calculation head part (from_bytes) of from-th item */ | 468 | /* |
469 | * do not take in calculation head part (from_bytes) | ||
470 | * of from-th item | ||
471 | */ | ||
443 | current_item_size -= | 472 | current_item_size -= |
444 | op_part_size(vi, 0 /*from start */ , start_bytes); | 473 | op_part_size(vi, 0 /*from start */ , start_bytes); |
445 | 474 | ||
@@ -455,9 +484,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
455 | continue; | 484 | continue; |
456 | } | 485 | } |
457 | 486 | ||
487 | /* | ||
488 | * virtual item length is longer, than max size of item in | ||
489 | * a node. It is impossible for direct item | ||
490 | */ | ||
458 | if (current_item_size > max_node_size) { | 491 | if (current_item_size > max_node_size) { |
459 | /* virtual item length is longer, than max size of item in | ||
460 | a node. It is impossible for direct item */ | ||
461 | RFALSE(is_direct_le_ih(vi->vi_ih), | 492 | RFALSE(is_direct_le_ih(vi->vi_ih), |
462 | "vs-8110: " | 493 | "vs-8110: " |
463 | "direct item length is %d. It can not be longer than %d", | 494 | "direct item length is %d. It can not be longer than %d", |
@@ -466,15 +497,18 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
466 | flow = 1; | 497 | flow = 1; |
467 | } | 498 | } |
468 | 499 | ||
500 | /* as we do not split items, take new node and continue */ | ||
469 | if (!flow) { | 501 | if (!flow) { |
470 | /* as we do not split items, take new node and continue */ | ||
471 | needed_nodes++; | 502 | needed_nodes++; |
472 | i--; | 503 | i--; |
473 | total_node_size = 0; | 504 | total_node_size = 0; |
474 | continue; | 505 | continue; |
475 | } | 506 | } |
476 | // calculate number of item units which fit into node being | 507 | |
477 | // filled | 508 | /* |
509 | * calculate number of item units which fit into node being | ||
510 | * filled | ||
511 | */ | ||
478 | { | 512 | { |
479 | int free_space; | 513 | int free_space; |
480 | 514 | ||
@@ -482,17 +516,17 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
482 | units = | 516 | units = |
483 | op_check_left(vi, free_space, start_bytes, | 517 | op_check_left(vi, free_space, start_bytes, |
484 | skip_from_end); | 518 | skip_from_end); |
519 | /* | ||
520 | * nothing fits into current node, take new | ||
521 | * node and continue | ||
522 | */ | ||
485 | if (units == -1) { | 523 | if (units == -1) { |
486 | /* nothing fits into current node, take new node and continue */ | ||
487 | needed_nodes++, i--, total_node_size = 0; | 524 | needed_nodes++, i--, total_node_size = 0; |
488 | continue; | 525 | continue; |
489 | } | 526 | } |
490 | } | 527 | } |
491 | 528 | ||
492 | /* something fits into the current node */ | 529 | /* something fits into the current node */ |
493 | //if (snum012[3] != -1 || needed_nodes != 1) | ||
494 | // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required"); | ||
495 | //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units; | ||
496 | start_bytes += units; | 530 | start_bytes += units; |
497 | snum012[needed_nodes - 1 + 3] = units; | 531 | snum012[needed_nodes - 1 + 3] = units; |
498 | 532 | ||
@@ -508,9 +542,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
508 | total_node_size = 0; | 542 | total_node_size = 0; |
509 | } | 543 | } |
510 | 544 | ||
511 | // sum012[4] (if it is not -1) contains number of units of which | 545 | /* |
512 | // are to be in S1new, snum012[3] - to be in S0. They are supposed | 546 | * sum012[4] (if it is not -1) contains number of units of which |
513 | // to be S1bytes and S2bytes correspondingly, so recalculate | 547 | * are to be in S1new, snum012[3] - to be in S0. They are supposed |
548 | * to be S1bytes and S2bytes correspondingly, so recalculate | ||
549 | */ | ||
514 | if (snum012[4] > 0) { | 550 | if (snum012[4] > 0) { |
515 | int split_item_num; | 551 | int split_item_num; |
516 | int bytes_to_r, bytes_to_l; | 552 | int bytes_to_r, bytes_to_l; |
@@ -527,7 +563,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
527 | ((split_item_positions[0] == | 563 | ((split_item_positions[0] == |
528 | split_item_positions[1]) ? snum012[3] : 0); | 564 | split_item_positions[1]) ? snum012[3] : 0); |
529 | 565 | ||
530 | // s2bytes | 566 | /* s2bytes */ |
531 | snum012[4] = | 567 | snum012[4] = |
532 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - | 568 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - |
533 | bytes_to_r - bytes_to_l - bytes_to_S1new; | 569 | bytes_to_r - bytes_to_l - bytes_to_S1new; |
@@ -555,7 +591,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
555 | ((split_item_positions[0] == split_item_positions[1] | 591 | ((split_item_positions[0] == split_item_positions[1] |
556 | && snum012[4] != -1) ? snum012[4] : 0); | 592 | && snum012[4] != -1) ? snum012[4] : 0); |
557 | 593 | ||
558 | // s1bytes | 594 | /* s1bytes */ |
559 | snum012[3] = | 595 | snum012[3] = |
560 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - | 596 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - |
561 | bytes_to_r - bytes_to_l - bytes_to_S2new; | 597 | bytes_to_r - bytes_to_l - bytes_to_S2new; |
@@ -565,7 +601,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
565 | } | 601 | } |
566 | 602 | ||
567 | 603 | ||
568 | /* Set parameters for balancing. | 604 | /* |
605 | * Set parameters for balancing. | ||
569 | * Performs write of results of analysis of balancing into structure tb, | 606 | * Performs write of results of analysis of balancing into structure tb, |
570 | * where it will later be used by the functions that actually do the balancing. | 607 | * where it will later be used by the functions that actually do the balancing. |
571 | * Parameters: | 608 | * Parameters: |
@@ -575,11 +612,12 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
575 | * rnum number of items from S[h] that must be shifted to R[h]; | 612 | * rnum number of items from S[h] that must be shifted to R[h]; |
576 | * blk_num number of blocks that S[h] will be splitted into; | 613 | * blk_num number of blocks that S[h] will be splitted into; |
577 | * s012 number of items that fall into splitted nodes. | 614 | * s012 number of items that fall into splitted nodes. |
578 | * lbytes number of bytes which flow to the left neighbor from the item that is not | 615 | * lbytes number of bytes which flow to the left neighbor from the |
579 | * not shifted entirely | 616 | * item that is not not shifted entirely |
580 | * rbytes number of bytes which flow to the right neighbor from the item that is not | 617 | * rbytes number of bytes which flow to the right neighbor from the |
581 | * not shifted entirely | 618 | * item that is not not shifted entirely |
582 | * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) | 619 | * s1bytes number of bytes which flow to the first new node when |
620 | * S[0] splits (this number is contained in s012 array) | ||
583 | */ | 621 | */ |
584 | 622 | ||
585 | static void set_parameters(struct tree_balance *tb, int h, int lnum, | 623 | static void set_parameters(struct tree_balance *tb, int h, int lnum, |
@@ -590,12 +628,14 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum, | |||
590 | tb->rnum[h] = rnum; | 628 | tb->rnum[h] = rnum; |
591 | tb->blknum[h] = blk_num; | 629 | tb->blknum[h] = blk_num; |
592 | 630 | ||
593 | if (h == 0) { /* only for leaf level */ | 631 | /* only for leaf level */ |
632 | if (h == 0) { | ||
594 | if (s012 != NULL) { | 633 | if (s012 != NULL) { |
595 | tb->s0num = *s012++, | 634 | tb->s0num = *s012++; |
596 | tb->s1num = *s012++, tb->s2num = *s012++; | 635 | tb->snum[0] = *s012++; |
597 | tb->s1bytes = *s012++; | 636 | tb->snum[1] = *s012++; |
598 | tb->s2bytes = *s012; | 637 | tb->sbytes[0] = *s012++; |
638 | tb->sbytes[1] = *s012; | ||
599 | } | 639 | } |
600 | tb->lbytes = lb; | 640 | tb->lbytes = lb; |
601 | tb->rbytes = rb; | 641 | tb->rbytes = rb; |
@@ -607,8 +647,10 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum, | |||
607 | PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); | 647 | PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); |
608 | } | 648 | } |
609 | 649 | ||
610 | /* check, does node disappear if we shift tb->lnum[0] items to left | 650 | /* |
611 | neighbor and tb->rnum[0] to the right one. */ | 651 | * check if node disappears if we shift tb->lnum[0] items to left |
652 | * neighbor and tb->rnum[0] to the right one. | ||
653 | */ | ||
612 | static int is_leaf_removable(struct tree_balance *tb) | 654 | static int is_leaf_removable(struct tree_balance *tb) |
613 | { | 655 | { |
614 | struct virtual_node *vn = tb->tb_vn; | 656 | struct virtual_node *vn = tb->tb_vn; |
@@ -616,8 +658,10 @@ static int is_leaf_removable(struct tree_balance *tb) | |||
616 | int size; | 658 | int size; |
617 | int remain_items; | 659 | int remain_items; |
618 | 660 | ||
619 | /* number of items, that will be shifted to left (right) neighbor | 661 | /* |
620 | entirely */ | 662 | * number of items that will be shifted to left (right) neighbor |
663 | * entirely | ||
664 | */ | ||
621 | to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); | 665 | to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); |
622 | to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); | 666 | to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); |
623 | remain_items = vn->vn_nr_item; | 667 | remain_items = vn->vn_nr_item; |
@@ -625,21 +669,21 @@ static int is_leaf_removable(struct tree_balance *tb) | |||
625 | /* how many items remain in S[0] after shiftings to neighbors */ | 669 | /* how many items remain in S[0] after shiftings to neighbors */ |
626 | remain_items -= (to_left + to_right); | 670 | remain_items -= (to_left + to_right); |
627 | 671 | ||
672 | /* all content of node can be shifted to neighbors */ | ||
628 | if (remain_items < 1) { | 673 | if (remain_items < 1) { |
629 | /* all content of node can be shifted to neighbors */ | ||
630 | set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, | 674 | set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, |
631 | NULL, -1, -1); | 675 | NULL, -1, -1); |
632 | return 1; | 676 | return 1; |
633 | } | 677 | } |
634 | 678 | ||
679 | /* S[0] is not removable */ | ||
635 | if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) | 680 | if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) |
636 | /* S[0] is not removable */ | ||
637 | return 0; | 681 | return 0; |
638 | 682 | ||
639 | /* check, whether we can divide 1 remaining item between neighbors */ | 683 | /* check whether we can divide 1 remaining item between neighbors */ |
640 | 684 | ||
641 | /* get size of remaining item (in item units) */ | 685 | /* get size of remaining item (in item units) */ |
642 | size = op_unit_num(&(vn->vn_vi[to_left])); | 686 | size = op_unit_num(&vn->vn_vi[to_left]); |
643 | 687 | ||
644 | if (tb->lbytes + tb->rbytes >= size) { | 688 | if (tb->lbytes + tb->rbytes >= size) { |
645 | set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, | 689 | set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, |
@@ -675,23 +719,28 @@ static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) | |||
675 | "vs-8125: item number must be 1: it is %d", | 719 | "vs-8125: item number must be 1: it is %d", |
676 | B_NR_ITEMS(S0)); | 720 | B_NR_ITEMS(S0)); |
677 | 721 | ||
678 | ih = B_N_PITEM_HEAD(S0, 0); | 722 | ih = item_head(S0, 0); |
679 | if (tb->CFR[0] | 723 | if (tb->CFR[0] |
680 | && !comp_short_le_keys(&(ih->ih_key), | 724 | && !comp_short_le_keys(&ih->ih_key, |
681 | B_N_PDELIM_KEY(tb->CFR[0], | 725 | internal_key(tb->CFR[0], |
682 | tb->rkey[0]))) | 726 | tb->rkey[0]))) |
727 | /* | ||
728 | * Directory must be in correct state here: that is | ||
729 | * somewhere at the left side should exist first | ||
730 | * directory item. But the item being deleted can | ||
731 | * not be that first one because its right neighbor | ||
732 | * is item of the same directory. (But first item | ||
733 | * always gets deleted in last turn). So, neighbors | ||
734 | * of deleted item can be merged, so we can save | ||
735 | * ih_size | ||
736 | */ | ||
683 | if (is_direntry_le_ih(ih)) { | 737 | if (is_direntry_le_ih(ih)) { |
684 | /* Directory must be in correct state here: that is | ||
685 | somewhere at the left side should exist first directory | ||
686 | item. But the item being deleted can not be that first | ||
687 | one because its right neighbor is item of the same | ||
688 | directory. (But first item always gets deleted in last | ||
689 | turn). So, neighbors of deleted item can be merged, so | ||
690 | we can save ih_size */ | ||
691 | ih_size = IH_SIZE; | 738 | ih_size = IH_SIZE; |
692 | 739 | ||
693 | /* we might check that left neighbor exists and is of the | 740 | /* |
694 | same directory */ | 741 | * we might check that left neighbor exists |
742 | * and is of the same directory | ||
743 | */ | ||
695 | RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, | 744 | RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, |
696 | "vs-8130: first directory item can not be removed until directory is not empty"); | 745 | "vs-8130: first directory item can not be removed until directory is not empty"); |
697 | } | 746 | } |
@@ -770,7 +819,8 @@ static void free_buffers_in_tb(struct tree_balance *tb) | |||
770 | } | 819 | } |
771 | } | 820 | } |
772 | 821 | ||
773 | /* Get new buffers for storing new nodes that are created while balancing. | 822 | /* |
823 | * Get new buffers for storing new nodes that are created while balancing. | ||
774 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; | 824 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; |
775 | * CARRY_ON - schedule didn't occur while the function worked; | 825 | * CARRY_ON - schedule didn't occur while the function worked; |
776 | * NO_DISK_SPACE - no disk space. | 826 | * NO_DISK_SPACE - no disk space. |
@@ -778,28 +828,33 @@ static void free_buffers_in_tb(struct tree_balance *tb) | |||
778 | /* The function is NOT SCHEDULE-SAFE! */ | 828 | /* The function is NOT SCHEDULE-SAFE! */ |
779 | static int get_empty_nodes(struct tree_balance *tb, int h) | 829 | static int get_empty_nodes(struct tree_balance *tb, int h) |
780 | { | 830 | { |
781 | struct buffer_head *new_bh, | 831 | struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h); |
782 | *Sh = PATH_H_PBUFFER(tb->tb_path, h); | ||
783 | b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; | 832 | b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; |
784 | int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */ | 833 | int counter, number_of_freeblk; |
785 | retval = CARRY_ON; | 834 | int amount_needed; /* number of needed empty blocks */ |
835 | int retval = CARRY_ON; | ||
786 | struct super_block *sb = tb->tb_sb; | 836 | struct super_block *sb = tb->tb_sb; |
787 | 837 | ||
788 | /* number_of_freeblk is the number of empty blocks which have been | 838 | /* |
789 | acquired for use by the balancing algorithm minus the number of | 839 | * number_of_freeblk is the number of empty blocks which have been |
790 | empty blocks used in the previous levels of the analysis, | 840 | * acquired for use by the balancing algorithm minus the number of |
791 | number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs | 841 | * empty blocks used in the previous levels of the analysis, |
792 | after empty blocks are acquired, and the balancing analysis is | 842 | * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule |
793 | then restarted, amount_needed is the number needed by this level | 843 | * occurs after empty blocks are acquired, and the balancing analysis |
794 | (h) of the balancing analysis. | 844 | * is then restarted, amount_needed is the number needed by this |
795 | 845 | * level (h) of the balancing analysis. | |
796 | Note that for systems with many processes writing, it would be | 846 | * |
797 | more layout optimal to calculate the total number needed by all | 847 | * Note that for systems with many processes writing, it would be |
798 | levels and then to run reiserfs_new_blocks to get all of them at once. */ | 848 | * more layout optimal to calculate the total number needed by all |
799 | 849 | * levels and then to run reiserfs_new_blocks to get all of them at | |
800 | /* Initiate number_of_freeblk to the amount acquired prior to the restart of | 850 | * once. |
801 | the analysis or 0 if not restarted, then subtract the amount needed | 851 | */ |
802 | by all of the levels of the tree below h. */ | 852 | |
853 | /* | ||
854 | * Initiate number_of_freeblk to the amount acquired prior to the | ||
855 | * restart of the analysis or 0 if not restarted, then subtract the | ||
856 | * amount needed by all of the levels of the tree below h. | ||
857 | */ | ||
803 | /* blknum includes S[h], so we subtract 1 in this calculation */ | 858 | /* blknum includes S[h], so we subtract 1 in this calculation */ |
804 | for (counter = 0, number_of_freeblk = tb->cur_blknum; | 859 | for (counter = 0, number_of_freeblk = tb->cur_blknum; |
805 | counter < h; counter++) | 860 | counter < h; counter++) |
@@ -810,13 +865,19 @@ static int get_empty_nodes(struct tree_balance *tb, int h) | |||
810 | /* Allocate missing empty blocks. */ | 865 | /* Allocate missing empty blocks. */ |
811 | /* if Sh == 0 then we are getting a new root */ | 866 | /* if Sh == 0 then we are getting a new root */ |
812 | amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; | 867 | amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; |
813 | /* Amount_needed = the amount that we need more than the amount that we have. */ | 868 | /* |
869 | * Amount_needed = the amount that we need more than the | ||
870 | * amount that we have. | ||
871 | */ | ||
814 | if (amount_needed > number_of_freeblk) | 872 | if (amount_needed > number_of_freeblk) |
815 | amount_needed -= number_of_freeblk; | 873 | amount_needed -= number_of_freeblk; |
816 | else /* If we have enough already then there is nothing to do. */ | 874 | else /* If we have enough already then there is nothing to do. */ |
817 | return CARRY_ON; | 875 | return CARRY_ON; |
818 | 876 | ||
819 | /* No need to check quota - is not allocated for blocks used for formatted nodes */ | 877 | /* |
878 | * No need to check quota - is not allocated for blocks used | ||
879 | * for formatted nodes | ||
880 | */ | ||
820 | if (reiserfs_new_form_blocknrs(tb, blocknrs, | 881 | if (reiserfs_new_form_blocknrs(tb, blocknrs, |
821 | amount_needed) == NO_DISK_SPACE) | 882 | amount_needed) == NO_DISK_SPACE) |
822 | return NO_DISK_SPACE; | 883 | return NO_DISK_SPACE; |
@@ -849,8 +910,10 @@ static int get_empty_nodes(struct tree_balance *tb, int h) | |||
849 | return retval; | 910 | return retval; |
850 | } | 911 | } |
851 | 912 | ||
852 | /* Get free space of the left neighbor, which is stored in the parent | 913 | /* |
853 | * node of the left neighbor. */ | 914 | * Get free space of the left neighbor, which is stored in the parent |
915 | * node of the left neighbor. | ||
916 | */ | ||
854 | static int get_lfree(struct tree_balance *tb, int h) | 917 | static int get_lfree(struct tree_balance *tb, int h) |
855 | { | 918 | { |
856 | struct buffer_head *l, *f; | 919 | struct buffer_head *l, *f; |
@@ -870,7 +933,8 @@ static int get_lfree(struct tree_balance *tb, int h) | |||
870 | return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); | 933 | return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); |
871 | } | 934 | } |
872 | 935 | ||
873 | /* Get free space of the right neighbor, | 936 | /* |
937 | * Get free space of the right neighbor, | ||
874 | * which is stored in the parent node of the right neighbor. | 938 | * which is stored in the parent node of the right neighbor. |
875 | */ | 939 | */ |
876 | static int get_rfree(struct tree_balance *tb, int h) | 940 | static int get_rfree(struct tree_balance *tb, int h) |
@@ -916,7 +980,10 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) | |||
916 | "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", | 980 | "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", |
917 | father, tb->FL[h]); | 981 | father, tb->FL[h]); |
918 | 982 | ||
919 | /* Get position of the pointer to the left neighbor into the left father. */ | 983 | /* |
984 | * Get position of the pointer to the left neighbor | ||
985 | * into the left father. | ||
986 | */ | ||
920 | left_neighbor_position = (father == tb->FL[h]) ? | 987 | left_neighbor_position = (father == tb->FL[h]) ? |
921 | tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); | 988 | tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); |
922 | /* Get left neighbor block number. */ | 989 | /* Get left neighbor block number. */ |
@@ -940,17 +1007,20 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) | |||
940 | 1007 | ||
941 | static void decrement_key(struct cpu_key *key) | 1008 | static void decrement_key(struct cpu_key *key) |
942 | { | 1009 | { |
943 | // call item specific function for this key | 1010 | /* call item specific function for this key */ |
944 | item_ops[cpu_key_k_type(key)]->decrement_key(key); | 1011 | item_ops[cpu_key_k_type(key)]->decrement_key(key); |
945 | } | 1012 | } |
946 | 1013 | ||
947 | /* Calculate far left/right parent of the left/right neighbor of the current node, that | 1014 | /* |
948 | * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. | 1015 | * Calculate far left/right parent of the left/right neighbor of the |
1016 | * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor | ||
1017 | * of the parent F[h]. | ||
949 | * Calculate left/right common parent of the current node and L[h]/R[h]. | 1018 | * Calculate left/right common parent of the current node and L[h]/R[h]. |
950 | * Calculate left/right delimiting key position. | 1019 | * Calculate left/right delimiting key position. |
951 | * Returns: PATH_INCORRECT - path in the tree is not correct; | 1020 | * Returns: PATH_INCORRECT - path in the tree is not correct |
952 | SCHEDULE_OCCURRED - schedule occurred while the function worked; | 1021 | * SCHEDULE_OCCURRED - schedule occurred while the function worked |
953 | * CARRY_ON - schedule didn't occur while the function worked; | 1022 | * CARRY_ON - schedule didn't occur while the function |
1023 | * worked | ||
954 | */ | 1024 | */ |
955 | static int get_far_parent(struct tree_balance *tb, | 1025 | static int get_far_parent(struct tree_balance *tb, |
956 | int h, | 1026 | int h, |
@@ -966,8 +1036,10 @@ static int get_far_parent(struct tree_balance *tb, | |||
966 | first_last_position = 0, | 1036 | first_last_position = 0, |
967 | path_offset = PATH_H_PATH_OFFSET(path, h); | 1037 | path_offset = PATH_H_PATH_OFFSET(path, h); |
968 | 1038 | ||
969 | /* Starting from F[h] go upwards in the tree, and look for the common | 1039 | /* |
970 | ancestor of F[h], and its neighbor l/r, that should be obtained. */ | 1040 | * Starting from F[h] go upwards in the tree, and look for the common |
1041 | * ancestor of F[h], and its neighbor l/r, that should be obtained. | ||
1042 | */ | ||
971 | 1043 | ||
972 | counter = path_offset; | 1044 | counter = path_offset; |
973 | 1045 | ||
@@ -975,21 +1047,33 @@ static int get_far_parent(struct tree_balance *tb, | |||
975 | "PAP-8180: invalid path length"); | 1047 | "PAP-8180: invalid path length"); |
976 | 1048 | ||
977 | for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { | 1049 | for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { |
978 | /* Check whether parent of the current buffer in the path is really parent in the tree. */ | 1050 | /* |
1051 | * Check whether parent of the current buffer in the path | ||
1052 | * is really parent in the tree. | ||
1053 | */ | ||
979 | if (!B_IS_IN_TREE | 1054 | if (!B_IS_IN_TREE |
980 | (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) | 1055 | (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) |
981 | return REPEAT_SEARCH; | 1056 | return REPEAT_SEARCH; |
1057 | |||
982 | /* Check whether position in the parent is correct. */ | 1058 | /* Check whether position in the parent is correct. */ |
983 | if ((position = | 1059 | if ((position = |
984 | PATH_OFFSET_POSITION(path, | 1060 | PATH_OFFSET_POSITION(path, |
985 | counter - 1)) > | 1061 | counter - 1)) > |
986 | B_NR_ITEMS(parent)) | 1062 | B_NR_ITEMS(parent)) |
987 | return REPEAT_SEARCH; | 1063 | return REPEAT_SEARCH; |
988 | /* Check whether parent at the path really points to the child. */ | 1064 | |
1065 | /* | ||
1066 | * Check whether parent at the path really points | ||
1067 | * to the child. | ||
1068 | */ | ||
989 | if (B_N_CHILD_NUM(parent, position) != | 1069 | if (B_N_CHILD_NUM(parent, position) != |
990 | PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) | 1070 | PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) |
991 | return REPEAT_SEARCH; | 1071 | return REPEAT_SEARCH; |
992 | /* Return delimiting key if position in the parent is not equal to first/last one. */ | 1072 | |
1073 | /* | ||
1074 | * Return delimiting key if position in the parent is not | ||
1075 | * equal to first/last one. | ||
1076 | */ | ||
993 | if (c_lr_par == RIGHT_PARENTS) | 1077 | if (c_lr_par == RIGHT_PARENTS) |
994 | first_last_position = B_NR_ITEMS(parent); | 1078 | first_last_position = B_NR_ITEMS(parent); |
995 | if (position != first_last_position) { | 1079 | if (position != first_last_position) { |
@@ -1002,7 +1086,10 @@ static int get_far_parent(struct tree_balance *tb, | |||
1002 | 1086 | ||
1003 | /* if we are in the root of the tree, then there is no common father */ | 1087 | /* if we are in the root of the tree, then there is no common father */ |
1004 | if (counter == FIRST_PATH_ELEMENT_OFFSET) { | 1088 | if (counter == FIRST_PATH_ELEMENT_OFFSET) { |
1005 | /* Check whether first buffer in the path is the root of the tree. */ | 1089 | /* |
1090 | * Check whether first buffer in the path is the | ||
1091 | * root of the tree. | ||
1092 | */ | ||
1006 | if (PATH_OFFSET_PBUFFER | 1093 | if (PATH_OFFSET_PBUFFER |
1007 | (tb->tb_path, | 1094 | (tb->tb_path, |
1008 | FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == | 1095 | FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == |
@@ -1031,12 +1118,15 @@ static int get_far_parent(struct tree_balance *tb, | |||
1031 | } | 1118 | } |
1032 | } | 1119 | } |
1033 | 1120 | ||
1034 | /* So, we got common parent of the current node and its left/right neighbor. | 1121 | /* |
1035 | Now we are geting the parent of the left/right neighbor. */ | 1122 | * So, we got common parent of the current node and its |
1123 | * left/right neighbor. Now we are getting the parent of the | ||
1124 | * left/right neighbor. | ||
1125 | */ | ||
1036 | 1126 | ||
1037 | /* Form key to get parent of the left/right neighbor. */ | 1127 | /* Form key to get parent of the left/right neighbor. */ |
1038 | le_key2cpu_key(&s_lr_father_key, | 1128 | le_key2cpu_key(&s_lr_father_key, |
1039 | B_N_PDELIM_KEY(*pcom_father, | 1129 | internal_key(*pcom_father, |
1040 | (c_lr_par == | 1130 | (c_lr_par == |
1041 | LEFT_PARENTS) ? (tb->lkey[h - 1] = | 1131 | LEFT_PARENTS) ? (tb->lkey[h - 1] = |
1042 | position - | 1132 | position - |
@@ -1050,7 +1140,7 @@ static int get_far_parent(struct tree_balance *tb, | |||
1050 | if (search_by_key | 1140 | if (search_by_key |
1051 | (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, | 1141 | (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, |
1052 | h + 1) == IO_ERROR) | 1142 | h + 1) == IO_ERROR) |
1053 | // path is released | 1143 | /* path is released */ |
1054 | return IO_ERROR; | 1144 | return IO_ERROR; |
1055 | 1145 | ||
1056 | if (FILESYSTEM_CHANGED_TB(tb)) { | 1146 | if (FILESYSTEM_CHANGED_TB(tb)) { |
@@ -1071,12 +1161,15 @@ static int get_far_parent(struct tree_balance *tb, | |||
1071 | return CARRY_ON; | 1161 | return CARRY_ON; |
1072 | } | 1162 | } |
1073 | 1163 | ||
1074 | /* Get parents of neighbors of node in the path(S[path_offset]) and common parents of | 1164 | /* |
1075 | * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset], | 1165 | * Get parents of neighbors of node in the path(S[path_offset]) and |
1076 | * FR[path_offset], CFL[path_offset], CFR[path_offset]. | 1166 | * common parents of S[path_offset] and L[path_offset]/R[path_offset]: |
1077 | * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset]. | 1167 | * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset], |
1078 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; | 1168 | * CFR[path_offset]. |
1079 | * CARRY_ON - schedule didn't occur while the function worked; | 1169 | * Calculate numbers of left and right delimiting keys position: |
1170 | * lkey[path_offset], rkey[path_offset]. | ||
1171 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked | ||
1172 | * CARRY_ON - schedule didn't occur while the function worked | ||
1080 | */ | 1173 | */ |
1081 | static int get_parents(struct tree_balance *tb, int h) | 1174 | static int get_parents(struct tree_balance *tb, int h) |
1082 | { | 1175 | { |
@@ -1088,8 +1181,11 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1088 | 1181 | ||
1089 | /* Current node is the root of the tree or will be root of the tree */ | 1182 | /* Current node is the root of the tree or will be root of the tree */ |
1090 | if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { | 1183 | if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { |
1091 | /* The root can not have parents. | 1184 | /* |
1092 | Release nodes which previously were obtained as parents of the current node neighbors. */ | 1185 | * The root can not have parents. |
1186 | * Release nodes which previously were obtained as | ||
1187 | * parents of the current node neighbors. | ||
1188 | */ | ||
1093 | brelse(tb->FL[h]); | 1189 | brelse(tb->FL[h]); |
1094 | brelse(tb->CFL[h]); | 1190 | brelse(tb->CFL[h]); |
1095 | brelse(tb->FR[h]); | 1191 | brelse(tb->FR[h]); |
@@ -1111,10 +1207,14 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1111 | get_bh(curf); | 1207 | get_bh(curf); |
1112 | tb->lkey[h] = position - 1; | 1208 | tb->lkey[h] = position - 1; |
1113 | } else { | 1209 | } else { |
1114 | /* Calculate current parent of L[path_offset], which is the left neighbor of the current node. | 1210 | /* |
1115 | Calculate current common parent of L[path_offset] and the current node. Note that | 1211 | * Calculate current parent of L[path_offset], which is the |
1116 | CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset]. | 1212 | * left neighbor of the current node. Calculate current |
1117 | Calculate lkey[path_offset]. */ | 1213 | * common parent of L[path_offset] and the current node. |
1214 | * Note that CFL[path_offset] not equal FL[path_offset] and | ||
1215 | * CFL[path_offset] not equal F[path_offset]. | ||
1216 | * Calculate lkey[path_offset]. | ||
1217 | */ | ||
1118 | if ((ret = get_far_parent(tb, h + 1, &curf, | 1218 | if ((ret = get_far_parent(tb, h + 1, &curf, |
1119 | &curcf, | 1219 | &curcf, |
1120 | LEFT_PARENTS)) != CARRY_ON) | 1220 | LEFT_PARENTS)) != CARRY_ON) |
@@ -1130,19 +1230,22 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1130 | (curcf && !B_IS_IN_TREE(curcf)), | 1230 | (curcf && !B_IS_IN_TREE(curcf)), |
1131 | "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); | 1231 | "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); |
1132 | 1232 | ||
1133 | /* Get parent FR[h] of R[h]. */ | 1233 | /* Get parent FR[h] of R[h]. */ |
1134 | 1234 | ||
1135 | /* Current node is the last child of F[h]. FR[h] != F[h]. */ | 1235 | /* Current node is the last child of F[h]. FR[h] != F[h]. */ |
1136 | if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { | 1236 | if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { |
1137 | /* Calculate current parent of R[h], which is the right neighbor of F[h]. | 1237 | /* |
1138 | Calculate current common parent of R[h] and current node. Note that CFR[h] | 1238 | * Calculate current parent of R[h], which is the right |
1139 | not equal FR[path_offset] and CFR[h] not equal F[h]. */ | 1239 | * neighbor of F[h]. Calculate current common parent of |
1240 | * R[h] and current node. Note that CFR[h] not equal | ||
1241 | * FR[path_offset] and CFR[h] not equal F[h]. | ||
1242 | */ | ||
1140 | if ((ret = | 1243 | if ((ret = |
1141 | get_far_parent(tb, h + 1, &curf, &curcf, | 1244 | get_far_parent(tb, h + 1, &curf, &curcf, |
1142 | RIGHT_PARENTS)) != CARRY_ON) | 1245 | RIGHT_PARENTS)) != CARRY_ON) |
1143 | return ret; | 1246 | return ret; |
1144 | } else { | 1247 | } else { |
1145 | /* Current node is not the last child of its parent F[h]. */ | 1248 | /* Current node is not the last child of its parent F[h]. */ |
1146 | curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 1249 | curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); |
1147 | curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 1250 | curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); |
1148 | get_bh(curf); | 1251 | get_bh(curf); |
@@ -1165,8 +1268,10 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1165 | return CARRY_ON; | 1268 | return CARRY_ON; |
1166 | } | 1269 | } |
1167 | 1270 | ||
1168 | /* it is possible to remove node as result of shiftings to | 1271 | /* |
1169 | neighbors even when we insert or paste item. */ | 1272 | * it is possible to remove node as result of shiftings to |
1273 | * neighbors even when we insert or paste item. | ||
1274 | */ | ||
1170 | static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | 1275 | static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, |
1171 | struct tree_balance *tb, int h) | 1276 | struct tree_balance *tb, int h) |
1172 | { | 1277 | { |
@@ -1175,21 +1280,22 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | |||
1175 | struct item_head *ih; | 1280 | struct item_head *ih; |
1176 | struct reiserfs_key *r_key = NULL; | 1281 | struct reiserfs_key *r_key = NULL; |
1177 | 1282 | ||
1178 | ih = B_N_PITEM_HEAD(Sh, 0); | 1283 | ih = item_head(Sh, 0); |
1179 | if (tb->CFR[h]) | 1284 | if (tb->CFR[h]) |
1180 | r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]); | 1285 | r_key = internal_key(tb->CFR[h], tb->rkey[h]); |
1181 | 1286 | ||
1182 | if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes | 1287 | if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes |
1183 | /* shifting may merge items which might save space */ | 1288 | /* shifting may merge items which might save space */ |
1184 | - | 1289 | - |
1185 | ((!h | 1290 | ((!h |
1186 | && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0) | 1291 | && op_is_left_mergeable(&ih->ih_key, Sh->b_size)) ? IH_SIZE : 0) |
1187 | - | 1292 | - |
1188 | ((!h && r_key | 1293 | ((!h && r_key |
1189 | && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) | 1294 | && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) |
1190 | + ((h) ? KEY_SIZE : 0)) { | 1295 | + ((h) ? KEY_SIZE : 0)) { |
1191 | /* node can not be removed */ | 1296 | /* node can not be removed */ |
1192 | if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ | 1297 | if (sfree >= levbytes) { |
1298 | /* new item fits into node S[h] without any shifting */ | ||
1193 | if (!h) | 1299 | if (!h) |
1194 | tb->s0num = | 1300 | tb->s0num = |
1195 | B_NR_ITEMS(Sh) + | 1301 | B_NR_ITEMS(Sh) + |
@@ -1202,7 +1308,8 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | |||
1202 | return !NO_BALANCING_NEEDED; | 1308 | return !NO_BALANCING_NEEDED; |
1203 | } | 1309 | } |
1204 | 1310 | ||
1205 | /* Check whether current node S[h] is balanced when increasing its size by | 1311 | /* |
1312 | * Check whether current node S[h] is balanced when increasing its size by | ||
1206 | * Inserting or Pasting. | 1313 | * Inserting or Pasting. |
1207 | * Calculate parameters for balancing for current level h. | 1314 | * Calculate parameters for balancing for current level h. |
1208 | * Parameters: | 1315 | * Parameters: |
@@ -1219,39 +1326,48 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | |||
1219 | static int ip_check_balance(struct tree_balance *tb, int h) | 1326 | static int ip_check_balance(struct tree_balance *tb, int h) |
1220 | { | 1327 | { |
1221 | struct virtual_node *vn = tb->tb_vn; | 1328 | struct virtual_node *vn = tb->tb_vn; |
1222 | int levbytes, /* Number of bytes that must be inserted into (value | 1329 | /* |
1223 | is negative if bytes are deleted) buffer which | 1330 | * Number of bytes that must be inserted into (value is negative |
1224 | contains node being balanced. The mnemonic is | 1331 | * if bytes are deleted) buffer which contains node being balanced. |
1225 | that the attempted change in node space used level | 1332 | * The mnemonic is that the attempted change in node space used |
1226 | is levbytes bytes. */ | 1333 | * level is levbytes bytes. |
1227 | ret; | 1334 | */ |
1335 | int levbytes; | ||
1336 | int ret; | ||
1228 | 1337 | ||
1229 | int lfree, sfree, rfree /* free space in L, S and R */ ; | 1338 | int lfree, sfree, rfree /* free space in L, S and R */ ; |
1230 | 1339 | ||
1231 | /* nver is short for number of vertixes, and lnver is the number if | 1340 | /* |
1232 | we shift to the left, rnver is the number if we shift to the | 1341 | * nver is short for number of vertixes, and lnver is the number if |
1233 | right, and lrnver is the number if we shift in both directions. | 1342 | * we shift to the left, rnver is the number if we shift to the |
1234 | The goal is to minimize first the number of vertixes, and second, | 1343 | * right, and lrnver is the number if we shift in both directions. |
1235 | the number of vertixes whose contents are changed by shifting, | 1344 | * The goal is to minimize first the number of vertixes, and second, |
1236 | and third the number of uncached vertixes whose contents are | 1345 | * the number of vertixes whose contents are changed by shifting, |
1237 | changed by shifting and must be read from disk. */ | 1346 | * and third the number of uncached vertixes whose contents are |
1347 | * changed by shifting and must be read from disk. | ||
1348 | */ | ||
1238 | int nver, lnver, rnver, lrnver; | 1349 | int nver, lnver, rnver, lrnver; |
1239 | 1350 | ||
1240 | /* used at leaf level only, S0 = S[0] is the node being balanced, | 1351 | /* |
1241 | sInum [ I = 0,1,2 ] is the number of items that will | 1352 | * used at leaf level only, S0 = S[0] is the node being balanced, |
1242 | remain in node SI after balancing. S1 and S2 are new | 1353 | * sInum [ I = 0,1,2 ] is the number of items that will |
1243 | nodes that might be created. */ | 1354 | * remain in node SI after balancing. S1 and S2 are new |
1355 | * nodes that might be created. | ||
1356 | */ | ||
1244 | 1357 | ||
1245 | /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. | 1358 | /* |
1246 | where 4th parameter is s1bytes and 5th - s2bytes | 1359 | * we perform 8 calls to get_num_ver(). For each call we |
1360 | * calculate five parameters. where 4th parameter is s1bytes | ||
1361 | * and 5th - s2bytes | ||
1362 | * | ||
1363 | * s0num, s1num, s2num for 8 cases | ||
1364 | * 0,1 - do not shift and do not shift but bottle | ||
1365 | * 2 - shift only whole item to left | ||
1366 | * 3 - shift to left and bottle as much as possible | ||
1367 | * 4,5 - shift to right (whole items and as much as possible | ||
1368 | * 6,7 - shift to both directions (whole items and as much as possible) | ||
1247 | */ | 1369 | */ |
1248 | short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases | 1370 | short snum012[40] = { 0, }; |
1249 | 0,1 - do not shift and do not shift but bottle | ||
1250 | 2 - shift only whole item to left | ||
1251 | 3 - shift to left and bottle as much as possible | ||
1252 | 4,5 - shift to right (whole items and as much as possible | ||
1253 | 6,7 - shift to both directions (whole items and as much as possible) | ||
1254 | */ | ||
1255 | 1371 | ||
1256 | /* Sh is the node whose balance is currently being checked */ | 1372 | /* Sh is the node whose balance is currently being checked */ |
1257 | struct buffer_head *Sh; | 1373 | struct buffer_head *Sh; |
@@ -1265,9 +1381,10 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1265 | reiserfs_panic(tb->tb_sb, "vs-8210", | 1381 | reiserfs_panic(tb->tb_sb, "vs-8210", |
1266 | "S[0] can not be 0"); | 1382 | "S[0] can not be 0"); |
1267 | switch (ret = get_empty_nodes(tb, h)) { | 1383 | switch (ret = get_empty_nodes(tb, h)) { |
1384 | /* no balancing for higher levels needed */ | ||
1268 | case CARRY_ON: | 1385 | case CARRY_ON: |
1269 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 1386 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); |
1270 | return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ | 1387 | return NO_BALANCING_NEEDED; |
1271 | 1388 | ||
1272 | case NO_DISK_SPACE: | 1389 | case NO_DISK_SPACE: |
1273 | case REPEAT_SEARCH: | 1390 | case REPEAT_SEARCH: |
@@ -1278,7 +1395,9 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1278 | } | 1395 | } |
1279 | } | 1396 | } |
1280 | 1397 | ||
1281 | if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ | 1398 | /* get parents of S[h] neighbors. */ |
1399 | ret = get_parents(tb, h); | ||
1400 | if (ret != CARRY_ON) | ||
1282 | return ret; | 1401 | return ret; |
1283 | 1402 | ||
1284 | sfree = B_FREE_SPACE(Sh); | 1403 | sfree = B_FREE_SPACE(Sh); |
@@ -1287,38 +1406,44 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1287 | rfree = get_rfree(tb, h); | 1406 | rfree = get_rfree(tb, h); |
1288 | lfree = get_lfree(tb, h); | 1407 | lfree = get_lfree(tb, h); |
1289 | 1408 | ||
1409 | /* and new item fits into node S[h] without any shifting */ | ||
1290 | if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == | 1410 | if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == |
1291 | NO_BALANCING_NEEDED) | 1411 | NO_BALANCING_NEEDED) |
1292 | /* and new item fits into node S[h] without any shifting */ | ||
1293 | return NO_BALANCING_NEEDED; | 1412 | return NO_BALANCING_NEEDED; |
1294 | 1413 | ||
1295 | create_virtual_node(tb, h); | 1414 | create_virtual_node(tb, h); |
1296 | 1415 | ||
1297 | /* | 1416 | /* |
1298 | determine maximal number of items we can shift to the left neighbor (in tb structure) | 1417 | * determine maximal number of items we can shift to the left |
1299 | and the maximal number of bytes that can flow to the left neighbor | 1418 | * neighbor (in tb structure) and the maximal number of bytes |
1300 | from the left most liquid item that cannot be shifted from S[0] entirely (returned value) | 1419 | * that can flow to the left neighbor from the left most liquid |
1420 | * item that cannot be shifted from S[0] entirely (returned value) | ||
1301 | */ | 1421 | */ |
1302 | check_left(tb, h, lfree); | 1422 | check_left(tb, h, lfree); |
1303 | 1423 | ||
1304 | /* | 1424 | /* |
1305 | determine maximal number of items we can shift to the right neighbor (in tb structure) | 1425 | * determine maximal number of items we can shift to the right |
1306 | and the maximal number of bytes that can flow to the right neighbor | 1426 | * neighbor (in tb structure) and the maximal number of bytes |
1307 | from the right most liquid item that cannot be shifted from S[0] entirely (returned value) | 1427 | * that can flow to the right neighbor from the right most liquid |
1428 | * item that cannot be shifted from S[0] entirely (returned value) | ||
1308 | */ | 1429 | */ |
1309 | check_right(tb, h, rfree); | 1430 | check_right(tb, h, rfree); |
1310 | 1431 | ||
1311 | /* all contents of internal node S[h] can be moved into its | 1432 | /* |
1312 | neighbors, S[h] will be removed after balancing */ | 1433 | * all contents of internal node S[h] can be moved into its |
1434 | * neighbors, S[h] will be removed after balancing | ||
1435 | */ | ||
1313 | if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { | 1436 | if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { |
1314 | int to_r; | 1437 | int to_r; |
1315 | 1438 | ||
1316 | /* Since we are working on internal nodes, and our internal | 1439 | /* |
1317 | nodes have fixed size entries, then we can balance by the | 1440 | * Since we are working on internal nodes, and our internal |
1318 | number of items rather than the space they consume. In this | 1441 | * nodes have fixed size entries, then we can balance by the |
1319 | routine we set the left node equal to the right node, | 1442 | * number of items rather than the space they consume. In this |
1320 | allowing a difference of less than or equal to 1 child | 1443 | * routine we set the left node equal to the right node, |
1321 | pointer. */ | 1444 | * allowing a difference of less than or equal to 1 child |
1445 | * pointer. | ||
1446 | */ | ||
1322 | to_r = | 1447 | to_r = |
1323 | ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + | 1448 | ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + |
1324 | vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - | 1449 | vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - |
@@ -1328,7 +1453,10 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1328 | return CARRY_ON; | 1453 | return CARRY_ON; |
1329 | } | 1454 | } |
1330 | 1455 | ||
1331 | /* this checks balance condition, that any two neighboring nodes can not fit in one node */ | 1456 | /* |
1457 | * this checks balance condition, that any two neighboring nodes | ||
1458 | * can not fit in one node | ||
1459 | */ | ||
1332 | RFALSE(h && | 1460 | RFALSE(h && |
1333 | (tb->lnum[h] >= vn->vn_nr_item + 1 || | 1461 | (tb->lnum[h] >= vn->vn_nr_item + 1 || |
1334 | tb->rnum[h] >= vn->vn_nr_item + 1), | 1462 | tb->rnum[h] >= vn->vn_nr_item + 1), |
@@ -1337,16 +1465,22 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1337 | (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), | 1465 | (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), |
1338 | "vs-8225: tree is not balanced on leaf level"); | 1466 | "vs-8225: tree is not balanced on leaf level"); |
1339 | 1467 | ||
1340 | /* all contents of S[0] can be moved into its neighbors | 1468 | /* |
1341 | S[0] will be removed after balancing. */ | 1469 | * all contents of S[0] can be moved into its neighbors |
1470 | * S[0] will be removed after balancing. | ||
1471 | */ | ||
1342 | if (!h && is_leaf_removable(tb)) | 1472 | if (!h && is_leaf_removable(tb)) |
1343 | return CARRY_ON; | 1473 | return CARRY_ON; |
1344 | 1474 | ||
1345 | /* why do we perform this check here rather than earlier?? | 1475 | /* |
1346 | Answer: we can win 1 node in some cases above. Moreover we | 1476 | * why do we perform this check here rather than earlier?? |
1347 | checked it above, when we checked, that S[0] is not removable | 1477 | * Answer: we can win 1 node in some cases above. Moreover we |
1348 | in principle */ | 1478 | * checked it above, when we checked, that S[0] is not removable |
1349 | if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ | 1479 | * in principle |
1480 | */ | ||
1481 | |||
1482 | /* new item fits into node S[h] without any shifting */ | ||
1483 | if (sfree >= levbytes) { | ||
1350 | if (!h) | 1484 | if (!h) |
1351 | tb->s0num = vn->vn_nr_item; | 1485 | tb->s0num = vn->vn_nr_item; |
1352 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 1486 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); |
@@ -1355,18 +1489,19 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1355 | 1489 | ||
1356 | { | 1490 | { |
1357 | int lpar, rpar, nset, lset, rset, lrset; | 1491 | int lpar, rpar, nset, lset, rset, lrset; |
1358 | /* | 1492 | /* regular overflowing of the node */ |
1359 | * regular overflowing of the node | ||
1360 | */ | ||
1361 | 1493 | ||
1362 | /* get_num_ver works in 2 modes (FLOW & NO_FLOW) | 1494 | /* |
1363 | lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) | 1495 | * get_num_ver works in 2 modes (FLOW & NO_FLOW) |
1364 | nset, lset, rset, lrset - shows, whether flowing items give better packing | 1496 | * lpar, rpar - number of items we can shift to left/right |
1497 | * neighbor (including splitting item) | ||
1498 | * nset, lset, rset, lrset - shows, whether flowing items | ||
1499 | * give better packing | ||
1365 | */ | 1500 | */ |
1366 | #define FLOW 1 | 1501 | #define FLOW 1 |
1367 | #define NO_FLOW 0 /* do not any splitting */ | 1502 | #define NO_FLOW 0 /* do not any splitting */ |
1368 | 1503 | ||
1369 | /* we choose one the following */ | 1504 | /* we choose one of the following */ |
1370 | #define NOTHING_SHIFT_NO_FLOW 0 | 1505 | #define NOTHING_SHIFT_NO_FLOW 0 |
1371 | #define NOTHING_SHIFT_FLOW 5 | 1506 | #define NOTHING_SHIFT_FLOW 5 |
1372 | #define LEFT_SHIFT_NO_FLOW 10 | 1507 | #define LEFT_SHIFT_NO_FLOW 10 |
@@ -1379,10 +1514,13 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1379 | lpar = tb->lnum[h]; | 1514 | lpar = tb->lnum[h]; |
1380 | rpar = tb->rnum[h]; | 1515 | rpar = tb->rnum[h]; |
1381 | 1516 | ||
1382 | /* calculate number of blocks S[h] must be split into when | 1517 | /* |
1383 | nothing is shifted to the neighbors, | 1518 | * calculate number of blocks S[h] must be split into when |
1384 | as well as number of items in each part of the split node (s012 numbers), | 1519 | * nothing is shifted to the neighbors, as well as number of |
1385 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ | 1520 | * items in each part of the split node (s012 numbers), |
1521 | * and number of bytes (s1bytes) of the shared drop which | ||
1522 | * flow to S1 if any | ||
1523 | */ | ||
1386 | nset = NOTHING_SHIFT_NO_FLOW; | 1524 | nset = NOTHING_SHIFT_NO_FLOW; |
1387 | nver = get_num_ver(vn->vn_mode, tb, h, | 1525 | nver = get_num_ver(vn->vn_mode, tb, h, |
1388 | 0, -1, h ? vn->vn_nr_item : 0, -1, | 1526 | 0, -1, h ? vn->vn_nr_item : 0, -1, |
@@ -1391,7 +1529,10 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1391 | if (!h) { | 1529 | if (!h) { |
1392 | int nver1; | 1530 | int nver1; |
1393 | 1531 | ||
1394 | /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ | 1532 | /* |
1533 | * note, that in this case we try to bottle | ||
1534 | * between S[0] and S1 (S1 - the first new node) | ||
1535 | */ | ||
1395 | nver1 = get_num_ver(vn->vn_mode, tb, h, | 1536 | nver1 = get_num_ver(vn->vn_mode, tb, h, |
1396 | 0, -1, 0, -1, | 1537 | 0, -1, 0, -1, |
1397 | snum012 + NOTHING_SHIFT_FLOW, FLOW); | 1538 | snum012 + NOTHING_SHIFT_FLOW, FLOW); |
@@ -1399,11 +1540,13 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1399 | nset = NOTHING_SHIFT_FLOW, nver = nver1; | 1540 | nset = NOTHING_SHIFT_FLOW, nver = nver1; |
1400 | } | 1541 | } |
1401 | 1542 | ||
1402 | /* calculate number of blocks S[h] must be split into when | 1543 | /* |
1403 | l_shift_num first items and l_shift_bytes of the right most | 1544 | * calculate number of blocks S[h] must be split into when |
1404 | liquid item to be shifted are shifted to the left neighbor, | 1545 | * l_shift_num first items and l_shift_bytes of the right |
1405 | as well as number of items in each part of the splitted node (s012 numbers), | 1546 | * most liquid item to be shifted are shifted to the left |
1406 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any | 1547 | * neighbor, as well as number of items in each part of the |
1548 | * splitted node (s012 numbers), and number of bytes | ||
1549 | * (s1bytes) of the shared drop which flow to S1 if any | ||
1407 | */ | 1550 | */ |
1408 | lset = LEFT_SHIFT_NO_FLOW; | 1551 | lset = LEFT_SHIFT_NO_FLOW; |
1409 | lnver = get_num_ver(vn->vn_mode, tb, h, | 1552 | lnver = get_num_ver(vn->vn_mode, tb, h, |
@@ -1422,11 +1565,13 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1422 | lset = LEFT_SHIFT_FLOW, lnver = lnver1; | 1565 | lset = LEFT_SHIFT_FLOW, lnver = lnver1; |
1423 | } | 1566 | } |
1424 | 1567 | ||
1425 | /* calculate number of blocks S[h] must be split into when | 1568 | /* |
1426 | r_shift_num first items and r_shift_bytes of the left most | 1569 | * calculate number of blocks S[h] must be split into when |
1427 | liquid item to be shifted are shifted to the right neighbor, | 1570 | * r_shift_num first items and r_shift_bytes of the left most |
1428 | as well as number of items in each part of the splitted node (s012 numbers), | 1571 | * liquid item to be shifted are shifted to the right neighbor, |
1429 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any | 1572 | * as well as number of items in each part of the splitted |
1573 | * node (s012 numbers), and number of bytes (s1bytes) of the | ||
1574 | * shared drop which flow to S1 if any | ||
1430 | */ | 1575 | */ |
1431 | rset = RIGHT_SHIFT_NO_FLOW; | 1576 | rset = RIGHT_SHIFT_NO_FLOW; |
1432 | rnver = get_num_ver(vn->vn_mode, tb, h, | 1577 | rnver = get_num_ver(vn->vn_mode, tb, h, |
@@ -1451,10 +1596,12 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1451 | rset = RIGHT_SHIFT_FLOW, rnver = rnver1; | 1596 | rset = RIGHT_SHIFT_FLOW, rnver = rnver1; |
1452 | } | 1597 | } |
1453 | 1598 | ||
1454 | /* calculate number of blocks S[h] must be split into when | 1599 | /* |
1455 | items are shifted in both directions, | 1600 | * calculate number of blocks S[h] must be split into when |
1456 | as well as number of items in each part of the splitted node (s012 numbers), | 1601 | * items are shifted in both directions, as well as number |
1457 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any | 1602 | * of items in each part of the splitted node (s012 numbers), |
1603 | * and number of bytes (s1bytes) of the shared drop which | ||
1604 | * flow to S1 if any | ||
1458 | */ | 1605 | */ |
1459 | lrset = LR_SHIFT_NO_FLOW; | 1606 | lrset = LR_SHIFT_NO_FLOW; |
1460 | lrnver = get_num_ver(vn->vn_mode, tb, h, | 1607 | lrnver = get_num_ver(vn->vn_mode, tb, h, |
@@ -1481,10 +1628,12 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1481 | lrset = LR_SHIFT_FLOW, lrnver = lrnver1; | 1628 | lrset = LR_SHIFT_FLOW, lrnver = lrnver1; |
1482 | } | 1629 | } |
1483 | 1630 | ||
1484 | /* Our general shifting strategy is: | 1631 | /* |
1485 | 1) to minimized number of new nodes; | 1632 | * Our general shifting strategy is: |
1486 | 2) to minimized number of neighbors involved in shifting; | 1633 | * 1) to minimized number of new nodes; |
1487 | 3) to minimized number of disk reads; */ | 1634 | * 2) to minimized number of neighbors involved in shifting; |
1635 | * 3) to minimized number of disk reads; | ||
1636 | */ | ||
1488 | 1637 | ||
1489 | /* we can win TWO or ONE nodes by shifting in both directions */ | 1638 | /* we can win TWO or ONE nodes by shifting in both directions */ |
1490 | if (lrnver < lnver && lrnver < rnver) { | 1639 | if (lrnver < lnver && lrnver < rnver) { |
@@ -1508,42 +1657,59 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1508 | return CARRY_ON; | 1657 | return CARRY_ON; |
1509 | } | 1658 | } |
1510 | 1659 | ||
1511 | /* if shifting doesn't lead to better packing then don't shift */ | 1660 | /* |
1661 | * if shifting doesn't lead to better packing | ||
1662 | * then don't shift | ||
1663 | */ | ||
1512 | if (nver == lrnver) { | 1664 | if (nver == lrnver) { |
1513 | set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, | 1665 | set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, |
1514 | -1); | 1666 | -1); |
1515 | return CARRY_ON; | 1667 | return CARRY_ON; |
1516 | } | 1668 | } |
1517 | 1669 | ||
1518 | /* now we know that for better packing shifting in only one | 1670 | /* |
1519 | direction either to the left or to the right is required */ | 1671 | * now we know that for better packing shifting in only one |
1672 | * direction either to the left or to the right is required | ||
1673 | */ | ||
1520 | 1674 | ||
1521 | /* if shifting to the left is better than shifting to the right */ | 1675 | /* |
1676 | * if shifting to the left is better than | ||
1677 | * shifting to the right | ||
1678 | */ | ||
1522 | if (lnver < rnver) { | 1679 | if (lnver < rnver) { |
1523 | SET_PAR_SHIFT_LEFT; | 1680 | SET_PAR_SHIFT_LEFT; |
1524 | return CARRY_ON; | 1681 | return CARRY_ON; |
1525 | } | 1682 | } |
1526 | 1683 | ||
1527 | /* if shifting to the right is better than shifting to the left */ | 1684 | /* |
1685 | * if shifting to the right is better than | ||
1686 | * shifting to the left | ||
1687 | */ | ||
1528 | if (lnver > rnver) { | 1688 | if (lnver > rnver) { |
1529 | SET_PAR_SHIFT_RIGHT; | 1689 | SET_PAR_SHIFT_RIGHT; |
1530 | return CARRY_ON; | 1690 | return CARRY_ON; |
1531 | } | 1691 | } |
1532 | 1692 | ||
1533 | /* now shifting in either direction gives the same number | 1693 | /* |
1534 | of nodes and we can make use of the cached neighbors */ | 1694 | * now shifting in either direction gives the same number |
1695 | * of nodes and we can make use of the cached neighbors | ||
1696 | */ | ||
1535 | if (is_left_neighbor_in_cache(tb, h)) { | 1697 | if (is_left_neighbor_in_cache(tb, h)) { |
1536 | SET_PAR_SHIFT_LEFT; | 1698 | SET_PAR_SHIFT_LEFT; |
1537 | return CARRY_ON; | 1699 | return CARRY_ON; |
1538 | } | 1700 | } |
1539 | 1701 | ||
1540 | /* shift to the right independently on whether the right neighbor in cache or not */ | 1702 | /* |
1703 | * shift to the right independently on whether the | ||
1704 | * right neighbor in cache or not | ||
1705 | */ | ||
1541 | SET_PAR_SHIFT_RIGHT; | 1706 | SET_PAR_SHIFT_RIGHT; |
1542 | return CARRY_ON; | 1707 | return CARRY_ON; |
1543 | } | 1708 | } |
1544 | } | 1709 | } |
1545 | 1710 | ||
1546 | /* Check whether current node S[h] is balanced when Decreasing its size by | 1711 | /* |
1712 | * Check whether current node S[h] is balanced when Decreasing its size by | ||
1547 | * Deleting or Cutting for INTERNAL node of S+tree. | 1713 | * Deleting or Cutting for INTERNAL node of S+tree. |
1548 | * Calculate parameters for balancing for current level h. | 1714 | * Calculate parameters for balancing for current level h. |
1549 | * Parameters: | 1715 | * Parameters: |
@@ -1563,8 +1729,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1563 | { | 1729 | { |
1564 | struct virtual_node *vn = tb->tb_vn; | 1730 | struct virtual_node *vn = tb->tb_vn; |
1565 | 1731 | ||
1566 | /* Sh is the node whose balance is currently being checked, | 1732 | /* |
1567 | and Fh is its father. */ | 1733 | * Sh is the node whose balance is currently being checked, |
1734 | * and Fh is its father. | ||
1735 | */ | ||
1568 | struct buffer_head *Sh, *Fh; | 1736 | struct buffer_head *Sh, *Fh; |
1569 | int maxsize, ret; | 1737 | int maxsize, ret; |
1570 | int lfree, rfree /* free space in L and R */ ; | 1738 | int lfree, rfree /* free space in L and R */ ; |
@@ -1574,19 +1742,25 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1574 | 1742 | ||
1575 | maxsize = MAX_CHILD_SIZE(Sh); | 1743 | maxsize = MAX_CHILD_SIZE(Sh); |
1576 | 1744 | ||
1577 | /* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ | 1745 | /* |
1578 | /* new_nr_item = number of items node would have if operation is */ | 1746 | * using tb->insert_size[h], which is negative in this case, |
1579 | /* performed without balancing (new_nr_item); */ | 1747 | * create_virtual_node calculates: |
1748 | * new_nr_item = number of items node would have if operation is | ||
1749 | * performed without balancing (new_nr_item); | ||
1750 | */ | ||
1580 | create_virtual_node(tb, h); | 1751 | create_virtual_node(tb, h); |
1581 | 1752 | ||
1582 | if (!Fh) { /* S[h] is the root. */ | 1753 | if (!Fh) { /* S[h] is the root. */ |
1754 | /* no balancing for higher levels needed */ | ||
1583 | if (vn->vn_nr_item > 0) { | 1755 | if (vn->vn_nr_item > 0) { |
1584 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 1756 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); |
1585 | return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ | 1757 | return NO_BALANCING_NEEDED; |
1586 | } | 1758 | } |
1587 | /* new_nr_item == 0. | 1759 | /* |
1760 | * new_nr_item == 0. | ||
1588 | * Current root will be deleted resulting in | 1761 | * Current root will be deleted resulting in |
1589 | * decrementing the tree height. */ | 1762 | * decrementing the tree height. |
1763 | */ | ||
1590 | set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); | 1764 | set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); |
1591 | return CARRY_ON; | 1765 | return CARRY_ON; |
1592 | } | 1766 | } |
@@ -1602,12 +1776,18 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1602 | check_left(tb, h, lfree); | 1776 | check_left(tb, h, lfree); |
1603 | check_right(tb, h, rfree); | 1777 | check_right(tb, h, rfree); |
1604 | 1778 | ||
1605 | if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid. | 1779 | /* |
1606 | * In this case we balance only if it leads to better packing. */ | 1780 | * Balance condition for the internal node is valid. |
1607 | if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors, | 1781 | * In this case we balance only if it leads to better packing. |
1608 | * which is impossible with greater values of new_nr_item. */ | 1782 | */ |
1783 | if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { | ||
1784 | /* | ||
1785 | * Here we join S[h] with one of its neighbors, | ||
1786 | * which is impossible with greater values of new_nr_item. | ||
1787 | */ | ||
1788 | if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { | ||
1789 | /* All contents of S[h] can be moved to L[h]. */ | ||
1609 | if (tb->lnum[h] >= vn->vn_nr_item + 1) { | 1790 | if (tb->lnum[h] >= vn->vn_nr_item + 1) { |
1610 | /* All contents of S[h] can be moved to L[h]. */ | ||
1611 | int n; | 1791 | int n; |
1612 | int order_L; | 1792 | int order_L; |
1613 | 1793 | ||
@@ -1623,8 +1803,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1623 | return CARRY_ON; | 1803 | return CARRY_ON; |
1624 | } | 1804 | } |
1625 | 1805 | ||
1806 | /* All contents of S[h] can be moved to R[h]. */ | ||
1626 | if (tb->rnum[h] >= vn->vn_nr_item + 1) { | 1807 | if (tb->rnum[h] >= vn->vn_nr_item + 1) { |
1627 | /* All contents of S[h] can be moved to R[h]. */ | ||
1628 | int n; | 1808 | int n; |
1629 | int order_R; | 1809 | int order_R; |
1630 | 1810 | ||
@@ -1641,8 +1821,11 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1641 | } | 1821 | } |
1642 | } | 1822 | } |
1643 | 1823 | ||
1824 | /* | ||
1825 | * All contents of S[h] can be moved to the neighbors | ||
1826 | * (L[h] & R[h]). | ||
1827 | */ | ||
1644 | if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { | 1828 | if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { |
1645 | /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ | ||
1646 | int to_r; | 1829 | int to_r; |
1647 | 1830 | ||
1648 | to_r = | 1831 | to_r = |
@@ -1659,7 +1842,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1659 | return NO_BALANCING_NEEDED; | 1842 | return NO_BALANCING_NEEDED; |
1660 | } | 1843 | } |
1661 | 1844 | ||
1662 | /* Current node contain insufficient number of items. Balancing is required. */ | 1845 | /* |
1846 | * Current node contain insufficient number of items. | ||
1847 | * Balancing is required. | ||
1848 | */ | ||
1663 | /* Check whether we can merge S[h] with left neighbor. */ | 1849 | /* Check whether we can merge S[h] with left neighbor. */ |
1664 | if (tb->lnum[h] >= vn->vn_nr_item + 1) | 1850 | if (tb->lnum[h] >= vn->vn_nr_item + 1) |
1665 | if (is_left_neighbor_in_cache(tb, h) | 1851 | if (is_left_neighbor_in_cache(tb, h) |
@@ -1726,7 +1912,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1726 | return CARRY_ON; | 1912 | return CARRY_ON; |
1727 | } | 1913 | } |
1728 | 1914 | ||
1729 | /* Check whether current node S[h] is balanced when Decreasing its size by | 1915 | /* |
1916 | * Check whether current node S[h] is balanced when Decreasing its size by | ||
1730 | * Deleting or Truncating for LEAF node of S+tree. | 1917 | * Deleting or Truncating for LEAF node of S+tree. |
1731 | * Calculate parameters for balancing for current level h. | 1918 | * Calculate parameters for balancing for current level h. |
1732 | * Parameters: | 1919 | * Parameters: |
@@ -1743,15 +1930,21 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1743 | { | 1930 | { |
1744 | struct virtual_node *vn = tb->tb_vn; | 1931 | struct virtual_node *vn = tb->tb_vn; |
1745 | 1932 | ||
1746 | /* Number of bytes that must be deleted from | 1933 | /* |
1747 | (value is negative if bytes are deleted) buffer which | 1934 | * Number of bytes that must be deleted from |
1748 | contains node being balanced. The mnemonic is that the | 1935 | * (value is negative if bytes are deleted) buffer which |
1749 | attempted change in node space used level is levbytes bytes. */ | 1936 | * contains node being balanced. The mnemonic is that the |
1937 | * attempted change in node space used level is levbytes bytes. | ||
1938 | */ | ||
1750 | int levbytes; | 1939 | int levbytes; |
1940 | |||
1751 | /* the maximal item size */ | 1941 | /* the maximal item size */ |
1752 | int maxsize, ret; | 1942 | int maxsize, ret; |
1753 | /* S0 is the node whose balance is currently being checked, | 1943 | |
1754 | and F0 is its father. */ | 1944 | /* |
1945 | * S0 is the node whose balance is currently being checked, | ||
1946 | * and F0 is its father. | ||
1947 | */ | ||
1755 | struct buffer_head *S0, *F0; | 1948 | struct buffer_head *S0, *F0; |
1756 | int lfree, rfree /* free space in L and R */ ; | 1949 | int lfree, rfree /* free space in L and R */ ; |
1757 | 1950 | ||
@@ -1784,9 +1977,11 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1784 | if (are_leaves_removable(tb, lfree, rfree)) | 1977 | if (are_leaves_removable(tb, lfree, rfree)) |
1785 | return CARRY_ON; | 1978 | return CARRY_ON; |
1786 | 1979 | ||
1787 | /* determine maximal number of items we can shift to the left/right neighbor | 1980 | /* |
1788 | and the maximal number of bytes that can flow to the left/right neighbor | 1981 | * determine maximal number of items we can shift to the left/right |
1789 | from the left/right most liquid item that cannot be shifted from S[0] entirely | 1982 | * neighbor and the maximal number of bytes that can flow to the |
1983 | * left/right neighbor from the left/right most liquid item that | ||
1984 | * cannot be shifted from S[0] entirely | ||
1790 | */ | 1985 | */ |
1791 | check_left(tb, h, lfree); | 1986 | check_left(tb, h, lfree); |
1792 | check_right(tb, h, rfree); | 1987 | check_right(tb, h, rfree); |
@@ -1810,7 +2005,10 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1810 | return CARRY_ON; | 2005 | return CARRY_ON; |
1811 | } | 2006 | } |
1812 | 2007 | ||
1813 | /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ | 2008 | /* |
2009 | * All contents of S[0] can be moved to the neighbors (L[0] & R[0]). | ||
2010 | * Set parameters and return | ||
2011 | */ | ||
1814 | if (is_leaf_removable(tb)) | 2012 | if (is_leaf_removable(tb)) |
1815 | return CARRY_ON; | 2013 | return CARRY_ON; |
1816 | 2014 | ||
@@ -1820,7 +2018,8 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1820 | return NO_BALANCING_NEEDED; | 2018 | return NO_BALANCING_NEEDED; |
1821 | } | 2019 | } |
1822 | 2020 | ||
1823 | /* Check whether current node S[h] is balanced when Decreasing its size by | 2021 | /* |
2022 | * Check whether current node S[h] is balanced when Decreasing its size by | ||
1824 | * Deleting or Cutting. | 2023 | * Deleting or Cutting. |
1825 | * Calculate parameters for balancing for current level h. | 2024 | * Calculate parameters for balancing for current level h. |
1826 | * Parameters: | 2025 | * Parameters: |
@@ -1844,15 +2043,16 @@ static int dc_check_balance(struct tree_balance *tb, int h) | |||
1844 | return dc_check_balance_leaf(tb, h); | 2043 | return dc_check_balance_leaf(tb, h); |
1845 | } | 2044 | } |
1846 | 2045 | ||
1847 | /* Check whether current node S[h] is balanced. | 2046 | /* |
2047 | * Check whether current node S[h] is balanced. | ||
1848 | * Calculate parameters for balancing for current level h. | 2048 | * Calculate parameters for balancing for current level h. |
1849 | * Parameters: | 2049 | * Parameters: |
1850 | * | 2050 | * |
1851 | * tb tree_balance structure: | 2051 | * tb tree_balance structure: |
1852 | * | 2052 | * |
1853 | * tb is a large structure that must be read about in the header file | 2053 | * tb is a large structure that must be read about in the header |
1854 | * at the same time as this procedure if the reader is to successfully | 2054 | * file at the same time as this procedure if the reader is |
1855 | * understand this procedure | 2055 | * to successfully understand this procedure |
1856 | * | 2056 | * |
1857 | * h current level of the node; | 2057 | * h current level of the node; |
1858 | * inum item number in S[h]; | 2058 | * inum item number in S[h]; |
@@ -1882,8 +2082,8 @@ static int check_balance(int mode, | |||
1882 | RFALSE(mode == M_INSERT && !vn->vn_ins_ih, | 2082 | RFALSE(mode == M_INSERT && !vn->vn_ins_ih, |
1883 | "vs-8255: ins_ih can not be 0 in insert mode"); | 2083 | "vs-8255: ins_ih can not be 0 in insert mode"); |
1884 | 2084 | ||
2085 | /* Calculate balance parameters when size of node is increasing. */ | ||
1885 | if (tb->insert_size[h] > 0) | 2086 | if (tb->insert_size[h] > 0) |
1886 | /* Calculate balance parameters when size of node is increasing. */ | ||
1887 | return ip_check_balance(tb, h); | 2087 | return ip_check_balance(tb, h); |
1888 | 2088 | ||
1889 | /* Calculate balance parameters when size of node is decreasing. */ | 2089 | /* Calculate balance parameters when size of node is decreasing. */ |
@@ -1911,21 +2111,23 @@ static int get_direct_parent(struct tree_balance *tb, int h) | |||
1911 | PATH_OFFSET_POSITION(path, path_offset - 1) = 0; | 2111 | PATH_OFFSET_POSITION(path, path_offset - 1) = 0; |
1912 | return CARRY_ON; | 2112 | return CARRY_ON; |
1913 | } | 2113 | } |
1914 | return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ | 2114 | /* Root is changed and we must recalculate the path. */ |
2115 | return REPEAT_SEARCH; | ||
1915 | } | 2116 | } |
1916 | 2117 | ||
2118 | /* Parent in the path is not in the tree. */ | ||
1917 | if (!B_IS_IN_TREE | 2119 | if (!B_IS_IN_TREE |
1918 | (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) | 2120 | (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) |
1919 | return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ | 2121 | return REPEAT_SEARCH; |
1920 | 2122 | ||
1921 | if ((position = | 2123 | if ((position = |
1922 | PATH_OFFSET_POSITION(path, | 2124 | PATH_OFFSET_POSITION(path, |
1923 | path_offset - 1)) > B_NR_ITEMS(bh)) | 2125 | path_offset - 1)) > B_NR_ITEMS(bh)) |
1924 | return REPEAT_SEARCH; | 2126 | return REPEAT_SEARCH; |
1925 | 2127 | ||
2128 | /* Parent in the path is not parent of the current node in the tree. */ | ||
1926 | if (B_N_CHILD_NUM(bh, position) != | 2129 | if (B_N_CHILD_NUM(bh, position) != |
1927 | PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) | 2130 | PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) |
1928 | /* Parent in the path is not parent of the current node in the tree. */ | ||
1929 | return REPEAT_SEARCH; | 2131 | return REPEAT_SEARCH; |
1930 | 2132 | ||
1931 | if (buffer_locked(bh)) { | 2133 | if (buffer_locked(bh)) { |
@@ -1936,10 +2138,15 @@ static int get_direct_parent(struct tree_balance *tb, int h) | |||
1936 | return REPEAT_SEARCH; | 2138 | return REPEAT_SEARCH; |
1937 | } | 2139 | } |
1938 | 2140 | ||
1939 | return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ | 2141 | /* |
2142 | * Parent in the path is unlocked and really parent | ||
2143 | * of the current node. | ||
2144 | */ | ||
2145 | return CARRY_ON; | ||
1940 | } | 2146 | } |
1941 | 2147 | ||
1942 | /* Using lnum[h] and rnum[h] we should determine what neighbors | 2148 | /* |
2149 | * Using lnum[h] and rnum[h] we should determine what neighbors | ||
1943 | * of S[h] we | 2150 | * of S[h] we |
1944 | * need in order to balance S[h], and get them if necessary. | 2151 | * need in order to balance S[h], and get them if necessary. |
1945 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; | 2152 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; |
@@ -1997,7 +2204,7 @@ static int get_neighbors(struct tree_balance *tb, int h) | |||
1997 | } | 2204 | } |
1998 | 2205 | ||
1999 | /* We need right neighbor to balance S[path_offset]. */ | 2206 | /* We need right neighbor to balance S[path_offset]. */ |
2000 | if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */ | 2207 | if (tb->rnum[h]) { |
2001 | PROC_INFO_INC(sb, need_r_neighbor[h]); | 2208 | PROC_INFO_INC(sb, need_r_neighbor[h]); |
2002 | bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); | 2209 | bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); |
2003 | 2210 | ||
@@ -2053,9 +2260,11 @@ static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) | |||
2053 | (max_num_of_entries - 1) * sizeof(__u16)); | 2260 | (max_num_of_entries - 1) * sizeof(__u16)); |
2054 | } | 2261 | } |
2055 | 2262 | ||
2056 | /* maybe we should fail balancing we are going to perform when kmalloc | 2263 | /* |
2057 | fails several times. But now it will loop until kmalloc gets | 2264 | * maybe we should fail balancing we are going to perform when kmalloc |
2058 | required memory */ | 2265 | * fails several times. But now it will loop until kmalloc gets |
2266 | * required memory | ||
2267 | */ | ||
2059 | static int get_mem_for_virtual_node(struct tree_balance *tb) | 2268 | static int get_mem_for_virtual_node(struct tree_balance *tb) |
2060 | { | 2269 | { |
2061 | int check_fs = 0; | 2270 | int check_fs = 0; |
@@ -2064,8 +2273,8 @@ static int get_mem_for_virtual_node(struct tree_balance *tb) | |||
2064 | 2273 | ||
2065 | size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); | 2274 | size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); |
2066 | 2275 | ||
2276 | /* we have to allocate more memory for virtual node */ | ||
2067 | if (size > tb->vn_buf_size) { | 2277 | if (size > tb->vn_buf_size) { |
2068 | /* we have to allocate more memory for virtual node */ | ||
2069 | if (tb->vn_buf) { | 2278 | if (tb->vn_buf) { |
2070 | /* free memory allocated before */ | 2279 | /* free memory allocated before */ |
2071 | kfree(tb->vn_buf); | 2280 | kfree(tb->vn_buf); |
@@ -2079,10 +2288,12 @@ static int get_mem_for_virtual_node(struct tree_balance *tb) | |||
2079 | /* get memory for virtual item */ | 2288 | /* get memory for virtual item */ |
2080 | buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); | 2289 | buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); |
2081 | if (!buf) { | 2290 | if (!buf) { |
2082 | /* getting memory with GFP_KERNEL priority may involve | 2291 | /* |
2083 | balancing now (due to indirect_to_direct conversion on | 2292 | * getting memory with GFP_KERNEL priority may involve |
2084 | dcache shrinking). So, release path and collected | 2293 | * balancing now (due to indirect_to_direct conversion |
2085 | resources here */ | 2294 | * on dcache shrinking). So, release path and collected |
2295 | * resources here | ||
2296 | */ | ||
2086 | free_buffers_in_tb(tb); | 2297 | free_buffers_in_tb(tb); |
2087 | buf = kmalloc(size, GFP_NOFS); | 2298 | buf = kmalloc(size, GFP_NOFS); |
2088 | if (!buf) { | 2299 | if (!buf) { |
@@ -2168,8 +2379,10 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2168 | for (i = tb->tb_path->path_length; | 2379 | for (i = tb->tb_path->path_length; |
2169 | !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { | 2380 | !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { |
2170 | if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { | 2381 | if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { |
2171 | /* if I understand correctly, we can only be sure the last buffer | 2382 | /* |
2172 | ** in the path is in the tree --clm | 2383 | * if I understand correctly, we can only |
2384 | * be sure the last buffer in the path is | ||
2385 | * in the tree --clm | ||
2173 | */ | 2386 | */ |
2174 | #ifdef CONFIG_REISERFS_CHECK | 2387 | #ifdef CONFIG_REISERFS_CHECK |
2175 | if (PATH_PLAST_BUFFER(tb->tb_path) == | 2388 | if (PATH_PLAST_BUFFER(tb->tb_path) == |
@@ -2256,13 +2469,15 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2256 | } | 2469 | } |
2257 | } | 2470 | } |
2258 | } | 2471 | } |
2259 | /* as far as I can tell, this is not required. The FEB list seems | 2472 | |
2260 | ** to be full of newly allocated nodes, which will never be locked, | 2473 | /* |
2261 | ** dirty, or anything else. | 2474 | * as far as I can tell, this is not required. The FEB list |
2262 | ** To be safe, I'm putting in the checks and waits in. For the moment, | 2475 | * seems to be full of newly allocated nodes, which will |
2263 | ** they are needed to keep the code in journal.c from complaining | 2476 | * never be locked, dirty, or anything else. |
2264 | ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. | 2477 | * To be safe, I'm putting in the checks and waits in. |
2265 | ** --clm | 2478 | * For the moment, they are needed to keep the code in |
2479 | * journal.c from complaining about the buffer. | ||
2480 | * That code is inside CONFIG_REISERFS_CHECK as well. --clm | ||
2266 | */ | 2481 | */ |
2267 | for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { | 2482 | for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { |
2268 | if (tb->FEB[i]) { | 2483 | if (tb->FEB[i]) { |
@@ -2300,7 +2515,8 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2300 | return CARRY_ON; | 2515 | return CARRY_ON; |
2301 | } | 2516 | } |
2302 | 2517 | ||
2303 | /* Prepare for balancing, that is | 2518 | /* |
2519 | * Prepare for balancing, that is | ||
2304 | * get all necessary parents, and neighbors; | 2520 | * get all necessary parents, and neighbors; |
2305 | * analyze what and where should be moved; | 2521 | * analyze what and where should be moved; |
2306 | * get sufficient number of new nodes; | 2522 | * get sufficient number of new nodes; |
@@ -2309,13 +2525,14 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2309 | * When ported to SMP kernels, only at the last moment after all needed nodes | 2525 | * When ported to SMP kernels, only at the last moment after all needed nodes |
2310 | * are collected in cache, will the resources be locked using the usual | 2526 | * are collected in cache, will the resources be locked using the usual |
2311 | * textbook ordered lock acquisition algorithms. Note that ensuring that | 2527 | * textbook ordered lock acquisition algorithms. Note that ensuring that |
2312 | * this code neither write locks what it does not need to write lock nor locks out of order | 2528 | * this code neither write locks what it does not need to write lock nor locks |
2313 | * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans | 2529 | * out of order will be a pain in the butt that could have been avoided. |
2530 | * Grumble grumble. -Hans | ||
2314 | * | 2531 | * |
2315 | * fix is meant in the sense of render unchanging | 2532 | * fix is meant in the sense of render unchanging |
2316 | * | 2533 | * |
2317 | * Latency might be improved by first gathering a list of what buffers are needed | 2534 | * Latency might be improved by first gathering a list of what buffers |
2318 | * and then getting as many of them in parallel as possible? -Hans | 2535 | * are needed and then getting as many of them in parallel as possible? -Hans |
2319 | * | 2536 | * |
2320 | * Parameters: | 2537 | * Parameters: |
2321 | * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) | 2538 | * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) |
@@ -2335,8 +2552,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2335 | int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); | 2552 | int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); |
2336 | int pos_in_item; | 2553 | int pos_in_item; |
2337 | 2554 | ||
2338 | /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared | 2555 | /* |
2339 | ** during wait_tb_buffers_run | 2556 | * we set wait_tb_buffers_run when we have to restore any dirty |
2557 | * bits cleared during wait_tb_buffers_run | ||
2340 | */ | 2558 | */ |
2341 | int wait_tb_buffers_run = 0; | 2559 | int wait_tb_buffers_run = 0; |
2342 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | 2560 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); |
@@ -2347,14 +2565,15 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2347 | 2565 | ||
2348 | tb->fs_gen = get_generation(tb->tb_sb); | 2566 | tb->fs_gen = get_generation(tb->tb_sb); |
2349 | 2567 | ||
2350 | /* we prepare and log the super here so it will already be in the | 2568 | /* |
2351 | ** transaction when do_balance needs to change it. | 2569 | * we prepare and log the super here so it will already be in the |
2352 | ** This way do_balance won't have to schedule when trying to prepare | 2570 | * transaction when do_balance needs to change it. |
2353 | ** the super for logging | 2571 | * This way do_balance won't have to schedule when trying to prepare |
2572 | * the super for logging | ||
2354 | */ | 2573 | */ |
2355 | reiserfs_prepare_for_journal(tb->tb_sb, | 2574 | reiserfs_prepare_for_journal(tb->tb_sb, |
2356 | SB_BUFFER_WITH_SB(tb->tb_sb), 1); | 2575 | SB_BUFFER_WITH_SB(tb->tb_sb), 1); |
2357 | journal_mark_dirty(tb->transaction_handle, tb->tb_sb, | 2576 | journal_mark_dirty(tb->transaction_handle, |
2358 | SB_BUFFER_WITH_SB(tb->tb_sb)); | 2577 | SB_BUFFER_WITH_SB(tb->tb_sb)); |
2359 | if (FILESYSTEM_CHANGED_TB(tb)) | 2578 | if (FILESYSTEM_CHANGED_TB(tb)) |
2360 | return REPEAT_SEARCH; | 2579 | return REPEAT_SEARCH; |
@@ -2408,7 +2627,7 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2408 | #endif | 2627 | #endif |
2409 | 2628 | ||
2410 | if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) | 2629 | if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) |
2411 | // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat | 2630 | /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */ |
2412 | return REPEAT_SEARCH; | 2631 | return REPEAT_SEARCH; |
2413 | 2632 | ||
2414 | /* Starting from the leaf level; for all levels h of the tree. */ | 2633 | /* Starting from the leaf level; for all levels h of the tree. */ |
@@ -2427,7 +2646,10 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2427 | goto repeat; | 2646 | goto repeat; |
2428 | if (h != MAX_HEIGHT - 1) | 2647 | if (h != MAX_HEIGHT - 1) |
2429 | tb->insert_size[h + 1] = 0; | 2648 | tb->insert_size[h + 1] = 0; |
2430 | /* ok, analysis and resource gathering are complete */ | 2649 | /* |
2650 | * ok, analysis and resource gathering | ||
2651 | * are complete | ||
2652 | */ | ||
2431 | break; | 2653 | break; |
2432 | } | 2654 | } |
2433 | goto repeat; | 2655 | goto repeat; |
@@ -2437,15 +2659,19 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2437 | if (ret != CARRY_ON) | 2659 | if (ret != CARRY_ON) |
2438 | goto repeat; | 2660 | goto repeat; |
2439 | 2661 | ||
2440 | /* No disk space, or schedule occurred and analysis may be | 2662 | /* |
2441 | * invalid and needs to be redone. */ | 2663 | * No disk space, or schedule occurred and analysis may be |
2664 | * invalid and needs to be redone. | ||
2665 | */ | ||
2442 | ret = get_empty_nodes(tb, h); | 2666 | ret = get_empty_nodes(tb, h); |
2443 | if (ret != CARRY_ON) | 2667 | if (ret != CARRY_ON) |
2444 | goto repeat; | 2668 | goto repeat; |
2445 | 2669 | ||
2670 | /* | ||
2671 | * We have a positive insert size but no nodes exist on this | ||
2672 | * level, this means that we are creating a new root. | ||
2673 | */ | ||
2446 | if (!PATH_H_PBUFFER(tb->tb_path, h)) { | 2674 | if (!PATH_H_PBUFFER(tb->tb_path, h)) { |
2447 | /* We have a positive insert size but no nodes exist on this | ||
2448 | level, this means that we are creating a new root. */ | ||
2449 | 2675 | ||
2450 | RFALSE(tb->blknum[h] != 1, | 2676 | RFALSE(tb->blknum[h] != 1, |
2451 | "PAP-8350: creating new empty root"); | 2677 | "PAP-8350: creating new empty root"); |
@@ -2453,11 +2679,13 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2453 | if (h < MAX_HEIGHT - 1) | 2679 | if (h < MAX_HEIGHT - 1) |
2454 | tb->insert_size[h + 1] = 0; | 2680 | tb->insert_size[h + 1] = 0; |
2455 | } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { | 2681 | } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { |
2682 | /* | ||
2683 | * The tree needs to be grown, so this node S[h] | ||
2684 | * which is the root node is split into two nodes, | ||
2685 | * and a new node (S[h+1]) will be created to | ||
2686 | * become the root node. | ||
2687 | */ | ||
2456 | if (tb->blknum[h] > 1) { | 2688 | if (tb->blknum[h] > 1) { |
2457 | /* The tree needs to be grown, so this node S[h] | ||
2458 | which is the root node is split into two nodes, | ||
2459 | and a new node (S[h+1]) will be created to | ||
2460 | become the root node. */ | ||
2461 | 2689 | ||
2462 | RFALSE(h == MAX_HEIGHT - 1, | 2690 | RFALSE(h == MAX_HEIGHT - 1, |
2463 | "PAP-8355: attempt to create too high of a tree"); | 2691 | "PAP-8355: attempt to create too high of a tree"); |
@@ -2487,12 +2715,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2487 | goto repeat; | 2715 | goto repeat; |
2488 | } | 2716 | } |
2489 | 2717 | ||
2490 | repeat: | 2718 | repeat: |
2491 | // fix_nodes was unable to perform its calculation due to | 2719 | /* |
2492 | // filesystem got changed under us, lack of free disk space or i/o | 2720 | * fix_nodes was unable to perform its calculation due to |
2493 | // failure. If the first is the case - the search will be | 2721 | * filesystem got changed under us, lack of free disk space or i/o |
2494 | // repeated. For now - free all resources acquired so far except | 2722 | * failure. If the first is the case - the search will be |
2495 | // for the new allocated nodes | 2723 | * repeated. For now - free all resources acquired so far except |
2724 | * for the new allocated nodes | ||
2725 | */ | ||
2496 | { | 2726 | { |
2497 | int i; | 2727 | int i; |
2498 | 2728 | ||
@@ -2548,8 +2778,6 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2548 | 2778 | ||
2549 | } | 2779 | } |
2550 | 2780 | ||
2551 | /* Anatoly will probably forgive me renaming tb to tb. I just | ||
2552 | wanted to make lines shorter */ | ||
2553 | void unfix_nodes(struct tree_balance *tb) | 2781 | void unfix_nodes(struct tree_balance *tb) |
2554 | { | 2782 | { |
2555 | int i; | 2783 | int i; |
@@ -2578,8 +2806,10 @@ void unfix_nodes(struct tree_balance *tb) | |||
2578 | for (i = 0; i < MAX_FEB_SIZE; i++) { | 2806 | for (i = 0; i < MAX_FEB_SIZE; i++) { |
2579 | if (tb->FEB[i]) { | 2807 | if (tb->FEB[i]) { |
2580 | b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; | 2808 | b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; |
2581 | /* de-allocated block which was not used by balancing and | 2809 | /* |
2582 | bforget about buffer for it */ | 2810 | * de-allocated block which was not used by |
2811 | * balancing and bforget about buffer for it | ||
2812 | */ | ||
2583 | brelse(tb->FEB[i]); | 2813 | brelse(tb->FEB[i]); |
2584 | reiserfs_free_block(tb->transaction_handle, NULL, | 2814 | reiserfs_free_block(tb->transaction_handle, NULL, |
2585 | blocknr, 0); | 2815 | blocknr, 0); |
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c index 91b0cc1242a2..7a26c4fe6c46 100644 --- a/fs/reiserfs/hashes.c +++ b/fs/reiserfs/hashes.c | |||
@@ -12,12 +12,6 @@ | |||
12 | * Yura's function is added (04/07/2000) | 12 | * Yura's function is added (04/07/2000) |
13 | */ | 13 | */ |
14 | 14 | ||
15 | // | ||
16 | // keyed_hash | ||
17 | // yura_hash | ||
18 | // r5_hash | ||
19 | // | ||
20 | |||
21 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
22 | #include "reiserfs.h" | 16 | #include "reiserfs.h" |
23 | #include <asm/types.h> | 17 | #include <asm/types.h> |
@@ -56,7 +50,7 @@ u32 keyed_hash(const signed char *msg, int len) | |||
56 | u32 pad; | 50 | u32 pad; |
57 | int i; | 51 | int i; |
58 | 52 | ||
59 | // assert(len >= 0 && len < 256); | 53 | /* assert(len >= 0 && len < 256); */ |
60 | 54 | ||
61 | pad = (u32) len | ((u32) len << 8); | 55 | pad = (u32) len | ((u32) len << 8); |
62 | pad |= pad << 16; | 56 | pad |= pad << 16; |
@@ -127,9 +121,10 @@ u32 keyed_hash(const signed char *msg, int len) | |||
127 | return h0 ^ h1; | 121 | return h0 ^ h1; |
128 | } | 122 | } |
129 | 123 | ||
130 | /* What follows in this file is copyright 2000 by Hans Reiser, and the | 124 | /* |
131 | * licensing of what follows is governed by reiserfs/README */ | 125 | * What follows in this file is copyright 2000 by Hans Reiser, and the |
132 | 126 | * licensing of what follows is governed by reiserfs/README | |
127 | */ | ||
133 | u32 yura_hash(const signed char *msg, int len) | 128 | u32 yura_hash(const signed char *msg, int len) |
134 | { | 129 | { |
135 | int j, pow; | 130 | int j, pow; |
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c index e1978fd895f5..73231b1ebdbe 100644 --- a/fs/reiserfs/ibalance.c +++ b/fs/reiserfs/ibalance.c | |||
@@ -12,7 +12,10 @@ | |||
12 | int balance_internal(struct tree_balance *, | 12 | int balance_internal(struct tree_balance *, |
13 | int, int, struct item_head *, struct buffer_head **); | 13 | int, int, struct item_head *, struct buffer_head **); |
14 | 14 | ||
15 | /* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ | 15 | /* |
16 | * modes of internal_shift_left, internal_shift_right and | ||
17 | * internal_insert_childs | ||
18 | */ | ||
16 | #define INTERNAL_SHIFT_FROM_S_TO_L 0 | 19 | #define INTERNAL_SHIFT_FROM_S_TO_L 0 |
17 | #define INTERNAL_SHIFT_FROM_R_TO_S 1 | 20 | #define INTERNAL_SHIFT_FROM_R_TO_S 1 |
18 | #define INTERNAL_SHIFT_FROM_L_TO_S 2 | 21 | #define INTERNAL_SHIFT_FROM_L_TO_S 2 |
@@ -32,7 +35,9 @@ static void internal_define_dest_src_infos(int shift_mode, | |||
32 | memset(src_bi, 0, sizeof(struct buffer_info)); | 35 | memset(src_bi, 0, sizeof(struct buffer_info)); |
33 | /* define dest, src, dest parent, dest position */ | 36 | /* define dest, src, dest parent, dest position */ |
34 | switch (shift_mode) { | 37 | switch (shift_mode) { |
35 | case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ | 38 | |
39 | /* used in internal_shift_left */ | ||
40 | case INTERNAL_SHIFT_FROM_S_TO_L: | ||
36 | src_bi->tb = tb; | 41 | src_bi->tb = tb; |
37 | src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); | 42 | src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); |
38 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); | 43 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); |
@@ -52,12 +57,14 @@ static void internal_define_dest_src_infos(int shift_mode, | |||
52 | dest_bi->tb = tb; | 57 | dest_bi->tb = tb; |
53 | dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); | 58 | dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); |
54 | dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); | 59 | dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); |
55 | dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ | 60 | /* dest position is analog of dest->b_item_order */ |
61 | dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); | ||
56 | *d_key = tb->lkey[h]; | 62 | *d_key = tb->lkey[h]; |
57 | *cf = tb->CFL[h]; | 63 | *cf = tb->CFL[h]; |
58 | break; | 64 | break; |
59 | 65 | ||
60 | case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ | 66 | /* used in internal_shift_left */ |
67 | case INTERNAL_SHIFT_FROM_R_TO_S: | ||
61 | src_bi->tb = tb; | 68 | src_bi->tb = tb; |
62 | src_bi->bi_bh = tb->R[h]; | 69 | src_bi->bi_bh = tb->R[h]; |
63 | src_bi->bi_parent = tb->FR[h]; | 70 | src_bi->bi_parent = tb->FR[h]; |
@@ -111,7 +118,8 @@ static void internal_define_dest_src_infos(int shift_mode, | |||
111 | } | 118 | } |
112 | } | 119 | } |
113 | 120 | ||
114 | /* Insert count node pointers into buffer cur before position to + 1. | 121 | /* |
122 | * Insert count node pointers into buffer cur before position to + 1. | ||
115 | * Insert count items into buffer cur before position to. | 123 | * Insert count items into buffer cur before position to. |
116 | * Items and node pointers are specified by inserted and bh respectively. | 124 | * Items and node pointers are specified by inserted and bh respectively. |
117 | */ | 125 | */ |
@@ -146,14 +154,14 @@ static void internal_insert_childs(struct buffer_info *cur_bi, | |||
146 | 154 | ||
147 | /* copy to_be_insert disk children */ | 155 | /* copy to_be_insert disk children */ |
148 | for (i = 0; i < count; i++) { | 156 | for (i = 0; i < count; i++) { |
149 | put_dc_size(&(new_dc[i]), | 157 | put_dc_size(&new_dc[i], |
150 | MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); | 158 | MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); |
151 | put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr); | 159 | put_dc_block_number(&new_dc[i], bh[i]->b_blocknr); |
152 | } | 160 | } |
153 | memcpy(dc, new_dc, DC_SIZE * count); | 161 | memcpy(dc, new_dc, DC_SIZE * count); |
154 | 162 | ||
155 | /* prepare space for count items */ | 163 | /* prepare space for count items */ |
156 | ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to)); | 164 | ih = internal_key(cur, ((to == -1) ? 0 : to)); |
157 | 165 | ||
158 | memmove(ih + count, ih, | 166 | memmove(ih + count, ih, |
159 | (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); | 167 | (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); |
@@ -190,8 +198,10 @@ static void internal_insert_childs(struct buffer_info *cur_bi, | |||
190 | 198 | ||
191 | } | 199 | } |
192 | 200 | ||
193 | /* Delete del_num items and node pointers from buffer cur starting from * | 201 | /* |
194 | * the first_i'th item and first_p'th pointers respectively. */ | 202 | * Delete del_num items and node pointers from buffer cur starting from |
203 | * the first_i'th item and first_p'th pointers respectively. | ||
204 | */ | ||
195 | static void internal_delete_pointers_items(struct buffer_info *cur_bi, | 205 | static void internal_delete_pointers_items(struct buffer_info *cur_bi, |
196 | int first_p, | 206 | int first_p, |
197 | int first_i, int del_num) | 207 | int first_i, int del_num) |
@@ -233,7 +243,7 @@ static void internal_delete_pointers_items(struct buffer_info *cur_bi, | |||
233 | dc = B_N_CHILD(cur, first_p); | 243 | dc = B_N_CHILD(cur, first_p); |
234 | 244 | ||
235 | memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); | 245 | memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); |
236 | key = B_N_PDELIM_KEY(cur, first_i); | 246 | key = internal_key(cur, first_i); |
237 | memmove(key, key + del_num, | 247 | memmove(key, key + del_num, |
238 | (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - | 248 | (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - |
239 | del_num) * DC_SIZE); | 249 | del_num) * DC_SIZE); |
@@ -270,22 +280,30 @@ static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n) | |||
270 | 280 | ||
271 | i_from = (from == 0) ? from : from - 1; | 281 | i_from = (from == 0) ? from : from - 1; |
272 | 282 | ||
273 | /* delete n pointers starting from `from' position in CUR; | 283 | /* |
274 | delete n keys starting from 'i_from' position in CUR; | 284 | * delete n pointers starting from `from' position in CUR; |
285 | * delete n keys starting from 'i_from' position in CUR; | ||
275 | */ | 286 | */ |
276 | internal_delete_pointers_items(cur_bi, from, i_from, n); | 287 | internal_delete_pointers_items(cur_bi, from, i_from, n); |
277 | } | 288 | } |
278 | 289 | ||
279 | /* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest | 290 | /* |
280 | * last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest | 291 | * copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer |
281 | * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest | 292 | * dest |
293 | * last_first == FIRST_TO_LAST means that we copy first items | ||
294 | * from src to tail of dest | ||
295 | * last_first == LAST_TO_FIRST means that we copy last items | ||
296 | * from src to head of dest | ||
282 | */ | 297 | */ |
283 | static void internal_copy_pointers_items(struct buffer_info *dest_bi, | 298 | static void internal_copy_pointers_items(struct buffer_info *dest_bi, |
284 | struct buffer_head *src, | 299 | struct buffer_head *src, |
285 | int last_first, int cpy_num) | 300 | int last_first, int cpy_num) |
286 | { | 301 | { |
287 | /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * | 302 | /* |
288 | * as delimiting key have already inserted to buffer dest.*/ | 303 | * ATTENTION! Number of node pointers in DEST is equal to number |
304 | * of items in DEST as delimiting key have already inserted to | ||
305 | * buffer dest. | ||
306 | */ | ||
289 | struct buffer_head *dest = dest_bi->bi_bh; | 307 | struct buffer_head *dest = dest_bi->bi_bh; |
290 | int nr_dest, nr_src; | 308 | int nr_dest, nr_src; |
291 | int dest_order, src_order; | 309 | int dest_order, src_order; |
@@ -330,13 +348,13 @@ static void internal_copy_pointers_items(struct buffer_info *dest_bi, | |||
330 | memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num); | 348 | memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num); |
331 | 349 | ||
332 | /* prepare space for cpy_num - 1 item headers */ | 350 | /* prepare space for cpy_num - 1 item headers */ |
333 | key = B_N_PDELIM_KEY(dest, dest_order); | 351 | key = internal_key(dest, dest_order); |
334 | memmove(key + cpy_num - 1, key, | 352 | memmove(key + cpy_num - 1, key, |
335 | KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + | 353 | KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + |
336 | cpy_num)); | 354 | cpy_num)); |
337 | 355 | ||
338 | /* insert headers */ | 356 | /* insert headers */ |
339 | memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1)); | 357 | memcpy(key, internal_key(src, src_order), KEY_SIZE * (cpy_num - 1)); |
340 | 358 | ||
341 | /* sizes, item number */ | 359 | /* sizes, item number */ |
342 | set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1)); | 360 | set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1)); |
@@ -366,7 +384,9 @@ static void internal_copy_pointers_items(struct buffer_info *dest_bi, | |||
366 | 384 | ||
367 | } | 385 | } |
368 | 386 | ||
369 | /* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. | 387 | /* |
388 | * Copy cpy_num node pointers and cpy_num - 1 items from buffer src to | ||
389 | * buffer dest. | ||
370 | * Delete cpy_num - del_par items and node pointers from buffer src. | 390 | * Delete cpy_num - del_par items and node pointers from buffer src. |
371 | * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. | 391 | * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. |
372 | * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. | 392 | * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. |
@@ -385,8 +405,10 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi, | |||
385 | if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ | 405 | if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ |
386 | first_pointer = 0; | 406 | first_pointer = 0; |
387 | first_item = 0; | 407 | first_item = 0; |
388 | /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, | 408 | /* |
389 | for key - with first_item */ | 409 | * delete cpy_num - del_par pointers and keys starting for |
410 | * pointers with first_pointer, for key - with first_item | ||
411 | */ | ||
390 | internal_delete_pointers_items(src_bi, first_pointer, | 412 | internal_delete_pointers_items(src_bi, first_pointer, |
391 | first_item, cpy_num - del_par); | 413 | first_item, cpy_num - del_par); |
392 | } else { /* shift_right occurs */ | 414 | } else { /* shift_right occurs */ |
@@ -404,7 +426,9 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi, | |||
404 | } | 426 | } |
405 | 427 | ||
406 | /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ | 428 | /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ |
407 | static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */ | 429 | static void internal_insert_key(struct buffer_info *dest_bi, |
430 | /* insert key before key with n_dest number */ | ||
431 | int dest_position_before, | ||
408 | struct buffer_head *src, int src_position) | 432 | struct buffer_head *src, int src_position) |
409 | { | 433 | { |
410 | struct buffer_head *dest = dest_bi->bi_bh; | 434 | struct buffer_head *dest = dest_bi->bi_bh; |
@@ -429,12 +453,12 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b | |||
429 | nr = blkh_nr_item(blkh); | 453 | nr = blkh_nr_item(blkh); |
430 | 454 | ||
431 | /* prepare space for inserting key */ | 455 | /* prepare space for inserting key */ |
432 | key = B_N_PDELIM_KEY(dest, dest_position_before); | 456 | key = internal_key(dest, dest_position_before); |
433 | memmove(key + 1, key, | 457 | memmove(key + 1, key, |
434 | (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); | 458 | (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); |
435 | 459 | ||
436 | /* insert key */ | 460 | /* insert key */ |
437 | memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); | 461 | memcpy(key, internal_key(src, src_position), KEY_SIZE); |
438 | 462 | ||
439 | /* Change dirt, free space, item number fields. */ | 463 | /* Change dirt, free space, item number fields. */ |
440 | 464 | ||
@@ -453,13 +477,19 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b | |||
453 | } | 477 | } |
454 | } | 478 | } |
455 | 479 | ||
456 | /* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. | 480 | /* |
457 | * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. | 481 | * Insert d_key'th (delimiting) key from buffer cfl to tail of dest. |
482 | * Copy pointer_amount node pointers and pointer_amount - 1 items from | ||
483 | * buffer src to buffer dest. | ||
458 | * Replace d_key'th key in buffer cfl. | 484 | * Replace d_key'th key in buffer cfl. |
459 | * Delete pointer_amount items and node pointers from buffer src. | 485 | * Delete pointer_amount items and node pointers from buffer src. |
460 | */ | 486 | */ |
461 | /* this can be invoked both to shift from S to L and from R to S */ | 487 | /* this can be invoked both to shift from S to L and from R to S */ |
462 | static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ | 488 | static void internal_shift_left( |
489 | /* | ||
490 | * INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S | ||
491 | */ | ||
492 | int mode, | ||
463 | struct tree_balance *tb, | 493 | struct tree_balance *tb, |
464 | int h, int pointer_amount) | 494 | int h, int pointer_amount) |
465 | { | 495 | { |
@@ -473,7 +503,10 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO | |||
473 | /*printk("pointer_amount = %d\n",pointer_amount); */ | 503 | /*printk("pointer_amount = %d\n",pointer_amount); */ |
474 | 504 | ||
475 | if (pointer_amount) { | 505 | if (pointer_amount) { |
476 | /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ | 506 | /* |
507 | * insert delimiting key from common father of dest and | ||
508 | * src to node dest into position B_NR_ITEM(dest) | ||
509 | */ | ||
477 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, | 510 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, |
478 | d_key_position); | 511 | d_key_position); |
479 | 512 | ||
@@ -492,7 +525,8 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO | |||
492 | 525 | ||
493 | } | 526 | } |
494 | 527 | ||
495 | /* Insert delimiting key to L[h]. | 528 | /* |
529 | * Insert delimiting key to L[h]. | ||
496 | * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. | 530 | * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. |
497 | * Delete n - 1 items and node pointers from buffer S[h]. | 531 | * Delete n - 1 items and node pointers from buffer S[h]. |
498 | */ | 532 | */ |
@@ -507,23 +541,27 @@ static void internal_shift1_left(struct tree_balance *tb, | |||
507 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, | 541 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, |
508 | &dest_bi, &src_bi, &d_key_position, &cf); | 542 | &dest_bi, &src_bi, &d_key_position, &cf); |
509 | 543 | ||
510 | if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ | 544 | /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ |
545 | if (pointer_amount > 0) | ||
511 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, | 546 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, |
512 | d_key_position); | 547 | d_key_position); |
513 | /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */ | ||
514 | 548 | ||
515 | /* last parameter is del_parameter */ | 549 | /* last parameter is del_parameter */ |
516 | internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, | 550 | internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, |
517 | pointer_amount, 1); | 551 | pointer_amount, 1); |
518 | /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */ | ||
519 | } | 552 | } |
520 | 553 | ||
521 | /* Insert d_key'th (delimiting) key from buffer cfr to head of dest. | 554 | /* |
555 | * Insert d_key'th (delimiting) key from buffer cfr to head of dest. | ||
522 | * Copy n node pointers and n - 1 items from buffer src to buffer dest. | 556 | * Copy n node pointers and n - 1 items from buffer src to buffer dest. |
523 | * Replace d_key'th key in buffer cfr. | 557 | * Replace d_key'th key in buffer cfr. |
524 | * Delete n items and node pointers from buffer src. | 558 | * Delete n items and node pointers from buffer src. |
525 | */ | 559 | */ |
526 | static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ | 560 | static void internal_shift_right( |
561 | /* | ||
562 | * INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S | ||
563 | */ | ||
564 | int mode, | ||
527 | struct tree_balance *tb, | 565 | struct tree_balance *tb, |
528 | int h, int pointer_amount) | 566 | int h, int pointer_amount) |
529 | { | 567 | { |
@@ -538,7 +576,10 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR | |||
538 | nr = B_NR_ITEMS(src_bi.bi_bh); | 576 | nr = B_NR_ITEMS(src_bi.bi_bh); |
539 | 577 | ||
540 | if (pointer_amount > 0) { | 578 | if (pointer_amount > 0) { |
541 | /* insert delimiting key from common father of dest and src to dest node into position 0 */ | 579 | /* |
580 | * insert delimiting key from common father of dest | ||
581 | * and src to dest node into position 0 | ||
582 | */ | ||
542 | internal_insert_key(&dest_bi, 0, cf, d_key_position); | 583 | internal_insert_key(&dest_bi, 0, cf, d_key_position); |
543 | if (nr == pointer_amount - 1) { | 584 | if (nr == pointer_amount - 1) { |
544 | RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || | 585 | RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || |
@@ -559,7 +600,8 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR | |||
559 | pointer_amount, 0); | 600 | pointer_amount, 0); |
560 | } | 601 | } |
561 | 602 | ||
562 | /* Insert delimiting key to R[h]. | 603 | /* |
604 | * Insert delimiting key to R[h]. | ||
563 | * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. | 605 | * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. |
564 | * Delete n - 1 items and node pointers from buffer S[h]. | 606 | * Delete n - 1 items and node pointers from buffer S[h]. |
565 | */ | 607 | */ |
@@ -574,18 +616,19 @@ static void internal_shift1_right(struct tree_balance *tb, | |||
574 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, | 616 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, |
575 | &dest_bi, &src_bi, &d_key_position, &cf); | 617 | &dest_bi, &src_bi, &d_key_position, &cf); |
576 | 618 | ||
577 | if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ | 619 | /* insert rkey from CFR[h] to right neighbor R[h] */ |
620 | if (pointer_amount > 0) | ||
578 | internal_insert_key(&dest_bi, 0, cf, d_key_position); | 621 | internal_insert_key(&dest_bi, 0, cf, d_key_position); |
579 | /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */ | ||
580 | 622 | ||
581 | /* last parameter is del_parameter */ | 623 | /* last parameter is del_parameter */ |
582 | internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, | 624 | internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, |
583 | pointer_amount, 1); | 625 | pointer_amount, 1); |
584 | /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */ | ||
585 | } | 626 | } |
586 | 627 | ||
587 | /* Delete insert_num node pointers together with their left items | 628 | /* |
588 | * and balance current node.*/ | 629 | * Delete insert_num node pointers together with their left items |
630 | * and balance current node. | ||
631 | */ | ||
589 | static void balance_internal_when_delete(struct tree_balance *tb, | 632 | static void balance_internal_when_delete(struct tree_balance *tb, |
590 | int h, int child_pos) | 633 | int h, int child_pos) |
591 | { | 634 | { |
@@ -626,9 +669,11 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
626 | new_root = tb->R[h - 1]; | 669 | new_root = tb->R[h - 1]; |
627 | else | 670 | else |
628 | new_root = tb->L[h - 1]; | 671 | new_root = tb->L[h - 1]; |
629 | /* switch super block's tree root block number to the new value */ | 672 | /* |
673 | * switch super block's tree root block | ||
674 | * number to the new value */ | ||
630 | PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); | 675 | PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); |
631 | //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; | 676 | /*REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; */ |
632 | PUT_SB_TREE_HEIGHT(tb->tb_sb, | 677 | PUT_SB_TREE_HEIGHT(tb->tb_sb, |
633 | SB_TREE_HEIGHT(tb->tb_sb) - 1); | 678 | SB_TREE_HEIGHT(tb->tb_sb) - 1); |
634 | 679 | ||
@@ -636,8 +681,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
636 | REISERFS_SB(tb->tb_sb)->s_sbh, | 681 | REISERFS_SB(tb->tb_sb)->s_sbh, |
637 | 1); | 682 | 1); |
638 | /*&&&&&&&&&&&&&&&&&&&&&& */ | 683 | /*&&&&&&&&&&&&&&&&&&&&&& */ |
684 | /* use check_internal if new root is an internal node */ | ||
639 | if (h > 1) | 685 | if (h > 1) |
640 | /* use check_internal if new root is an internal node */ | ||
641 | check_internal(new_root); | 686 | check_internal(new_root); |
642 | /*&&&&&&&&&&&&&&&&&&&&&& */ | 687 | /*&&&&&&&&&&&&&&&&&&&&&& */ |
643 | 688 | ||
@@ -648,7 +693,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
648 | return; | 693 | return; |
649 | } | 694 | } |
650 | 695 | ||
651 | if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */ | 696 | /* join S[h] with L[h] */ |
697 | if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { | ||
652 | 698 | ||
653 | RFALSE(tb->rnum[h] != 0, | 699 | RFALSE(tb->rnum[h] != 0, |
654 | "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", | 700 | "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", |
@@ -660,7 +706,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
660 | return; | 706 | return; |
661 | } | 707 | } |
662 | 708 | ||
663 | if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */ | 709 | /* join S[h] with R[h] */ |
710 | if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { | ||
664 | RFALSE(tb->lnum[h] != 0, | 711 | RFALSE(tb->lnum[h] != 0, |
665 | "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", | 712 | "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", |
666 | h, tb->lnum[h]); | 713 | h, tb->lnum[h]); |
@@ -671,17 +718,18 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
671 | return; | 718 | return; |
672 | } | 719 | } |
673 | 720 | ||
674 | if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */ | 721 | /* borrow from left neighbor L[h] */ |
722 | if (tb->lnum[h] < 0) { | ||
675 | RFALSE(tb->rnum[h] != 0, | 723 | RFALSE(tb->rnum[h] != 0, |
676 | "wrong tb->rnum[%d]==%d when borrow from L[h]", h, | 724 | "wrong tb->rnum[%d]==%d when borrow from L[h]", h, |
677 | tb->rnum[h]); | 725 | tb->rnum[h]); |
678 | /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */ | ||
679 | internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, | 726 | internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, |
680 | -tb->lnum[h]); | 727 | -tb->lnum[h]); |
681 | return; | 728 | return; |
682 | } | 729 | } |
683 | 730 | ||
684 | if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */ | 731 | /* borrow from right neighbor R[h] */ |
732 | if (tb->rnum[h] < 0) { | ||
685 | RFALSE(tb->lnum[h] != 0, | 733 | RFALSE(tb->lnum[h] != 0, |
686 | "invalid tb->lnum[%d]==%d when borrow from R[h]", | 734 | "invalid tb->lnum[%d]==%d when borrow from R[h]", |
687 | h, tb->lnum[h]); | 735 | h, tb->lnum[h]); |
@@ -689,7 +737,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
689 | return; | 737 | return; |
690 | } | 738 | } |
691 | 739 | ||
692 | if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */ | 740 | /* split S[h] into two parts and put them into neighbors */ |
741 | if (tb->lnum[h] > 0) { | ||
693 | RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, | 742 | RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, |
694 | "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", | 743 | "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", |
695 | h, tb->lnum[h], h, tb->rnum[h], n); | 744 | h, tb->lnum[h], h, tb->rnum[h], n); |
@@ -717,7 +766,7 @@ static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key) | |||
717 | if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) | 766 | if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) |
718 | return; | 767 | return; |
719 | 768 | ||
720 | memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE); | 769 | memcpy(internal_key(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE); |
721 | 770 | ||
722 | do_balance_mark_internal_dirty(tb, tb->CFL[h], 0); | 771 | do_balance_mark_internal_dirty(tb, tb->CFL[h], 0); |
723 | } | 772 | } |
@@ -732,34 +781,41 @@ static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key) | |||
732 | "R[h] can not be empty if it exists (item number=%d)", | 781 | "R[h] can not be empty if it exists (item number=%d)", |
733 | B_NR_ITEMS(tb->R[h])); | 782 | B_NR_ITEMS(tb->R[h])); |
734 | 783 | ||
735 | memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE); | 784 | memcpy(internal_key(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE); |
736 | 785 | ||
737 | do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); | 786 | do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); |
738 | } | 787 | } |
739 | 788 | ||
740 | int balance_internal(struct tree_balance *tb, /* tree_balance structure */ | 789 | |
741 | int h, /* level of the tree */ | 790 | /* |
742 | int child_pos, struct item_head *insert_key, /* key for insertion on higher level */ | 791 | * if inserting/pasting { |
743 | struct buffer_head **insert_ptr /* node for insertion on higher level */ | 792 | * child_pos is the position of the node-pointer in S[h] that |
744 | ) | 793 | * pointed to S[h-1] before balancing of the h-1 level; |
745 | /* if inserting/pasting | 794 | * this means that new pointers and items must be inserted AFTER |
746 | { | 795 | * child_pos |
747 | child_pos is the position of the node-pointer in S[h] that * | 796 | * } else { |
748 | pointed to S[h-1] before balancing of the h-1 level; * | 797 | * it is the position of the leftmost pointer that must be deleted |
749 | this means that new pointers and items must be inserted AFTER * | 798 | * (together with its corresponding key to the left of the pointer) |
750 | child_pos | 799 | * as a result of the previous level's balancing. |
751 | } | 800 | * } |
752 | else | 801 | */ |
753 | { | 802 | |
754 | it is the position of the leftmost pointer that must be deleted (together with | 803 | int balance_internal(struct tree_balance *tb, |
755 | its corresponding key to the left of the pointer) | 804 | int h, /* level of the tree */ |
756 | as a result of the previous level's balancing. | 805 | int child_pos, |
757 | } | 806 | /* key for insertion on higher level */ |
758 | */ | 807 | struct item_head *insert_key, |
808 | /* node for insertion on higher level */ | ||
809 | struct buffer_head **insert_ptr) | ||
759 | { | 810 | { |
760 | struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); | 811 | struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); |
761 | struct buffer_info bi; | 812 | struct buffer_info bi; |
762 | int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ | 813 | |
814 | /* | ||
815 | * we return this: it is 0 if there is no S[h], | ||
816 | * else it is tb->S[h]->b_item_order | ||
817 | */ | ||
818 | int order; | ||
763 | int insert_num, n, k; | 819 | int insert_num, n, k; |
764 | struct buffer_head *S_new; | 820 | struct buffer_head *S_new; |
765 | struct item_head new_insert_key; | 821 | struct item_head new_insert_key; |
@@ -774,8 +830,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
774 | (tbSh) ? PATH_H_POSITION(tb->tb_path, | 830 | (tbSh) ? PATH_H_POSITION(tb->tb_path, |
775 | h + 1) /*tb->S[h]->b_item_order */ : 0; | 831 | h + 1) /*tb->S[h]->b_item_order */ : 0; |
776 | 832 | ||
777 | /* Using insert_size[h] calculate the number insert_num of items | 833 | /* |
778 | that must be inserted to or deleted from S[h]. */ | 834 | * Using insert_size[h] calculate the number insert_num of items |
835 | * that must be inserted to or deleted from S[h]. | ||
836 | */ | ||
779 | insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); | 837 | insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); |
780 | 838 | ||
781 | /* Check whether insert_num is proper * */ | 839 | /* Check whether insert_num is proper * */ |
@@ -794,23 +852,21 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
794 | 852 | ||
795 | k = 0; | 853 | k = 0; |
796 | if (tb->lnum[h] > 0) { | 854 | if (tb->lnum[h] > 0) { |
797 | /* shift lnum[h] items from S[h] to the left neighbor L[h]. | 855 | /* |
798 | check how many of new items fall into L[h] or CFL[h] after | 856 | * shift lnum[h] items from S[h] to the left neighbor L[h]. |
799 | shifting */ | 857 | * check how many of new items fall into L[h] or CFL[h] after |
858 | * shifting | ||
859 | */ | ||
800 | n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ | 860 | n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ |
801 | if (tb->lnum[h] <= child_pos) { | 861 | if (tb->lnum[h] <= child_pos) { |
802 | /* new items don't fall into L[h] or CFL[h] */ | 862 | /* new items don't fall into L[h] or CFL[h] */ |
803 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, | 863 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, |
804 | tb->lnum[h]); | 864 | tb->lnum[h]); |
805 | /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */ | ||
806 | child_pos -= tb->lnum[h]; | 865 | child_pos -= tb->lnum[h]; |
807 | } else if (tb->lnum[h] > child_pos + insert_num) { | 866 | } else if (tb->lnum[h] > child_pos + insert_num) { |
808 | /* all new items fall into L[h] */ | 867 | /* all new items fall into L[h] */ |
809 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, | 868 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, |
810 | tb->lnum[h] - insert_num); | 869 | tb->lnum[h] - insert_num); |
811 | /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh, | ||
812 | tb->lnum[h]-insert_num); | ||
813 | */ | ||
814 | /* insert insert_num keys and node-pointers into L[h] */ | 870 | /* insert insert_num keys and node-pointers into L[h] */ |
815 | bi.tb = tb; | 871 | bi.tb = tb; |
816 | bi.bi_bh = tb->L[h]; | 872 | bi.bi_bh = tb->L[h]; |
@@ -826,7 +882,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
826 | } else { | 882 | } else { |
827 | struct disk_child *dc; | 883 | struct disk_child *dc; |
828 | 884 | ||
829 | /* some items fall into L[h] or CFL[h], but some don't fall */ | 885 | /* |
886 | * some items fall into L[h] or CFL[h], | ||
887 | * but some don't fall | ||
888 | */ | ||
830 | internal_shift1_left(tb, h, child_pos + 1); | 889 | internal_shift1_left(tb, h, child_pos + 1); |
831 | /* calculate number of new items that fall into L[h] */ | 890 | /* calculate number of new items that fall into L[h] */ |
832 | k = tb->lnum[h] - child_pos - 1; | 891 | k = tb->lnum[h] - child_pos - 1; |
@@ -841,7 +900,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
841 | 900 | ||
842 | replace_lkey(tb, h, insert_key + k); | 901 | replace_lkey(tb, h, insert_key + k); |
843 | 902 | ||
844 | /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ | 903 | /* |
904 | * replace the first node-ptr in S[h] by | ||
905 | * node-ptr to insert_ptr[k] | ||
906 | */ | ||
845 | dc = B_N_CHILD(tbSh, 0); | 907 | dc = B_N_CHILD(tbSh, 0); |
846 | put_dc_size(dc, | 908 | put_dc_size(dc, |
847 | MAX_CHILD_SIZE(insert_ptr[k]) - | 909 | MAX_CHILD_SIZE(insert_ptr[k]) - |
@@ -860,17 +922,17 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
860 | /* tb->lnum[h] > 0 */ | 922 | /* tb->lnum[h] > 0 */ |
861 | if (tb->rnum[h] > 0) { | 923 | if (tb->rnum[h] > 0) { |
862 | /*shift rnum[h] items from S[h] to the right neighbor R[h] */ | 924 | /*shift rnum[h] items from S[h] to the right neighbor R[h] */ |
863 | /* check how many of new items fall into R or CFR after shifting */ | 925 | /* |
926 | * check how many of new items fall into R or CFR | ||
927 | * after shifting | ||
928 | */ | ||
864 | n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ | 929 | n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ |
865 | if (n - tb->rnum[h] >= child_pos) | 930 | if (n - tb->rnum[h] >= child_pos) |
866 | /* new items fall into S[h] */ | 931 | /* new items fall into S[h] */ |
867 | /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */ | ||
868 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, | 932 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, |
869 | tb->rnum[h]); | 933 | tb->rnum[h]); |
870 | else if (n + insert_num - tb->rnum[h] < child_pos) { | 934 | else if (n + insert_num - tb->rnum[h] < child_pos) { |
871 | /* all new items fall into R[h] */ | 935 | /* all new items fall into R[h] */ |
872 | /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h], | ||
873 | tb->rnum[h] - insert_num); */ | ||
874 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, | 936 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, |
875 | tb->rnum[h] - insert_num); | 937 | tb->rnum[h] - insert_num); |
876 | 938 | ||
@@ -904,7 +966,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
904 | 966 | ||
905 | replace_rkey(tb, h, insert_key + insert_num - k - 1); | 967 | replace_rkey(tb, h, insert_key + insert_num - k - 1); |
906 | 968 | ||
907 | /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */ | 969 | /* |
970 | * replace the first node-ptr in R[h] by | ||
971 | * node-ptr insert_ptr[insert_num-k-1] | ||
972 | */ | ||
908 | dc = B_N_CHILD(tb->R[h], 0); | 973 | dc = B_N_CHILD(tb->R[h], 0); |
909 | put_dc_size(dc, | 974 | put_dc_size(dc, |
910 | MAX_CHILD_SIZE(insert_ptr | 975 | MAX_CHILD_SIZE(insert_ptr |
@@ -921,7 +986,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
921 | } | 986 | } |
922 | } | 987 | } |
923 | 988 | ||
924 | /** Fill new node that appears instead of S[h] **/ | 989 | /** Fill new node that appears instead of S[h] **/ |
925 | RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); | 990 | RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); |
926 | RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); | 991 | RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); |
927 | 992 | ||
@@ -997,26 +1062,30 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
997 | /* new items don't fall into S_new */ | 1062 | /* new items don't fall into S_new */ |
998 | /* store the delimiting key for the next level */ | 1063 | /* store the delimiting key for the next level */ |
999 | /* new_insert_key = (n - snum)'th key in S[h] */ | 1064 | /* new_insert_key = (n - snum)'th key in S[h] */ |
1000 | memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum), | 1065 | memcpy(&new_insert_key, internal_key(tbSh, n - snum), |
1001 | KEY_SIZE); | 1066 | KEY_SIZE); |
1002 | /* last parameter is del_par */ | 1067 | /* last parameter is del_par */ |
1003 | internal_move_pointers_items(&dest_bi, &src_bi, | 1068 | internal_move_pointers_items(&dest_bi, &src_bi, |
1004 | LAST_TO_FIRST, snum, 0); | 1069 | LAST_TO_FIRST, snum, 0); |
1005 | /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */ | ||
1006 | } else if (n + insert_num - snum < child_pos) { | 1070 | } else if (n + insert_num - snum < child_pos) { |
1007 | /* all new items fall into S_new */ | 1071 | /* all new items fall into S_new */ |
1008 | /* store the delimiting key for the next level */ | 1072 | /* store the delimiting key for the next level */ |
1009 | /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ | 1073 | /* |
1074 | * new_insert_key = (n + insert_item - snum)'th | ||
1075 | * key in S[h] | ||
1076 | */ | ||
1010 | memcpy(&new_insert_key, | 1077 | memcpy(&new_insert_key, |
1011 | B_N_PDELIM_KEY(tbSh, n + insert_num - snum), | 1078 | internal_key(tbSh, n + insert_num - snum), |
1012 | KEY_SIZE); | 1079 | KEY_SIZE); |
1013 | /* last parameter is del_par */ | 1080 | /* last parameter is del_par */ |
1014 | internal_move_pointers_items(&dest_bi, &src_bi, | 1081 | internal_move_pointers_items(&dest_bi, &src_bi, |
1015 | LAST_TO_FIRST, | 1082 | LAST_TO_FIRST, |
1016 | snum - insert_num, 0); | 1083 | snum - insert_num, 0); |
1017 | /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */ | ||
1018 | 1084 | ||
1019 | /* insert insert_num keys and node-pointers into S_new */ | 1085 | /* |
1086 | * insert insert_num keys and node-pointers | ||
1087 | * into S_new | ||
1088 | */ | ||
1020 | internal_insert_childs(&dest_bi, | 1089 | internal_insert_childs(&dest_bi, |
1021 | /*S_new,tb->S[h-1]->b_next, */ | 1090 | /*S_new,tb->S[h-1]->b_next, */ |
1022 | child_pos - n - insert_num + | 1091 | child_pos - n - insert_num + |
@@ -1033,7 +1102,6 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1033 | internal_move_pointers_items(&dest_bi, &src_bi, | 1102 | internal_move_pointers_items(&dest_bi, &src_bi, |
1034 | LAST_TO_FIRST, | 1103 | LAST_TO_FIRST, |
1035 | n - child_pos + 1, 1); | 1104 | n - child_pos + 1, 1); |
1036 | /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */ | ||
1037 | /* calculate number of new items that fall into S_new */ | 1105 | /* calculate number of new items that fall into S_new */ |
1038 | k = snum - n + child_pos - 1; | 1106 | k = snum - n + child_pos - 1; |
1039 | 1107 | ||
@@ -1043,7 +1111,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1043 | /* new_insert_key = insert_key[insert_num - k - 1] */ | 1111 | /* new_insert_key = insert_key[insert_num - k - 1] */ |
1044 | memcpy(&new_insert_key, insert_key + insert_num - k - 1, | 1112 | memcpy(&new_insert_key, insert_key + insert_num - k - 1, |
1045 | KEY_SIZE); | 1113 | KEY_SIZE); |
1046 | /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ | 1114 | /* |
1115 | * replace first node-ptr in S_new by node-ptr | ||
1116 | * to insert_ptr[insert_num-k-1] | ||
1117 | */ | ||
1047 | 1118 | ||
1048 | dc = B_N_CHILD(S_new, 0); | 1119 | dc = B_N_CHILD(S_new, 0); |
1049 | put_dc_size(dc, | 1120 | put_dc_size(dc, |
@@ -1066,7 +1137,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1066 | || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", | 1137 | || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", |
1067 | S_new); | 1138 | S_new); |
1068 | 1139 | ||
1069 | // S_new is released in unfix_nodes | 1140 | /* S_new is released in unfix_nodes */ |
1070 | } | 1141 | } |
1071 | 1142 | ||
1072 | n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ | 1143 | n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index bc8b8009897d..e3ca04894919 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -25,7 +25,10 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
25 | 25 | ||
26 | void reiserfs_evict_inode(struct inode *inode) | 26 | void reiserfs_evict_inode(struct inode *inode) |
27 | { | 27 | { |
28 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ | 28 | /* |
29 | * We need blocks for transaction + (user+group) quota | ||
30 | * update (possibly delete) | ||
31 | */ | ||
29 | int jbegin_count = | 32 | int jbegin_count = |
30 | JOURNAL_PER_BALANCE_CNT * 2 + | 33 | JOURNAL_PER_BALANCE_CNT * 2 + |
31 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); | 34 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); |
@@ -39,8 +42,12 @@ void reiserfs_evict_inode(struct inode *inode) | |||
39 | if (inode->i_nlink) | 42 | if (inode->i_nlink) |
40 | goto no_delete; | 43 | goto no_delete; |
41 | 44 | ||
42 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ | 45 | /* |
43 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ | 46 | * The = 0 happens when we abort creating a new inode |
47 | * for some reason like lack of space.. | ||
48 | * also handles bad_inode case | ||
49 | */ | ||
50 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { | ||
44 | 51 | ||
45 | reiserfs_delete_xattrs(inode); | 52 | reiserfs_delete_xattrs(inode); |
46 | 53 | ||
@@ -54,34 +61,43 @@ void reiserfs_evict_inode(struct inode *inode) | |||
54 | 61 | ||
55 | err = reiserfs_delete_object(&th, inode); | 62 | err = reiserfs_delete_object(&th, inode); |
56 | 63 | ||
57 | /* Do quota update inside a transaction for journaled quotas. We must do that | 64 | /* |
58 | * after delete_object so that quota updates go into the same transaction as | 65 | * Do quota update inside a transaction for journaled quotas. |
59 | * stat data deletion */ | 66 | * We must do that after delete_object so that quota updates |
67 | * go into the same transaction as stat data deletion | ||
68 | */ | ||
60 | if (!err) { | 69 | if (!err) { |
61 | int depth = reiserfs_write_unlock_nested(inode->i_sb); | 70 | int depth = reiserfs_write_unlock_nested(inode->i_sb); |
62 | dquot_free_inode(inode); | 71 | dquot_free_inode(inode); |
63 | reiserfs_write_lock_nested(inode->i_sb, depth); | 72 | reiserfs_write_lock_nested(inode->i_sb, depth); |
64 | } | 73 | } |
65 | 74 | ||
66 | if (journal_end(&th, inode->i_sb, jbegin_count)) | 75 | if (journal_end(&th)) |
67 | goto out; | 76 | goto out; |
68 | 77 | ||
69 | /* check return value from reiserfs_delete_object after | 78 | /* |
79 | * check return value from reiserfs_delete_object after | ||
70 | * ending the transaction | 80 | * ending the transaction |
71 | */ | 81 | */ |
72 | if (err) | 82 | if (err) |
73 | goto out; | 83 | goto out; |
74 | 84 | ||
75 | /* all items of file are deleted, so we can remove "save" link */ | 85 | /* |
76 | remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything | 86 | * all items of file are deleted, so we can remove |
77 | * about an error here */ | 87 | * "save" link |
88 | * we can't do anything about an error here | ||
89 | */ | ||
90 | remove_save_link(inode, 0 /* not truncate */); | ||
78 | out: | 91 | out: |
79 | reiserfs_write_unlock(inode->i_sb); | 92 | reiserfs_write_unlock(inode->i_sb); |
80 | } else { | 93 | } else { |
81 | /* no object items are in the tree */ | 94 | /* no object items are in the tree */ |
82 | ; | 95 | ; |
83 | } | 96 | } |
84 | clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ | 97 | |
98 | /* note this must go after the journal_end to prevent deadlock */ | ||
99 | clear_inode(inode); | ||
100 | |||
85 | dquot_drop(inode); | 101 | dquot_drop(inode); |
86 | inode->i_blocks = 0; | 102 | inode->i_blocks = 0; |
87 | return; | 103 | return; |
@@ -103,8 +119,10 @@ static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, | |||
103 | key->key_length = length; | 119 | key->key_length = length; |
104 | } | 120 | } |
105 | 121 | ||
106 | /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set | 122 | /* |
107 | offset and type of key */ | 123 | * take base of inode_key (it comes from inode always) (dirid, objectid) |
124 | * and version from an inode, set offset and type of key | ||
125 | */ | ||
108 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, | 126 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, |
109 | int type, int length) | 127 | int type, int length) |
110 | { | 128 | { |
@@ -114,9 +132,7 @@ void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, | |||
114 | length); | 132 | length); |
115 | } | 133 | } |
116 | 134 | ||
117 | // | 135 | /* when key is 0, do not set version and short key */ |
118 | // when key is 0, do not set version and short key | ||
119 | // | ||
120 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, | 136 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, |
121 | int version, | 137 | int version, |
122 | loff_t offset, int type, int length, | 138 | loff_t offset, int type, int length, |
@@ -132,43 +148,47 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, | |||
132 | set_le_ih_k_type(ih, type); | 148 | set_le_ih_k_type(ih, type); |
133 | put_ih_item_len(ih, length); | 149 | put_ih_item_len(ih, length); |
134 | /* set_ih_free_space (ih, 0); */ | 150 | /* set_ih_free_space (ih, 0); */ |
135 | // for directory items it is entry count, for directs and stat | 151 | /* |
136 | // datas - 0xffff, for indirects - 0 | 152 | * for directory items it is entry count, for directs and stat |
153 | * datas - 0xffff, for indirects - 0 | ||
154 | */ | ||
137 | put_ih_entry_count(ih, entry_count); | 155 | put_ih_entry_count(ih, entry_count); |
138 | } | 156 | } |
139 | 157 | ||
140 | // | 158 | /* |
141 | // FIXME: we might cache recently accessed indirect item | 159 | * FIXME: we might cache recently accessed indirect item |
142 | 160 | * Ugh. Not too eager for that.... | |
143 | // Ugh. Not too eager for that.... | 161 | * I cut the code until such time as I see a convincing argument (benchmark). |
144 | // I cut the code until such time as I see a convincing argument (benchmark). | 162 | * I don't want a bloated inode struct..., and I don't like code complexity.... |
145 | // I don't want a bloated inode struct..., and I don't like code complexity.... | 163 | */ |
146 | |||
147 | /* cutting the code is fine, since it really isn't in use yet and is easy | ||
148 | ** to add back in. But, Vladimir has a really good idea here. Think | ||
149 | ** about what happens for reading a file. For each page, | ||
150 | ** The VFS layer calls reiserfs_readpage, who searches the tree to find | ||
151 | ** an indirect item. This indirect item has X number of pointers, where | ||
152 | ** X is a big number if we've done the block allocation right. But, | ||
153 | ** we only use one or two of these pointers during each call to readpage, | ||
154 | ** needlessly researching again later on. | ||
155 | ** | ||
156 | ** The size of the cache could be dynamic based on the size of the file. | ||
157 | ** | ||
158 | ** I'd also like to see us cache the location the stat data item, since | ||
159 | ** we are needlessly researching for that frequently. | ||
160 | ** | ||
161 | ** --chris | ||
162 | */ | ||
163 | 164 | ||
164 | /* If this page has a file tail in it, and | 165 | /* |
165 | ** it was read in by get_block_create_0, the page data is valid, | 166 | * cutting the code is fine, since it really isn't in use yet and is easy |
166 | ** but tail is still sitting in a direct item, and we can't write to | 167 | * to add back in. But, Vladimir has a really good idea here. Think |
167 | ** it. So, look through this page, and check all the mapped buffers | 168 | * about what happens for reading a file. For each page, |
168 | ** to make sure they have valid block numbers. Any that don't need | 169 | * The VFS layer calls reiserfs_readpage, who searches the tree to find |
169 | ** to be unmapped, so that __block_write_begin will correctly call | 170 | * an indirect item. This indirect item has X number of pointers, where |
170 | ** reiserfs_get_block to convert the tail into an unformatted node | 171 | * X is a big number if we've done the block allocation right. But, |
171 | */ | 172 | * we only use one or two of these pointers during each call to readpage, |
173 | * needlessly researching again later on. | ||
174 | * | ||
175 | * The size of the cache could be dynamic based on the size of the file. | ||
176 | * | ||
177 | * I'd also like to see us cache the location the stat data item, since | ||
178 | * we are needlessly researching for that frequently. | ||
179 | * | ||
180 | * --chris | ||
181 | */ | ||
182 | |||
183 | /* | ||
184 | * If this page has a file tail in it, and | ||
185 | * it was read in by get_block_create_0, the page data is valid, | ||
186 | * but tail is still sitting in a direct item, and we can't write to | ||
187 | * it. So, look through this page, and check all the mapped buffers | ||
188 | * to make sure they have valid block numbers. Any that don't need | ||
189 | * to be unmapped, so that __block_write_begin will correctly call | ||
190 | * reiserfs_get_block to convert the tail into an unformatted node | ||
191 | */ | ||
172 | static inline void fix_tail_page_for_writing(struct page *page) | 192 | static inline void fix_tail_page_for_writing(struct page *page) |
173 | { | 193 | { |
174 | struct buffer_head *head, *next, *bh; | 194 | struct buffer_head *head, *next, *bh; |
@@ -186,8 +206,10 @@ static inline void fix_tail_page_for_writing(struct page *page) | |||
186 | } | 206 | } |
187 | } | 207 | } |
188 | 208 | ||
189 | /* reiserfs_get_block does not need to allocate a block only if it has been | 209 | /* |
190 | done already or non-hole position has been found in the indirect item */ | 210 | * reiserfs_get_block does not need to allocate a block only if it has been |
211 | * done already or non-hole position has been found in the indirect item | ||
212 | */ | ||
191 | static inline int allocation_needed(int retval, b_blocknr_t allocated, | 213 | static inline int allocation_needed(int retval, b_blocknr_t allocated, |
192 | struct item_head *ih, | 214 | struct item_head *ih, |
193 | __le32 * item, int pos_in_item) | 215 | __le32 * item, int pos_in_item) |
@@ -211,14 +233,16 @@ static inline void set_block_dev_mapped(struct buffer_head *bh, | |||
211 | map_bh(bh, inode->i_sb, block); | 233 | map_bh(bh, inode->i_sb, block); |
212 | } | 234 | } |
213 | 235 | ||
214 | // | 236 | /* |
215 | // files which were created in the earlier version can not be longer, | 237 | * files which were created in the earlier version can not be longer, |
216 | // than 2 gb | 238 | * than 2 gb |
217 | // | 239 | */ |
218 | static int file_capable(struct inode *inode, sector_t block) | 240 | static int file_capable(struct inode *inode, sector_t block) |
219 | { | 241 | { |
220 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. | 242 | /* it is new file. */ |
221 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb | 243 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || |
244 | /* old file, but 'block' is inside of 2gb */ | ||
245 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) | ||
222 | return 1; | 246 | return 1; |
223 | 247 | ||
224 | return 0; | 248 | return 0; |
@@ -228,7 +252,6 @@ static int restart_transaction(struct reiserfs_transaction_handle *th, | |||
228 | struct inode *inode, struct treepath *path) | 252 | struct inode *inode, struct treepath *path) |
229 | { | 253 | { |
230 | struct super_block *s = th->t_super; | 254 | struct super_block *s = th->t_super; |
231 | int len = th->t_blocks_allocated; | ||
232 | int err; | 255 | int err; |
233 | 256 | ||
234 | BUG_ON(!th->t_trans_id); | 257 | BUG_ON(!th->t_trans_id); |
@@ -241,7 +264,7 @@ static int restart_transaction(struct reiserfs_transaction_handle *th, | |||
241 | return 0; | 264 | return 0; |
242 | } | 265 | } |
243 | reiserfs_update_sd(th, inode); | 266 | reiserfs_update_sd(th, inode); |
244 | err = journal_end(th, s, len); | 267 | err = journal_end(th); |
245 | if (!err) { | 268 | if (!err) { |
246 | err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); | 269 | err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); |
247 | if (!err) | 270 | if (!err) |
@@ -250,14 +273,14 @@ static int restart_transaction(struct reiserfs_transaction_handle *th, | |||
250 | return err; | 273 | return err; |
251 | } | 274 | } |
252 | 275 | ||
253 | // it is called by get_block when create == 0. Returns block number | 276 | /* |
254 | // for 'block'-th logical block of file. When it hits direct item it | 277 | * it is called by get_block when create == 0. Returns block number |
255 | // returns 0 (being called from bmap) or read direct item into piece | 278 | * for 'block'-th logical block of file. When it hits direct item it |
256 | // of page (bh_result) | 279 | * returns 0 (being called from bmap) or read direct item into piece |
257 | 280 | * of page (bh_result) | |
258 | // Please improve the english/clarity in the comment above, as it is | 281 | * Please improve the english/clarity in the comment above, as it is |
259 | // hard to understand. | 282 | * hard to understand. |
260 | 283 | */ | |
261 | static int _get_block_create_0(struct inode *inode, sector_t block, | 284 | static int _get_block_create_0(struct inode *inode, sector_t block, |
262 | struct buffer_head *bh_result, int args) | 285 | struct buffer_head *bh_result, int args) |
263 | { | 286 | { |
@@ -273,7 +296,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
273 | int done = 0; | 296 | int done = 0; |
274 | unsigned long offset; | 297 | unsigned long offset; |
275 | 298 | ||
276 | // prepare the key to look for the 'block'-th block of file | 299 | /* prepare the key to look for the 'block'-th block of file */ |
277 | make_cpu_key(&key, inode, | 300 | make_cpu_key(&key, inode, |
278 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, | 301 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, |
279 | 3); | 302 | 3); |
@@ -285,23 +308,28 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
285 | kunmap(bh_result->b_page); | 308 | kunmap(bh_result->b_page); |
286 | if (result == IO_ERROR) | 309 | if (result == IO_ERROR) |
287 | return -EIO; | 310 | return -EIO; |
288 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 311 | /* |
289 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 312 | * We do not return -ENOENT if there is a hole but page is |
313 | * uptodate, because it means that there is some MMAPED data | ||
314 | * associated with it that is yet to be written to disk. | ||
315 | */ | ||
290 | if ((args & GET_BLOCK_NO_HOLE) | 316 | if ((args & GET_BLOCK_NO_HOLE) |
291 | && !PageUptodate(bh_result->b_page)) { | 317 | && !PageUptodate(bh_result->b_page)) { |
292 | return -ENOENT; | 318 | return -ENOENT; |
293 | } | 319 | } |
294 | return 0; | 320 | return 0; |
295 | } | 321 | } |
296 | // | 322 | |
297 | bh = get_last_bh(&path); | 323 | bh = get_last_bh(&path); |
298 | ih = get_ih(&path); | 324 | ih = tp_item_head(&path); |
299 | if (is_indirect_le_ih(ih)) { | 325 | if (is_indirect_le_ih(ih)) { |
300 | __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); | 326 | __le32 *ind_item = (__le32 *) ih_item_body(bh, ih); |
301 | 327 | ||
302 | /* FIXME: here we could cache indirect item or part of it in | 328 | /* |
303 | the inode to avoid search_by_key in case of subsequent | 329 | * FIXME: here we could cache indirect item or part of it in |
304 | access to file */ | 330 | * the inode to avoid search_by_key in case of subsequent |
331 | * access to file | ||
332 | */ | ||
305 | blocknr = get_block_num(ind_item, path.pos_in_item); | 333 | blocknr = get_block_num(ind_item, path.pos_in_item); |
306 | ret = 0; | 334 | ret = 0; |
307 | if (blocknr) { | 335 | if (blocknr) { |
@@ -311,8 +339,12 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
311 | set_buffer_boundary(bh_result); | 339 | set_buffer_boundary(bh_result); |
312 | } | 340 | } |
313 | } else | 341 | } else |
314 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 342 | /* |
315 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 343 | * We do not return -ENOENT if there is a hole but |
344 | * page is uptodate, because it means that there is | ||
345 | * some MMAPED data associated with it that is | ||
346 | * yet to be written to disk. | ||
347 | */ | ||
316 | if ((args & GET_BLOCK_NO_HOLE) | 348 | if ((args & GET_BLOCK_NO_HOLE) |
317 | && !PageUptodate(bh_result->b_page)) { | 349 | && !PageUptodate(bh_result->b_page)) { |
318 | ret = -ENOENT; | 350 | ret = -ENOENT; |
@@ -323,41 +355,45 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
323 | kunmap(bh_result->b_page); | 355 | kunmap(bh_result->b_page); |
324 | return ret; | 356 | return ret; |
325 | } | 357 | } |
326 | // requested data are in direct item(s) | 358 | /* requested data are in direct item(s) */ |
327 | if (!(args & GET_BLOCK_READ_DIRECT)) { | 359 | if (!(args & GET_BLOCK_READ_DIRECT)) { |
328 | // we are called by bmap. FIXME: we can not map block of file | 360 | /* |
329 | // when it is stored in direct item(s) | 361 | * we are called by bmap. FIXME: we can not map block of file |
362 | * when it is stored in direct item(s) | ||
363 | */ | ||
330 | pathrelse(&path); | 364 | pathrelse(&path); |
331 | if (p) | 365 | if (p) |
332 | kunmap(bh_result->b_page); | 366 | kunmap(bh_result->b_page); |
333 | return -ENOENT; | 367 | return -ENOENT; |
334 | } | 368 | } |
335 | 369 | ||
336 | /* if we've got a direct item, and the buffer or page was uptodate, | 370 | /* |
337 | ** we don't want to pull data off disk again. skip to the | 371 | * if we've got a direct item, and the buffer or page was uptodate, |
338 | ** end, where we map the buffer and return | 372 | * we don't want to pull data off disk again. skip to the |
373 | * end, where we map the buffer and return | ||
339 | */ | 374 | */ |
340 | if (buffer_uptodate(bh_result)) { | 375 | if (buffer_uptodate(bh_result)) { |
341 | goto finished; | 376 | goto finished; |
342 | } else | 377 | } else |
343 | /* | 378 | /* |
344 | ** grab_tail_page can trigger calls to reiserfs_get_block on up to date | 379 | * grab_tail_page can trigger calls to reiserfs_get_block on |
345 | ** pages without any buffers. If the page is up to date, we don't want | 380 | * up to date pages without any buffers. If the page is up |
346 | ** read old data off disk. Set the up to date bit on the buffer instead | 381 | * to date, we don't want read old data off disk. Set the up |
347 | ** and jump to the end | 382 | * to date bit on the buffer instead and jump to the end |
348 | */ | 383 | */ |
349 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { | 384 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { |
350 | set_buffer_uptodate(bh_result); | 385 | set_buffer_uptodate(bh_result); |
351 | goto finished; | 386 | goto finished; |
352 | } | 387 | } |
353 | // read file tail into part of page | 388 | /* read file tail into part of page */ |
354 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); | 389 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); |
355 | copy_item_head(&tmp_ih, ih); | 390 | copy_item_head(&tmp_ih, ih); |
356 | 391 | ||
357 | /* we only want to kmap if we are reading the tail into the page. | 392 | /* |
358 | ** this is not the common case, so we don't kmap until we are | 393 | * we only want to kmap if we are reading the tail into the page. |
359 | ** sure we need to. But, this means the item might move if | 394 | * this is not the common case, so we don't kmap until we are |
360 | ** kmap schedules | 395 | * sure we need to. But, this means the item might move if |
396 | * kmap schedules | ||
361 | */ | 397 | */ |
362 | if (!p) | 398 | if (!p) |
363 | p = (char *)kmap(bh_result->b_page); | 399 | p = (char *)kmap(bh_result->b_page); |
@@ -368,10 +404,11 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
368 | if (!is_direct_le_ih(ih)) { | 404 | if (!is_direct_le_ih(ih)) { |
369 | BUG(); | 405 | BUG(); |
370 | } | 406 | } |
371 | /* make sure we don't read more bytes than actually exist in | 407 | /* |
372 | ** the file. This can happen in odd cases where i_size isn't | 408 | * make sure we don't read more bytes than actually exist in |
373 | ** correct, and when direct item padding results in a few | 409 | * the file. This can happen in odd cases where i_size isn't |
374 | ** extra bytes at the end of the direct item | 410 | * correct, and when direct item padding results in a few |
411 | * extra bytes at the end of the direct item | ||
375 | */ | 412 | */ |
376 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) | 413 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) |
377 | break; | 414 | break; |
@@ -383,40 +420,43 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
383 | } else { | 420 | } else { |
384 | chars = ih_item_len(ih) - path.pos_in_item; | 421 | chars = ih_item_len(ih) - path.pos_in_item; |
385 | } | 422 | } |
386 | memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); | 423 | memcpy(p, ih_item_body(bh, ih) + path.pos_in_item, chars); |
387 | 424 | ||
388 | if (done) | 425 | if (done) |
389 | break; | 426 | break; |
390 | 427 | ||
391 | p += chars; | 428 | p += chars; |
392 | 429 | ||
430 | /* | ||
431 | * we done, if read direct item is not the last item of | ||
432 | * node FIXME: we could try to check right delimiting key | ||
433 | * to see whether direct item continues in the right | ||
434 | * neighbor or rely on i_size | ||
435 | */ | ||
393 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) | 436 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) |
394 | // we done, if read direct item is not the last item of | ||
395 | // node FIXME: we could try to check right delimiting key | ||
396 | // to see whether direct item continues in the right | ||
397 | // neighbor or rely on i_size | ||
398 | break; | 437 | break; |
399 | 438 | ||
400 | // update key to look for the next piece | 439 | /* update key to look for the next piece */ |
401 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); | 440 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); |
402 | result = search_for_position_by_key(inode->i_sb, &key, &path); | 441 | result = search_for_position_by_key(inode->i_sb, &key, &path); |
403 | if (result != POSITION_FOUND) | 442 | if (result != POSITION_FOUND) |
404 | // i/o error most likely | 443 | /* i/o error most likely */ |
405 | break; | 444 | break; |
406 | bh = get_last_bh(&path); | 445 | bh = get_last_bh(&path); |
407 | ih = get_ih(&path); | 446 | ih = tp_item_head(&path); |
408 | } while (1); | 447 | } while (1); |
409 | 448 | ||
410 | flush_dcache_page(bh_result->b_page); | 449 | flush_dcache_page(bh_result->b_page); |
411 | kunmap(bh_result->b_page); | 450 | kunmap(bh_result->b_page); |
412 | 451 | ||
413 | finished: | 452 | finished: |
414 | pathrelse(&path); | 453 | pathrelse(&path); |
415 | 454 | ||
416 | if (result == IO_ERROR) | 455 | if (result == IO_ERROR) |
417 | return -EIO; | 456 | return -EIO; |
418 | 457 | ||
419 | /* this buffer has valid data, but isn't valid for io. mapping it to | 458 | /* |
459 | * this buffer has valid data, but isn't valid for io. mapping it to | ||
420 | * block #0 tells the rest of reiserfs it just has a tail in it | 460 | * block #0 tells the rest of reiserfs it just has a tail in it |
421 | */ | 461 | */ |
422 | map_bh(bh_result, inode->i_sb, 0); | 462 | map_bh(bh_result, inode->i_sb, 0); |
@@ -424,8 +464,10 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
424 | return 0; | 464 | return 0; |
425 | } | 465 | } |
426 | 466 | ||
427 | // this is called to create file map. So, _get_block_create_0 will not | 467 | /* |
428 | // read direct item | 468 | * this is called to create file map. So, _get_block_create_0 will not |
469 | * read direct item | ||
470 | */ | ||
429 | static int reiserfs_bmap(struct inode *inode, sector_t block, | 471 | static int reiserfs_bmap(struct inode *inode, sector_t block, |
430 | struct buffer_head *bh_result, int create) | 472 | struct buffer_head *bh_result, int create) |
431 | { | 473 | { |
@@ -439,22 +481,23 @@ static int reiserfs_bmap(struct inode *inode, sector_t block, | |||
439 | return 0; | 481 | return 0; |
440 | } | 482 | } |
441 | 483 | ||
442 | /* special version of get_block that is only used by grab_tail_page right | 484 | /* |
443 | ** now. It is sent to __block_write_begin, and when you try to get a | 485 | * special version of get_block that is only used by grab_tail_page right |
444 | ** block past the end of the file (or a block from a hole) it returns | 486 | * now. It is sent to __block_write_begin, and when you try to get a |
445 | ** -ENOENT instead of a valid buffer. __block_write_begin expects to | 487 | * block past the end of the file (or a block from a hole) it returns |
446 | ** be able to do i/o on the buffers returned, unless an error value | 488 | * -ENOENT instead of a valid buffer. __block_write_begin expects to |
447 | ** is also returned. | 489 | * be able to do i/o on the buffers returned, unless an error value |
448 | ** | 490 | * is also returned. |
449 | ** So, this allows __block_write_begin to be used for reading a single block | 491 | * |
450 | ** in a page. Where it does not produce a valid page for holes, or past the | 492 | * So, this allows __block_write_begin to be used for reading a single block |
451 | ** end of the file. This turns out to be exactly what we need for reading | 493 | * in a page. Where it does not produce a valid page for holes, or past the |
452 | ** tails for conversion. | 494 | * end of the file. This turns out to be exactly what we need for reading |
453 | ** | 495 | * tails for conversion. |
454 | ** The point of the wrapper is forcing a certain value for create, even | 496 | * |
455 | ** though the VFS layer is calling this function with create==1. If you | 497 | * The point of the wrapper is forcing a certain value for create, even |
456 | ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, | 498 | * though the VFS layer is calling this function with create==1. If you |
457 | ** don't use this function. | 499 | * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, |
500 | * don't use this function. | ||
458 | */ | 501 | */ |
459 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, | 502 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, |
460 | struct buffer_head *bh_result, | 503 | struct buffer_head *bh_result, |
@@ -463,8 +506,10 @@ static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, | |||
463 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); | 506 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); |
464 | } | 507 | } |
465 | 508 | ||
466 | /* This is special helper for reiserfs_get_block in case we are executing | 509 | /* |
467 | direct_IO request. */ | 510 | * This is special helper for reiserfs_get_block in case we are executing |
511 | * direct_IO request. | ||
512 | */ | ||
468 | static int reiserfs_get_blocks_direct_io(struct inode *inode, | 513 | static int reiserfs_get_blocks_direct_io(struct inode *inode, |
469 | sector_t iblock, | 514 | sector_t iblock, |
470 | struct buffer_head *bh_result, | 515 | struct buffer_head *bh_result, |
@@ -474,9 +519,11 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, | |||
474 | 519 | ||
475 | bh_result->b_page = NULL; | 520 | bh_result->b_page = NULL; |
476 | 521 | ||
477 | /* We set the b_size before reiserfs_get_block call since it is | 522 | /* |
478 | referenced in convert_tail_for_hole() that may be called from | 523 | * We set the b_size before reiserfs_get_block call since it is |
479 | reiserfs_get_block() */ | 524 | * referenced in convert_tail_for_hole() that may be called from |
525 | * reiserfs_get_block() | ||
526 | */ | ||
480 | bh_result->b_size = (1 << inode->i_blkbits); | 527 | bh_result->b_size = (1 << inode->i_blkbits); |
481 | 528 | ||
482 | ret = reiserfs_get_block(inode, iblock, bh_result, | 529 | ret = reiserfs_get_block(inode, iblock, bh_result, |
@@ -486,14 +533,18 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, | |||
486 | 533 | ||
487 | /* don't allow direct io onto tail pages */ | 534 | /* don't allow direct io onto tail pages */ |
488 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 535 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
489 | /* make sure future calls to the direct io funcs for this offset | 536 | /* |
490 | ** in the file fail by unmapping the buffer | 537 | * make sure future calls to the direct io funcs for this |
538 | * offset in the file fail by unmapping the buffer | ||
491 | */ | 539 | */ |
492 | clear_buffer_mapped(bh_result); | 540 | clear_buffer_mapped(bh_result); |
493 | ret = -EINVAL; | 541 | ret = -EINVAL; |
494 | } | 542 | } |
495 | /* Possible unpacked tail. Flush the data before pages have | 543 | |
496 | disappeared */ | 544 | /* |
545 | * Possible unpacked tail. Flush the data before pages have | ||
546 | * disappeared | ||
547 | */ | ||
497 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { | 548 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { |
498 | int err; | 549 | int err; |
499 | 550 | ||
@@ -507,20 +558,20 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, | |||
507 | if (err < 0) | 558 | if (err < 0) |
508 | ret = err; | 559 | ret = err; |
509 | } | 560 | } |
510 | out: | 561 | out: |
511 | return ret; | 562 | return ret; |
512 | } | 563 | } |
513 | 564 | ||
514 | /* | 565 | /* |
515 | ** helper function for when reiserfs_get_block is called for a hole | 566 | * helper function for when reiserfs_get_block is called for a hole |
516 | ** but the file tail is still in a direct item | 567 | * but the file tail is still in a direct item |
517 | ** bh_result is the buffer head for the hole | 568 | * bh_result is the buffer head for the hole |
518 | ** tail_offset is the offset of the start of the tail in the file | 569 | * tail_offset is the offset of the start of the tail in the file |
519 | ** | 570 | * |
520 | ** This calls prepare_write, which will start a new transaction | 571 | * This calls prepare_write, which will start a new transaction |
521 | ** you should not be in a transaction, or have any paths held when you | 572 | * you should not be in a transaction, or have any paths held when you |
522 | ** call this. | 573 | * call this. |
523 | */ | 574 | */ |
524 | static int convert_tail_for_hole(struct inode *inode, | 575 | static int convert_tail_for_hole(struct inode *inode, |
525 | struct buffer_head *bh_result, | 576 | struct buffer_head *bh_result, |
526 | loff_t tail_offset) | 577 | loff_t tail_offset) |
@@ -540,9 +591,10 @@ static int convert_tail_for_hole(struct inode *inode, | |||
540 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; | 591 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; |
541 | 592 | ||
542 | index = tail_offset >> PAGE_CACHE_SHIFT; | 593 | index = tail_offset >> PAGE_CACHE_SHIFT; |
543 | /* hole_page can be zero in case of direct_io, we are sure | 594 | /* |
544 | that we cannot get here if we write with O_DIRECT into | 595 | * hole_page can be zero in case of direct_io, we are sure |
545 | tail page */ | 596 | * that we cannot get here if we write with O_DIRECT into tail page |
597 | */ | ||
546 | if (!hole_page || index != hole_page->index) { | 598 | if (!hole_page || index != hole_page->index) { |
547 | tail_page = grab_cache_page(inode->i_mapping, index); | 599 | tail_page = grab_cache_page(inode->i_mapping, index); |
548 | retval = -ENOMEM; | 600 | retval = -ENOMEM; |
@@ -553,14 +605,15 @@ static int convert_tail_for_hole(struct inode *inode, | |||
553 | tail_page = hole_page; | 605 | tail_page = hole_page; |
554 | } | 606 | } |
555 | 607 | ||
556 | /* we don't have to make sure the conversion did not happen while | 608 | /* |
557 | ** we were locking the page because anyone that could convert | 609 | * we don't have to make sure the conversion did not happen while |
558 | ** must first take i_mutex. | 610 | * we were locking the page because anyone that could convert |
559 | ** | 611 | * must first take i_mutex. |
560 | ** We must fix the tail page for writing because it might have buffers | 612 | * |
561 | ** that are mapped, but have a block number of 0. This indicates tail | 613 | * We must fix the tail page for writing because it might have buffers |
562 | ** data that has been read directly into the page, and | 614 | * that are mapped, but have a block number of 0. This indicates tail |
563 | ** __block_write_begin won't trigger a get_block in this case. | 615 | * data that has been read directly into the page, and |
616 | * __block_write_begin won't trigger a get_block in this case. | ||
564 | */ | 617 | */ |
565 | fix_tail_page_for_writing(tail_page); | 618 | fix_tail_page_for_writing(tail_page); |
566 | retval = __reiserfs_write_begin(tail_page, tail_start, | 619 | retval = __reiserfs_write_begin(tail_page, tail_start, |
@@ -573,12 +626,12 @@ static int convert_tail_for_hole(struct inode *inode, | |||
573 | 626 | ||
574 | retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); | 627 | retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); |
575 | 628 | ||
576 | unlock: | 629 | unlock: |
577 | if (tail_page != hole_page) { | 630 | if (tail_page != hole_page) { |
578 | unlock_page(tail_page); | 631 | unlock_page(tail_page); |
579 | page_cache_release(tail_page); | 632 | page_cache_release(tail_page); |
580 | } | 633 | } |
581 | out: | 634 | out: |
582 | return retval; | 635 | return retval; |
583 | } | 636 | } |
584 | 637 | ||
@@ -604,7 +657,8 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
604 | struct buffer_head *bh_result, int create) | 657 | struct buffer_head *bh_result, int create) |
605 | { | 658 | { |
606 | int repeat, retval = 0; | 659 | int repeat, retval = 0; |
607 | b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int | 660 | /* b_blocknr_t is (unsigned) 32 bit int*/ |
661 | b_blocknr_t allocated_block_nr = 0; | ||
608 | INITIALIZE_PATH(path); | 662 | INITIALIZE_PATH(path); |
609 | int pos_in_item; | 663 | int pos_in_item; |
610 | struct cpu_key key; | 664 | struct cpu_key key; |
@@ -614,12 +668,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
614 | int done; | 668 | int done; |
615 | int fs_gen; | 669 | int fs_gen; |
616 | struct reiserfs_transaction_handle *th = NULL; | 670 | struct reiserfs_transaction_handle *th = NULL; |
617 | /* space reserved in transaction batch: | 671 | /* |
618 | . 3 balancings in direct->indirect conversion | 672 | * space reserved in transaction batch: |
619 | . 1 block involved into reiserfs_update_sd() | 673 | * . 3 balancings in direct->indirect conversion |
620 | XXX in practically impossible worst case direct2indirect() | 674 | * . 1 block involved into reiserfs_update_sd() |
621 | can incur (much) more than 3 balancings. | 675 | * XXX in practically impossible worst case direct2indirect() |
622 | quota update for user, group */ | 676 | * can incur (much) more than 3 balancings. |
677 | * quota update for user, group | ||
678 | */ | ||
623 | int jbegin_count = | 679 | int jbegin_count = |
624 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + | 680 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + |
625 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); | 681 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); |
@@ -636,8 +692,9 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
636 | return -EFBIG; | 692 | return -EFBIG; |
637 | } | 693 | } |
638 | 694 | ||
639 | /* if !create, we aren't changing the FS, so we don't need to | 695 | /* |
640 | ** log anything, so we don't need to start a transaction | 696 | * if !create, we aren't changing the FS, so we don't need to |
697 | * log anything, so we don't need to start a transaction | ||
641 | */ | 698 | */ |
642 | if (!(create & GET_BLOCK_CREATE)) { | 699 | if (!(create & GET_BLOCK_CREATE)) { |
643 | int ret; | 700 | int ret; |
@@ -647,6 +704,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
647 | reiserfs_write_unlock(inode->i_sb); | 704 | reiserfs_write_unlock(inode->i_sb); |
648 | return ret; | 705 | return ret; |
649 | } | 706 | } |
707 | |||
650 | /* | 708 | /* |
651 | * if we're already in a transaction, make sure to close | 709 | * if we're already in a transaction, make sure to close |
652 | * any new transactions we start in this func | 710 | * any new transactions we start in this func |
@@ -655,8 +713,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
655 | reiserfs_transaction_running(inode->i_sb)) | 713 | reiserfs_transaction_running(inode->i_sb)) |
656 | dangle = 0; | 714 | dangle = 0; |
657 | 715 | ||
658 | /* If file is of such a size, that it might have a tail and tails are enabled | 716 | /* |
659 | ** we should mark it as possibly needing tail packing on close | 717 | * If file is of such a size, that it might have a tail and |
718 | * tails are enabled we should mark it as possibly needing | ||
719 | * tail packing on close | ||
660 | */ | 720 | */ |
661 | if ((have_large_tails(inode->i_sb) | 721 | if ((have_large_tails(inode->i_sb) |
662 | && inode->i_size < i_block_size(inode) * 4) | 722 | && inode->i_size < i_block_size(inode) * 4) |
@@ -667,7 +727,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
667 | /* set the key of the first byte in the 'block'-th block of file */ | 727 | /* set the key of the first byte in the 'block'-th block of file */ |
668 | make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); | 728 | make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); |
669 | if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { | 729 | if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { |
670 | start_trans: | 730 | start_trans: |
671 | th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); | 731 | th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); |
672 | if (!th) { | 732 | if (!th) { |
673 | retval = -ENOMEM; | 733 | retval = -ENOMEM; |
@@ -675,7 +735,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
675 | } | 735 | } |
676 | reiserfs_update_inode_transaction(inode); | 736 | reiserfs_update_inode_transaction(inode); |
677 | } | 737 | } |
678 | research: | 738 | research: |
679 | 739 | ||
680 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 740 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
681 | if (retval == IO_ERROR) { | 741 | if (retval == IO_ERROR) { |
@@ -684,8 +744,8 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
684 | } | 744 | } |
685 | 745 | ||
686 | bh = get_last_bh(&path); | 746 | bh = get_last_bh(&path); |
687 | ih = get_ih(&path); | 747 | ih = tp_item_head(&path); |
688 | item = get_item(&path); | 748 | item = tp_item_body(&path); |
689 | pos_in_item = path.pos_in_item; | 749 | pos_in_item = path.pos_in_item; |
690 | 750 | ||
691 | fs_gen = get_generation(inode->i_sb); | 751 | fs_gen = get_generation(inode->i_sb); |
@@ -703,11 +763,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
703 | _allocate_block(th, block, inode, &allocated_block_nr, | 763 | _allocate_block(th, block, inode, &allocated_block_nr, |
704 | &path, create); | 764 | &path, create); |
705 | 765 | ||
766 | /* | ||
767 | * restart the transaction to give the journal a chance to free | ||
768 | * some blocks. releases the path, so we have to go back to | ||
769 | * research if we succeed on the second try | ||
770 | */ | ||
706 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { | 771 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { |
707 | /* restart the transaction to give the journal a chance to free | ||
708 | ** some blocks. releases the path, so we have to go back to | ||
709 | ** research if we succeed on the second try | ||
710 | */ | ||
711 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; | 772 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; |
712 | retval = restart_transaction(th, inode, &path); | 773 | retval = restart_transaction(th, inode, &path); |
713 | if (retval) | 774 | if (retval) |
@@ -734,9 +795,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
734 | 795 | ||
735 | if (indirect_item_found(retval, ih)) { | 796 | if (indirect_item_found(retval, ih)) { |
736 | b_blocknr_t unfm_ptr; | 797 | b_blocknr_t unfm_ptr; |
737 | /* 'block'-th block is in the file already (there is | 798 | /* |
738 | corresponding cell in some indirect item). But it may be | 799 | * 'block'-th block is in the file already (there is |
739 | zero unformatted node pointer (hole) */ | 800 | * corresponding cell in some indirect item). But it may be |
801 | * zero unformatted node pointer (hole) | ||
802 | */ | ||
740 | unfm_ptr = get_block_num(item, pos_in_item); | 803 | unfm_ptr = get_block_num(item, pos_in_item); |
741 | if (unfm_ptr == 0) { | 804 | if (unfm_ptr == 0) { |
742 | /* use allocated block to plug the hole */ | 805 | /* use allocated block to plug the hole */ |
@@ -753,7 +816,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
753 | reiserfs_add_ordered_list(inode, bh_result); | 816 | reiserfs_add_ordered_list(inode, bh_result); |
754 | put_block_num(item, pos_in_item, allocated_block_nr); | 817 | put_block_num(item, pos_in_item, allocated_block_nr); |
755 | unfm_ptr = allocated_block_nr; | 818 | unfm_ptr = allocated_block_nr; |
756 | journal_mark_dirty(th, inode->i_sb, bh); | 819 | journal_mark_dirty(th, bh); |
757 | reiserfs_update_sd(th, inode); | 820 | reiserfs_update_sd(th, inode); |
758 | } | 821 | } |
759 | set_block_dev_mapped(bh_result, unfm_ptr, inode); | 822 | set_block_dev_mapped(bh_result, unfm_ptr, inode); |
@@ -764,9 +827,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
764 | 827 | ||
765 | reiserfs_write_unlock(inode->i_sb); | 828 | reiserfs_write_unlock(inode->i_sb); |
766 | 829 | ||
767 | /* the item was found, so new blocks were not added to the file | 830 | /* |
768 | ** there is no need to make sure the inode is updated with this | 831 | * the item was found, so new blocks were not added to the file |
769 | ** transaction | 832 | * there is no need to make sure the inode is updated with this |
833 | * transaction | ||
770 | */ | 834 | */ |
771 | return retval; | 835 | return retval; |
772 | } | 836 | } |
@@ -776,9 +840,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
776 | goto start_trans; | 840 | goto start_trans; |
777 | } | 841 | } |
778 | 842 | ||
779 | /* desired position is not found or is in the direct item. We have | 843 | /* |
780 | to append file with holes up to 'block'-th block converting | 844 | * desired position is not found or is in the direct item. We have |
781 | direct items to indirect one if necessary */ | 845 | * to append file with holes up to 'block'-th block converting |
846 | * direct items to indirect one if necessary | ||
847 | */ | ||
782 | done = 0; | 848 | done = 0; |
783 | do { | 849 | do { |
784 | if (is_statdata_le_ih(ih)) { | 850 | if (is_statdata_le_ih(ih)) { |
@@ -790,16 +856,18 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
790 | TYPE_INDIRECT, UNFM_P_SIZE, | 856 | TYPE_INDIRECT, UNFM_P_SIZE, |
791 | 0 /* free_space */ ); | 857 | 0 /* free_space */ ); |
792 | 858 | ||
859 | /* | ||
860 | * we are going to add 'block'-th block to the file. | ||
861 | * Use allocated block for that | ||
862 | */ | ||
793 | if (cpu_key_k_offset(&key) == 1) { | 863 | if (cpu_key_k_offset(&key) == 1) { |
794 | /* we are going to add 'block'-th block to the file. Use | ||
795 | allocated block for that */ | ||
796 | unp = cpu_to_le32(allocated_block_nr); | 864 | unp = cpu_to_le32(allocated_block_nr); |
797 | set_block_dev_mapped(bh_result, | 865 | set_block_dev_mapped(bh_result, |
798 | allocated_block_nr, inode); | 866 | allocated_block_nr, inode); |
799 | set_buffer_new(bh_result); | 867 | set_buffer_new(bh_result); |
800 | done = 1; | 868 | done = 1; |
801 | } | 869 | } |
802 | tmp_key = key; // ;) | 870 | tmp_key = key; /* ;) */ |
803 | set_cpu_key_k_offset(&tmp_key, 1); | 871 | set_cpu_key_k_offset(&tmp_key, 1); |
804 | PATH_LAST_POSITION(&path)++; | 872 | PATH_LAST_POSITION(&path)++; |
805 | 873 | ||
@@ -809,9 +877,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
809 | if (retval) { | 877 | if (retval) { |
810 | reiserfs_free_block(th, inode, | 878 | reiserfs_free_block(th, inode, |
811 | allocated_block_nr, 1); | 879 | allocated_block_nr, 1); |
812 | goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST | 880 | /* |
881 | * retval == -ENOSPC, -EDQUOT or -EIO | ||
882 | * or -EEXIST | ||
883 | */ | ||
884 | goto failure; | ||
813 | } | 885 | } |
814 | //mark_tail_converted (inode); | ||
815 | } else if (is_direct_le_ih(ih)) { | 886 | } else if (is_direct_le_ih(ih)) { |
816 | /* direct item has to be converted */ | 887 | /* direct item has to be converted */ |
817 | loff_t tail_offset; | 888 | loff_t tail_offset; |
@@ -819,18 +890,24 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
819 | tail_offset = | 890 | tail_offset = |
820 | ((le_ih_k_offset(ih) - | 891 | ((le_ih_k_offset(ih) - |
821 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; | 892 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; |
893 | |||
894 | /* | ||
895 | * direct item we just found fits into block we have | ||
896 | * to map. Convert it into unformatted node: use | ||
897 | * bh_result for the conversion | ||
898 | */ | ||
822 | if (tail_offset == cpu_key_k_offset(&key)) { | 899 | if (tail_offset == cpu_key_k_offset(&key)) { |
823 | /* direct item we just found fits into block we have | ||
824 | to map. Convert it into unformatted node: use | ||
825 | bh_result for the conversion */ | ||
826 | set_block_dev_mapped(bh_result, | 900 | set_block_dev_mapped(bh_result, |
827 | allocated_block_nr, inode); | 901 | allocated_block_nr, inode); |
828 | unbh = bh_result; | 902 | unbh = bh_result; |
829 | done = 1; | 903 | done = 1; |
830 | } else { | 904 | } else { |
831 | /* we have to padd file tail stored in direct item(s) | 905 | /* |
832 | up to block size and convert it to unformatted | 906 | * we have to pad file tail stored in direct |
833 | node. FIXME: this should also get into page cache */ | 907 | * item(s) up to block size and convert it |
908 | * to unformatted node. FIXME: this should | ||
909 | * also get into page cache | ||
910 | */ | ||
834 | 911 | ||
835 | pathrelse(&path); | 912 | pathrelse(&path); |
836 | /* | 913 | /* |
@@ -859,7 +936,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
859 | inode->i_ino, | 936 | inode->i_ino, |
860 | retval); | 937 | retval); |
861 | if (allocated_block_nr) { | 938 | if (allocated_block_nr) { |
862 | /* the bitmap, the super, and the stat data == 3 */ | 939 | /* |
940 | * the bitmap, the super, | ||
941 | * and the stat data == 3 | ||
942 | */ | ||
863 | if (!th) | 943 | if (!th) |
864 | th = reiserfs_persistent_transaction(inode->i_sb, 3); | 944 | th = reiserfs_persistent_transaction(inode->i_sb, 3); |
865 | if (th) | 945 | if (th) |
@@ -881,43 +961,57 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
881 | allocated_block_nr, 1); | 961 | allocated_block_nr, 1); |
882 | goto failure; | 962 | goto failure; |
883 | } | 963 | } |
884 | /* it is important the set_buffer_uptodate is done after | 964 | /* |
885 | ** the direct2indirect. The buffer might contain valid | 965 | * it is important the set_buffer_uptodate is done |
886 | ** data newer than the data on disk (read by readpage, changed, | 966 | * after the direct2indirect. The buffer might |
887 | ** and then sent here by writepage). direct2indirect needs | 967 | * contain valid data newer than the data on disk |
888 | ** to know if unbh was already up to date, so it can decide | 968 | * (read by readpage, changed, and then sent here by |
889 | ** if the data in unbh needs to be replaced with data from | 969 | * writepage). direct2indirect needs to know if unbh |
890 | ** the disk | 970 | * was already up to date, so it can decide if the |
971 | * data in unbh needs to be replaced with data from | ||
972 | * the disk | ||
891 | */ | 973 | */ |
892 | set_buffer_uptodate(unbh); | 974 | set_buffer_uptodate(unbh); |
893 | 975 | ||
894 | /* unbh->b_page == NULL in case of DIRECT_IO request, this means | 976 | /* |
895 | buffer will disappear shortly, so it should not be added to | 977 | * unbh->b_page == NULL in case of DIRECT_IO request, |
978 | * this means buffer will disappear shortly, so it | ||
979 | * should not be added to | ||
896 | */ | 980 | */ |
897 | if (unbh->b_page) { | 981 | if (unbh->b_page) { |
898 | /* we've converted the tail, so we must | 982 | /* |
899 | ** flush unbh before the transaction commits | 983 | * we've converted the tail, so we must |
984 | * flush unbh before the transaction commits | ||
900 | */ | 985 | */ |
901 | reiserfs_add_tail_list(inode, unbh); | 986 | reiserfs_add_tail_list(inode, unbh); |
902 | 987 | ||
903 | /* mark it dirty now to prevent commit_write from adding | 988 | /* |
904 | ** this buffer to the inode's dirty buffer list | 989 | * mark it dirty now to prevent commit_write |
990 | * from adding this buffer to the inode's | ||
991 | * dirty buffer list | ||
905 | */ | 992 | */ |
906 | /* | 993 | /* |
907 | * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). | 994 | * AKPM: changed __mark_buffer_dirty to |
908 | * It's still atomic, but it sets the page dirty too, | 995 | * mark_buffer_dirty(). It's still atomic, |
909 | * which makes it eligible for writeback at any time by the | 996 | * but it sets the page dirty too, which makes |
910 | * VM (which was also the case with __mark_buffer_dirty()) | 997 | * it eligible for writeback at any time by the |
998 | * VM (which was also the case with | ||
999 | * __mark_buffer_dirty()) | ||
911 | */ | 1000 | */ |
912 | mark_buffer_dirty(unbh); | 1001 | mark_buffer_dirty(unbh); |
913 | } | 1002 | } |
914 | } else { | 1003 | } else { |
915 | /* append indirect item with holes if needed, when appending | 1004 | /* |
916 | pointer to 'block'-th block use block, which is already | 1005 | * append indirect item with holes if needed, when |
917 | allocated */ | 1006 | * appending pointer to 'block'-th block use block, |
1007 | * which is already allocated | ||
1008 | */ | ||
918 | struct cpu_key tmp_key; | 1009 | struct cpu_key tmp_key; |
919 | unp_t unf_single = 0; // We use this in case we need to allocate only | 1010 | /* |
920 | // one block which is a fastpath | 1011 | * We use this in case we need to allocate |
1012 | * only one block which is a fastpath | ||
1013 | */ | ||
1014 | unp_t unf_single = 0; | ||
921 | unp_t *un; | 1015 | unp_t *un; |
922 | __u64 max_to_insert = | 1016 | __u64 max_to_insert = |
923 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / | 1017 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / |
@@ -926,14 +1020,17 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
926 | 1020 | ||
927 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, | 1021 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, |
928 | "vs-804: invalid position for append"); | 1022 | "vs-804: invalid position for append"); |
929 | /* indirect item has to be appended, set up key of that position */ | 1023 | /* |
1024 | * indirect item has to be appended, | ||
1025 | * set up key of that position | ||
1026 | * (key type is unimportant) | ||
1027 | */ | ||
930 | make_cpu_key(&tmp_key, inode, | 1028 | make_cpu_key(&tmp_key, inode, |
931 | le_key_k_offset(version, | 1029 | le_key_k_offset(version, |
932 | &(ih->ih_key)) + | 1030 | &ih->ih_key) + |
933 | op_bytes_number(ih, | 1031 | op_bytes_number(ih, |
934 | inode->i_sb->s_blocksize), | 1032 | inode->i_sb->s_blocksize), |
935 | //pos_in_item * inode->i_sb->s_blocksize, | 1033 | TYPE_INDIRECT, 3); |
936 | TYPE_INDIRECT, 3); // key type is unimportant | ||
937 | 1034 | ||
938 | RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), | 1035 | RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), |
939 | "green-805: invalid offset"); | 1036 | "green-805: invalid offset"); |
@@ -954,8 +1051,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
954 | } | 1051 | } |
955 | } | 1052 | } |
956 | if (blocks_needed <= max_to_insert) { | 1053 | if (blocks_needed <= max_to_insert) { |
957 | /* we are going to add target block to the file. Use allocated | 1054 | /* |
958 | block for that */ | 1055 | * we are going to add target block to |
1056 | * the file. Use allocated block for that | ||
1057 | */ | ||
959 | un[blocks_needed - 1] = | 1058 | un[blocks_needed - 1] = |
960 | cpu_to_le32(allocated_block_nr); | 1059 | cpu_to_le32(allocated_block_nr); |
961 | set_block_dev_mapped(bh_result, | 1060 | set_block_dev_mapped(bh_result, |
@@ -964,8 +1063,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
964 | done = 1; | 1063 | done = 1; |
965 | } else { | 1064 | } else { |
966 | /* paste hole to the indirect item */ | 1065 | /* paste hole to the indirect item */ |
967 | /* If kmalloc failed, max_to_insert becomes zero and it means we | 1066 | /* |
968 | only have space for one block */ | 1067 | * If kmalloc failed, max_to_insert becomes |
1068 | * zero and it means we only have space for | ||
1069 | * one block | ||
1070 | */ | ||
969 | blocks_needed = | 1071 | blocks_needed = |
970 | max_to_insert ? max_to_insert : 1; | 1072 | max_to_insert ? max_to_insert : 1; |
971 | } | 1073 | } |
@@ -984,9 +1086,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
984 | goto failure; | 1086 | goto failure; |
985 | } | 1087 | } |
986 | if (!done) { | 1088 | if (!done) { |
987 | /* We need to mark new file size in case this function will be | 1089 | /* |
988 | interrupted/aborted later on. And we may do this only for | 1090 | * We need to mark new file size in case |
989 | holes. */ | 1091 | * this function will be interrupted/aborted |
1092 | * later on. And we may do this only for | ||
1093 | * holes. | ||
1094 | */ | ||
990 | inode->i_size += | 1095 | inode->i_size += |
991 | inode->i_sb->s_blocksize * blocks_needed; | 1096 | inode->i_sb->s_blocksize * blocks_needed; |
992 | } | 1097 | } |
@@ -995,13 +1100,13 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
995 | if (done == 1) | 1100 | if (done == 1) |
996 | break; | 1101 | break; |
997 | 1102 | ||
998 | /* this loop could log more blocks than we had originally asked | 1103 | /* |
999 | ** for. So, we have to allow the transaction to end if it is | 1104 | * this loop could log more blocks than we had originally |
1000 | ** too big or too full. Update the inode so things are | 1105 | * asked for. So, we have to allow the transaction to end |
1001 | ** consistent if we crash before the function returns | 1106 | * if it is too big or too full. Update the inode so things |
1002 | ** | 1107 | * are consistent if we crash before the function returns |
1003 | ** release the path so that anybody waiting on the path before | 1108 | * release the path so that anybody waiting on the path before |
1004 | ** ending their transaction will be able to continue. | 1109 | * ending their transaction will be able to continue. |
1005 | */ | 1110 | */ |
1006 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { | 1111 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { |
1007 | retval = restart_transaction(th, inode, &path); | 1112 | retval = restart_transaction(th, inode, &path); |
@@ -1031,14 +1136,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
1031 | goto failure; | 1136 | goto failure; |
1032 | } | 1137 | } |
1033 | bh = get_last_bh(&path); | 1138 | bh = get_last_bh(&path); |
1034 | ih = get_ih(&path); | 1139 | ih = tp_item_head(&path); |
1035 | item = get_item(&path); | 1140 | item = tp_item_body(&path); |
1036 | pos_in_item = path.pos_in_item; | 1141 | pos_in_item = path.pos_in_item; |
1037 | } while (1); | 1142 | } while (1); |
1038 | 1143 | ||
1039 | retval = 0; | 1144 | retval = 0; |
1040 | 1145 | ||
1041 | failure: | 1146 | failure: |
1042 | if (th && (!dangle || (retval && !th->t_trans_id))) { | 1147 | if (th && (!dangle || (retval && !th->t_trans_id))) { |
1043 | int err; | 1148 | int err; |
1044 | if (th->t_trans_id) | 1149 | if (th->t_trans_id) |
@@ -1060,8 +1165,10 @@ reiserfs_readpages(struct file *file, struct address_space *mapping, | |||
1060 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); | 1165 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); |
1061 | } | 1166 | } |
1062 | 1167 | ||
1063 | /* Compute real number of used bytes by file | 1168 | /* |
1064 | * Following three functions can go away when we'll have enough space in stat item | 1169 | * Compute real number of used bytes by file |
1170 | * Following three functions can go away when we'll have enough space in | ||
1171 | * stat item | ||
1065 | */ | 1172 | */ |
1066 | static int real_space_diff(struct inode *inode, int sd_size) | 1173 | static int real_space_diff(struct inode *inode, int sd_size) |
1067 | { | 1174 | { |
@@ -1071,13 +1178,14 @@ static int real_space_diff(struct inode *inode, int sd_size) | |||
1071 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) | 1178 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) |
1072 | return sd_size; | 1179 | return sd_size; |
1073 | 1180 | ||
1074 | /* End of file is also in full block with indirect reference, so round | 1181 | /* |
1075 | ** up to the next block. | 1182 | * End of file is also in full block with indirect reference, so round |
1076 | ** | 1183 | * up to the next block. |
1077 | ** there is just no way to know if the tail is actually packed | 1184 | * |
1078 | ** on the file, so we have to assume it isn't. When we pack the | 1185 | * there is just no way to know if the tail is actually packed |
1079 | ** tail, we add 4 bytes to pretend there really is an unformatted | 1186 | * on the file, so we have to assume it isn't. When we pack the |
1080 | ** node pointer | 1187 | * tail, we add 4 bytes to pretend there really is an unformatted |
1188 | * node pointer | ||
1081 | */ | 1189 | */ |
1082 | bytes = | 1190 | bytes = |
1083 | ((inode->i_size + | 1191 | ((inode->i_size + |
@@ -1108,36 +1216,36 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) | |||
1108 | bytes += (loff_t) 511; | 1216 | bytes += (loff_t) 511; |
1109 | } | 1217 | } |
1110 | 1218 | ||
1111 | /* files from before the quota patch might i_blocks such that | 1219 | /* |
1112 | ** bytes < real_space. Deal with that here to prevent it from | 1220 | * files from before the quota patch might i_blocks such that |
1113 | ** going negative. | 1221 | * bytes < real_space. Deal with that here to prevent it from |
1222 | * going negative. | ||
1114 | */ | 1223 | */ |
1115 | if (bytes < real_space) | 1224 | if (bytes < real_space) |
1116 | return 0; | 1225 | return 0; |
1117 | return (bytes - real_space) >> 9; | 1226 | return (bytes - real_space) >> 9; |
1118 | } | 1227 | } |
1119 | 1228 | ||
1120 | // | 1229 | /* |
1121 | // BAD: new directories have stat data of new type and all other items | 1230 | * BAD: new directories have stat data of new type and all other items |
1122 | // of old type. Version stored in the inode says about body items, so | 1231 | * of old type. Version stored in the inode says about body items, so |
1123 | // in update_stat_data we can not rely on inode, but have to check | 1232 | * in update_stat_data we can not rely on inode, but have to check |
1124 | // item version directly | 1233 | * item version directly |
1125 | // | 1234 | */ |
1126 | 1235 | ||
1127 | // called by read_locked_inode | 1236 | /* called by read_locked_inode */ |
1128 | static void init_inode(struct inode *inode, struct treepath *path) | 1237 | static void init_inode(struct inode *inode, struct treepath *path) |
1129 | { | 1238 | { |
1130 | struct buffer_head *bh; | 1239 | struct buffer_head *bh; |
1131 | struct item_head *ih; | 1240 | struct item_head *ih; |
1132 | __u32 rdev; | 1241 | __u32 rdev; |
1133 | //int version = ITEM_VERSION_1; | ||
1134 | 1242 | ||
1135 | bh = PATH_PLAST_BUFFER(path); | 1243 | bh = PATH_PLAST_BUFFER(path); |
1136 | ih = PATH_PITEM_HEAD(path); | 1244 | ih = tp_item_head(path); |
1137 | 1245 | ||
1138 | copy_key(INODE_PKEY(inode), &(ih->ih_key)); | 1246 | copy_key(INODE_PKEY(inode), &ih->ih_key); |
1139 | 1247 | ||
1140 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); | 1248 | INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list); |
1141 | REISERFS_I(inode)->i_flags = 0; | 1249 | REISERFS_I(inode)->i_flags = 0; |
1142 | REISERFS_I(inode)->i_prealloc_block = 0; | 1250 | REISERFS_I(inode)->i_prealloc_block = 0; |
1143 | REISERFS_I(inode)->i_prealloc_count = 0; | 1251 | REISERFS_I(inode)->i_prealloc_count = 0; |
@@ -1147,7 +1255,7 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1147 | 1255 | ||
1148 | if (stat_data_v1(ih)) { | 1256 | if (stat_data_v1(ih)) { |
1149 | struct stat_data_v1 *sd = | 1257 | struct stat_data_v1 *sd = |
1150 | (struct stat_data_v1 *)B_I_PITEM(bh, ih); | 1258 | (struct stat_data_v1 *)ih_item_body(bh, ih); |
1151 | unsigned long blocks; | 1259 | unsigned long blocks; |
1152 | 1260 | ||
1153 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 1261 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
@@ -1168,20 +1276,26 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1168 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1276 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1169 | blocks = (inode->i_size + 511) >> 9; | 1277 | blocks = (inode->i_size + 511) >> 9; |
1170 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); | 1278 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); |
1279 | |||
1280 | /* | ||
1281 | * there was a bug in <=3.5.23 when i_blocks could take | ||
1282 | * negative values. Starting from 3.5.17 this value could | ||
1283 | * even be stored in stat data. For such files we set | ||
1284 | * i_blocks based on file size. Just 2 notes: this can be | ||
1285 | * wrong for sparse files. On-disk value will be only | ||
1286 | * updated if file's inode will ever change | ||
1287 | */ | ||
1171 | if (inode->i_blocks > blocks) { | 1288 | if (inode->i_blocks > blocks) { |
1172 | // there was a bug in <=3.5.23 when i_blocks could take negative | ||
1173 | // values. Starting from 3.5.17 this value could even be stored in | ||
1174 | // stat data. For such files we set i_blocks based on file | ||
1175 | // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be | ||
1176 | // only updated if file's inode will ever change | ||
1177 | inode->i_blocks = blocks; | 1289 | inode->i_blocks = blocks; |
1178 | } | 1290 | } |
1179 | 1291 | ||
1180 | rdev = sd_v1_rdev(sd); | 1292 | rdev = sd_v1_rdev(sd); |
1181 | REISERFS_I(inode)->i_first_direct_byte = | 1293 | REISERFS_I(inode)->i_first_direct_byte = |
1182 | sd_v1_first_direct_byte(sd); | 1294 | sd_v1_first_direct_byte(sd); |
1183 | /* an early bug in the quota code can give us an odd number for the | 1295 | |
1184 | ** block count. This is incorrect, fix it here. | 1296 | /* |
1297 | * an early bug in the quota code can give us an odd | ||
1298 | * number for the block count. This is incorrect, fix it here. | ||
1185 | */ | 1299 | */ |
1186 | if (inode->i_blocks & 1) { | 1300 | if (inode->i_blocks & 1) { |
1187 | inode->i_blocks++; | 1301 | inode->i_blocks++; |
@@ -1189,13 +1303,17 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1189 | inode_set_bytes(inode, | 1303 | inode_set_bytes(inode, |
1190 | to_real_used_space(inode, inode->i_blocks, | 1304 | to_real_used_space(inode, inode->i_blocks, |
1191 | SD_V1_SIZE)); | 1305 | SD_V1_SIZE)); |
1192 | /* nopack is initially zero for v1 objects. For v2 objects, | 1306 | /* |
1193 | nopack is initialised from sd_attrs */ | 1307 | * nopack is initially zero for v1 objects. For v2 objects, |
1308 | * nopack is initialised from sd_attrs | ||
1309 | */ | ||
1194 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; | 1310 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; |
1195 | } else { | 1311 | } else { |
1196 | // new stat data found, but object may have old items | 1312 | /* |
1197 | // (directories and symlinks) | 1313 | * new stat data found, but object may have old items |
1198 | struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); | 1314 | * (directories and symlinks) |
1315 | */ | ||
1316 | struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih); | ||
1199 | 1317 | ||
1200 | inode->i_mode = sd_v2_mode(sd); | 1318 | inode->i_mode = sd_v2_mode(sd); |
1201 | set_nlink(inode, sd_v2_nlink(sd)); | 1319 | set_nlink(inode, sd_v2_nlink(sd)); |
@@ -1225,8 +1343,10 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1225 | inode_set_bytes(inode, | 1343 | inode_set_bytes(inode, |
1226 | to_real_used_space(inode, inode->i_blocks, | 1344 | to_real_used_space(inode, inode->i_blocks, |
1227 | SD_V2_SIZE)); | 1345 | SD_V2_SIZE)); |
1228 | /* read persistent inode attributes from sd and initialise | 1346 | /* |
1229 | generic inode flags from them */ | 1347 | * read persistent inode attributes from sd and initialise |
1348 | * generic inode flags from them | ||
1349 | */ | ||
1230 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); | 1350 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); |
1231 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); | 1351 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); |
1232 | } | 1352 | } |
@@ -1249,7 +1369,7 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1249 | } | 1369 | } |
1250 | } | 1370 | } |
1251 | 1371 | ||
1252 | // update new stat data with inode fields | 1372 | /* update new stat data with inode fields */ |
1253 | static void inode2sd(void *sd, struct inode *inode, loff_t size) | 1373 | static void inode2sd(void *sd, struct inode *inode, loff_t size) |
1254 | { | 1374 | { |
1255 | struct stat_data *sd_v2 = (struct stat_data *)sd; | 1375 | struct stat_data *sd_v2 = (struct stat_data *)sd; |
@@ -1273,7 +1393,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) | |||
1273 | set_sd_v2_attrs(sd_v2, flags); | 1393 | set_sd_v2_attrs(sd_v2, flags); |
1274 | } | 1394 | } |
1275 | 1395 | ||
1276 | // used to copy inode's fields to old stat data | 1396 | /* used to copy inode's fields to old stat data */ |
1277 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) | 1397 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) |
1278 | { | 1398 | { |
1279 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; | 1399 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; |
@@ -1292,14 +1412,15 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) | |||
1292 | else | 1412 | else |
1293 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); | 1413 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); |
1294 | 1414 | ||
1295 | // Sigh. i_first_direct_byte is back | 1415 | /* Sigh. i_first_direct_byte is back */ |
1296 | set_sd_v1_first_direct_byte(sd_v1, | 1416 | set_sd_v1_first_direct_byte(sd_v1, |
1297 | REISERFS_I(inode)->i_first_direct_byte); | 1417 | REISERFS_I(inode)->i_first_direct_byte); |
1298 | } | 1418 | } |
1299 | 1419 | ||
1300 | /* NOTE, you must prepare the buffer head before sending it here, | 1420 | /* |
1301 | ** and then log it after the call | 1421 | * NOTE, you must prepare the buffer head before sending it here, |
1302 | */ | 1422 | * and then log it after the call |
1423 | */ | ||
1303 | static void update_stat_data(struct treepath *path, struct inode *inode, | 1424 | static void update_stat_data(struct treepath *path, struct inode *inode, |
1304 | loff_t size) | 1425 | loff_t size) |
1305 | { | 1426 | { |
@@ -1307,17 +1428,17 @@ static void update_stat_data(struct treepath *path, struct inode *inode, | |||
1307 | struct item_head *ih; | 1428 | struct item_head *ih; |
1308 | 1429 | ||
1309 | bh = PATH_PLAST_BUFFER(path); | 1430 | bh = PATH_PLAST_BUFFER(path); |
1310 | ih = PATH_PITEM_HEAD(path); | 1431 | ih = tp_item_head(path); |
1311 | 1432 | ||
1312 | if (!is_statdata_le_ih(ih)) | 1433 | if (!is_statdata_le_ih(ih)) |
1313 | reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", | 1434 | reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", |
1314 | INODE_PKEY(inode), ih); | 1435 | INODE_PKEY(inode), ih); |
1315 | 1436 | ||
1437 | /* path points to old stat data */ | ||
1316 | if (stat_data_v1(ih)) { | 1438 | if (stat_data_v1(ih)) { |
1317 | // path points to old stat data | 1439 | inode2sd_v1(ih_item_body(bh, ih), inode, size); |
1318 | inode2sd_v1(B_I_PITEM(bh, ih), inode, size); | ||
1319 | } else { | 1440 | } else { |
1320 | inode2sd(B_I_PITEM(bh, ih), inode, size); | 1441 | inode2sd(ih_item_body(bh, ih), inode, size); |
1321 | } | 1442 | } |
1322 | 1443 | ||
1323 | return; | 1444 | return; |
@@ -1335,7 +1456,8 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | |||
1335 | 1456 | ||
1336 | BUG_ON(!th->t_trans_id); | 1457 | BUG_ON(!th->t_trans_id); |
1337 | 1458 | ||
1338 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant | 1459 | /* key type is unimportant */ |
1460 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); | ||
1339 | 1461 | ||
1340 | for (;;) { | 1462 | for (;;) { |
1341 | int pos; | 1463 | int pos; |
@@ -1363,45 +1485,48 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | |||
1363 | return; | 1485 | return; |
1364 | } | 1486 | } |
1365 | 1487 | ||
1366 | /* sigh, prepare_for_journal might schedule. When it schedules the | 1488 | /* |
1367 | ** FS might change. We have to detect that, and loop back to the | 1489 | * sigh, prepare_for_journal might schedule. When it |
1368 | ** search if the stat data item has moved | 1490 | * schedules the FS might change. We have to detect that, |
1491 | * and loop back to the search if the stat data item has moved | ||
1369 | */ | 1492 | */ |
1370 | bh = get_last_bh(&path); | 1493 | bh = get_last_bh(&path); |
1371 | ih = get_ih(&path); | 1494 | ih = tp_item_head(&path); |
1372 | copy_item_head(&tmp_ih, ih); | 1495 | copy_item_head(&tmp_ih, ih); |
1373 | fs_gen = get_generation(inode->i_sb); | 1496 | fs_gen = get_generation(inode->i_sb); |
1374 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 1497 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
1498 | |||
1499 | /* Stat_data item has been moved after scheduling. */ | ||
1375 | if (fs_changed(fs_gen, inode->i_sb) | 1500 | if (fs_changed(fs_gen, inode->i_sb) |
1376 | && item_moved(&tmp_ih, &path)) { | 1501 | && item_moved(&tmp_ih, &path)) { |
1377 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); | 1502 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); |
1378 | continue; /* Stat_data item has been moved after scheduling. */ | 1503 | continue; |
1379 | } | 1504 | } |
1380 | break; | 1505 | break; |
1381 | } | 1506 | } |
1382 | update_stat_data(&path, inode, size); | 1507 | update_stat_data(&path, inode, size); |
1383 | journal_mark_dirty(th, th->t_super, bh); | 1508 | journal_mark_dirty(th, bh); |
1384 | pathrelse(&path); | 1509 | pathrelse(&path); |
1385 | return; | 1510 | return; |
1386 | } | 1511 | } |
1387 | 1512 | ||
1388 | /* reiserfs_read_locked_inode is called to read the inode off disk, and it | 1513 | /* |
1389 | ** does a make_bad_inode when things go wrong. But, we need to make sure | 1514 | * reiserfs_read_locked_inode is called to read the inode off disk, and it |
1390 | ** and clear the key in the private portion of the inode, otherwise a | 1515 | * does a make_bad_inode when things go wrong. But, we need to make sure |
1391 | ** corresponding iput might try to delete whatever object the inode last | 1516 | * and clear the key in the private portion of the inode, otherwise a |
1392 | ** represented. | 1517 | * corresponding iput might try to delete whatever object the inode last |
1393 | */ | 1518 | * represented. |
1519 | */ | ||
1394 | static void reiserfs_make_bad_inode(struct inode *inode) | 1520 | static void reiserfs_make_bad_inode(struct inode *inode) |
1395 | { | 1521 | { |
1396 | memset(INODE_PKEY(inode), 0, KEY_SIZE); | 1522 | memset(INODE_PKEY(inode), 0, KEY_SIZE); |
1397 | make_bad_inode(inode); | 1523 | make_bad_inode(inode); |
1398 | } | 1524 | } |
1399 | 1525 | ||
1400 | // | 1526 | /* |
1401 | // initially this function was derived from minix or ext2's analog and | 1527 | * initially this function was derived from minix or ext2's analog and |
1402 | // evolved as the prototype did | 1528 | * evolved as the prototype did |
1403 | // | 1529 | */ |
1404 | |||
1405 | int reiserfs_init_locked_inode(struct inode *inode, void *p) | 1530 | int reiserfs_init_locked_inode(struct inode *inode, void *p) |
1406 | { | 1531 | { |
1407 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; | 1532 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; |
@@ -1410,8 +1535,10 @@ int reiserfs_init_locked_inode(struct inode *inode, void *p) | |||
1410 | return 0; | 1535 | return 0; |
1411 | } | 1536 | } |
1412 | 1537 | ||
1413 | /* looks for stat data in the tree, and fills up the fields of in-core | 1538 | /* |
1414 | inode stat data fields */ | 1539 | * looks for stat data in the tree, and fills up the fields of in-core |
1540 | * inode stat data fields | ||
1541 | */ | ||
1415 | void reiserfs_read_locked_inode(struct inode *inode, | 1542 | void reiserfs_read_locked_inode(struct inode *inode, |
1416 | struct reiserfs_iget_args *args) | 1543 | struct reiserfs_iget_args *args) |
1417 | { | 1544 | { |
@@ -1422,8 +1549,10 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1422 | 1549 | ||
1423 | dirino = args->dirid; | 1550 | dirino = args->dirid; |
1424 | 1551 | ||
1425 | /* set version 1, version 2 could be used too, because stat data | 1552 | /* |
1426 | key is the same in both versions */ | 1553 | * set version 1, version 2 could be used too, because stat data |
1554 | * key is the same in both versions | ||
1555 | */ | ||
1427 | key.version = KEY_FORMAT_3_5; | 1556 | key.version = KEY_FORMAT_3_5; |
1428 | key.on_disk_key.k_dir_id = dirino; | 1557 | key.on_disk_key.k_dir_id = dirino; |
1429 | key.on_disk_key.k_objectid = inode->i_ino; | 1558 | key.on_disk_key.k_objectid = inode->i_ino; |
@@ -1439,8 +1568,9 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1439 | reiserfs_make_bad_inode(inode); | 1568 | reiserfs_make_bad_inode(inode); |
1440 | return; | 1569 | return; |
1441 | } | 1570 | } |
1571 | |||
1572 | /* a stale NFS handle can trigger this without it being an error */ | ||
1442 | if (retval != ITEM_FOUND) { | 1573 | if (retval != ITEM_FOUND) { |
1443 | /* a stale NFS handle can trigger this without it being an error */ | ||
1444 | pathrelse(&path_to_sd); | 1574 | pathrelse(&path_to_sd); |
1445 | reiserfs_make_bad_inode(inode); | 1575 | reiserfs_make_bad_inode(inode); |
1446 | clear_nlink(inode); | 1576 | clear_nlink(inode); |
@@ -1449,20 +1579,25 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1449 | 1579 | ||
1450 | init_inode(inode, &path_to_sd); | 1580 | init_inode(inode, &path_to_sd); |
1451 | 1581 | ||
1452 | /* It is possible that knfsd is trying to access inode of a file | 1582 | /* |
1453 | that is being removed from the disk by some other thread. As we | 1583 | * It is possible that knfsd is trying to access inode of a file |
1454 | update sd on unlink all that is required is to check for nlink | 1584 | * that is being removed from the disk by some other thread. As we |
1455 | here. This bug was first found by Sizif when debugging | 1585 | * update sd on unlink all that is required is to check for nlink |
1456 | SquidNG/Butterfly, forgotten, and found again after Philippe | 1586 | * here. This bug was first found by Sizif when debugging |
1457 | Gramoulle <philippe.gramoulle@mmania.com> reproduced it. | 1587 | * SquidNG/Butterfly, forgotten, and found again after Philippe |
1458 | 1588 | * Gramoulle <philippe.gramoulle@mmania.com> reproduced it. | |
1459 | More logical fix would require changes in fs/inode.c:iput() to | 1589 | |
1460 | remove inode from hash-table _after_ fs cleaned disk stuff up and | 1590 | * More logical fix would require changes in fs/inode.c:iput() to |
1461 | in iget() to return NULL if I_FREEING inode is found in | 1591 | * remove inode from hash-table _after_ fs cleaned disk stuff up and |
1462 | hash-table. */ | 1592 | * in iget() to return NULL if I_FREEING inode is found in |
1463 | /* Currently there is one place where it's ok to meet inode with | 1593 | * hash-table. |
1464 | nlink==0: processing of open-unlinked and half-truncated files | 1594 | */ |
1465 | during mount (fs/reiserfs/super.c:finish_unfinished()). */ | 1595 | |
1596 | /* | ||
1597 | * Currently there is one place where it's ok to meet inode with | ||
1598 | * nlink==0: processing of open-unlinked and half-truncated files | ||
1599 | * during mount (fs/reiserfs/super.c:finish_unfinished()). | ||
1600 | */ | ||
1466 | if ((inode->i_nlink == 0) && | 1601 | if ((inode->i_nlink == 0) && |
1467 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { | 1602 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { |
1468 | reiserfs_warning(inode->i_sb, "vs-13075", | 1603 | reiserfs_warning(inode->i_sb, "vs-13075", |
@@ -1472,7 +1607,8 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1472 | reiserfs_make_bad_inode(inode); | 1607 | reiserfs_make_bad_inode(inode); |
1473 | } | 1608 | } |
1474 | 1609 | ||
1475 | reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ | 1610 | /* init inode should be relsing */ |
1611 | reiserfs_check_path(&path_to_sd); | ||
1476 | 1612 | ||
1477 | /* | 1613 | /* |
1478 | * Stat data v1 doesn't support ACLs. | 1614 | * Stat data v1 doesn't support ACLs. |
@@ -1481,7 +1617,7 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1481 | cache_no_acl(inode); | 1617 | cache_no_acl(inode); |
1482 | } | 1618 | } |
1483 | 1619 | ||
1484 | /** | 1620 | /* |
1485 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). | 1621 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). |
1486 | * | 1622 | * |
1487 | * @inode: inode from hash table to check | 1623 | * @inode: inode from hash table to check |
@@ -1556,7 +1692,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, | |||
1556 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | 1692 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
1557 | int fh_len, int fh_type) | 1693 | int fh_len, int fh_type) |
1558 | { | 1694 | { |
1559 | /* fhtype happens to reflect the number of u32s encoded. | 1695 | /* |
1696 | * fhtype happens to reflect the number of u32s encoded. | ||
1560 | * due to a bug in earlier code, fhtype might indicate there | 1697 | * due to a bug in earlier code, fhtype might indicate there |
1561 | * are more u32s then actually fitted. | 1698 | * are more u32s then actually fitted. |
1562 | * so if fhtype seems to be more than len, reduce fhtype. | 1699 | * so if fhtype seems to be more than len, reduce fhtype. |
@@ -1625,13 +1762,16 @@ int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, | |||
1625 | return *lenp; | 1762 | return *lenp; |
1626 | } | 1763 | } |
1627 | 1764 | ||
1628 | /* looks for stat data, then copies fields to it, marks the buffer | 1765 | /* |
1629 | containing stat data as dirty */ | 1766 | * looks for stat data, then copies fields to it, marks the buffer |
1630 | /* reiserfs inodes are never really dirty, since the dirty inode call | 1767 | * containing stat data as dirty |
1631 | ** always logs them. This call allows the VFS inode marking routines | 1768 | */ |
1632 | ** to properly mark inodes for datasync and such, but only actually | 1769 | /* |
1633 | ** does something when called for a synchronous update. | 1770 | * reiserfs inodes are never really dirty, since the dirty inode call |
1634 | */ | 1771 | * always logs them. This call allows the VFS inode marking routines |
1772 | * to properly mark inodes for datasync and such, but only actually | ||
1773 | * does something when called for a synchronous update. | ||
1774 | */ | ||
1635 | int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1775 | int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1636 | { | 1776 | { |
1637 | struct reiserfs_transaction_handle th; | 1777 | struct reiserfs_transaction_handle th; |
@@ -1639,24 +1779,28 @@ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1639 | 1779 | ||
1640 | if (inode->i_sb->s_flags & MS_RDONLY) | 1780 | if (inode->i_sb->s_flags & MS_RDONLY) |
1641 | return -EROFS; | 1781 | return -EROFS; |
1642 | /* memory pressure can sometimes initiate write_inode calls with sync == 1, | 1782 | /* |
1643 | ** these cases are just when the system needs ram, not when the | 1783 | * memory pressure can sometimes initiate write_inode calls with |
1644 | ** inode needs to reach disk for safety, and they can safely be | 1784 | * sync == 1, |
1645 | ** ignored because the altered inode has already been logged. | 1785 | * these cases are just when the system needs ram, not when the |
1786 | * inode needs to reach disk for safety, and they can safely be | ||
1787 | * ignored because the altered inode has already been logged. | ||
1646 | */ | 1788 | */ |
1647 | if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { | 1789 | if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { |
1648 | reiserfs_write_lock(inode->i_sb); | 1790 | reiserfs_write_lock(inode->i_sb); |
1649 | if (!journal_begin(&th, inode->i_sb, jbegin_count)) { | 1791 | if (!journal_begin(&th, inode->i_sb, jbegin_count)) { |
1650 | reiserfs_update_sd(&th, inode); | 1792 | reiserfs_update_sd(&th, inode); |
1651 | journal_end_sync(&th, inode->i_sb, jbegin_count); | 1793 | journal_end_sync(&th); |
1652 | } | 1794 | } |
1653 | reiserfs_write_unlock(inode->i_sb); | 1795 | reiserfs_write_unlock(inode->i_sb); |
1654 | } | 1796 | } |
1655 | return 0; | 1797 | return 0; |
1656 | } | 1798 | } |
1657 | 1799 | ||
1658 | /* stat data of new object is inserted already, this inserts the item | 1800 | /* |
1659 | containing "." and ".." entries */ | 1801 | * stat data of new object is inserted already, this inserts the item |
1802 | * containing "." and ".." entries | ||
1803 | */ | ||
1660 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | 1804 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, |
1661 | struct inode *inode, | 1805 | struct inode *inode, |
1662 | struct item_head *ih, struct treepath *path, | 1806 | struct item_head *ih, struct treepath *path, |
@@ -1674,9 +1818,11 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | |||
1674 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, | 1818 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, |
1675 | TYPE_DIRENTRY, 3 /*key length */ ); | 1819 | TYPE_DIRENTRY, 3 /*key length */ ); |
1676 | 1820 | ||
1677 | /* compose item head for new item. Directories consist of items of | 1821 | /* |
1678 | old type (ITEM_VERSION_1). Do not set key (second arg is 0), it | 1822 | * compose item head for new item. Directories consist of items of |
1679 | is done by reiserfs_new_inode */ | 1823 | * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it |
1824 | * is done by reiserfs_new_inode | ||
1825 | */ | ||
1680 | if (old_format_only(sb)) { | 1826 | if (old_format_only(sb)) { |
1681 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, | 1827 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, |
1682 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); | 1828 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); |
@@ -1714,9 +1860,12 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | |||
1714 | return reiserfs_insert_item(th, path, &key, ih, inode, body); | 1860 | return reiserfs_insert_item(th, path, &key, ih, inode, body); |
1715 | } | 1861 | } |
1716 | 1862 | ||
1717 | /* stat data of object has been inserted, this inserts the item | 1863 | /* |
1718 | containing the body of symlink */ | 1864 | * stat data of object has been inserted, this inserts the item |
1719 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ | 1865 | * containing the body of symlink |
1866 | */ | ||
1867 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, | ||
1868 | struct inode *inode, | ||
1720 | struct item_head *ih, | 1869 | struct item_head *ih, |
1721 | struct treepath *path, const char *symname, | 1870 | struct treepath *path, const char *symname, |
1722 | int item_len) | 1871 | int item_len) |
@@ -1754,15 +1903,26 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i | |||
1754 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); | 1903 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); |
1755 | } | 1904 | } |
1756 | 1905 | ||
1757 | /* inserts the stat data into the tree, and then calls | 1906 | /* |
1758 | reiserfs_new_directory (to insert ".", ".." item if new object is | 1907 | * inserts the stat data into the tree, and then calls |
1759 | directory) or reiserfs_new_symlink (to insert symlink body if new | 1908 | * reiserfs_new_directory (to insert ".", ".." item if new object is |
1760 | object is symlink) or nothing (if new object is regular file) | 1909 | * directory) or reiserfs_new_symlink (to insert symlink body if new |
1761 | 1910 | * object is symlink) or nothing (if new object is regular file) | |
1762 | NOTE! uid and gid must already be set in the inode. If we return | 1911 | |
1763 | non-zero due to an error, we have to drop the quota previously allocated | 1912 | * NOTE! uid and gid must already be set in the inode. If we return |
1764 | for the fresh inode. This can only be done outside a transaction, so | 1913 | * non-zero due to an error, we have to drop the quota previously allocated |
1765 | if we return non-zero, we also end the transaction. */ | 1914 | * for the fresh inode. This can only be done outside a transaction, so |
1915 | * if we return non-zero, we also end the transaction. | ||
1916 | * | ||
1917 | * @th: active transaction handle | ||
1918 | * @dir: parent directory for new inode | ||
1919 | * @mode: mode of new inode | ||
1920 | * @symname: symlink contents if inode is symlink | ||
1921 | * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for | ||
1922 | * symlinks | ||
1923 | * @inode: inode to be filled | ||
1924 | * @security: optional security context to associate with this inode | ||
1925 | */ | ||
1766 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | 1926 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, |
1767 | struct inode *dir, umode_t mode, const char *symname, | 1927 | struct inode *dir, umode_t mode, const char *symname, |
1768 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, | 1928 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, |
@@ -1807,7 +1967,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1807 | else | 1967 | else |
1808 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, | 1968 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, |
1809 | TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); | 1969 | TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); |
1810 | memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); | 1970 | memcpy(INODE_PKEY(inode), &ih.ih_key, KEY_SIZE); |
1811 | args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); | 1971 | args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); |
1812 | 1972 | ||
1813 | depth = reiserfs_write_unlock_nested(inode->i_sb); | 1973 | depth = reiserfs_write_unlock_nested(inode->i_sb); |
@@ -1820,10 +1980,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1820 | } | 1980 | } |
1821 | 1981 | ||
1822 | if (old_format_only(sb)) | 1982 | if (old_format_only(sb)) |
1823 | /* not a perfect generation count, as object ids can be reused, but | 1983 | /* |
1824 | ** this is as good as reiserfs can do right now. | 1984 | * not a perfect generation count, as object ids can be reused, |
1825 | ** note that the private part of inode isn't filled in yet, we have | 1985 | * but this is as good as reiserfs can do right now. |
1826 | ** to use the directory. | 1986 | * note that the private part of inode isn't filled in yet, |
1987 | * we have to use the directory. | ||
1827 | */ | 1988 | */ |
1828 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); | 1989 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); |
1829 | else | 1990 | else |
@@ -1850,7 +2011,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1850 | REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : | 2011 | REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : |
1851 | U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; | 2012 | U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; |
1852 | 2013 | ||
1853 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); | 2014 | INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list); |
1854 | REISERFS_I(inode)->i_flags = 0; | 2015 | REISERFS_I(inode)->i_flags = 0; |
1855 | REISERFS_I(inode)->i_prealloc_block = 0; | 2016 | REISERFS_I(inode)->i_prealloc_block = 0; |
1856 | REISERFS_I(inode)->i_prealloc_count = 0; | 2017 | REISERFS_I(inode)->i_prealloc_count = 0; |
@@ -1878,9 +2039,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1878 | goto out_bad_inode; | 2039 | goto out_bad_inode; |
1879 | } | 2040 | } |
1880 | if (old_format_only(sb)) { | 2041 | if (old_format_only(sb)) { |
2042 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ | ||
1881 | if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { | 2043 | if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { |
1882 | pathrelse(&path_to_key); | 2044 | pathrelse(&path_to_key); |
1883 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ | ||
1884 | err = -EINVAL; | 2045 | err = -EINVAL; |
1885 | goto out_bad_inode; | 2046 | goto out_bad_inode; |
1886 | } | 2047 | } |
@@ -1888,9 +2049,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1888 | } else { | 2049 | } else { |
1889 | inode2sd(&sd, inode, inode->i_size); | 2050 | inode2sd(&sd, inode, inode->i_size); |
1890 | } | 2051 | } |
1891 | // store in in-core inode the key of stat data and version all | 2052 | /* |
1892 | // object items will have (directory items will have old offset | 2053 | * store in in-core inode the key of stat data and version all |
1893 | // format, other new objects will consist of new items) | 2054 | * object items will have (directory items will have old offset |
2055 | * format, other new objects will consist of new items) | ||
2056 | */ | ||
1894 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) | 2057 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) |
1895 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 2058 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1896 | else | 2059 | else |
@@ -1934,7 +2097,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1934 | if (retval) { | 2097 | if (retval) { |
1935 | err = retval; | 2098 | err = retval; |
1936 | reiserfs_check_path(&path_to_key); | 2099 | reiserfs_check_path(&path_to_key); |
1937 | journal_end(th, th->t_super, th->t_blocks_allocated); | 2100 | journal_end(th); |
1938 | goto out_inserted_sd; | 2101 | goto out_inserted_sd; |
1939 | } | 2102 | } |
1940 | 2103 | ||
@@ -1945,7 +2108,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1945 | if (retval) { | 2108 | if (retval) { |
1946 | err = retval; | 2109 | err = retval; |
1947 | reiserfs_check_path(&path_to_key); | 2110 | reiserfs_check_path(&path_to_key); |
1948 | journal_end(th, th->t_super, th->t_blocks_allocated); | 2111 | journal_end(th); |
1949 | goto out_inserted_sd; | 2112 | goto out_inserted_sd; |
1950 | } | 2113 | } |
1951 | } else if (inode->i_sb->s_flags & MS_POSIXACL) { | 2114 | } else if (inode->i_sb->s_flags & MS_POSIXACL) { |
@@ -1962,8 +2125,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1962 | if (retval) { | 2125 | if (retval) { |
1963 | err = retval; | 2126 | err = retval; |
1964 | reiserfs_check_path(&path_to_key); | 2127 | reiserfs_check_path(&path_to_key); |
1965 | retval = journal_end(th, th->t_super, | 2128 | retval = journal_end(th); |
1966 | th->t_blocks_allocated); | ||
1967 | if (retval) | 2129 | if (retval) |
1968 | err = retval; | 2130 | err = retval; |
1969 | goto out_inserted_sd; | 2131 | goto out_inserted_sd; |
@@ -1975,11 +2137,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1975 | 2137 | ||
1976 | return 0; | 2138 | return 0; |
1977 | 2139 | ||
1978 | /* it looks like you can easily compress these two goto targets into | 2140 | out_bad_inode: |
1979 | * one. Keeping it like this doesn't actually hurt anything, and they | ||
1980 | * are place holders for what the quota code actually needs. | ||
1981 | */ | ||
1982 | out_bad_inode: | ||
1983 | /* Invalidate the object, nothing was inserted yet */ | 2141 | /* Invalidate the object, nothing was inserted yet */ |
1984 | INODE_PKEY(inode)->k_objectid = 0; | 2142 | INODE_PKEY(inode)->k_objectid = 0; |
1985 | 2143 | ||
@@ -1988,16 +2146,19 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1988 | dquot_free_inode(inode); | 2146 | dquot_free_inode(inode); |
1989 | reiserfs_write_lock_nested(inode->i_sb, depth); | 2147 | reiserfs_write_lock_nested(inode->i_sb, depth); |
1990 | 2148 | ||
1991 | out_end_trans: | 2149 | out_end_trans: |
1992 | journal_end(th, th->t_super, th->t_blocks_allocated); | 2150 | journal_end(th); |
1993 | /* Drop can be outside and it needs more credits so it's better to have it outside */ | 2151 | /* |
2152 | * Drop can be outside and it needs more credits so it's better | ||
2153 | * to have it outside | ||
2154 | */ | ||
1994 | depth = reiserfs_write_unlock_nested(inode->i_sb); | 2155 | depth = reiserfs_write_unlock_nested(inode->i_sb); |
1995 | dquot_drop(inode); | 2156 | dquot_drop(inode); |
1996 | reiserfs_write_lock_nested(inode->i_sb, depth); | 2157 | reiserfs_write_lock_nested(inode->i_sb, depth); |
1997 | inode->i_flags |= S_NOQUOTA; | 2158 | inode->i_flags |= S_NOQUOTA; |
1998 | make_bad_inode(inode); | 2159 | make_bad_inode(inode); |
1999 | 2160 | ||
2000 | out_inserted_sd: | 2161 | out_inserted_sd: |
2001 | clear_nlink(inode); | 2162 | clear_nlink(inode); |
2002 | th->t_trans_id = 0; /* so the caller can't use this handle later */ | 2163 | th->t_trans_id = 0; /* so the caller can't use this handle later */ |
2003 | unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ | 2164 | unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ |
@@ -2006,25 +2167,26 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
2006 | } | 2167 | } |
2007 | 2168 | ||
2008 | /* | 2169 | /* |
2009 | ** finds the tail page in the page cache, | 2170 | * finds the tail page in the page cache, |
2010 | ** reads the last block in. | 2171 | * reads the last block in. |
2011 | ** | 2172 | * |
2012 | ** On success, page_result is set to a locked, pinned page, and bh_result | 2173 | * On success, page_result is set to a locked, pinned page, and bh_result |
2013 | ** is set to an up to date buffer for the last block in the file. returns 0. | 2174 | * is set to an up to date buffer for the last block in the file. returns 0. |
2014 | ** | 2175 | * |
2015 | ** tail conversion is not done, so bh_result might not be valid for writing | 2176 | * tail conversion is not done, so bh_result might not be valid for writing |
2016 | ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before | 2177 | * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before |
2017 | ** trying to write the block. | 2178 | * trying to write the block. |
2018 | ** | 2179 | * |
2019 | ** on failure, nonzero is returned, page_result and bh_result are untouched. | 2180 | * on failure, nonzero is returned, page_result and bh_result are untouched. |
2020 | */ | 2181 | */ |
2021 | static int grab_tail_page(struct inode *inode, | 2182 | static int grab_tail_page(struct inode *inode, |
2022 | struct page **page_result, | 2183 | struct page **page_result, |
2023 | struct buffer_head **bh_result) | 2184 | struct buffer_head **bh_result) |
2024 | { | 2185 | { |
2025 | 2186 | ||
2026 | /* we want the page with the last byte in the file, | 2187 | /* |
2027 | ** not the page that will hold the next byte for appending | 2188 | * we want the page with the last byte in the file, |
2189 | * not the page that will hold the next byte for appending | ||
2028 | */ | 2190 | */ |
2029 | unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 2191 | unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; |
2030 | unsigned long pos = 0; | 2192 | unsigned long pos = 0; |
@@ -2036,10 +2198,11 @@ static int grab_tail_page(struct inode *inode, | |||
2036 | struct page *page; | 2198 | struct page *page; |
2037 | int error; | 2199 | int error; |
2038 | 2200 | ||
2039 | /* we know that we are only called with inode->i_size > 0. | 2201 | /* |
2040 | ** we also know that a file tail can never be as big as a block | 2202 | * we know that we are only called with inode->i_size > 0. |
2041 | ** If i_size % blocksize == 0, our file is currently block aligned | 2203 | * we also know that a file tail can never be as big as a block |
2042 | ** and it won't need converting or zeroing after a truncate. | 2204 | * If i_size % blocksize == 0, our file is currently block aligned |
2205 | * and it won't need converting or zeroing after a truncate. | ||
2043 | */ | 2206 | */ |
2044 | if ((offset & (blocksize - 1)) == 0) { | 2207 | if ((offset & (blocksize - 1)) == 0) { |
2045 | return -ENOENT; | 2208 | return -ENOENT; |
@@ -2068,10 +2231,11 @@ static int grab_tail_page(struct inode *inode, | |||
2068 | } while (bh != head); | 2231 | } while (bh != head); |
2069 | 2232 | ||
2070 | if (!buffer_uptodate(bh)) { | 2233 | if (!buffer_uptodate(bh)) { |
2071 | /* note, this should never happen, prepare_write should | 2234 | /* |
2072 | ** be taking care of this for us. If the buffer isn't up to date, | 2235 | * note, this should never happen, prepare_write should be |
2073 | ** I've screwed up the code to find the buffer, or the code to | 2236 | * taking care of this for us. If the buffer isn't up to |
2074 | ** call prepare_write | 2237 | * date, I've screwed up the code to find the buffer, or the |
2238 | * code to call prepare_write | ||
2075 | */ | 2239 | */ |
2076 | reiserfs_error(inode->i_sb, "clm-6000", | 2240 | reiserfs_error(inode->i_sb, "clm-6000", |
2077 | "error reading block %lu", bh->b_blocknr); | 2241 | "error reading block %lu", bh->b_blocknr); |
@@ -2081,21 +2245,21 @@ static int grab_tail_page(struct inode *inode, | |||
2081 | *bh_result = bh; | 2245 | *bh_result = bh; |
2082 | *page_result = page; | 2246 | *page_result = page; |
2083 | 2247 | ||
2084 | out: | 2248 | out: |
2085 | return error; | 2249 | return error; |
2086 | 2250 | ||
2087 | unlock: | 2251 | unlock: |
2088 | unlock_page(page); | 2252 | unlock_page(page); |
2089 | page_cache_release(page); | 2253 | page_cache_release(page); |
2090 | return error; | 2254 | return error; |
2091 | } | 2255 | } |
2092 | 2256 | ||
2093 | /* | 2257 | /* |
2094 | ** vfs version of truncate file. Must NOT be called with | 2258 | * vfs version of truncate file. Must NOT be called with |
2095 | ** a transaction already started. | 2259 | * a transaction already started. |
2096 | ** | 2260 | * |
2097 | ** some code taken from block_truncate_page | 2261 | * some code taken from block_truncate_page |
2098 | */ | 2262 | */ |
2099 | int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | 2263 | int reiserfs_truncate_file(struct inode *inode, int update_timestamps) |
2100 | { | 2264 | { |
2101 | struct reiserfs_transaction_handle th; | 2265 | struct reiserfs_transaction_handle th; |
@@ -2113,9 +2277,11 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | |||
2113 | if (inode->i_size > 0) { | 2277 | if (inode->i_size > 0) { |
2114 | error = grab_tail_page(inode, &page, &bh); | 2278 | error = grab_tail_page(inode, &page, &bh); |
2115 | if (error) { | 2279 | if (error) { |
2116 | // -ENOENT means we truncated past the end of the file, | 2280 | /* |
2117 | // and get_block_create_0 could not find a block to read in, | 2281 | * -ENOENT means we truncated past the end of the |
2118 | // which is ok. | 2282 | * file, and get_block_create_0 could not find a |
2283 | * block to read in, which is ok. | ||
2284 | */ | ||
2119 | if (error != -ENOENT) | 2285 | if (error != -ENOENT) |
2120 | reiserfs_error(inode->i_sb, "clm-6001", | 2286 | reiserfs_error(inode->i_sb, "clm-6001", |
2121 | "grab_tail_page failed %d", | 2287 | "grab_tail_page failed %d", |
@@ -2125,29 +2291,33 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | |||
2125 | } | 2291 | } |
2126 | } | 2292 | } |
2127 | 2293 | ||
2128 | /* so, if page != NULL, we have a buffer head for the offset at | 2294 | /* |
2129 | ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, | 2295 | * so, if page != NULL, we have a buffer head for the offset at |
2130 | ** then we have an unformatted node. Otherwise, we have a direct item, | 2296 | * the end of the file. if the bh is mapped, and bh->b_blocknr != 0, |
2131 | ** and no zeroing is required on disk. We zero after the truncate, | 2297 | * then we have an unformatted node. Otherwise, we have a direct item, |
2132 | ** because the truncate might pack the item anyway | 2298 | * and no zeroing is required on disk. We zero after the truncate, |
2133 | ** (it will unmap bh if it packs). | 2299 | * because the truncate might pack the item anyway |
2300 | * (it will unmap bh if it packs). | ||
2301 | * | ||
2302 | * it is enough to reserve space in transaction for 2 balancings: | ||
2303 | * one for "save" link adding and another for the first | ||
2304 | * cut_from_item. 1 is for update_sd | ||
2134 | */ | 2305 | */ |
2135 | /* it is enough to reserve space in transaction for 2 balancings: | ||
2136 | one for "save" link adding and another for the first | ||
2137 | cut_from_item. 1 is for update_sd */ | ||
2138 | error = journal_begin(&th, inode->i_sb, | 2306 | error = journal_begin(&th, inode->i_sb, |
2139 | JOURNAL_PER_BALANCE_CNT * 2 + 1); | 2307 | JOURNAL_PER_BALANCE_CNT * 2 + 1); |
2140 | if (error) | 2308 | if (error) |
2141 | goto out; | 2309 | goto out; |
2142 | reiserfs_update_inode_transaction(inode); | 2310 | reiserfs_update_inode_transaction(inode); |
2143 | if (update_timestamps) | 2311 | if (update_timestamps) |
2144 | /* we are doing real truncate: if the system crashes before the last | 2312 | /* |
2145 | transaction of truncating gets committed - on reboot the file | 2313 | * we are doing real truncate: if the system crashes |
2146 | either appears truncated properly or not truncated at all */ | 2314 | * before the last transaction of truncating gets committed |
2315 | * - on reboot the file either appears truncated properly | ||
2316 | * or not truncated at all | ||
2317 | */ | ||
2147 | add_save_link(&th, inode, 1); | 2318 | add_save_link(&th, inode, 1); |
2148 | err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); | 2319 | err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); |
2149 | error = | 2320 | error = journal_end(&th); |
2150 | journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); | ||
2151 | if (error) | 2321 | if (error) |
2152 | goto out; | 2322 | goto out; |
2153 | 2323 | ||
@@ -2180,7 +2350,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | |||
2180 | reiserfs_write_unlock(inode->i_sb); | 2350 | reiserfs_write_unlock(inode->i_sb); |
2181 | 2351 | ||
2182 | return 0; | 2352 | return 0; |
2183 | out: | 2353 | out: |
2184 | if (page) { | 2354 | if (page) { |
2185 | unlock_page(page); | 2355 | unlock_page(page); |
2186 | page_cache_release(page); | 2356 | page_cache_release(page); |
@@ -2212,7 +2382,10 @@ static int map_block_for_writepage(struct inode *inode, | |||
2212 | int copy_size; | 2382 | int copy_size; |
2213 | int trans_running = 0; | 2383 | int trans_running = 0; |
2214 | 2384 | ||
2215 | /* catch places below that try to log something without starting a trans */ | 2385 | /* |
2386 | * catch places below that try to log something without | ||
2387 | * starting a trans | ||
2388 | */ | ||
2216 | th.t_trans_id = 0; | 2389 | th.t_trans_id = 0; |
2217 | 2390 | ||
2218 | if (!buffer_uptodate(bh_result)) { | 2391 | if (!buffer_uptodate(bh_result)) { |
@@ -2220,11 +2393,11 @@ static int map_block_for_writepage(struct inode *inode, | |||
2220 | } | 2393 | } |
2221 | 2394 | ||
2222 | kmap(bh_result->b_page); | 2395 | kmap(bh_result->b_page); |
2223 | start_over: | 2396 | start_over: |
2224 | reiserfs_write_lock(inode->i_sb); | 2397 | reiserfs_write_lock(inode->i_sb); |
2225 | make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); | 2398 | make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); |
2226 | 2399 | ||
2227 | research: | 2400 | research: |
2228 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 2401 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
2229 | if (retval != POSITION_FOUND) { | 2402 | if (retval != POSITION_FOUND) { |
2230 | use_get_block = 1; | 2403 | use_get_block = 1; |
@@ -2232,8 +2405,8 @@ static int map_block_for_writepage(struct inode *inode, | |||
2232 | } | 2405 | } |
2233 | 2406 | ||
2234 | bh = get_last_bh(&path); | 2407 | bh = get_last_bh(&path); |
2235 | ih = get_ih(&path); | 2408 | ih = tp_item_head(&path); |
2236 | item = get_item(&path); | 2409 | item = tp_item_body(&path); |
2237 | pos_in_item = path.pos_in_item; | 2410 | pos_in_item = path.pos_in_item; |
2238 | 2411 | ||
2239 | /* we've found an unformatted node */ | 2412 | /* we've found an unformatted node */ |
@@ -2281,10 +2454,10 @@ static int map_block_for_writepage(struct inode *inode, | |||
2281 | goto research; | 2454 | goto research; |
2282 | } | 2455 | } |
2283 | 2456 | ||
2284 | memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, | 2457 | memcpy(ih_item_body(bh, ih) + pos_in_item, p + bytes_copied, |
2285 | copy_size); | 2458 | copy_size); |
2286 | 2459 | ||
2287 | journal_mark_dirty(&th, inode->i_sb, bh); | 2460 | journal_mark_dirty(&th, bh); |
2288 | bytes_copied += copy_size; | 2461 | bytes_copied += copy_size; |
2289 | set_block_dev_mapped(bh_result, 0, inode); | 2462 | set_block_dev_mapped(bh_result, 0, inode); |
2290 | 2463 | ||
@@ -2304,10 +2477,10 @@ static int map_block_for_writepage(struct inode *inode, | |||
2304 | } | 2477 | } |
2305 | retval = 0; | 2478 | retval = 0; |
2306 | 2479 | ||
2307 | out: | 2480 | out: |
2308 | pathrelse(&path); | 2481 | pathrelse(&path); |
2309 | if (trans_running) { | 2482 | if (trans_running) { |
2310 | int err = journal_end(&th, inode->i_sb, jbegin_count); | 2483 | int err = journal_end(&th); |
2311 | if (err) | 2484 | if (err) |
2312 | retval = err; | 2485 | retval = err; |
2313 | trans_running = 0; | 2486 | trans_running = 0; |
@@ -2331,7 +2504,8 @@ static int map_block_for_writepage(struct inode *inode, | |||
2331 | kunmap(bh_result->b_page); | 2504 | kunmap(bh_result->b_page); |
2332 | 2505 | ||
2333 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 2506 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
2334 | /* we've copied data from the page into the direct item, so the | 2507 | /* |
2508 | * we've copied data from the page into the direct item, so the | ||
2335 | * buffer in the page is now clean, mark it to reflect that. | 2509 | * buffer in the page is now clean, mark it to reflect that. |
2336 | */ | 2510 | */ |
2337 | lock_buffer(bh_result); | 2511 | lock_buffer(bh_result); |
@@ -2370,7 +2544,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2370 | return 0; | 2544 | return 0; |
2371 | } | 2545 | } |
2372 | 2546 | ||
2373 | /* The page dirty bit is cleared before writepage is called, which | 2547 | /* |
2548 | * The page dirty bit is cleared before writepage is called, which | ||
2374 | * means we have to tell create_empty_buffers to make dirty buffers | 2549 | * means we have to tell create_empty_buffers to make dirty buffers |
2375 | * The page really should be up to date at this point, so tossing | 2550 | * The page really should be up to date at this point, so tossing |
2376 | * in the BH_Uptodate is just a sanity check. | 2551 | * in the BH_Uptodate is just a sanity check. |
@@ -2381,8 +2556,9 @@ static int reiserfs_write_full_page(struct page *page, | |||
2381 | } | 2556 | } |
2382 | head = page_buffers(page); | 2557 | head = page_buffers(page); |
2383 | 2558 | ||
2384 | /* last page in the file, zero out any contents past the | 2559 | /* |
2385 | ** last byte in the file | 2560 | * last page in the file, zero out any contents past the |
2561 | * last byte in the file | ||
2386 | */ | 2562 | */ |
2387 | if (page->index >= end_index) { | 2563 | if (page->index >= end_index) { |
2388 | unsigned last_offset; | 2564 | unsigned last_offset; |
@@ -2412,7 +2588,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2412 | (!buffer_mapped(bh) || (buffer_mapped(bh) | 2588 | (!buffer_mapped(bh) || (buffer_mapped(bh) |
2413 | && bh->b_blocknr == | 2589 | && bh->b_blocknr == |
2414 | 0))) { | 2590 | 0))) { |
2415 | /* not mapped yet, or it points to a direct item, search | 2591 | /* |
2592 | * not mapped yet, or it points to a direct item, search | ||
2416 | * the btree for the mapping info, and log any direct | 2593 | * the btree for the mapping info, and log any direct |
2417 | * items found | 2594 | * items found |
2418 | */ | 2595 | */ |
@@ -2450,10 +2627,11 @@ static int reiserfs_write_full_page(struct page *page, | |||
2450 | 2627 | ||
2451 | if (checked) { | 2628 | if (checked) { |
2452 | reiserfs_prepare_for_journal(s, bh, 1); | 2629 | reiserfs_prepare_for_journal(s, bh, 1); |
2453 | journal_mark_dirty(&th, s, bh); | 2630 | journal_mark_dirty(&th, bh); |
2454 | continue; | 2631 | continue; |
2455 | } | 2632 | } |
2456 | /* from this point on, we know the buffer is mapped to a | 2633 | /* |
2634 | * from this point on, we know the buffer is mapped to a | ||
2457 | * real block and not a direct item | 2635 | * real block and not a direct item |
2458 | */ | 2636 | */ |
2459 | if (wbc->sync_mode != WB_SYNC_NONE) { | 2637 | if (wbc->sync_mode != WB_SYNC_NONE) { |
@@ -2472,7 +2650,7 @@ static int reiserfs_write_full_page(struct page *page, | |||
2472 | } while ((bh = bh->b_this_page) != head); | 2650 | } while ((bh = bh->b_this_page) != head); |
2473 | 2651 | ||
2474 | if (checked) { | 2652 | if (checked) { |
2475 | error = journal_end(&th, s, bh_per_page + 1); | 2653 | error = journal_end(&th); |
2476 | reiserfs_write_unlock(s); | 2654 | reiserfs_write_unlock(s); |
2477 | if (error) | 2655 | if (error) |
2478 | goto fail; | 2656 | goto fail; |
@@ -2497,7 +2675,7 @@ static int reiserfs_write_full_page(struct page *page, | |||
2497 | } while (bh != head); | 2675 | } while (bh != head); |
2498 | 2676 | ||
2499 | error = 0; | 2677 | error = 0; |
2500 | done: | 2678 | done: |
2501 | if (nr == 0) { | 2679 | if (nr == 0) { |
2502 | /* | 2680 | /* |
2503 | * if this page only had a direct item, it is very possible for | 2681 | * if this page only had a direct item, it is very possible for |
@@ -2519,8 +2697,9 @@ static int reiserfs_write_full_page(struct page *page, | |||
2519 | } | 2697 | } |
2520 | return error; | 2698 | return error; |
2521 | 2699 | ||
2522 | fail: | 2700 | fail: |
2523 | /* catches various errors, we need to make sure any valid dirty blocks | 2701 | /* |
2702 | * catches various errors, we need to make sure any valid dirty blocks | ||
2524 | * get to the media. The page is currently locked and not marked for | 2703 | * get to the media. The page is currently locked and not marked for |
2525 | * writeback | 2704 | * writeback |
2526 | */ | 2705 | */ |
@@ -2533,8 +2712,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2533 | mark_buffer_async_write(bh); | 2712 | mark_buffer_async_write(bh); |
2534 | } else { | 2713 | } else { |
2535 | /* | 2714 | /* |
2536 | * clear any dirty bits that might have come from getting | 2715 | * clear any dirty bits that might have come from |
2537 | * attached to a dirty page | 2716 | * getting attached to a dirty page |
2538 | */ | 2717 | */ |
2539 | clear_buffer_dirty(bh); | 2718 | clear_buffer_dirty(bh); |
2540 | } | 2719 | } |
@@ -2614,15 +2793,18 @@ static int reiserfs_write_begin(struct file *file, | |||
2614 | ret = __block_write_begin(page, pos, len, reiserfs_get_block); | 2793 | ret = __block_write_begin(page, pos, len, reiserfs_get_block); |
2615 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2794 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2616 | struct reiserfs_transaction_handle *th = current->journal_info; | 2795 | struct reiserfs_transaction_handle *th = current->journal_info; |
2617 | /* this gets a little ugly. If reiserfs_get_block returned an | 2796 | /* |
2618 | * error and left a transacstion running, we've got to close it, | 2797 | * this gets a little ugly. If reiserfs_get_block returned an |
2619 | * and we've got to free handle if it was a persistent transaction. | 2798 | * error and left a transacstion running, we've got to close |
2799 | * it, and we've got to free handle if it was a persistent | ||
2800 | * transaction. | ||
2620 | * | 2801 | * |
2621 | * But, if we had nested into an existing transaction, we need | 2802 | * But, if we had nested into an existing transaction, we need |
2622 | * to just drop the ref count on the handle. | 2803 | * to just drop the ref count on the handle. |
2623 | * | 2804 | * |
2624 | * If old_ref == 0, the transaction is from reiserfs_get_block, | 2805 | * If old_ref == 0, the transaction is from reiserfs_get_block, |
2625 | * and it was a persistent trans. Otherwise, it was nested above. | 2806 | * and it was a persistent trans. Otherwise, it was nested |
2807 | * above. | ||
2626 | */ | 2808 | */ |
2627 | if (th->t_refcount > old_ref) { | 2809 | if (th->t_refcount > old_ref) { |
2628 | if (old_ref) | 2810 | if (old_ref) |
@@ -2671,15 +2853,18 @@ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) | |||
2671 | ret = __block_write_begin(page, from, len, reiserfs_get_block); | 2853 | ret = __block_write_begin(page, from, len, reiserfs_get_block); |
2672 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2854 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2673 | struct reiserfs_transaction_handle *th = current->journal_info; | 2855 | struct reiserfs_transaction_handle *th = current->journal_info; |
2674 | /* this gets a little ugly. If reiserfs_get_block returned an | 2856 | /* |
2675 | * error and left a transacstion running, we've got to close it, | 2857 | * this gets a little ugly. If reiserfs_get_block returned an |
2676 | * and we've got to free handle if it was a persistent transaction. | 2858 | * error and left a transacstion running, we've got to close |
2859 | * it, and we've got to free handle if it was a persistent | ||
2860 | * transaction. | ||
2677 | * | 2861 | * |
2678 | * But, if we had nested into an existing transaction, we need | 2862 | * But, if we had nested into an existing transaction, we need |
2679 | * to just drop the ref count on the handle. | 2863 | * to just drop the ref count on the handle. |
2680 | * | 2864 | * |
2681 | * If old_ref == 0, the transaction is from reiserfs_get_block, | 2865 | * If old_ref == 0, the transaction is from reiserfs_get_block, |
2682 | * and it was a persistent trans. Otherwise, it was nested above. | 2866 | * and it was a persistent trans. Otherwise, it was nested |
2867 | * above. | ||
2683 | */ | 2868 | */ |
2684 | if (th->t_refcount > old_ref) { | 2869 | if (th->t_refcount > old_ref) { |
2685 | if (old_ref) | 2870 | if (old_ref) |
@@ -2734,17 +2919,20 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2734 | 2919 | ||
2735 | reiserfs_commit_page(inode, page, start, start + copied); | 2920 | reiserfs_commit_page(inode, page, start, start + copied); |
2736 | 2921 | ||
2737 | /* generic_commit_write does this for us, but does not update the | 2922 | /* |
2738 | ** transaction tracking stuff when the size changes. So, we have | 2923 | * generic_commit_write does this for us, but does not update the |
2739 | ** to do the i_size updates here. | 2924 | * transaction tracking stuff when the size changes. So, we have |
2925 | * to do the i_size updates here. | ||
2740 | */ | 2926 | */ |
2741 | if (pos + copied > inode->i_size) { | 2927 | if (pos + copied > inode->i_size) { |
2742 | struct reiserfs_transaction_handle myth; | 2928 | struct reiserfs_transaction_handle myth; |
2743 | reiserfs_write_lock(inode->i_sb); | 2929 | reiserfs_write_lock(inode->i_sb); |
2744 | locked = true; | 2930 | locked = true; |
2745 | /* If the file have grown beyond the border where it | 2931 | /* |
2746 | can have a tail, unmark it as needing a tail | 2932 | * If the file have grown beyond the border where it |
2747 | packing */ | 2933 | * can have a tail, unmark it as needing a tail |
2934 | * packing | ||
2935 | */ | ||
2748 | if ((have_large_tails(inode->i_sb) | 2936 | if ((have_large_tails(inode->i_sb) |
2749 | && inode->i_size > i_block_size(inode) * 4) | 2937 | && inode->i_size > i_block_size(inode) * 4) |
2750 | || (have_small_tails(inode->i_sb) | 2938 | || (have_small_tails(inode->i_sb) |
@@ -2759,13 +2947,13 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2759 | inode->i_size = pos + copied; | 2947 | inode->i_size = pos + copied; |
2760 | /* | 2948 | /* |
2761 | * this will just nest into our transaction. It's important | 2949 | * this will just nest into our transaction. It's important |
2762 | * to use mark_inode_dirty so the inode gets pushed around on the | 2950 | * to use mark_inode_dirty so the inode gets pushed around on |
2763 | * dirty lists, and so that O_SYNC works as expected | 2951 | * the dirty lists, and so that O_SYNC works as expected |
2764 | */ | 2952 | */ |
2765 | mark_inode_dirty(inode); | 2953 | mark_inode_dirty(inode); |
2766 | reiserfs_update_sd(&myth, inode); | 2954 | reiserfs_update_sd(&myth, inode); |
2767 | update_sd = 1; | 2955 | update_sd = 1; |
2768 | ret = journal_end(&myth, inode->i_sb, 1); | 2956 | ret = journal_end(&myth); |
2769 | if (ret) | 2957 | if (ret) |
2770 | goto journal_error; | 2958 | goto journal_error; |
2771 | } | 2959 | } |
@@ -2781,7 +2969,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2781 | goto out; | 2969 | goto out; |
2782 | } | 2970 | } |
2783 | 2971 | ||
2784 | out: | 2972 | out: |
2785 | if (locked) | 2973 | if (locked) |
2786 | reiserfs_write_unlock(inode->i_sb); | 2974 | reiserfs_write_unlock(inode->i_sb); |
2787 | unlock_page(page); | 2975 | unlock_page(page); |
@@ -2792,7 +2980,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2792 | 2980 | ||
2793 | return ret == 0 ? copied : ret; | 2981 | return ret == 0 ? copied : ret; |
2794 | 2982 | ||
2795 | journal_error: | 2983 | journal_error: |
2796 | reiserfs_write_unlock(inode->i_sb); | 2984 | reiserfs_write_unlock(inode->i_sb); |
2797 | locked = false; | 2985 | locked = false; |
2798 | if (th) { | 2986 | if (th) { |
@@ -2822,15 +3010,18 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2822 | } | 3010 | } |
2823 | reiserfs_commit_page(inode, page, from, to); | 3011 | reiserfs_commit_page(inode, page, from, to); |
2824 | 3012 | ||
2825 | /* generic_commit_write does this for us, but does not update the | 3013 | /* |
2826 | ** transaction tracking stuff when the size changes. So, we have | 3014 | * generic_commit_write does this for us, but does not update the |
2827 | ** to do the i_size updates here. | 3015 | * transaction tracking stuff when the size changes. So, we have |
3016 | * to do the i_size updates here. | ||
2828 | */ | 3017 | */ |
2829 | if (pos > inode->i_size) { | 3018 | if (pos > inode->i_size) { |
2830 | struct reiserfs_transaction_handle myth; | 3019 | struct reiserfs_transaction_handle myth; |
2831 | /* If the file have grown beyond the border where it | 3020 | /* |
2832 | can have a tail, unmark it as needing a tail | 3021 | * If the file have grown beyond the border where it |
2833 | packing */ | 3022 | * can have a tail, unmark it as needing a tail |
3023 | * packing | ||
3024 | */ | ||
2834 | if ((have_large_tails(inode->i_sb) | 3025 | if ((have_large_tails(inode->i_sb) |
2835 | && inode->i_size > i_block_size(inode) * 4) | 3026 | && inode->i_size > i_block_size(inode) * 4) |
2836 | || (have_small_tails(inode->i_sb) | 3027 | || (have_small_tails(inode->i_sb) |
@@ -2845,13 +3036,13 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2845 | inode->i_size = pos; | 3036 | inode->i_size = pos; |
2846 | /* | 3037 | /* |
2847 | * this will just nest into our transaction. It's important | 3038 | * this will just nest into our transaction. It's important |
2848 | * to use mark_inode_dirty so the inode gets pushed around on the | 3039 | * to use mark_inode_dirty so the inode gets pushed around |
2849 | * dirty lists, and so that O_SYNC works as expected | 3040 | * on the dirty lists, and so that O_SYNC works as expected |
2850 | */ | 3041 | */ |
2851 | mark_inode_dirty(inode); | 3042 | mark_inode_dirty(inode); |
2852 | reiserfs_update_sd(&myth, inode); | 3043 | reiserfs_update_sd(&myth, inode); |
2853 | update_sd = 1; | 3044 | update_sd = 1; |
2854 | ret = journal_end(&myth, inode->i_sb, 1); | 3045 | ret = journal_end(&myth); |
2855 | if (ret) | 3046 | if (ret) |
2856 | goto journal_error; | 3047 | goto journal_error; |
2857 | } | 3048 | } |
@@ -2863,10 +3054,10 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2863 | goto out; | 3054 | goto out; |
2864 | } | 3055 | } |
2865 | 3056 | ||
2866 | out: | 3057 | out: |
2867 | return ret; | 3058 | return ret; |
2868 | 3059 | ||
2869 | journal_error: | 3060 | journal_error: |
2870 | if (th) { | 3061 | if (th) { |
2871 | if (!update_sd) | 3062 | if (!update_sd) |
2872 | reiserfs_update_sd(th, inode); | 3063 | reiserfs_update_sd(th, inode); |
@@ -2924,9 +3115,10 @@ void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) | |||
2924 | } | 3115 | } |
2925 | } | 3116 | } |
2926 | 3117 | ||
2927 | /* decide if this buffer needs to stay around for data logging or ordered | 3118 | /* |
2928 | ** write purposes | 3119 | * decide if this buffer needs to stay around for data logging or ordered |
2929 | */ | 3120 | * write purposes |
3121 | */ | ||
2930 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | 3122 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) |
2931 | { | 3123 | { |
2932 | int ret = 1; | 3124 | int ret = 1; |
@@ -2937,7 +3129,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | |||
2937 | if (!buffer_mapped(bh)) { | 3129 | if (!buffer_mapped(bh)) { |
2938 | goto free_jh; | 3130 | goto free_jh; |
2939 | } | 3131 | } |
2940 | /* the page is locked, and the only places that log a data buffer | 3132 | /* |
3133 | * the page is locked, and the only places that log a data buffer | ||
2941 | * also lock the page. | 3134 | * also lock the page. |
2942 | */ | 3135 | */ |
2943 | if (reiserfs_file_data_log(inode)) { | 3136 | if (reiserfs_file_data_log(inode)) { |
@@ -2952,7 +3145,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | |||
2952 | struct reiserfs_journal_list *jl; | 3145 | struct reiserfs_journal_list *jl; |
2953 | struct reiserfs_jh *jh = bh->b_private; | 3146 | struct reiserfs_jh *jh = bh->b_private; |
2954 | 3147 | ||
2955 | /* why is this safe? | 3148 | /* |
3149 | * why is this safe? | ||
2956 | * reiserfs_setattr updates i_size in the on disk | 3150 | * reiserfs_setattr updates i_size in the on disk |
2957 | * stat data before allowing vmtruncate to be called. | 3151 | * stat data before allowing vmtruncate to be called. |
2958 | * | 3152 | * |
@@ -2969,7 +3163,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | |||
2969 | && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) | 3163 | && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) |
2970 | ret = 0; | 3164 | ret = 0; |
2971 | } | 3165 | } |
2972 | free_jh: | 3166 | free_jh: |
2973 | if (ret && bh->b_private) { | 3167 | if (ret && bh->b_private) { |
2974 | reiserfs_free_jh(bh); | 3168 | reiserfs_free_jh(bh); |
2975 | } | 3169 | } |
@@ -3028,7 +3222,7 @@ static void reiserfs_invalidatepage(struct page *page, unsigned int offset, | |||
3028 | ret = try_to_release_page(page, 0); | 3222 | ret = try_to_release_page(page, 0); |
3029 | /* maybe should BUG_ON(!ret); - neilb */ | 3223 | /* maybe should BUG_ON(!ret); - neilb */ |
3030 | } | 3224 | } |
3031 | out: | 3225 | out: |
3032 | return; | 3226 | return; |
3033 | } | 3227 | } |
3034 | 3228 | ||
@@ -3080,8 +3274,10 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) | |||
3080 | return ret; | 3274 | return ret; |
3081 | } | 3275 | } |
3082 | 3276 | ||
3083 | /* We thank Mingming Cao for helping us understand in great detail what | 3277 | /* |
3084 | to do in this section of the code. */ | 3278 | * We thank Mingming Cao for helping us understand in great detail what |
3279 | * to do in this section of the code. | ||
3280 | */ | ||
3085 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, | 3281 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, |
3086 | const struct iovec *iov, loff_t offset, | 3282 | const struct iovec *iov, loff_t offset, |
3087 | unsigned long nr_segs) | 3283 | unsigned long nr_segs) |
@@ -3127,8 +3323,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3127 | dquot_initialize(inode); | 3323 | dquot_initialize(inode); |
3128 | reiserfs_write_lock(inode->i_sb); | 3324 | reiserfs_write_lock(inode->i_sb); |
3129 | if (attr->ia_valid & ATTR_SIZE) { | 3325 | if (attr->ia_valid & ATTR_SIZE) { |
3130 | /* version 2 items will be caught by the s_maxbytes check | 3326 | /* |
3131 | ** done for us in vmtruncate | 3327 | * version 2 items will be caught by the s_maxbytes check |
3328 | * done for us in vmtruncate | ||
3132 | */ | 3329 | */ |
3133 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && | 3330 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && |
3134 | attr->ia_size > MAX_NON_LFS) { | 3331 | attr->ia_size > MAX_NON_LFS) { |
@@ -3149,7 +3346,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3149 | err = journal_begin(&th, inode->i_sb, 4); | 3346 | err = journal_begin(&th, inode->i_sb, 4); |
3150 | if (!err) { | 3347 | if (!err) { |
3151 | reiserfs_discard_prealloc(&th, inode); | 3348 | reiserfs_discard_prealloc(&th, inode); |
3152 | err = journal_end(&th, inode->i_sb, 4); | 3349 | err = journal_end(&th); |
3153 | } | 3350 | } |
3154 | if (err) | 3351 | if (err) |
3155 | error = err; | 3352 | error = err; |
@@ -3189,7 +3386,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3189 | if (error) | 3386 | if (error) |
3190 | return error; | 3387 | return error; |
3191 | 3388 | ||
3192 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ | 3389 | /* |
3390 | * (user+group)*(old+new) structure - we count quota | ||
3391 | * info and , inode write (sb, inode) | ||
3392 | */ | ||
3193 | reiserfs_write_lock(inode->i_sb); | 3393 | reiserfs_write_lock(inode->i_sb); |
3194 | error = journal_begin(&th, inode->i_sb, jbegin_count); | 3394 | error = journal_begin(&th, inode->i_sb, jbegin_count); |
3195 | reiserfs_write_unlock(inode->i_sb); | 3395 | reiserfs_write_unlock(inode->i_sb); |
@@ -3198,19 +3398,21 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3198 | error = dquot_transfer(inode, attr); | 3398 | error = dquot_transfer(inode, attr); |
3199 | reiserfs_write_lock(inode->i_sb); | 3399 | reiserfs_write_lock(inode->i_sb); |
3200 | if (error) { | 3400 | if (error) { |
3201 | journal_end(&th, inode->i_sb, jbegin_count); | 3401 | journal_end(&th); |
3202 | reiserfs_write_unlock(inode->i_sb); | 3402 | reiserfs_write_unlock(inode->i_sb); |
3203 | goto out; | 3403 | goto out; |
3204 | } | 3404 | } |
3205 | 3405 | ||
3206 | /* Update corresponding info in inode so that everything is in | 3406 | /* |
3207 | * one transaction */ | 3407 | * Update corresponding info in inode so that everything |
3408 | * is in one transaction | ||
3409 | */ | ||
3208 | if (attr->ia_valid & ATTR_UID) | 3410 | if (attr->ia_valid & ATTR_UID) |
3209 | inode->i_uid = attr->ia_uid; | 3411 | inode->i_uid = attr->ia_uid; |
3210 | if (attr->ia_valid & ATTR_GID) | 3412 | if (attr->ia_valid & ATTR_GID) |
3211 | inode->i_gid = attr->ia_gid; | 3413 | inode->i_gid = attr->ia_gid; |
3212 | mark_inode_dirty(inode); | 3414 | mark_inode_dirty(inode); |
3213 | error = journal_end(&th, inode->i_sb, jbegin_count); | 3415 | error = journal_end(&th); |
3214 | reiserfs_write_unlock(inode->i_sb); | 3416 | reiserfs_write_unlock(inode->i_sb); |
3215 | if (error) | 3417 | if (error) |
3216 | goto out; | 3418 | goto out; |
@@ -3220,8 +3422,14 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3220 | attr->ia_size != i_size_read(inode)) { | 3422 | attr->ia_size != i_size_read(inode)) { |
3221 | error = inode_newsize_ok(inode, attr->ia_size); | 3423 | error = inode_newsize_ok(inode, attr->ia_size); |
3222 | if (!error) { | 3424 | if (!error) { |
3425 | /* | ||
3426 | * Could race against reiserfs_file_release | ||
3427 | * if called from NFS, so take tailpack mutex. | ||
3428 | */ | ||
3429 | mutex_lock(&REISERFS_I(inode)->tailpack); | ||
3223 | truncate_setsize(inode, attr->ia_size); | 3430 | truncate_setsize(inode, attr->ia_size); |
3224 | reiserfs_vfs_truncate_file(inode); | 3431 | reiserfs_truncate_file(inode, 1); |
3432 | mutex_unlock(&REISERFS_I(inode)->tailpack); | ||
3225 | } | 3433 | } |
3226 | } | 3434 | } |
3227 | 3435 | ||
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 946ccbf5b5a1..501ed6811a2b 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -15,7 +15,8 @@ | |||
15 | * reiserfs_ioctl - handler for ioctl for inode | 15 | * reiserfs_ioctl - handler for ioctl for inode |
16 | * supported commands: | 16 | * supported commands: |
17 | * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect | 17 | * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect |
18 | * and prevent packing file (argument arg has to be non-zero) | 18 | * and prevent packing file (argument arg has t |
19 | * be non-zero) | ||
19 | * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION | 20 | * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION |
20 | * 3) That's all for a while ... | 21 | * 3) That's all for a while ... |
21 | */ | 22 | */ |
@@ -132,7 +133,10 @@ setversion_out: | |||
132 | long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, | 133 | long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, |
133 | unsigned long arg) | 134 | unsigned long arg) |
134 | { | 135 | { |
135 | /* These are just misnamed, they actually get/put from/to user an int */ | 136 | /* |
137 | * These are just misnamed, they actually | ||
138 | * get/put from/to user an int | ||
139 | */ | ||
136 | switch (cmd) { | 140 | switch (cmd) { |
137 | case REISERFS_IOC32_UNPACK: | 141 | case REISERFS_IOC32_UNPACK: |
138 | cmd = REISERFS_IOC_UNPACK; | 142 | cmd = REISERFS_IOC_UNPACK; |
@@ -160,10 +164,10 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, | |||
160 | int reiserfs_commit_write(struct file *f, struct page *page, | 164 | int reiserfs_commit_write(struct file *f, struct page *page, |
161 | unsigned from, unsigned to); | 165 | unsigned from, unsigned to); |
162 | /* | 166 | /* |
163 | ** reiserfs_unpack | 167 | * reiserfs_unpack |
164 | ** Function try to convert tail from direct item into indirect. | 168 | * Function try to convert tail from direct item into indirect. |
165 | ** It set up nopack attribute in the REISERFS_I(inode)->nopack | 169 | * It set up nopack attribute in the REISERFS_I(inode)->nopack |
166 | */ | 170 | */ |
167 | int reiserfs_unpack(struct inode *inode, struct file *filp) | 171 | int reiserfs_unpack(struct inode *inode, struct file *filp) |
168 | { | 172 | { |
169 | int retval = 0; | 173 | int retval = 0; |
@@ -194,9 +198,10 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
194 | goto out; | 198 | goto out; |
195 | } | 199 | } |
196 | 200 | ||
197 | /* we unpack by finding the page with the tail, and calling | 201 | /* |
198 | ** __reiserfs_write_begin on that page. This will force a | 202 | * we unpack by finding the page with the tail, and calling |
199 | ** reiserfs_get_block to unpack the tail for us. | 203 | * __reiserfs_write_begin on that page. This will force a |
204 | * reiserfs_get_block to unpack the tail for us. | ||
200 | */ | 205 | */ |
201 | index = inode->i_size >> PAGE_CACHE_SHIFT; | 206 | index = inode->i_size >> PAGE_CACHE_SHIFT; |
202 | mapping = inode->i_mapping; | 207 | mapping = inode->i_mapping; |
@@ -214,11 +219,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
214 | retval = reiserfs_commit_write(NULL, page, write_from, write_from); | 219 | retval = reiserfs_commit_write(NULL, page, write_from, write_from); |
215 | REISERFS_I(inode)->i_flags |= i_nopack_mask; | 220 | REISERFS_I(inode)->i_flags |= i_nopack_mask; |
216 | 221 | ||
217 | out_unlock: | 222 | out_unlock: |
218 | unlock_page(page); | 223 | unlock_page(page); |
219 | page_cache_release(page); | 224 | page_cache_release(page); |
220 | 225 | ||
221 | out: | 226 | out: |
222 | mutex_unlock(&inode->i_mutex); | 227 | mutex_unlock(&inode->i_mutex); |
223 | reiserfs_write_unlock(inode->i_sb); | 228 | reiserfs_write_unlock(inode->i_sb); |
224 | return retval; | 229 | return retval; |
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index ee382ef3d300..cfaee912ee09 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c | |||
@@ -5,15 +5,17 @@ | |||
5 | #include <linux/time.h> | 5 | #include <linux/time.h> |
6 | #include "reiserfs.h" | 6 | #include "reiserfs.h" |
7 | 7 | ||
8 | // this contains item handlers for old item types: sd, direct, | 8 | /* |
9 | // indirect, directory | 9 | * this contains item handlers for old item types: sd, direct, |
10 | * indirect, directory | ||
11 | */ | ||
10 | 12 | ||
11 | /* and where are the comments? how about saying where we can find an | 13 | /* |
12 | explanation of each item handler method? -Hans */ | 14 | * and where are the comments? how about saying where we can find an |
15 | * explanation of each item handler method? -Hans | ||
16 | */ | ||
13 | 17 | ||
14 | ////////////////////////////////////////////////////////////////////////////// | 18 | /* stat data functions */ |
15 | // stat data functions | ||
16 | // | ||
17 | static int sd_bytes_number(struct item_head *ih, int block_size) | 19 | static int sd_bytes_number(struct item_head *ih, int block_size) |
18 | { | 20 | { |
19 | return 0; | 21 | return 0; |
@@ -60,7 +62,7 @@ static void sd_print_item(struct item_head *ih, char *item) | |||
60 | 62 | ||
61 | static void sd_check_item(struct item_head *ih, char *item) | 63 | static void sd_check_item(struct item_head *ih, char *item) |
62 | { | 64 | { |
63 | // FIXME: type something here! | 65 | /* unused */ |
64 | } | 66 | } |
65 | 67 | ||
66 | static int sd_create_vi(struct virtual_node *vn, | 68 | static int sd_create_vi(struct virtual_node *vn, |
@@ -68,7 +70,6 @@ static int sd_create_vi(struct virtual_node *vn, | |||
68 | int is_affected, int insert_size) | 70 | int is_affected, int insert_size) |
69 | { | 71 | { |
70 | vi->vi_index = TYPE_STAT_DATA; | 72 | vi->vi_index = TYPE_STAT_DATA; |
71 | //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? | ||
72 | return 0; | 73 | return 0; |
73 | } | 74 | } |
74 | 75 | ||
@@ -117,15 +118,13 @@ static struct item_operations stat_data_ops = { | |||
117 | .print_vi = sd_print_vi | 118 | .print_vi = sd_print_vi |
118 | }; | 119 | }; |
119 | 120 | ||
120 | ////////////////////////////////////////////////////////////////////////////// | 121 | /* direct item functions */ |
121 | // direct item functions | ||
122 | // | ||
123 | static int direct_bytes_number(struct item_head *ih, int block_size) | 122 | static int direct_bytes_number(struct item_head *ih, int block_size) |
124 | { | 123 | { |
125 | return ih_item_len(ih); | 124 | return ih_item_len(ih); |
126 | } | 125 | } |
127 | 126 | ||
128 | // FIXME: this should probably switch to indirect as well | 127 | /* FIXME: this should probably switch to indirect as well */ |
129 | static void direct_decrement_key(struct cpu_key *key) | 128 | static void direct_decrement_key(struct cpu_key *key) |
130 | { | 129 | { |
131 | cpu_key_k_offset_dec(key); | 130 | cpu_key_k_offset_dec(key); |
@@ -144,7 +143,7 @@ static void direct_print_item(struct item_head *ih, char *item) | |||
144 | { | 143 | { |
145 | int j = 0; | 144 | int j = 0; |
146 | 145 | ||
147 | // return; | 146 | /* return; */ |
148 | printk("\""); | 147 | printk("\""); |
149 | while (j < ih_item_len(ih)) | 148 | while (j < ih_item_len(ih)) |
150 | printk("%c", item[j++]); | 149 | printk("%c", item[j++]); |
@@ -153,7 +152,7 @@ static void direct_print_item(struct item_head *ih, char *item) | |||
153 | 152 | ||
154 | static void direct_check_item(struct item_head *ih, char *item) | 153 | static void direct_check_item(struct item_head *ih, char *item) |
155 | { | 154 | { |
156 | // FIXME: type something here! | 155 | /* unused */ |
157 | } | 156 | } |
158 | 157 | ||
159 | static int direct_create_vi(struct virtual_node *vn, | 158 | static int direct_create_vi(struct virtual_node *vn, |
@@ -161,7 +160,6 @@ static int direct_create_vi(struct virtual_node *vn, | |||
161 | int is_affected, int insert_size) | 160 | int is_affected, int insert_size) |
162 | { | 161 | { |
163 | vi->vi_index = TYPE_DIRECT; | 162 | vi->vi_index = TYPE_DIRECT; |
164 | //vi->vi_type |= VI_TYPE_DIRECT; | ||
165 | return 0; | 163 | return 0; |
166 | } | 164 | } |
167 | 165 | ||
@@ -211,16 +209,13 @@ static struct item_operations direct_ops = { | |||
211 | .print_vi = direct_print_vi | 209 | .print_vi = direct_print_vi |
212 | }; | 210 | }; |
213 | 211 | ||
214 | ////////////////////////////////////////////////////////////////////////////// | 212 | /* indirect item functions */ |
215 | // indirect item functions | ||
216 | // | ||
217 | |||
218 | static int indirect_bytes_number(struct item_head *ih, int block_size) | 213 | static int indirect_bytes_number(struct item_head *ih, int block_size) |
219 | { | 214 | { |
220 | return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); | 215 | return ih_item_len(ih) / UNFM_P_SIZE * block_size; |
221 | } | 216 | } |
222 | 217 | ||
223 | // decrease offset, if it becomes 0, change type to stat data | 218 | /* decrease offset, if it becomes 0, change type to stat data */ |
224 | static void indirect_decrement_key(struct cpu_key *key) | 219 | static void indirect_decrement_key(struct cpu_key *key) |
225 | { | 220 | { |
226 | cpu_key_k_offset_dec(key); | 221 | cpu_key_k_offset_dec(key); |
@@ -228,7 +223,7 @@ static void indirect_decrement_key(struct cpu_key *key) | |||
228 | set_cpu_key_k_type(key, TYPE_STAT_DATA); | 223 | set_cpu_key_k_type(key, TYPE_STAT_DATA); |
229 | } | 224 | } |
230 | 225 | ||
231 | // if it is not first item of the body, then it is mergeable | 226 | /* if it is not first item of the body, then it is mergeable */ |
232 | static int indirect_is_left_mergeable(struct reiserfs_key *key, | 227 | static int indirect_is_left_mergeable(struct reiserfs_key *key, |
233 | unsigned long bsize) | 228 | unsigned long bsize) |
234 | { | 229 | { |
@@ -236,7 +231,7 @@ static int indirect_is_left_mergeable(struct reiserfs_key *key, | |||
236 | return (le_key_k_offset(version, key) != 1); | 231 | return (le_key_k_offset(version, key) != 1); |
237 | } | 232 | } |
238 | 233 | ||
239 | // printing of indirect item | 234 | /* printing of indirect item */ |
240 | static void start_new_sequence(__u32 * start, int *len, __u32 new) | 235 | static void start_new_sequence(__u32 * start, int *len, __u32 new) |
241 | { | 236 | { |
242 | *start = new; | 237 | *start = new; |
@@ -295,7 +290,7 @@ static void indirect_print_item(struct item_head *ih, char *item) | |||
295 | 290 | ||
296 | static void indirect_check_item(struct item_head *ih, char *item) | 291 | static void indirect_check_item(struct item_head *ih, char *item) |
297 | { | 292 | { |
298 | // FIXME: type something here! | 293 | /* unused */ |
299 | } | 294 | } |
300 | 295 | ||
301 | static int indirect_create_vi(struct virtual_node *vn, | 296 | static int indirect_create_vi(struct virtual_node *vn, |
@@ -303,7 +298,6 @@ static int indirect_create_vi(struct virtual_node *vn, | |||
303 | int is_affected, int insert_size) | 298 | int is_affected, int insert_size) |
304 | { | 299 | { |
305 | vi->vi_index = TYPE_INDIRECT; | 300 | vi->vi_index = TYPE_INDIRECT; |
306 | //vi->vi_type |= VI_TYPE_INDIRECT; | ||
307 | return 0; | 301 | return 0; |
308 | } | 302 | } |
309 | 303 | ||
@@ -321,16 +315,19 @@ static int indirect_check_right(struct virtual_item *vi, int free) | |||
321 | return indirect_check_left(vi, free, 0, 0); | 315 | return indirect_check_left(vi, free, 0, 0); |
322 | } | 316 | } |
323 | 317 | ||
324 | // return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) | 318 | /* |
319 | * return size in bytes of 'units' units. If first == 0 - calculate | ||
320 | * from the head (left), otherwise - from tail (right) | ||
321 | */ | ||
325 | static int indirect_part_size(struct virtual_item *vi, int first, int units) | 322 | static int indirect_part_size(struct virtual_item *vi, int first, int units) |
326 | { | 323 | { |
327 | // unit of indirect item is byte (yet) | 324 | /* unit of indirect item is byte (yet) */ |
328 | return units; | 325 | return units; |
329 | } | 326 | } |
330 | 327 | ||
331 | static int indirect_unit_num(struct virtual_item *vi) | 328 | static int indirect_unit_num(struct virtual_item *vi) |
332 | { | 329 | { |
333 | // unit of indirect item is byte (yet) | 330 | /* unit of indirect item is byte (yet) */ |
334 | return vi->vi_item_len - IH_SIZE; | 331 | return vi->vi_item_len - IH_SIZE; |
335 | } | 332 | } |
336 | 333 | ||
@@ -356,10 +353,7 @@ static struct item_operations indirect_ops = { | |||
356 | .print_vi = indirect_print_vi | 353 | .print_vi = indirect_print_vi |
357 | }; | 354 | }; |
358 | 355 | ||
359 | ////////////////////////////////////////////////////////////////////////////// | 356 | /* direntry functions */ |
360 | // direntry functions | ||
361 | // | ||
362 | |||
363 | static int direntry_bytes_number(struct item_head *ih, int block_size) | 357 | static int direntry_bytes_number(struct item_head *ih, int block_size) |
364 | { | 358 | { |
365 | reiserfs_warning(NULL, "vs-16090", | 359 | reiserfs_warning(NULL, "vs-16090", |
@@ -396,7 +390,7 @@ static void direntry_print_item(struct item_head *ih, char *item) | |||
396 | 390 | ||
397 | deh = (struct reiserfs_de_head *)item; | 391 | deh = (struct reiserfs_de_head *)item; |
398 | 392 | ||
399 | for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { | 393 | for (i = 0; i < ih_entry_count(ih); i++, deh++) { |
400 | namelen = | 394 | namelen = |
401 | (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - | 395 | (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - |
402 | deh_location(deh); | 396 | deh_location(deh); |
@@ -428,9 +422,9 @@ static void direntry_check_item(struct item_head *ih, char *item) | |||
428 | int i; | 422 | int i; |
429 | struct reiserfs_de_head *deh; | 423 | struct reiserfs_de_head *deh; |
430 | 424 | ||
431 | // FIXME: type something here! | 425 | /* unused */ |
432 | deh = (struct reiserfs_de_head *)item; | 426 | deh = (struct reiserfs_de_head *)item; |
433 | for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { | 427 | for (i = 0; i < ih_entry_count(ih); i++, deh++) { |
434 | ; | 428 | ; |
435 | } | 429 | } |
436 | } | 430 | } |
@@ -439,7 +433,8 @@ static void direntry_check_item(struct item_head *ih, char *item) | |||
439 | 433 | ||
440 | /* | 434 | /* |
441 | * function returns old entry number in directory item in real node | 435 | * function returns old entry number in directory item in real node |
442 | * using new entry number in virtual item in virtual node */ | 436 | * using new entry number in virtual item in virtual node |
437 | */ | ||
443 | static inline int old_entry_num(int is_affected, int virtual_entry_num, | 438 | static inline int old_entry_num(int is_affected, int virtual_entry_num, |
444 | int pos_in_item, int mode) | 439 | int pos_in_item, int mode) |
445 | { | 440 | { |
@@ -463,9 +458,11 @@ static inline int old_entry_num(int is_affected, int virtual_entry_num, | |||
463 | return virtual_entry_num - 1; | 458 | return virtual_entry_num - 1; |
464 | } | 459 | } |
465 | 460 | ||
466 | /* Create an array of sizes of directory entries for virtual | 461 | /* |
467 | item. Return space used by an item. FIXME: no control over | 462 | * Create an array of sizes of directory entries for virtual |
468 | consuming of space used by this item handler */ | 463 | * item. Return space used by an item. FIXME: no control over |
464 | * consuming of space used by this item handler | ||
465 | */ | ||
469 | static int direntry_create_vi(struct virtual_node *vn, | 466 | static int direntry_create_vi(struct virtual_node *vn, |
470 | struct virtual_item *vi, | 467 | struct virtual_item *vi, |
471 | int is_affected, int insert_size) | 468 | int is_affected, int insert_size) |
@@ -494,8 +491,8 @@ static int direntry_create_vi(struct virtual_node *vn, | |||
494 | j = old_entry_num(is_affected, i, vn->vn_pos_in_item, | 491 | j = old_entry_num(is_affected, i, vn->vn_pos_in_item, |
495 | vn->vn_mode); | 492 | vn->vn_mode); |
496 | dir_u->entry_sizes[i] = | 493 | dir_u->entry_sizes[i] = |
497 | (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) - | 494 | (j ? deh_location(&deh[j - 1]) : ih_item_len(vi->vi_ih)) - |
498 | deh_location(&(deh[j])) + DEH_SIZE; | 495 | deh_location(&deh[j]) + DEH_SIZE; |
499 | } | 496 | } |
500 | 497 | ||
501 | size += (dir_u->entry_count * sizeof(short)); | 498 | size += (dir_u->entry_count * sizeof(short)); |
@@ -529,10 +526,10 @@ static int direntry_create_vi(struct virtual_node *vn, | |||
529 | 526 | ||
530 | } | 527 | } |
531 | 528 | ||
532 | // | 529 | /* |
533 | // return number of entries which may fit into specified amount of | 530 | * return number of entries which may fit into specified amount of |
534 | // free space, or -1 if free space is not enough even for 1 entry | 531 | * free space, or -1 if free space is not enough even for 1 entry |
535 | // | 532 | */ |
536 | static int direntry_check_left(struct virtual_item *vi, int free, | 533 | static int direntry_check_left(struct virtual_item *vi, int free, |
537 | int start_skip, int end_skip) | 534 | int start_skip, int end_skip) |
538 | { | 535 | { |
@@ -541,8 +538,8 @@ static int direntry_check_left(struct virtual_item *vi, int free, | |||
541 | struct direntry_uarea *dir_u = vi->vi_uarea; | 538 | struct direntry_uarea *dir_u = vi->vi_uarea; |
542 | 539 | ||
543 | for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { | 540 | for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { |
541 | /* i-th entry doesn't fit into the remaining free space */ | ||
544 | if (dir_u->entry_sizes[i] > free) | 542 | if (dir_u->entry_sizes[i] > free) |
545 | /* i-th entry doesn't fit into the remaining free space */ | ||
546 | break; | 543 | break; |
547 | 544 | ||
548 | free -= dir_u->entry_sizes[i]; | 545 | free -= dir_u->entry_sizes[i]; |
@@ -570,8 +567,8 @@ static int direntry_check_right(struct virtual_item *vi, int free) | |||
570 | struct direntry_uarea *dir_u = vi->vi_uarea; | 567 | struct direntry_uarea *dir_u = vi->vi_uarea; |
571 | 568 | ||
572 | for (i = dir_u->entry_count - 1; i >= 0; i--) { | 569 | for (i = dir_u->entry_count - 1; i >= 0; i--) { |
570 | /* i-th entry doesn't fit into the remaining free space */ | ||
573 | if (dir_u->entry_sizes[i] > free) | 571 | if (dir_u->entry_sizes[i] > free) |
574 | /* i-th entry doesn't fit into the remaining free space */ | ||
575 | break; | 572 | break; |
576 | 573 | ||
577 | free -= dir_u->entry_sizes[i]; | 574 | free -= dir_u->entry_sizes[i]; |
@@ -643,9 +640,7 @@ static struct item_operations direntry_ops = { | |||
643 | .print_vi = direntry_print_vi | 640 | .print_vi = direntry_print_vi |
644 | }; | 641 | }; |
645 | 642 | ||
646 | ////////////////////////////////////////////////////////////////////////////// | 643 | /* Error catching functions to catch errors caused by incorrect item types. */ |
647 | // Error catching functions to catch errors caused by incorrect item types. | ||
648 | // | ||
649 | static int errcatch_bytes_number(struct item_head *ih, int block_size) | 644 | static int errcatch_bytes_number(struct item_head *ih, int block_size) |
650 | { | 645 | { |
651 | reiserfs_warning(NULL, "green-16001", | 646 | reiserfs_warning(NULL, "green-16001", |
@@ -685,8 +680,12 @@ static int errcatch_create_vi(struct virtual_node *vn, | |||
685 | { | 680 | { |
686 | reiserfs_warning(NULL, "green-16006", | 681 | reiserfs_warning(NULL, "green-16006", |
687 | "Invalid item type observed, run fsck ASAP"); | 682 | "Invalid item type observed, run fsck ASAP"); |
688 | return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where | 683 | /* |
689 | // this operation is called from is of return type void. | 684 | * We might return -1 here as well, but it won't help as |
685 | * create_virtual_node() from where this operation is called | ||
686 | * from is of return type void. | ||
687 | */ | ||
688 | return 0; | ||
690 | } | 689 | } |
691 | 690 | ||
692 | static int errcatch_check_left(struct virtual_item *vi, int free, | 691 | static int errcatch_check_left(struct virtual_item *vi, int free, |
@@ -739,9 +738,6 @@ static struct item_operations errcatch_ops = { | |||
739 | errcatch_print_vi | 738 | errcatch_print_vi |
740 | }; | 739 | }; |
741 | 740 | ||
742 | ////////////////////////////////////////////////////////////////////////////// | ||
743 | // | ||
744 | // | ||
745 | #if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) | 741 | #if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) |
746 | #error Item types must use disk-format assigned values. | 742 | #error Item types must use disk-format assigned values. |
747 | #endif | 743 | #endif |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index fd777032c2ba..e8870de4627e 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -1,38 +1,38 @@ | |||
1 | /* | 1 | /* |
2 | ** Write ahead logging implementation copyright Chris Mason 2000 | 2 | * Write ahead logging implementation copyright Chris Mason 2000 |
3 | ** | 3 | * |
4 | ** The background commits make this code very interrelated, and | 4 | * The background commits make this code very interrelated, and |
5 | ** overly complex. I need to rethink things a bit....The major players: | 5 | * overly complex. I need to rethink things a bit....The major players: |
6 | ** | 6 | * |
7 | ** journal_begin -- call with the number of blocks you expect to log. | 7 | * journal_begin -- call with the number of blocks you expect to log. |
8 | ** If the current transaction is too | 8 | * If the current transaction is too |
9 | ** old, it will block until the current transaction is | 9 | * old, it will block until the current transaction is |
10 | ** finished, and then start a new one. | 10 | * finished, and then start a new one. |
11 | ** Usually, your transaction will get joined in with | 11 | * Usually, your transaction will get joined in with |
12 | ** previous ones for speed. | 12 | * previous ones for speed. |
13 | ** | 13 | * |
14 | ** journal_join -- same as journal_begin, but won't block on the current | 14 | * journal_join -- same as journal_begin, but won't block on the current |
15 | ** transaction regardless of age. Don't ever call | 15 | * transaction regardless of age. Don't ever call |
16 | ** this. Ever. There are only two places it should be | 16 | * this. Ever. There are only two places it should be |
17 | ** called from, and they are both inside this file. | 17 | * called from, and they are both inside this file. |
18 | ** | 18 | * |
19 | ** journal_mark_dirty -- adds blocks into this transaction. clears any flags | 19 | * journal_mark_dirty -- adds blocks into this transaction. clears any flags |
20 | ** that might make them get sent to disk | 20 | * that might make them get sent to disk |
21 | ** and then marks them BH_JDirty. Puts the buffer head | 21 | * and then marks them BH_JDirty. Puts the buffer head |
22 | ** into the current transaction hash. | 22 | * into the current transaction hash. |
23 | ** | 23 | * |
24 | ** journal_end -- if the current transaction is batchable, it does nothing | 24 | * journal_end -- if the current transaction is batchable, it does nothing |
25 | ** otherwise, it could do an async/synchronous commit, or | 25 | * otherwise, it could do an async/synchronous commit, or |
26 | ** a full flush of all log and real blocks in the | 26 | * a full flush of all log and real blocks in the |
27 | ** transaction. | 27 | * transaction. |
28 | ** | 28 | * |
29 | ** flush_old_commits -- if the current transaction is too old, it is ended and | 29 | * flush_old_commits -- if the current transaction is too old, it is ended and |
30 | ** commit blocks are sent to disk. Forces commit blocks | 30 | * commit blocks are sent to disk. Forces commit blocks |
31 | ** to disk for all backgrounded commits that have been | 31 | * to disk for all backgrounded commits that have been |
32 | ** around too long. | 32 | * around too long. |
33 | ** -- Note, if you call this as an immediate flush from | 33 | * -- Note, if you call this as an immediate flush from |
34 | ** from within kupdate, it will ignore the immediate flag | 34 | * from within kupdate, it will ignore the immediate flag |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #include <linux/time.h> | 37 | #include <linux/time.h> |
38 | #include <linux/semaphore.h> | 38 | #include <linux/semaphore.h> |
@@ -58,23 +58,19 @@ | |||
58 | #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ | 58 | #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ |
59 | j_working_list)) | 59 | j_working_list)) |
60 | 60 | ||
61 | /* the number of mounted filesystems. This is used to decide when to | 61 | /* must be correct to keep the desc and commit structs at 4k */ |
62 | ** start and kill the commit workqueue | 62 | #define JOURNAL_TRANS_HALF 1018 |
63 | */ | ||
64 | static int reiserfs_mounted_fs_count; | ||
65 | |||
66 | static struct workqueue_struct *commit_wq; | ||
67 | |||
68 | #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit | ||
69 | structs at 4k */ | ||
70 | #define BUFNR 64 /*read ahead */ | 63 | #define BUFNR 64 /*read ahead */ |
71 | 64 | ||
72 | /* cnode stat bits. Move these into reiserfs_fs.h */ | 65 | /* cnode stat bits. Move these into reiserfs_fs.h */ |
73 | 66 | ||
74 | #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ | 67 | /* this block was freed, and can't be written. */ |
75 | #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ | 68 | #define BLOCK_FREED 2 |
69 | /* this block was freed during this transaction, and can't be written */ | ||
70 | #define BLOCK_FREED_HOLDER 3 | ||
76 | 71 | ||
77 | #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ | 72 | /* used in flush_journal_list */ |
73 | #define BLOCK_NEEDS_FLUSH 4 | ||
78 | #define BLOCK_DIRTIED 5 | 74 | #define BLOCK_DIRTIED 5 |
79 | 75 | ||
80 | /* journal list state bits */ | 76 | /* journal list state bits */ |
@@ -87,16 +83,14 @@ static struct workqueue_struct *commit_wq; | |||
87 | #define COMMIT_NOW 2 /* end and commit this transaction */ | 83 | #define COMMIT_NOW 2 /* end and commit this transaction */ |
88 | #define WAIT 4 /* wait for the log blocks to hit the disk */ | 84 | #define WAIT 4 /* wait for the log blocks to hit the disk */ |
89 | 85 | ||
90 | static int do_journal_end(struct reiserfs_transaction_handle *, | 86 | static int do_journal_end(struct reiserfs_transaction_handle *, int flags); |
91 | struct super_block *, unsigned long nblocks, | ||
92 | int flags); | ||
93 | static int flush_journal_list(struct super_block *s, | 87 | static int flush_journal_list(struct super_block *s, |
94 | struct reiserfs_journal_list *jl, int flushall); | 88 | struct reiserfs_journal_list *jl, int flushall); |
95 | static int flush_commit_list(struct super_block *s, | 89 | static int flush_commit_list(struct super_block *s, |
96 | struct reiserfs_journal_list *jl, int flushall); | 90 | struct reiserfs_journal_list *jl, int flushall); |
97 | static int can_dirty(struct reiserfs_journal_cnode *cn); | 91 | static int can_dirty(struct reiserfs_journal_cnode *cn); |
98 | static int journal_join(struct reiserfs_transaction_handle *th, | 92 | static int journal_join(struct reiserfs_transaction_handle *th, |
99 | struct super_block *sb, unsigned long nblocks); | 93 | struct super_block *sb); |
100 | static void release_journal_dev(struct super_block *super, | 94 | static void release_journal_dev(struct super_block *super, |
101 | struct reiserfs_journal *journal); | 95 | struct reiserfs_journal *journal); |
102 | static int dirty_one_transaction(struct super_block *s, | 96 | static int dirty_one_transaction(struct super_block *s, |
@@ -107,8 +101,10 @@ static void queue_log_writer(struct super_block *s); | |||
107 | /* values for join in do_journal_begin_r */ | 101 | /* values for join in do_journal_begin_r */ |
108 | enum { | 102 | enum { |
109 | JBEGIN_REG = 0, /* regular journal begin */ | 103 | JBEGIN_REG = 0, /* regular journal begin */ |
110 | JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ | 104 | /* join the running transaction if at all possible */ |
111 | JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ | 105 | JBEGIN_JOIN = 1, |
106 | /* called from cleanup code, ignores aborted flag */ | ||
107 | JBEGIN_ABORT = 2, | ||
112 | }; | 108 | }; |
113 | 109 | ||
114 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | 110 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, |
@@ -123,10 +119,11 @@ static void init_journal_hash(struct super_block *sb) | |||
123 | } | 119 | } |
124 | 120 | ||
125 | /* | 121 | /* |
126 | ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to | 122 | * clears BH_Dirty and sticks the buffer on the clean list. Called because |
127 | ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for | 123 | * I can't allow refile_buffer to make schedule happen after I've freed a |
128 | ** more details. | 124 | * block. Look at remove_from_transaction and journal_mark_freed for |
129 | */ | 125 | * more details. |
126 | */ | ||
130 | static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) | 127 | static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) |
131 | { | 128 | { |
132 | if (bh) { | 129 | if (bh) { |
@@ -163,7 +160,7 @@ static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb) | |||
163 | struct list_head *entry = journal->j_bitmap_nodes.next; | 160 | struct list_head *entry = journal->j_bitmap_nodes.next; |
164 | 161 | ||
165 | journal->j_used_bitmap_nodes++; | 162 | journal->j_used_bitmap_nodes++; |
166 | repeat: | 163 | repeat: |
167 | 164 | ||
168 | if (entry != &journal->j_bitmap_nodes) { | 165 | if (entry != &journal->j_bitmap_nodes) { |
169 | bn = list_entry(entry, struct reiserfs_bitmap_node, list); | 166 | bn = list_entry(entry, struct reiserfs_bitmap_node, list); |
@@ -204,7 +201,8 @@ static void allocate_bitmap_nodes(struct super_block *sb) | |||
204 | list_add(&bn->list, &journal->j_bitmap_nodes); | 201 | list_add(&bn->list, &journal->j_bitmap_nodes); |
205 | journal->j_free_bitmap_nodes++; | 202 | journal->j_free_bitmap_nodes++; |
206 | } else { | 203 | } else { |
207 | break; /* this is ok, we'll try again when more are needed */ | 204 | /* this is ok, we'll try again when more are needed */ |
205 | break; | ||
208 | } | 206 | } |
209 | } | 207 | } |
210 | } | 208 | } |
@@ -239,8 +237,8 @@ static void cleanup_bitmap_list(struct super_block *sb, | |||
239 | } | 237 | } |
240 | 238 | ||
241 | /* | 239 | /* |
242 | ** only call this on FS unmount. | 240 | * only call this on FS unmount. |
243 | */ | 241 | */ |
244 | static int free_list_bitmaps(struct super_block *sb, | 242 | static int free_list_bitmaps(struct super_block *sb, |
245 | struct reiserfs_list_bitmap *jb_array) | 243 | struct reiserfs_list_bitmap *jb_array) |
246 | { | 244 | { |
@@ -275,9 +273,9 @@ static int free_bitmap_nodes(struct super_block *sb) | |||
275 | } | 273 | } |
276 | 274 | ||
277 | /* | 275 | /* |
278 | ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. | 276 | * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. |
279 | ** jb_array is the array to be filled in. | 277 | * jb_array is the array to be filled in. |
280 | */ | 278 | */ |
281 | int reiserfs_allocate_list_bitmaps(struct super_block *sb, | 279 | int reiserfs_allocate_list_bitmaps(struct super_block *sb, |
282 | struct reiserfs_list_bitmap *jb_array, | 280 | struct reiserfs_list_bitmap *jb_array, |
283 | unsigned int bmap_nr) | 281 | unsigned int bmap_nr) |
@@ -306,9 +304,9 @@ int reiserfs_allocate_list_bitmaps(struct super_block *sb, | |||
306 | } | 304 | } |
307 | 305 | ||
308 | /* | 306 | /* |
309 | ** find an available list bitmap. If you can't find one, flush a commit list | 307 | * find an available list bitmap. If you can't find one, flush a commit list |
310 | ** and try again | 308 | * and try again |
311 | */ | 309 | */ |
312 | static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, | 310 | static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, |
313 | struct reiserfs_journal_list | 311 | struct reiserfs_journal_list |
314 | *jl) | 312 | *jl) |
@@ -332,18 +330,18 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, | |||
332 | break; | 330 | break; |
333 | } | 331 | } |
334 | } | 332 | } |
335 | if (jb->journal_list) { /* double check to make sure if flushed correctly */ | 333 | /* double check to make sure if flushed correctly */ |
334 | if (jb->journal_list) | ||
336 | return NULL; | 335 | return NULL; |
337 | } | ||
338 | jb->journal_list = jl; | 336 | jb->journal_list = jl; |
339 | return jb; | 337 | return jb; |
340 | } | 338 | } |
341 | 339 | ||
342 | /* | 340 | /* |
343 | ** allocates a new chunk of X nodes, and links them all together as a list. | 341 | * allocates a new chunk of X nodes, and links them all together as a list. |
344 | ** Uses the cnode->next and cnode->prev pointers | 342 | * Uses the cnode->next and cnode->prev pointers |
345 | ** returns NULL on failure | 343 | * returns NULL on failure |
346 | */ | 344 | */ |
347 | static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) | 345 | static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) |
348 | { | 346 | { |
349 | struct reiserfs_journal_cnode *head; | 347 | struct reiserfs_journal_cnode *head; |
@@ -365,9 +363,7 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) | |||
365 | return head; | 363 | return head; |
366 | } | 364 | } |
367 | 365 | ||
368 | /* | 366 | /* pulls a cnode off the free list, or returns NULL on failure */ |
369 | ** pulls a cnode off the free list, or returns NULL on failure | ||
370 | */ | ||
371 | static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) | 367 | static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) |
372 | { | 368 | { |
373 | struct reiserfs_journal_cnode *cn; | 369 | struct reiserfs_journal_cnode *cn; |
@@ -393,8 +389,8 @@ static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) | |||
393 | } | 389 | } |
394 | 390 | ||
395 | /* | 391 | /* |
396 | ** returns a cnode to the free list | 392 | * returns a cnode to the free list |
397 | */ | 393 | */ |
398 | static void free_cnode(struct super_block *sb, | 394 | static void free_cnode(struct super_block *sb, |
399 | struct reiserfs_journal_cnode *cn) | 395 | struct reiserfs_journal_cnode *cn) |
400 | { | 396 | { |
@@ -419,7 +415,10 @@ static void clear_prepared_bits(struct buffer_head *bh) | |||
419 | clear_buffer_journal_restore_dirty(bh); | 415 | clear_buffer_journal_restore_dirty(bh); |
420 | } | 416 | } |
421 | 417 | ||
422 | /* return a cnode with same dev, block number and size in table, or null if not found */ | 418 | /* |
419 | * return a cnode with same dev, block number and size in table, | ||
420 | * or null if not found | ||
421 | */ | ||
423 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct | 422 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct |
424 | super_block | 423 | super_block |
425 | *sb, | 424 | *sb, |
@@ -439,23 +438,24 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct | |||
439 | } | 438 | } |
440 | 439 | ||
441 | /* | 440 | /* |
442 | ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated | 441 | * this actually means 'can this block be reallocated yet?'. If you set |
443 | ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever | 442 | * search_all, a block can only be allocated if it is not in the current |
444 | ** being overwritten by a replay after crashing. | 443 | * transaction, was not freed by the current transaction, and has no chance |
445 | ** | 444 | * of ever being overwritten by a replay after crashing. |
446 | ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting | 445 | * |
447 | ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make | 446 | * If you don't set search_all, a block can only be allocated if it is not |
448 | ** sure you never write the block without logging it. | 447 | * in the current transaction. Since deleting a block removes it from the |
449 | ** | 448 | * current transaction, this case should never happen. If you don't set |
450 | ** next_zero_bit is a suggestion about the next block to try for find_forward. | 449 | * search_all, make sure you never write the block without logging it. |
451 | ** when bl is rejected because it is set in a journal list bitmap, we search | 450 | * |
452 | ** for the next zero bit in the bitmap that rejected bl. Then, we return that | 451 | * next_zero_bit is a suggestion about the next block to try for find_forward. |
453 | ** through next_zero_bit for find_forward to try. | 452 | * when bl is rejected because it is set in a journal list bitmap, we search |
454 | ** | 453 | * for the next zero bit in the bitmap that rejected bl. Then, we return |
455 | ** Just because we return something in next_zero_bit does not mean we won't | 454 | * that through next_zero_bit for find_forward to try. |
456 | ** reject it on the next call to reiserfs_in_journal | 455 | * |
457 | ** | 456 | * Just because we return something in next_zero_bit does not mean we won't |
458 | */ | 457 | * reject it on the next call to reiserfs_in_journal |
458 | */ | ||
459 | int reiserfs_in_journal(struct super_block *sb, | 459 | int reiserfs_in_journal(struct super_block *sb, |
460 | unsigned int bmap_nr, int bit_nr, int search_all, | 460 | unsigned int bmap_nr, int bit_nr, int search_all, |
461 | b_blocknr_t * next_zero_bit) | 461 | b_blocknr_t * next_zero_bit) |
@@ -469,9 +469,11 @@ int reiserfs_in_journal(struct super_block *sb, | |||
469 | *next_zero_bit = 0; /* always start this at zero. */ | 469 | *next_zero_bit = 0; /* always start this at zero. */ |
470 | 470 | ||
471 | PROC_INFO_INC(sb, journal.in_journal); | 471 | PROC_INFO_INC(sb, journal.in_journal); |
472 | /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. | 472 | /* |
473 | ** if we crash before the transaction that freed it commits, this transaction won't | 473 | * If we aren't doing a search_all, this is a metablock, and it |
474 | ** have committed either, and the block will never be written | 474 | * will be logged before use. if we crash before the transaction |
475 | * that freed it commits, this transaction won't have committed | ||
476 | * either, and the block will never be written | ||
475 | */ | 477 | */ |
476 | if (search_all) { | 478 | if (search_all) { |
477 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { | 479 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { |
@@ -511,8 +513,7 @@ int reiserfs_in_journal(struct super_block *sb, | |||
511 | return 0; | 513 | return 0; |
512 | } | 514 | } |
513 | 515 | ||
514 | /* insert cn into table | 516 | /* insert cn into table */ |
515 | */ | ||
516 | static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, | 517 | static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, |
517 | struct reiserfs_journal_cnode *cn) | 518 | struct reiserfs_journal_cnode *cn) |
518 | { | 519 | { |
@@ -558,10 +559,10 @@ static inline void put_journal_list(struct super_block *s, | |||
558 | } | 559 | } |
559 | 560 | ||
560 | /* | 561 | /* |
561 | ** this used to be much more involved, and I'm keeping it just in case things get ugly again. | 562 | * this used to be much more involved, and I'm keeping it just in case |
562 | ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a | 563 | * things get ugly again. it gets called by flush_commit_list, and |
563 | ** transaction. | 564 | * cleans up any data stored about blocks freed during a transaction. |
564 | */ | 565 | */ |
565 | static void cleanup_freed_for_journal_list(struct super_block *sb, | 566 | static void cleanup_freed_for_journal_list(struct super_block *sb, |
566 | struct reiserfs_journal_list *jl) | 567 | struct reiserfs_journal_list *jl) |
567 | { | 568 | { |
@@ -756,11 +757,12 @@ static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, | |||
756 | jh = bh->b_private; | 757 | jh = bh->b_private; |
757 | list_del_init(&jh->list); | 758 | list_del_init(&jh->list); |
758 | } else { | 759 | } else { |
759 | no_jh: | 760 | no_jh: |
760 | get_bh(bh); | 761 | get_bh(bh); |
761 | jh = alloc_jh(); | 762 | jh = alloc_jh(); |
762 | spin_lock(&j->j_dirty_buffers_lock); | 763 | spin_lock(&j->j_dirty_buffers_lock); |
763 | /* buffer must be locked for __add_jh, should be able to have | 764 | /* |
765 | * buffer must be locked for __add_jh, should be able to have | ||
764 | * two adds at the same time | 766 | * two adds at the same time |
765 | */ | 767 | */ |
766 | BUG_ON(bh->b_private); | 768 | BUG_ON(bh->b_private); |
@@ -818,7 +820,8 @@ static int write_ordered_buffers(spinlock_t * lock, | |||
818 | spin_lock(lock); | 820 | spin_lock(lock); |
819 | goto loop_next; | 821 | goto loop_next; |
820 | } | 822 | } |
821 | /* in theory, dirty non-uptodate buffers should never get here, | 823 | /* |
824 | * in theory, dirty non-uptodate buffers should never get here, | ||
822 | * but the upper layer io error paths still have a few quirks. | 825 | * but the upper layer io error paths still have a few quirks. |
823 | * Handle them here as gracefully as we can | 826 | * Handle them here as gracefully as we can |
824 | */ | 827 | */ |
@@ -833,7 +836,7 @@ static int write_ordered_buffers(spinlock_t * lock, | |||
833 | reiserfs_free_jh(bh); | 836 | reiserfs_free_jh(bh); |
834 | unlock_buffer(bh); | 837 | unlock_buffer(bh); |
835 | } | 838 | } |
836 | loop_next: | 839 | loop_next: |
837 | put_bh(bh); | 840 | put_bh(bh); |
838 | cond_resched_lock(lock); | 841 | cond_resched_lock(lock); |
839 | } | 842 | } |
@@ -856,13 +859,14 @@ static int write_ordered_buffers(spinlock_t * lock, | |||
856 | if (!buffer_uptodate(bh)) { | 859 | if (!buffer_uptodate(bh)) { |
857 | ret = -EIO; | 860 | ret = -EIO; |
858 | } | 861 | } |
859 | /* ugly interaction with invalidatepage here. | 862 | /* |
860 | * reiserfs_invalidate_page will pin any buffer that has a valid | 863 | * ugly interaction with invalidatepage here. |
861 | * journal head from an older transaction. If someone else sets | 864 | * reiserfs_invalidate_page will pin any buffer that has a |
862 | * our buffer dirty after we write it in the first loop, and | 865 | * valid journal head from an older transaction. If someone |
863 | * then someone truncates the page away, nobody will ever write | 866 | * else sets our buffer dirty after we write it in the first |
864 | * the buffer. We're safe if we write the page one last time | 867 | * loop, and then someone truncates the page away, nobody |
865 | * after freeing the journal header. | 868 | * will ever write the buffer. We're safe if we write the |
869 | * page one last time after freeing the journal header. | ||
866 | */ | 870 | */ |
867 | if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { | 871 | if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { |
868 | spin_unlock(lock); | 872 | spin_unlock(lock); |
@@ -887,7 +891,7 @@ static int flush_older_commits(struct super_block *s, | |||
887 | unsigned int other_trans_id; | 891 | unsigned int other_trans_id; |
888 | unsigned int first_trans_id; | 892 | unsigned int first_trans_id; |
889 | 893 | ||
890 | find_first: | 894 | find_first: |
891 | /* | 895 | /* |
892 | * first we walk backwards to find the oldest uncommitted transation | 896 | * first we walk backwards to find the oldest uncommitted transation |
893 | */ | 897 | */ |
@@ -923,9 +927,11 @@ static int flush_older_commits(struct super_block *s, | |||
923 | if (!journal_list_still_alive(s, trans_id)) | 927 | if (!journal_list_still_alive(s, trans_id)) |
924 | return 1; | 928 | return 1; |
925 | 929 | ||
926 | /* the one we just flushed is gone, this means all | 930 | /* |
927 | * older lists are also gone, so first_jl is no longer | 931 | * the one we just flushed is gone, this means |
928 | * valid either. Go back to the beginning. | 932 | * all older lists are also gone, so first_jl |
933 | * is no longer valid either. Go back to the | ||
934 | * beginning. | ||
929 | */ | 935 | */ |
930 | if (!journal_list_still_alive | 936 | if (!journal_list_still_alive |
931 | (s, other_trans_id)) { | 937 | (s, other_trans_id)) { |
@@ -958,12 +964,12 @@ static int reiserfs_async_progress_wait(struct super_block *s) | |||
958 | } | 964 | } |
959 | 965 | ||
960 | /* | 966 | /* |
961 | ** if this journal list still has commit blocks unflushed, send them to disk. | 967 | * if this journal list still has commit blocks unflushed, send them to disk. |
962 | ** | 968 | * |
963 | ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) | 969 | * log areas must be flushed in order (transaction 2 can't commit before |
964 | ** Before the commit block can by written, every other log block must be safely on disk | 970 | * transaction 1) Before the commit block can by written, every other log |
965 | ** | 971 | * block must be safely on disk |
966 | */ | 972 | */ |
967 | static int flush_commit_list(struct super_block *s, | 973 | static int flush_commit_list(struct super_block *s, |
968 | struct reiserfs_journal_list *jl, int flushall) | 974 | struct reiserfs_journal_list *jl, int flushall) |
969 | { | 975 | { |
@@ -982,8 +988,9 @@ static int flush_commit_list(struct super_block *s, | |||
982 | return 0; | 988 | return 0; |
983 | } | 989 | } |
984 | 990 | ||
985 | /* before we can put our commit blocks on disk, we have to make sure everyone older than | 991 | /* |
986 | ** us is on disk too | 992 | * before we can put our commit blocks on disk, we have to make |
993 | * sure everyone older than us is on disk too | ||
987 | */ | 994 | */ |
988 | BUG_ON(jl->j_len <= 0); | 995 | BUG_ON(jl->j_len <= 0); |
989 | BUG_ON(trans_id == journal->j_trans_id); | 996 | BUG_ON(trans_id == journal->j_trans_id); |
@@ -991,7 +998,10 @@ static int flush_commit_list(struct super_block *s, | |||
991 | get_journal_list(jl); | 998 | get_journal_list(jl); |
992 | if (flushall) { | 999 | if (flushall) { |
993 | if (flush_older_commits(s, jl) == 1) { | 1000 | if (flush_older_commits(s, jl) == 1) { |
994 | /* list disappeared during flush_older_commits. return */ | 1001 | /* |
1002 | * list disappeared during flush_older_commits. | ||
1003 | * return | ||
1004 | */ | ||
995 | goto put_jl; | 1005 | goto put_jl; |
996 | } | 1006 | } |
997 | } | 1007 | } |
@@ -1006,9 +1016,9 @@ static int flush_commit_list(struct super_block *s, | |||
1006 | BUG_ON(jl->j_trans_id == 0); | 1016 | BUG_ON(jl->j_trans_id == 0); |
1007 | 1017 | ||
1008 | /* this commit is done, exit */ | 1018 | /* this commit is done, exit */ |
1009 | if (atomic_read(&(jl->j_commit_left)) <= 0) { | 1019 | if (atomic_read(&jl->j_commit_left) <= 0) { |
1010 | if (flushall) { | 1020 | if (flushall) { |
1011 | atomic_set(&(jl->j_older_commits_done), 1); | 1021 | atomic_set(&jl->j_older_commits_done, 1); |
1012 | } | 1022 | } |
1013 | mutex_unlock(&jl->j_commit_mutex); | 1023 | mutex_unlock(&jl->j_commit_mutex); |
1014 | goto put_jl; | 1024 | goto put_jl; |
@@ -1063,9 +1073,10 @@ static int flush_commit_list(struct super_block *s, | |||
1063 | depth = reiserfs_write_unlock_nested(s); | 1073 | depth = reiserfs_write_unlock_nested(s); |
1064 | __wait_on_buffer(tbh); | 1074 | __wait_on_buffer(tbh); |
1065 | reiserfs_write_lock_nested(s, depth); | 1075 | reiserfs_write_lock_nested(s, depth); |
1066 | // since we're using ll_rw_blk above, it might have skipped over | 1076 | /* |
1067 | // a locked buffer. Double check here | 1077 | * since we're using ll_rw_blk above, it might have skipped |
1068 | // | 1078 | * over a locked buffer. Double check here |
1079 | */ | ||
1069 | /* redundant, sync_dirty_buffer() checks */ | 1080 | /* redundant, sync_dirty_buffer() checks */ |
1070 | if (buffer_dirty(tbh)) { | 1081 | if (buffer_dirty(tbh)) { |
1071 | depth = reiserfs_write_unlock_nested(s); | 1082 | depth = reiserfs_write_unlock_nested(s); |
@@ -1079,17 +1090,21 @@ static int flush_commit_list(struct super_block *s, | |||
1079 | #endif | 1090 | #endif |
1080 | retval = -EIO; | 1091 | retval = -EIO; |
1081 | } | 1092 | } |
1082 | put_bh(tbh); /* once for journal_find_get_block */ | 1093 | /* once for journal_find_get_block */ |
1083 | put_bh(tbh); /* once due to original getblk in do_journal_end */ | 1094 | put_bh(tbh); |
1084 | atomic_dec(&(jl->j_commit_left)); | 1095 | /* once due to original getblk in do_journal_end */ |
1096 | put_bh(tbh); | ||
1097 | atomic_dec(&jl->j_commit_left); | ||
1085 | } | 1098 | } |
1086 | 1099 | ||
1087 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); | 1100 | BUG_ON(atomic_read(&jl->j_commit_left) != 1); |
1088 | 1101 | ||
1089 | /* If there was a write error in the journal - we can't commit | 1102 | /* |
1103 | * If there was a write error in the journal - we can't commit | ||
1090 | * this transaction - it will be invalid and, if successful, | 1104 | * this transaction - it will be invalid and, if successful, |
1091 | * will just end up propagating the write error out to | 1105 | * will just end up propagating the write error out to |
1092 | * the file system. */ | 1106 | * the file system. |
1107 | */ | ||
1093 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { | 1108 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { |
1094 | if (buffer_dirty(jl->j_commit_bh)) | 1109 | if (buffer_dirty(jl->j_commit_bh)) |
1095 | BUG(); | 1110 | BUG(); |
@@ -1102,9 +1117,11 @@ static int flush_commit_list(struct super_block *s, | |||
1102 | reiserfs_write_lock_nested(s, depth); | 1117 | reiserfs_write_lock_nested(s, depth); |
1103 | } | 1118 | } |
1104 | 1119 | ||
1105 | /* If there was a write error in the journal - we can't commit this | 1120 | /* |
1121 | * If there was a write error in the journal - we can't commit this | ||
1106 | * transaction - it will be invalid and, if successful, will just end | 1122 | * transaction - it will be invalid and, if successful, will just end |
1107 | * up propagating the write error out to the filesystem. */ | 1123 | * up propagating the write error out to the filesystem. |
1124 | */ | ||
1108 | if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { | 1125 | if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { |
1109 | #ifdef CONFIG_REISERFS_CHECK | 1126 | #ifdef CONFIG_REISERFS_CHECK |
1110 | reiserfs_warning(s, "journal-615", "buffer write failed"); | 1127 | reiserfs_warning(s, "journal-615", "buffer write failed"); |
@@ -1119,7 +1136,10 @@ static int flush_commit_list(struct super_block *s, | |||
1119 | } | 1136 | } |
1120 | journal->j_last_commit_id = jl->j_trans_id; | 1137 | journal->j_last_commit_id = jl->j_trans_id; |
1121 | 1138 | ||
1122 | /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ | 1139 | /* |
1140 | * now, every commit block is on the disk. It is safe to allow | ||
1141 | * blocks freed during this transaction to be reallocated | ||
1142 | */ | ||
1123 | cleanup_freed_for_journal_list(s, jl); | 1143 | cleanup_freed_for_journal_list(s, jl); |
1124 | 1144 | ||
1125 | retval = retval ? retval : journal->j_errno; | 1145 | retval = retval ? retval : journal->j_errno; |
@@ -1127,13 +1147,13 @@ static int flush_commit_list(struct super_block *s, | |||
1127 | /* mark the metadata dirty */ | 1147 | /* mark the metadata dirty */ |
1128 | if (!retval) | 1148 | if (!retval) |
1129 | dirty_one_transaction(s, jl); | 1149 | dirty_one_transaction(s, jl); |
1130 | atomic_dec(&(jl->j_commit_left)); | 1150 | atomic_dec(&jl->j_commit_left); |
1131 | 1151 | ||
1132 | if (flushall) { | 1152 | if (flushall) { |
1133 | atomic_set(&(jl->j_older_commits_done), 1); | 1153 | atomic_set(&jl->j_older_commits_done, 1); |
1134 | } | 1154 | } |
1135 | mutex_unlock(&jl->j_commit_mutex); | 1155 | mutex_unlock(&jl->j_commit_mutex); |
1136 | put_jl: | 1156 | put_jl: |
1137 | put_journal_list(s, jl); | 1157 | put_journal_list(s, jl); |
1138 | 1158 | ||
1139 | if (retval) | 1159 | if (retval) |
@@ -1143,9 +1163,9 @@ static int flush_commit_list(struct super_block *s, | |||
1143 | } | 1163 | } |
1144 | 1164 | ||
1145 | /* | 1165 | /* |
1146 | ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or | 1166 | * flush_journal_list frequently needs to find a newer transaction for a |
1147 | ** returns NULL if it can't find anything | 1167 | * given block. This does that, or returns NULL if it can't find anything |
1148 | */ | 1168 | */ |
1149 | static struct reiserfs_journal_list *find_newer_jl_for_cn(struct | 1169 | static struct reiserfs_journal_list *find_newer_jl_for_cn(struct |
1150 | reiserfs_journal_cnode | 1170 | reiserfs_journal_cnode |
1151 | *cn) | 1171 | *cn) |
@@ -1169,10 +1189,11 @@ static void remove_journal_hash(struct super_block *, | |||
1169 | int); | 1189 | int); |
1170 | 1190 | ||
1171 | /* | 1191 | /* |
1172 | ** once all the real blocks have been flushed, it is safe to remove them from the | 1192 | * once all the real blocks have been flushed, it is safe to remove them |
1173 | ** journal list for this transaction. Aside from freeing the cnode, this also allows the | 1193 | * from the journal list for this transaction. Aside from freeing the |
1174 | ** block to be reallocated for data blocks if it had been deleted. | 1194 | * cnode, this also allows the block to be reallocated for data blocks |
1175 | */ | 1195 | * if it had been deleted. |
1196 | */ | ||
1176 | static void remove_all_from_journal_list(struct super_block *sb, | 1197 | static void remove_all_from_journal_list(struct super_block *sb, |
1177 | struct reiserfs_journal_list *jl, | 1198 | struct reiserfs_journal_list *jl, |
1178 | int debug) | 1199 | int debug) |
@@ -1181,8 +1202,9 @@ static void remove_all_from_journal_list(struct super_block *sb, | |||
1181 | struct reiserfs_journal_cnode *cn, *last; | 1202 | struct reiserfs_journal_cnode *cn, *last; |
1182 | cn = jl->j_realblock; | 1203 | cn = jl->j_realblock; |
1183 | 1204 | ||
1184 | /* which is better, to lock once around the whole loop, or | 1205 | /* |
1185 | ** to lock for each call to remove_journal_hash? | 1206 | * which is better, to lock once around the whole loop, or |
1207 | * to lock for each call to remove_journal_hash? | ||
1186 | */ | 1208 | */ |
1187 | while (cn) { | 1209 | while (cn) { |
1188 | if (cn->blocknr != 0) { | 1210 | if (cn->blocknr != 0) { |
@@ -1204,12 +1226,13 @@ static void remove_all_from_journal_list(struct super_block *sb, | |||
1204 | } | 1226 | } |
1205 | 1227 | ||
1206 | /* | 1228 | /* |
1207 | ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. | 1229 | * if this timestamp is greater than the timestamp we wrote last to the |
1208 | ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start | 1230 | * header block, write it to the header block. once this is done, I can |
1209 | ** releasing blocks in this transaction for reuse as data blocks. | 1231 | * safely say the log area for this transaction won't ever be replayed, |
1210 | ** called by flush_journal_list, before it calls remove_all_from_journal_list | 1232 | * and I can start releasing blocks in this transaction for reuse as data |
1211 | ** | 1233 | * blocks. called by flush_journal_list, before it calls |
1212 | */ | 1234 | * remove_all_from_journal_list |
1235 | */ | ||
1213 | static int _update_journal_header_block(struct super_block *sb, | 1236 | static int _update_journal_header_block(struct super_block *sb, |
1214 | unsigned long offset, | 1237 | unsigned long offset, |
1215 | unsigned int trans_id) | 1238 | unsigned int trans_id) |
@@ -1279,10 +1302,11 @@ static int flush_older_journal_lists(struct super_block *sb, | |||
1279 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 1302 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
1280 | unsigned int trans_id = jl->j_trans_id; | 1303 | unsigned int trans_id = jl->j_trans_id; |
1281 | 1304 | ||
1282 | /* we know we are the only ones flushing things, no extra race | 1305 | /* |
1306 | * we know we are the only ones flushing things, no extra race | ||
1283 | * protection is required. | 1307 | * protection is required. |
1284 | */ | 1308 | */ |
1285 | restart: | 1309 | restart: |
1286 | entry = journal->j_journal_list.next; | 1310 | entry = journal->j_journal_list.next; |
1287 | /* Did we wrap? */ | 1311 | /* Did we wrap? */ |
1288 | if (entry == &journal->j_journal_list) | 1312 | if (entry == &journal->j_journal_list) |
@@ -1309,15 +1333,16 @@ static void del_from_work_list(struct super_block *s, | |||
1309 | } | 1333 | } |
1310 | } | 1334 | } |
1311 | 1335 | ||
1312 | /* flush a journal list, both commit and real blocks | 1336 | /* |
1313 | ** | 1337 | * flush a journal list, both commit and real blocks |
1314 | ** always set flushall to 1, unless you are calling from inside | 1338 | * |
1315 | ** flush_journal_list | 1339 | * always set flushall to 1, unless you are calling from inside |
1316 | ** | 1340 | * flush_journal_list |
1317 | ** IMPORTANT. This can only be called while there are no journal writers, | 1341 | * |
1318 | ** and the journal is locked. That means it can only be called from | 1342 | * IMPORTANT. This can only be called while there are no journal writers, |
1319 | ** do_journal_end, or by journal_release | 1343 | * and the journal is locked. That means it can only be called from |
1320 | */ | 1344 | * do_journal_end, or by journal_release |
1345 | */ | ||
1321 | static int flush_journal_list(struct super_block *s, | 1346 | static int flush_journal_list(struct super_block *s, |
1322 | struct reiserfs_journal_list *jl, int flushall) | 1347 | struct reiserfs_journal_list *jl, int flushall) |
1323 | { | 1348 | { |
@@ -1354,13 +1379,14 @@ static int flush_journal_list(struct super_block *s, | |||
1354 | } | 1379 | } |
1355 | 1380 | ||
1356 | /* if all the work is already done, get out of here */ | 1381 | /* if all the work is already done, get out of here */ |
1357 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && | 1382 | if (atomic_read(&jl->j_nonzerolen) <= 0 && |
1358 | atomic_read(&(jl->j_commit_left)) <= 0) { | 1383 | atomic_read(&jl->j_commit_left) <= 0) { |
1359 | goto flush_older_and_return; | 1384 | goto flush_older_and_return; |
1360 | } | 1385 | } |
1361 | 1386 | ||
1362 | /* start by putting the commit list on disk. This will also flush | 1387 | /* |
1363 | ** the commit lists of any olders transactions | 1388 | * start by putting the commit list on disk. This will also flush |
1389 | * the commit lists of any olders transactions | ||
1364 | */ | 1390 | */ |
1365 | flush_commit_list(s, jl, 1); | 1391 | flush_commit_list(s, jl, 1); |
1366 | 1392 | ||
@@ -1369,15 +1395,16 @@ static int flush_journal_list(struct super_block *s, | |||
1369 | BUG(); | 1395 | BUG(); |
1370 | 1396 | ||
1371 | /* are we done now? */ | 1397 | /* are we done now? */ |
1372 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && | 1398 | if (atomic_read(&jl->j_nonzerolen) <= 0 && |
1373 | atomic_read(&(jl->j_commit_left)) <= 0) { | 1399 | atomic_read(&jl->j_commit_left) <= 0) { |
1374 | goto flush_older_and_return; | 1400 | goto flush_older_and_return; |
1375 | } | 1401 | } |
1376 | 1402 | ||
1377 | /* loop through each cnode, see if we need to write it, | 1403 | /* |
1378 | ** or wait on a more recent transaction, or just ignore it | 1404 | * loop through each cnode, see if we need to write it, |
1405 | * or wait on a more recent transaction, or just ignore it | ||
1379 | */ | 1406 | */ |
1380 | if (atomic_read(&(journal->j_wcount)) != 0) { | 1407 | if (atomic_read(&journal->j_wcount) != 0) { |
1381 | reiserfs_panic(s, "journal-844", "journal list is flushing, " | 1408 | reiserfs_panic(s, "journal-844", "journal list is flushing, " |
1382 | "wcount is not 0"); | 1409 | "wcount is not 0"); |
1383 | } | 1410 | } |
@@ -1391,20 +1418,25 @@ static int flush_journal_list(struct super_block *s, | |||
1391 | goto free_cnode; | 1418 | goto free_cnode; |
1392 | } | 1419 | } |
1393 | 1420 | ||
1394 | /* This transaction failed commit. Don't write out to the disk */ | 1421 | /* |
1422 | * This transaction failed commit. | ||
1423 | * Don't write out to the disk | ||
1424 | */ | ||
1395 | if (!(jl->j_state & LIST_DIRTY)) | 1425 | if (!(jl->j_state & LIST_DIRTY)) |
1396 | goto free_cnode; | 1426 | goto free_cnode; |
1397 | 1427 | ||
1398 | pjl = find_newer_jl_for_cn(cn); | 1428 | pjl = find_newer_jl_for_cn(cn); |
1399 | /* the order is important here. We check pjl to make sure we | 1429 | /* |
1400 | ** don't clear BH_JDirty_wait if we aren't the one writing this | 1430 | * the order is important here. We check pjl to make sure we |
1401 | ** block to disk | 1431 | * don't clear BH_JDirty_wait if we aren't the one writing this |
1432 | * block to disk | ||
1402 | */ | 1433 | */ |
1403 | if (!pjl && cn->bh) { | 1434 | if (!pjl && cn->bh) { |
1404 | saved_bh = cn->bh; | 1435 | saved_bh = cn->bh; |
1405 | 1436 | ||
1406 | /* we do this to make sure nobody releases the buffer while | 1437 | /* |
1407 | ** we are working with it | 1438 | * we do this to make sure nobody releases the |
1439 | * buffer while we are working with it | ||
1408 | */ | 1440 | */ |
1409 | get_bh(saved_bh); | 1441 | get_bh(saved_bh); |
1410 | 1442 | ||
@@ -1413,13 +1445,17 @@ static int flush_journal_list(struct super_block *s, | |||
1413 | was_jwait = 1; | 1445 | was_jwait = 1; |
1414 | was_dirty = 1; | 1446 | was_dirty = 1; |
1415 | } else if (can_dirty(cn)) { | 1447 | } else if (can_dirty(cn)) { |
1416 | /* everything with !pjl && jwait should be writable */ | 1448 | /* |
1449 | * everything with !pjl && jwait | ||
1450 | * should be writable | ||
1451 | */ | ||
1417 | BUG(); | 1452 | BUG(); |
1418 | } | 1453 | } |
1419 | } | 1454 | } |
1420 | 1455 | ||
1421 | /* if someone has this block in a newer transaction, just make | 1456 | /* |
1422 | ** sure they are committed, and don't try writing it to disk | 1457 | * if someone has this block in a newer transaction, just make |
1458 | * sure they are committed, and don't try writing it to disk | ||
1423 | */ | 1459 | */ |
1424 | if (pjl) { | 1460 | if (pjl) { |
1425 | if (atomic_read(&pjl->j_commit_left)) | 1461 | if (atomic_read(&pjl->j_commit_left)) |
@@ -1427,16 +1463,18 @@ static int flush_journal_list(struct super_block *s, | |||
1427 | goto free_cnode; | 1463 | goto free_cnode; |
1428 | } | 1464 | } |
1429 | 1465 | ||
1430 | /* bh == NULL when the block got to disk on its own, OR, | 1466 | /* |
1431 | ** the block got freed in a future transaction | 1467 | * bh == NULL when the block got to disk on its own, OR, |
1468 | * the block got freed in a future transaction | ||
1432 | */ | 1469 | */ |
1433 | if (saved_bh == NULL) { | 1470 | if (saved_bh == NULL) { |
1434 | goto free_cnode; | 1471 | goto free_cnode; |
1435 | } | 1472 | } |
1436 | 1473 | ||
1437 | /* this should never happen. kupdate_one_transaction has this list | 1474 | /* |
1438 | ** locked while it works, so we should never see a buffer here that | 1475 | * this should never happen. kupdate_one_transaction has |
1439 | ** is not marked JDirty_wait | 1476 | * this list locked while it works, so we should never see a |
1477 | * buffer here that is not marked JDirty_wait | ||
1440 | */ | 1478 | */ |
1441 | if ((!was_jwait) && !buffer_locked(saved_bh)) { | 1479 | if ((!was_jwait) && !buffer_locked(saved_bh)) { |
1442 | reiserfs_warning(s, "journal-813", | 1480 | reiserfs_warning(s, "journal-813", |
@@ -1447,7 +1485,10 @@ static int flush_journal_list(struct super_block *s, | |||
1447 | was_jwait ? ' ' : '!'); | 1485 | was_jwait ? ' ' : '!'); |
1448 | } | 1486 | } |
1449 | if (was_dirty) { | 1487 | if (was_dirty) { |
1450 | /* we inc again because saved_bh gets decremented at free_cnode */ | 1488 | /* |
1489 | * we inc again because saved_bh gets decremented | ||
1490 | * at free_cnode | ||
1491 | */ | ||
1451 | get_bh(saved_bh); | 1492 | get_bh(saved_bh); |
1452 | set_bit(BLOCK_NEEDS_FLUSH, &cn->state); | 1493 | set_bit(BLOCK_NEEDS_FLUSH, &cn->state); |
1453 | lock_buffer(saved_bh); | 1494 | lock_buffer(saved_bh); |
@@ -1463,13 +1504,16 @@ static int flush_journal_list(struct super_block *s, | |||
1463 | (unsigned long long)saved_bh-> | 1504 | (unsigned long long)saved_bh-> |
1464 | b_blocknr, __func__); | 1505 | b_blocknr, __func__); |
1465 | } | 1506 | } |
1466 | free_cnode: | 1507 | free_cnode: |
1467 | last = cn; | 1508 | last = cn; |
1468 | cn = cn->next; | 1509 | cn = cn->next; |
1469 | if (saved_bh) { | 1510 | if (saved_bh) { |
1470 | /* we incremented this to keep others from taking the buffer head away */ | 1511 | /* |
1512 | * we incremented this to keep others from | ||
1513 | * taking the buffer head away | ||
1514 | */ | ||
1471 | put_bh(saved_bh); | 1515 | put_bh(saved_bh); |
1472 | if (atomic_read(&(saved_bh->b_count)) < 0) { | 1516 | if (atomic_read(&saved_bh->b_count) < 0) { |
1473 | reiserfs_warning(s, "journal-945", | 1517 | reiserfs_warning(s, "journal-945", |
1474 | "saved_bh->b_count < 0"); | 1518 | "saved_bh->b_count < 0"); |
1475 | } | 1519 | } |
@@ -1499,8 +1543,10 @@ static int flush_journal_list(struct super_block *s, | |||
1499 | #endif | 1543 | #endif |
1500 | err = -EIO; | 1544 | err = -EIO; |
1501 | } | 1545 | } |
1502 | /* note, we must clear the JDirty_wait bit after the up to date | 1546 | /* |
1503 | ** check, otherwise we race against our flushpage routine | 1547 | * note, we must clear the JDirty_wait bit |
1548 | * after the up to date check, otherwise we | ||
1549 | * race against our flushpage routine | ||
1504 | */ | 1550 | */ |
1505 | BUG_ON(!test_clear_buffer_journal_dirty | 1551 | BUG_ON(!test_clear_buffer_journal_dirty |
1506 | (cn->bh)); | 1552 | (cn->bh)); |
@@ -1518,25 +1564,27 @@ static int flush_journal_list(struct super_block *s, | |||
1518 | reiserfs_abort(s, -EIO, | 1564 | reiserfs_abort(s, -EIO, |
1519 | "Write error while pushing transaction to disk in %s", | 1565 | "Write error while pushing transaction to disk in %s", |
1520 | __func__); | 1566 | __func__); |
1521 | flush_older_and_return: | 1567 | flush_older_and_return: |
1522 | 1568 | ||
1523 | /* before we can update the journal header block, we _must_ flush all | 1569 | /* |
1524 | ** real blocks from all older transactions to disk. This is because | 1570 | * before we can update the journal header block, we _must_ flush all |
1525 | ** once the header block is updated, this transaction will not be | 1571 | * real blocks from all older transactions to disk. This is because |
1526 | ** replayed after a crash | 1572 | * once the header block is updated, this transaction will not be |
1573 | * replayed after a crash | ||
1527 | */ | 1574 | */ |
1528 | if (flushall) { | 1575 | if (flushall) { |
1529 | flush_older_journal_lists(s, jl); | 1576 | flush_older_journal_lists(s, jl); |
1530 | } | 1577 | } |
1531 | 1578 | ||
1532 | err = journal->j_errno; | 1579 | err = journal->j_errno; |
1533 | /* before we can remove everything from the hash tables for this | 1580 | /* |
1534 | ** transaction, we must make sure it can never be replayed | 1581 | * before we can remove everything from the hash tables for this |
1535 | ** | 1582 | * transaction, we must make sure it can never be replayed |
1536 | ** since we are only called from do_journal_end, we know for sure there | 1583 | * |
1537 | ** are no allocations going on while we are flushing journal lists. So, | 1584 | * since we are only called from do_journal_end, we know for sure there |
1538 | ** we only need to update the journal header block for the last list | 1585 | * are no allocations going on while we are flushing journal lists. So, |
1539 | ** being flushed | 1586 | * we only need to update the journal header block for the last list |
1587 | * being flushed | ||
1540 | */ | 1588 | */ |
1541 | if (!err && flushall) { | 1589 | if (!err && flushall) { |
1542 | err = | 1590 | err = |
@@ -1561,11 +1609,12 @@ static int flush_journal_list(struct super_block *s, | |||
1561 | } | 1609 | } |
1562 | journal->j_last_flush_id = jl->j_trans_id; | 1610 | journal->j_last_flush_id = jl->j_trans_id; |
1563 | 1611 | ||
1564 | /* not strictly required since we are freeing the list, but it should | 1612 | /* |
1613 | * not strictly required since we are freeing the list, but it should | ||
1565 | * help find code using dead lists later on | 1614 | * help find code using dead lists later on |
1566 | */ | 1615 | */ |
1567 | jl->j_len = 0; | 1616 | jl->j_len = 0; |
1568 | atomic_set(&(jl->j_nonzerolen), 0); | 1617 | atomic_set(&jl->j_nonzerolen, 0); |
1569 | jl->j_start = 0; | 1618 | jl->j_start = 0; |
1570 | jl->j_realblock = NULL; | 1619 | jl->j_realblock = NULL; |
1571 | jl->j_commit_bh = NULL; | 1620 | jl->j_commit_bh = NULL; |
@@ -1592,15 +1641,17 @@ static int write_one_transaction(struct super_block *s, | |||
1592 | 1641 | ||
1593 | cn = jl->j_realblock; | 1642 | cn = jl->j_realblock; |
1594 | while (cn) { | 1643 | while (cn) { |
1595 | /* if the blocknr == 0, this has been cleared from the hash, | 1644 | /* |
1596 | ** skip it | 1645 | * if the blocknr == 0, this has been cleared from the hash, |
1646 | * skip it | ||
1597 | */ | 1647 | */ |
1598 | if (cn->blocknr == 0) { | 1648 | if (cn->blocknr == 0) { |
1599 | goto next; | 1649 | goto next; |
1600 | } | 1650 | } |
1601 | if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { | 1651 | if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { |
1602 | struct buffer_head *tmp_bh; | 1652 | struct buffer_head *tmp_bh; |
1603 | /* we can race against journal_mark_freed when we try | 1653 | /* |
1654 | * we can race against journal_mark_freed when we try | ||
1604 | * to lock_buffer(cn->bh), so we have to inc the buffer | 1655 | * to lock_buffer(cn->bh), so we have to inc the buffer |
1605 | * count, and recheck things after locking | 1656 | * count, and recheck things after locking |
1606 | */ | 1657 | */ |
@@ -1619,7 +1670,7 @@ static int write_one_transaction(struct super_block *s, | |||
1619 | } | 1670 | } |
1620 | put_bh(tmp_bh); | 1671 | put_bh(tmp_bh); |
1621 | } | 1672 | } |
1622 | next: | 1673 | next: |
1623 | cn = cn->next; | 1674 | cn = cn->next; |
1624 | cond_resched(); | 1675 | cond_resched(); |
1625 | } | 1676 | } |
@@ -1637,15 +1688,17 @@ static int dirty_one_transaction(struct super_block *s, | |||
1637 | jl->j_state |= LIST_DIRTY; | 1688 | jl->j_state |= LIST_DIRTY; |
1638 | cn = jl->j_realblock; | 1689 | cn = jl->j_realblock; |
1639 | while (cn) { | 1690 | while (cn) { |
1640 | /* look for a more recent transaction that logged this | 1691 | /* |
1641 | ** buffer. Only the most recent transaction with a buffer in | 1692 | * look for a more recent transaction that logged this |
1642 | ** it is allowed to send that buffer to disk | 1693 | * buffer. Only the most recent transaction with a buffer in |
1694 | * it is allowed to send that buffer to disk | ||
1643 | */ | 1695 | */ |
1644 | pjl = find_newer_jl_for_cn(cn); | 1696 | pjl = find_newer_jl_for_cn(cn); |
1645 | if (!pjl && cn->blocknr && cn->bh | 1697 | if (!pjl && cn->blocknr && cn->bh |
1646 | && buffer_journal_dirty(cn->bh)) { | 1698 | && buffer_journal_dirty(cn->bh)) { |
1647 | BUG_ON(!can_dirty(cn)); | 1699 | BUG_ON(!can_dirty(cn)); |
1648 | /* if the buffer is prepared, it will either be logged | 1700 | /* |
1701 | * if the buffer is prepared, it will either be logged | ||
1649 | * or restored. If restored, we need to make sure | 1702 | * or restored. If restored, we need to make sure |
1650 | * it actually gets marked dirty | 1703 | * it actually gets marked dirty |
1651 | */ | 1704 | */ |
@@ -1682,7 +1735,8 @@ static int kupdate_transactions(struct super_block *s, | |||
1682 | goto done; | 1735 | goto done; |
1683 | } | 1736 | } |
1684 | 1737 | ||
1685 | /* we've got j_flush_mutex held, nobody is going to delete any | 1738 | /* |
1739 | * we've got j_flush_mutex held, nobody is going to delete any | ||
1686 | * of these lists out from underneath us | 1740 | * of these lists out from underneath us |
1687 | */ | 1741 | */ |
1688 | while ((num_trans && transactions_flushed < num_trans) || | 1742 | while ((num_trans && transactions_flushed < num_trans) || |
@@ -1716,20 +1770,21 @@ static int kupdate_transactions(struct super_block *s, | |||
1716 | write_chunk(&chunk); | 1770 | write_chunk(&chunk); |
1717 | } | 1771 | } |
1718 | 1772 | ||
1719 | done: | 1773 | done: |
1720 | mutex_unlock(&journal->j_flush_mutex); | 1774 | mutex_unlock(&journal->j_flush_mutex); |
1721 | return ret; | 1775 | return ret; |
1722 | } | 1776 | } |
1723 | 1777 | ||
1724 | /* for o_sync and fsync heavy applications, they tend to use | 1778 | /* |
1725 | ** all the journa list slots with tiny transactions. These | 1779 | * for o_sync and fsync heavy applications, they tend to use |
1726 | ** trigger lots and lots of calls to update the header block, which | 1780 | * all the journa list slots with tiny transactions. These |
1727 | ** adds seeks and slows things down. | 1781 | * trigger lots and lots of calls to update the header block, which |
1728 | ** | 1782 | * adds seeks and slows things down. |
1729 | ** This function tries to clear out a large chunk of the journal lists | 1783 | * |
1730 | ** at once, which makes everything faster since only the newest journal | 1784 | * This function tries to clear out a large chunk of the journal lists |
1731 | ** list updates the header block | 1785 | * at once, which makes everything faster since only the newest journal |
1732 | */ | 1786 | * list updates the header block |
1787 | */ | ||
1733 | static int flush_used_journal_lists(struct super_block *s, | 1788 | static int flush_used_journal_lists(struct super_block *s, |
1734 | struct reiserfs_journal_list *jl) | 1789 | struct reiserfs_journal_list *jl) |
1735 | { | 1790 | { |
@@ -1766,9 +1821,11 @@ static int flush_used_journal_lists(struct super_block *s, | |||
1766 | } | 1821 | } |
1767 | get_journal_list(jl); | 1822 | get_journal_list(jl); |
1768 | get_journal_list(flush_jl); | 1823 | get_journal_list(flush_jl); |
1769 | /* try to find a group of blocks we can flush across all the | 1824 | |
1770 | ** transactions, but only bother if we've actually spanned | 1825 | /* |
1771 | ** across multiple lists | 1826 | * try to find a group of blocks we can flush across all the |
1827 | * transactions, but only bother if we've actually spanned | ||
1828 | * across multiple lists | ||
1772 | */ | 1829 | */ |
1773 | if (flush_jl != jl) { | 1830 | if (flush_jl != jl) { |
1774 | ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); | 1831 | ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); |
@@ -1780,9 +1837,9 @@ static int flush_used_journal_lists(struct super_block *s, | |||
1780 | } | 1837 | } |
1781 | 1838 | ||
1782 | /* | 1839 | /* |
1783 | ** removes any nodes in table with name block and dev as bh. | 1840 | * removes any nodes in table with name block and dev as bh. |
1784 | ** only touchs the hnext and hprev pointers. | 1841 | * only touchs the hnext and hprev pointers. |
1785 | */ | 1842 | */ |
1786 | void remove_journal_hash(struct super_block *sb, | 1843 | void remove_journal_hash(struct super_block *sb, |
1787 | struct reiserfs_journal_cnode **table, | 1844 | struct reiserfs_journal_cnode **table, |
1788 | struct reiserfs_journal_list *jl, | 1845 | struct reiserfs_journal_list *jl, |
@@ -1811,8 +1868,12 @@ void remove_journal_hash(struct super_block *sb, | |||
1811 | cur->blocknr = 0; | 1868 | cur->blocknr = 0; |
1812 | cur->sb = NULL; | 1869 | cur->sb = NULL; |
1813 | cur->state = 0; | 1870 | cur->state = 0; |
1814 | if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ | 1871 | /* |
1815 | atomic_dec(&(cur->jlist->j_nonzerolen)); | 1872 | * anybody who clears the cur->bh will also |
1873 | * dec the nonzerolen | ||
1874 | */ | ||
1875 | if (cur->bh && cur->jlist) | ||
1876 | atomic_dec(&cur->jlist->j_nonzerolen); | ||
1816 | cur->bh = NULL; | 1877 | cur->bh = NULL; |
1817 | cur->jlist = NULL; | 1878 | cur->jlist = NULL; |
1818 | } | 1879 | } |
@@ -1832,17 +1893,18 @@ static void free_journal_ram(struct super_block *sb) | |||
1832 | if (journal->j_header_bh) { | 1893 | if (journal->j_header_bh) { |
1833 | brelse(journal->j_header_bh); | 1894 | brelse(journal->j_header_bh); |
1834 | } | 1895 | } |
1835 | /* j_header_bh is on the journal dev, make sure not to release the journal | 1896 | /* |
1836 | * dev until we brelse j_header_bh | 1897 | * j_header_bh is on the journal dev, make sure |
1898 | * not to release the journal dev until we brelse j_header_bh | ||
1837 | */ | 1899 | */ |
1838 | release_journal_dev(sb, journal); | 1900 | release_journal_dev(sb, journal); |
1839 | vfree(journal); | 1901 | vfree(journal); |
1840 | } | 1902 | } |
1841 | 1903 | ||
1842 | /* | 1904 | /* |
1843 | ** call on unmount. Only set error to 1 if you haven't made your way out | 1905 | * call on unmount. Only set error to 1 if you haven't made your way out |
1844 | ** of read_super() yet. Any other caller must keep error at 0. | 1906 | * of read_super() yet. Any other caller must keep error at 0. |
1845 | */ | 1907 | */ |
1846 | static int do_journal_release(struct reiserfs_transaction_handle *th, | 1908 | static int do_journal_release(struct reiserfs_transaction_handle *th, |
1847 | struct super_block *sb, int error) | 1909 | struct super_block *sb, int error) |
1848 | { | 1910 | { |
@@ -1850,21 +1912,25 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1850 | int flushed = 0; | 1912 | int flushed = 0; |
1851 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 1913 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
1852 | 1914 | ||
1853 | /* we only want to flush out transactions if we were called with error == 0 | 1915 | /* |
1916 | * we only want to flush out transactions if we were | ||
1917 | * called with error == 0 | ||
1854 | */ | 1918 | */ |
1855 | if (!error && !(sb->s_flags & MS_RDONLY)) { | 1919 | if (!error && !(sb->s_flags & MS_RDONLY)) { |
1856 | /* end the current trans */ | 1920 | /* end the current trans */ |
1857 | BUG_ON(!th->t_trans_id); | 1921 | BUG_ON(!th->t_trans_id); |
1858 | do_journal_end(th, sb, 10, FLUSH_ALL); | 1922 | do_journal_end(th, FLUSH_ALL); |
1859 | 1923 | ||
1860 | /* make sure something gets logged to force our way into the flush code */ | 1924 | /* |
1861 | if (!journal_join(&myth, sb, 1)) { | 1925 | * make sure something gets logged to force |
1926 | * our way into the flush code | ||
1927 | */ | ||
1928 | if (!journal_join(&myth, sb)) { | ||
1862 | reiserfs_prepare_for_journal(sb, | 1929 | reiserfs_prepare_for_journal(sb, |
1863 | SB_BUFFER_WITH_SB(sb), | 1930 | SB_BUFFER_WITH_SB(sb), |
1864 | 1); | 1931 | 1); |
1865 | journal_mark_dirty(&myth, sb, | 1932 | journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb)); |
1866 | SB_BUFFER_WITH_SB(sb)); | 1933 | do_journal_end(&myth, FLUSH_ALL); |
1867 | do_journal_end(&myth, sb, 1, FLUSH_ALL); | ||
1868 | flushed = 1; | 1934 | flushed = 1; |
1869 | } | 1935 | } |
1870 | } | 1936 | } |
@@ -1872,17 +1938,15 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1872 | /* this also catches errors during the do_journal_end above */ | 1938 | /* this also catches errors during the do_journal_end above */ |
1873 | if (!error && reiserfs_is_journal_aborted(journal)) { | 1939 | if (!error && reiserfs_is_journal_aborted(journal)) { |
1874 | memset(&myth, 0, sizeof(myth)); | 1940 | memset(&myth, 0, sizeof(myth)); |
1875 | if (!journal_join_abort(&myth, sb, 1)) { | 1941 | if (!journal_join_abort(&myth, sb)) { |
1876 | reiserfs_prepare_for_journal(sb, | 1942 | reiserfs_prepare_for_journal(sb, |
1877 | SB_BUFFER_WITH_SB(sb), | 1943 | SB_BUFFER_WITH_SB(sb), |
1878 | 1); | 1944 | 1); |
1879 | journal_mark_dirty(&myth, sb, | 1945 | journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb)); |
1880 | SB_BUFFER_WITH_SB(sb)); | 1946 | do_journal_end(&myth, FLUSH_ALL); |
1881 | do_journal_end(&myth, sb, 1, FLUSH_ALL); | ||
1882 | } | 1947 | } |
1883 | } | 1948 | } |
1884 | 1949 | ||
1885 | reiserfs_mounted_fs_count--; | ||
1886 | /* wait for all commits to finish */ | 1950 | /* wait for all commits to finish */ |
1887 | cancel_delayed_work(&SB_JOURNAL(sb)->j_work); | 1951 | cancel_delayed_work(&SB_JOURNAL(sb)->j_work); |
1888 | 1952 | ||
@@ -1893,12 +1957,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1893 | reiserfs_write_unlock(sb); | 1957 | reiserfs_write_unlock(sb); |
1894 | 1958 | ||
1895 | cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); | 1959 | cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); |
1896 | flush_workqueue(commit_wq); | 1960 | flush_workqueue(REISERFS_SB(sb)->commit_wq); |
1897 | |||
1898 | if (!reiserfs_mounted_fs_count) { | ||
1899 | destroy_workqueue(commit_wq); | ||
1900 | commit_wq = NULL; | ||
1901 | } | ||
1902 | 1961 | ||
1903 | free_journal_ram(sb); | 1962 | free_journal_ram(sb); |
1904 | 1963 | ||
@@ -1907,25 +1966,24 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1907 | return 0; | 1966 | return 0; |
1908 | } | 1967 | } |
1909 | 1968 | ||
1910 | /* | 1969 | /* * call on unmount. flush all journal trans, release all alloc'd ram */ |
1911 | ** call on unmount. flush all journal trans, release all alloc'd ram | ||
1912 | */ | ||
1913 | int journal_release(struct reiserfs_transaction_handle *th, | 1970 | int journal_release(struct reiserfs_transaction_handle *th, |
1914 | struct super_block *sb) | 1971 | struct super_block *sb) |
1915 | { | 1972 | { |
1916 | return do_journal_release(th, sb, 0); | 1973 | return do_journal_release(th, sb, 0); |
1917 | } | 1974 | } |
1918 | 1975 | ||
1919 | /* | 1976 | /* only call from an error condition inside reiserfs_read_super! */ |
1920 | ** only call from an error condition inside reiserfs_read_super! | ||
1921 | */ | ||
1922 | int journal_release_error(struct reiserfs_transaction_handle *th, | 1977 | int journal_release_error(struct reiserfs_transaction_handle *th, |
1923 | struct super_block *sb) | 1978 | struct super_block *sb) |
1924 | { | 1979 | { |
1925 | return do_journal_release(th, sb, 1); | 1980 | return do_journal_release(th, sb, 1); |
1926 | } | 1981 | } |
1927 | 1982 | ||
1928 | /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ | 1983 | /* |
1984 | * compares description block with commit block. | ||
1985 | * returns 1 if they differ, 0 if they are the same | ||
1986 | */ | ||
1929 | static int journal_compare_desc_commit(struct super_block *sb, | 1987 | static int journal_compare_desc_commit(struct super_block *sb, |
1930 | struct reiserfs_journal_desc *desc, | 1988 | struct reiserfs_journal_desc *desc, |
1931 | struct reiserfs_journal_commit *commit) | 1989 | struct reiserfs_journal_commit *commit) |
@@ -1939,11 +1997,12 @@ static int journal_compare_desc_commit(struct super_block *sb, | |||
1939 | return 0; | 1997 | return 0; |
1940 | } | 1998 | } |
1941 | 1999 | ||
1942 | /* returns 0 if it did not find a description block | 2000 | /* |
1943 | ** returns -1 if it found a corrupt commit block | 2001 | * returns 0 if it did not find a description block |
1944 | ** returns 1 if both desc and commit were valid | 2002 | * returns -1 if it found a corrupt commit block |
1945 | ** NOTE: only called during fs mount | 2003 | * returns 1 if both desc and commit were valid |
1946 | */ | 2004 | * NOTE: only called during fs mount |
2005 | */ | ||
1947 | static int journal_transaction_is_valid(struct super_block *sb, | 2006 | static int journal_transaction_is_valid(struct super_block *sb, |
1948 | struct buffer_head *d_bh, | 2007 | struct buffer_head *d_bh, |
1949 | unsigned int *oldest_invalid_trans_id, | 2008 | unsigned int *oldest_invalid_trans_id, |
@@ -1989,7 +2048,10 @@ static int journal_transaction_is_valid(struct super_block *sb, | |||
1989 | } | 2048 | } |
1990 | offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); | 2049 | offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); |
1991 | 2050 | ||
1992 | /* ok, we have a journal description block, lets see if the transaction was valid */ | 2051 | /* |
2052 | * ok, we have a journal description block, | ||
2053 | * let's see if the transaction was valid | ||
2054 | */ | ||
1993 | c_bh = | 2055 | c_bh = |
1994 | journal_bread(sb, | 2056 | journal_bread(sb, |
1995 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) + | 2057 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) + |
@@ -2041,11 +2103,11 @@ static void brelse_array(struct buffer_head **heads, int num) | |||
2041 | } | 2103 | } |
2042 | 2104 | ||
2043 | /* | 2105 | /* |
2044 | ** given the start, and values for the oldest acceptable transactions, | 2106 | * given the start, and values for the oldest acceptable transactions, |
2045 | ** this either reads in a replays a transaction, or returns because the | 2107 | * this either reads in a replays a transaction, or returns because the |
2046 | ** transaction is invalid, or too old. | 2108 | * transaction is invalid, or too old. |
2047 | ** NOTE: only called during fs mount | 2109 | * NOTE: only called during fs mount |
2048 | */ | 2110 | */ |
2049 | static int journal_read_transaction(struct super_block *sb, | 2111 | static int journal_read_transaction(struct super_block *sb, |
2050 | unsigned long cur_dblock, | 2112 | unsigned long cur_dblock, |
2051 | unsigned long oldest_start, | 2113 | unsigned long oldest_start, |
@@ -2119,7 +2181,10 @@ static int journal_read_transaction(struct super_block *sb, | |||
2119 | } | 2181 | } |
2120 | 2182 | ||
2121 | trans_id = get_desc_trans_id(desc); | 2183 | trans_id = get_desc_trans_id(desc); |
2122 | /* now we know we've got a good transaction, and it was inside the valid time ranges */ | 2184 | /* |
2185 | * now we know we've got a good transaction, and it was | ||
2186 | * inside the valid time ranges | ||
2187 | */ | ||
2123 | log_blocks = kmalloc(get_desc_trans_len(desc) * | 2188 | log_blocks = kmalloc(get_desc_trans_len(desc) * |
2124 | sizeof(struct buffer_head *), GFP_NOFS); | 2189 | sizeof(struct buffer_head *), GFP_NOFS); |
2125 | real_blocks = kmalloc(get_desc_trans_len(desc) * | 2190 | real_blocks = kmalloc(get_desc_trans_len(desc) * |
@@ -2164,7 +2229,7 @@ static int journal_read_transaction(struct super_block *sb, | |||
2164 | reiserfs_warning(sb, "journal-1204", | 2229 | reiserfs_warning(sb, "journal-1204", |
2165 | "REPLAY FAILURE fsck required! " | 2230 | "REPLAY FAILURE fsck required! " |
2166 | "Trying to replay onto a log block"); | 2231 | "Trying to replay onto a log block"); |
2167 | abort_replay: | 2232 | abort_replay: |
2168 | brelse_array(log_blocks, i); | 2233 | brelse_array(log_blocks, i); |
2169 | brelse_array(real_blocks, i); | 2234 | brelse_array(real_blocks, i); |
2170 | brelse(c_bh); | 2235 | brelse(c_bh); |
@@ -2226,7 +2291,10 @@ static int journal_read_transaction(struct super_block *sb, | |||
2226 | "journal-1095: setting journal " "start to offset %ld", | 2291 | "journal-1095: setting journal " "start to offset %ld", |
2227 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); | 2292 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); |
2228 | 2293 | ||
2229 | /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ | 2294 | /* |
2295 | * init starting values for the first transaction, in case | ||
2296 | * this is the last transaction to be replayed. | ||
2297 | */ | ||
2230 | journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); | 2298 | journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); |
2231 | journal->j_last_flush_trans_id = trans_id; | 2299 | journal->j_last_flush_trans_id = trans_id; |
2232 | journal->j_trans_id = trans_id + 1; | 2300 | journal->j_trans_id = trans_id + 1; |
@@ -2240,12 +2308,14 @@ static int journal_read_transaction(struct super_block *sb, | |||
2240 | return 0; | 2308 | return 0; |
2241 | } | 2309 | } |
2242 | 2310 | ||
2243 | /* This function reads blocks starting from block and to max_block of bufsize | 2311 | /* |
2244 | size (but no more than BUFNR blocks at a time). This proved to improve | 2312 | * This function reads blocks starting from block and to max_block of bufsize |
2245 | mounting speed on self-rebuilding raid5 arrays at least. | 2313 | * size (but no more than BUFNR blocks at a time). This proved to improve |
2246 | Right now it is only used from journal code. But later we might use it | 2314 | * mounting speed on self-rebuilding raid5 arrays at least. |
2247 | from other places. | 2315 | * Right now it is only used from journal code. But later we might use it |
2248 | Note: Do not use journal_getblk/sb_getblk functions here! */ | 2316 | * from other places. |
2317 | * Note: Do not use journal_getblk/sb_getblk functions here! | ||
2318 | */ | ||
2249 | static struct buffer_head *reiserfs_breada(struct block_device *dev, | 2319 | static struct buffer_head *reiserfs_breada(struct block_device *dev, |
2250 | b_blocknr_t block, int bufsize, | 2320 | b_blocknr_t block, int bufsize, |
2251 | b_blocknr_t max_block) | 2321 | b_blocknr_t max_block) |
@@ -2284,15 +2354,17 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, | |||
2284 | } | 2354 | } |
2285 | 2355 | ||
2286 | /* | 2356 | /* |
2287 | ** read and replay the log | 2357 | * read and replay the log |
2288 | ** on a clean unmount, the journal header's next unflushed pointer will | 2358 | * on a clean unmount, the journal header's next unflushed pointer will be |
2289 | ** be to an invalid transaction. This tests that before finding all the | 2359 | * to an invalid transaction. This tests that before finding all the |
2290 | ** transactions in the log, which makes normal mount times fast. | 2360 | * transactions in the log, which makes normal mount times fast. |
2291 | ** After a crash, this starts with the next unflushed transaction, and | 2361 | * |
2292 | ** replays until it finds one too old, or invalid. | 2362 | * After a crash, this starts with the next unflushed transaction, and |
2293 | ** On exit, it sets things up so the first transaction will work correctly. | 2363 | * replays until it finds one too old, or invalid. |
2294 | ** NOTE: only called during fs mount | 2364 | * |
2295 | */ | 2365 | * On exit, it sets things up so the first transaction will work correctly. |
2366 | * NOTE: only called during fs mount | ||
2367 | */ | ||
2296 | static int journal_read(struct super_block *sb) | 2368 | static int journal_read(struct super_block *sb) |
2297 | { | 2369 | { |
2298 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 2370 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
@@ -2316,9 +2388,10 @@ static int journal_read(struct super_block *sb) | |||
2316 | bdevname(journal->j_dev_bd, b)); | 2388 | bdevname(journal->j_dev_bd, b)); |
2317 | start = get_seconds(); | 2389 | start = get_seconds(); |
2318 | 2390 | ||
2319 | /* step 1, read in the journal header block. Check the transaction it says | 2391 | /* |
2320 | ** is the first unflushed, and if that transaction is not valid, | 2392 | * step 1, read in the journal header block. Check the transaction |
2321 | ** replay is done | 2393 | * it says is the first unflushed, and if that transaction is not |
2394 | * valid, replay is done | ||
2322 | */ | 2395 | */ |
2323 | journal->j_header_bh = journal_bread(sb, | 2396 | journal->j_header_bh = journal_bread(sb, |
2324 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) | 2397 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) |
@@ -2342,9 +2415,10 @@ static int journal_read(struct super_block *sb) | |||
2342 | le32_to_cpu(jh->j_last_flush_trans_id)); | 2415 | le32_to_cpu(jh->j_last_flush_trans_id)); |
2343 | valid_journal_header = 1; | 2416 | valid_journal_header = 1; |
2344 | 2417 | ||
2345 | /* now, we try to read the first unflushed offset. If it is not valid, | 2418 | /* |
2346 | ** there is nothing more we can do, and it makes no sense to read | 2419 | * now, we try to read the first unflushed offset. If it |
2347 | ** through the whole log. | 2420 | * is not valid, there is nothing more we can do, and it |
2421 | * makes no sense to read through the whole log. | ||
2348 | */ | 2422 | */ |
2349 | d_bh = | 2423 | d_bh = |
2350 | journal_bread(sb, | 2424 | journal_bread(sb, |
@@ -2358,15 +2432,19 @@ static int journal_read(struct super_block *sb) | |||
2358 | goto start_log_replay; | 2432 | goto start_log_replay; |
2359 | } | 2433 | } |
2360 | 2434 | ||
2361 | /* ok, there are transactions that need to be replayed. start with the first log block, find | 2435 | /* |
2362 | ** all the valid transactions, and pick out the oldest. | 2436 | * ok, there are transactions that need to be replayed. start |
2437 | * with the first log block, find all the valid transactions, and | ||
2438 | * pick out the oldest. | ||
2363 | */ | 2439 | */ |
2364 | while (continue_replay | 2440 | while (continue_replay |
2365 | && cur_dblock < | 2441 | && cur_dblock < |
2366 | (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + | 2442 | (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + |
2367 | SB_ONDISK_JOURNAL_SIZE(sb))) { | 2443 | SB_ONDISK_JOURNAL_SIZE(sb))) { |
2368 | /* Note that it is required for blocksize of primary fs device and journal | 2444 | /* |
2369 | device to be the same */ | 2445 | * Note that it is required for blocksize of primary fs |
2446 | * device and journal device to be the same | ||
2447 | */ | ||
2370 | d_bh = | 2448 | d_bh = |
2371 | reiserfs_breada(journal->j_dev_bd, cur_dblock, | 2449 | reiserfs_breada(journal->j_dev_bd, cur_dblock, |
2372 | sb->s_blocksize, | 2450 | sb->s_blocksize, |
@@ -2413,7 +2491,7 @@ static int journal_read(struct super_block *sb) | |||
2413 | brelse(d_bh); | 2491 | brelse(d_bh); |
2414 | } | 2492 | } |
2415 | 2493 | ||
2416 | start_log_replay: | 2494 | start_log_replay: |
2417 | cur_dblock = oldest_start; | 2495 | cur_dblock = oldest_start; |
2418 | if (oldest_trans_id) { | 2496 | if (oldest_trans_id) { |
2419 | reiserfs_debug(sb, REISERFS_DEBUG_CODE, | 2497 | reiserfs_debug(sb, REISERFS_DEBUG_CODE, |
@@ -2444,9 +2522,11 @@ static int journal_read(struct super_block *sb) | |||
2444 | reiserfs_debug(sb, REISERFS_DEBUG_CODE, | 2522 | reiserfs_debug(sb, REISERFS_DEBUG_CODE, |
2445 | "journal-1225: No valid " "transactions found"); | 2523 | "journal-1225: No valid " "transactions found"); |
2446 | } | 2524 | } |
2447 | /* j_start does not get set correctly if we don't replay any transactions. | 2525 | /* |
2448 | ** if we had a valid journal_header, set j_start to the first unflushed transaction value, | 2526 | * j_start does not get set correctly if we don't replay any |
2449 | ** copy the trans_id from the header | 2527 | * transactions. if we had a valid journal_header, set j_start |
2528 | * to the first unflushed transaction value, copy the trans_id | ||
2529 | * from the header | ||
2450 | */ | 2530 | */ |
2451 | if (valid_journal_header && replay_count == 0) { | 2531 | if (valid_journal_header && replay_count == 0) { |
2452 | journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); | 2532 | journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); |
@@ -2475,8 +2555,9 @@ static int journal_read(struct super_block *sb) | |||
2475 | _update_journal_header_block(sb, journal->j_start, | 2555 | _update_journal_header_block(sb, journal->j_start, |
2476 | journal->j_last_flush_trans_id)) { | 2556 | journal->j_last_flush_trans_id)) { |
2477 | reiserfs_write_unlock(sb); | 2557 | reiserfs_write_unlock(sb); |
2478 | /* replay failed, caller must call free_journal_ram and abort | 2558 | /* |
2479 | ** the mount | 2559 | * replay failed, caller must call free_journal_ram and abort |
2560 | * the mount | ||
2480 | */ | 2561 | */ |
2481 | return -1; | 2562 | return -1; |
2482 | } | 2563 | } |
@@ -2569,7 +2650,7 @@ static int journal_init_dev(struct super_block *super, | |||
2569 | return 0; | 2650 | return 0; |
2570 | } | 2651 | } |
2571 | 2652 | ||
2572 | /** | 2653 | /* |
2573 | * When creating/tuning a file system user can assign some | 2654 | * When creating/tuning a file system user can assign some |
2574 | * journal params within boundaries which depend on the ratio | 2655 | * journal params within boundaries which depend on the ratio |
2575 | * blocksize/standard_blocksize. | 2656 | * blocksize/standard_blocksize. |
@@ -2587,8 +2668,7 @@ static int check_advise_trans_params(struct super_block *sb, | |||
2587 | struct reiserfs_journal *journal) | 2668 | struct reiserfs_journal *journal) |
2588 | { | 2669 | { |
2589 | if (journal->j_trans_max) { | 2670 | if (journal->j_trans_max) { |
2590 | /* Non-default journal params. | 2671 | /* Non-default journal params. Do sanity check for them. */ |
2591 | Do sanity check for them. */ | ||
2592 | int ratio = 1; | 2672 | int ratio = 1; |
2593 | if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) | 2673 | if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) |
2594 | ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; | 2674 | ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; |
@@ -2610,10 +2690,12 @@ static int check_advise_trans_params(struct super_block *sb, | |||
2610 | return 1; | 2690 | return 1; |
2611 | } | 2691 | } |
2612 | } else { | 2692 | } else { |
2613 | /* Default journal params. | 2693 | /* |
2614 | The file system was created by old version | 2694 | * Default journal params. |
2615 | of mkreiserfs, so some fields contain zeros, | 2695 | * The file system was created by old version |
2616 | and we need to advise proper values for them */ | 2696 | * of mkreiserfs, so some fields contain zeros, |
2697 | * and we need to advise proper values for them | ||
2698 | */ | ||
2617 | if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { | 2699 | if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { |
2618 | reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", | 2700 | reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", |
2619 | sb->s_blocksize); | 2701 | sb->s_blocksize); |
@@ -2626,9 +2708,7 @@ static int check_advise_trans_params(struct super_block *sb, | |||
2626 | return 0; | 2708 | return 0; |
2627 | } | 2709 | } |
2628 | 2710 | ||
2629 | /* | 2711 | /* must be called once on fs mount. calls journal_read for you */ |
2630 | ** must be called once on fs mount. calls journal_read for you | ||
2631 | */ | ||
2632 | int journal_init(struct super_block *sb, const char *j_dev_name, | 2712 | int journal_init(struct super_block *sb, const char *j_dev_name, |
2633 | int old_format, unsigned int commit_max_age) | 2713 | int old_format, unsigned int commit_max_age) |
2634 | { | 2714 | { |
@@ -2667,8 +2747,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2667 | REISERFS_DISK_OFFSET_IN_BYTES / | 2747 | REISERFS_DISK_OFFSET_IN_BYTES / |
2668 | sb->s_blocksize + 2); | 2748 | sb->s_blocksize + 2); |
2669 | 2749 | ||
2670 | /* Sanity check to see is the standard journal fitting within first bitmap | 2750 | /* |
2671 | (actual for small blocksizes) */ | 2751 | * Sanity check to see is the standard journal fitting |
2752 | * within first bitmap (actual for small blocksizes) | ||
2753 | */ | ||
2672 | if (!SB_ONDISK_JOURNAL_DEVICE(sb) && | 2754 | if (!SB_ONDISK_JOURNAL_DEVICE(sb) && |
2673 | (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + | 2755 | (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + |
2674 | SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { | 2756 | SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { |
@@ -2754,20 +2836,20 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2754 | journal->j_start = 0; | 2836 | journal->j_start = 0; |
2755 | journal->j_len = 0; | 2837 | journal->j_len = 0; |
2756 | journal->j_len_alloc = 0; | 2838 | journal->j_len_alloc = 0; |
2757 | atomic_set(&(journal->j_wcount), 0); | 2839 | atomic_set(&journal->j_wcount, 0); |
2758 | atomic_set(&(journal->j_async_throttle), 0); | 2840 | atomic_set(&journal->j_async_throttle, 0); |
2759 | journal->j_bcount = 0; | 2841 | journal->j_bcount = 0; |
2760 | journal->j_trans_start_time = 0; | 2842 | journal->j_trans_start_time = 0; |
2761 | journal->j_last = NULL; | 2843 | journal->j_last = NULL; |
2762 | journal->j_first = NULL; | 2844 | journal->j_first = NULL; |
2763 | init_waitqueue_head(&(journal->j_join_wait)); | 2845 | init_waitqueue_head(&journal->j_join_wait); |
2764 | mutex_init(&journal->j_mutex); | 2846 | mutex_init(&journal->j_mutex); |
2765 | mutex_init(&journal->j_flush_mutex); | 2847 | mutex_init(&journal->j_flush_mutex); |
2766 | 2848 | ||
2767 | journal->j_trans_id = 10; | 2849 | journal->j_trans_id = 10; |
2768 | journal->j_mount_id = 10; | 2850 | journal->j_mount_id = 10; |
2769 | journal->j_state = 0; | 2851 | journal->j_state = 0; |
2770 | atomic_set(&(journal->j_jlock), 0); | 2852 | atomic_set(&journal->j_jlock, 0); |
2771 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); | 2853 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); |
2772 | journal->j_cnode_free_orig = journal->j_cnode_free_list; | 2854 | journal->j_cnode_free_orig = journal->j_cnode_free_list; |
2773 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; | 2855 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; |
@@ -2807,23 +2889,19 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2807 | goto free_and_return; | 2889 | goto free_and_return; |
2808 | } | 2890 | } |
2809 | 2891 | ||
2810 | reiserfs_mounted_fs_count++; | ||
2811 | if (reiserfs_mounted_fs_count <= 1) | ||
2812 | commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); | ||
2813 | |||
2814 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); | 2892 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); |
2815 | journal->j_work_sb = sb; | 2893 | journal->j_work_sb = sb; |
2816 | return 0; | 2894 | return 0; |
2817 | free_and_return: | 2895 | free_and_return: |
2818 | free_journal_ram(sb); | 2896 | free_journal_ram(sb); |
2819 | return 1; | 2897 | return 1; |
2820 | } | 2898 | } |
2821 | 2899 | ||
2822 | /* | 2900 | /* |
2823 | ** test for a polite end of the current transaction. Used by file_write, and should | 2901 | * test for a polite end of the current transaction. Used by file_write, |
2824 | ** be used by delete to make sure they don't write more than can fit inside a single | 2902 | * and should be used by delete to make sure they don't write more than |
2825 | ** transaction | 2903 | * can fit inside a single transaction |
2826 | */ | 2904 | */ |
2827 | int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | 2905 | int journal_transaction_should_end(struct reiserfs_transaction_handle *th, |
2828 | int new_alloc) | 2906 | int new_alloc) |
2829 | { | 2907 | { |
@@ -2835,7 +2913,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | |||
2835 | return 0; | 2913 | return 0; |
2836 | if (journal->j_must_wait > 0 || | 2914 | if (journal->j_must_wait > 0 || |
2837 | (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || | 2915 | (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || |
2838 | atomic_read(&(journal->j_jlock)) || | 2916 | atomic_read(&journal->j_jlock) || |
2839 | (now - journal->j_trans_start_time) > journal->j_max_trans_age || | 2917 | (now - journal->j_trans_start_time) > journal->j_max_trans_age || |
2840 | journal->j_cnode_free < (journal->j_trans_max * 3)) { | 2918 | journal->j_cnode_free < (journal->j_trans_max * 3)) { |
2841 | return 1; | 2919 | return 1; |
@@ -2846,8 +2924,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | |||
2846 | return 0; | 2924 | return 0; |
2847 | } | 2925 | } |
2848 | 2926 | ||
2849 | /* this must be called inside a transaction | 2927 | /* this must be called inside a transaction */ |
2850 | */ | ||
2851 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | 2928 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) |
2852 | { | 2929 | { |
2853 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); | 2930 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); |
@@ -2857,8 +2934,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | |||
2857 | return; | 2934 | return; |
2858 | } | 2935 | } |
2859 | 2936 | ||
2860 | /* this must be called without a transaction started | 2937 | /* this must be called without a transaction started */ |
2861 | */ | ||
2862 | void reiserfs_allow_writes(struct super_block *s) | 2938 | void reiserfs_allow_writes(struct super_block *s) |
2863 | { | 2939 | { |
2864 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2940 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
@@ -2866,8 +2942,7 @@ void reiserfs_allow_writes(struct super_block *s) | |||
2866 | wake_up(&journal->j_join_wait); | 2942 | wake_up(&journal->j_join_wait); |
2867 | } | 2943 | } |
2868 | 2944 | ||
2869 | /* this must be called without a transaction started | 2945 | /* this must be called without a transaction started */ |
2870 | */ | ||
2871 | void reiserfs_wait_on_write_block(struct super_block *s) | 2946 | void reiserfs_wait_on_write_block(struct super_block *s) |
2872 | { | 2947 | { |
2873 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2948 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
@@ -2929,11 +3004,12 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) | |||
2929 | } | 3004 | } |
2930 | } | 3005 | } |
2931 | 3006 | ||
2932 | /* join == true if you must join an existing transaction. | 3007 | /* |
2933 | ** join == false if you can deal with waiting for others to finish | 3008 | * join == true if you must join an existing transaction. |
2934 | ** | 3009 | * join == false if you can deal with waiting for others to finish |
2935 | ** this will block until the transaction is joinable. send the number of blocks you | 3010 | * |
2936 | ** expect to use in nblocks. | 3011 | * this will block until the transaction is joinable. send the number of |
3012 | * blocks you expect to use in nblocks. | ||
2937 | */ | 3013 | */ |
2938 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | 3014 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, |
2939 | struct super_block *sb, unsigned long nblocks, | 3015 | struct super_block *sb, unsigned long nblocks, |
@@ -2955,7 +3031,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
2955 | th->t_refcount = 1; | 3031 | th->t_refcount = 1; |
2956 | th->t_super = sb; | 3032 | th->t_super = sb; |
2957 | 3033 | ||
2958 | relock: | 3034 | relock: |
2959 | lock_journal(sb); | 3035 | lock_journal(sb); |
2960 | if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { | 3036 | if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { |
2961 | unlock_journal(sb); | 3037 | unlock_journal(sb); |
@@ -2974,9 +3050,11 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
2974 | } | 3050 | } |
2975 | now = get_seconds(); | 3051 | now = get_seconds(); |
2976 | 3052 | ||
2977 | /* if there is no room in the journal OR | 3053 | /* |
2978 | ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning | 3054 | * if there is no room in the journal OR |
2979 | ** we don't sleep if there aren't other writers | 3055 | * if this transaction is too old, and we weren't called joinable, |
3056 | * wait for it to finish before beginning we don't sleep if there | ||
3057 | * aren't other writers | ||
2980 | */ | 3058 | */ |
2981 | 3059 | ||
2982 | if ((!join && journal->j_must_wait > 0) || | 3060 | if ((!join && journal->j_must_wait > 0) || |
@@ -2990,7 +3068,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
2990 | || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { | 3068 | || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { |
2991 | 3069 | ||
2992 | old_trans_id = journal->j_trans_id; | 3070 | old_trans_id = journal->j_trans_id; |
2993 | unlock_journal(sb); /* allow others to finish this transaction */ | 3071 | /* allow others to finish this transaction */ |
3072 | unlock_journal(sb); | ||
2994 | 3073 | ||
2995 | if (!join && (journal->j_len_alloc + nblocks + 2) >= | 3074 | if (!join && (journal->j_len_alloc + nblocks + 2) >= |
2996 | journal->j_max_batch && | 3075 | journal->j_max_batch && |
@@ -3002,8 +3081,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
3002 | goto relock; | 3081 | goto relock; |
3003 | } | 3082 | } |
3004 | } | 3083 | } |
3005 | /* don't mess with joining the transaction if all we have to do is | 3084 | /* |
3006 | * wait for someone else to do a commit | 3085 | * don't mess with joining the transaction if all we |
3086 | * have to do is wait for someone else to do a commit | ||
3007 | */ | 3087 | */ |
3008 | if (atomic_read(&journal->j_jlock)) { | 3088 | if (atomic_read(&journal->j_jlock)) { |
3009 | while (journal->j_trans_id == old_trans_id && | 3089 | while (journal->j_trans_id == old_trans_id && |
@@ -3012,15 +3092,15 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
3012 | } | 3092 | } |
3013 | goto relock; | 3093 | goto relock; |
3014 | } | 3094 | } |
3015 | retval = journal_join(&myth, sb, 1); | 3095 | retval = journal_join(&myth, sb); |
3016 | if (retval) | 3096 | if (retval) |
3017 | goto out_fail; | 3097 | goto out_fail; |
3018 | 3098 | ||
3019 | /* someone might have ended the transaction while we joined */ | 3099 | /* someone might have ended the transaction while we joined */ |
3020 | if (old_trans_id != journal->j_trans_id) { | 3100 | if (old_trans_id != journal->j_trans_id) { |
3021 | retval = do_journal_end(&myth, sb, 1, 0); | 3101 | retval = do_journal_end(&myth, 0); |
3022 | } else { | 3102 | } else { |
3023 | retval = do_journal_end(&myth, sb, 1, COMMIT_NOW); | 3103 | retval = do_journal_end(&myth, COMMIT_NOW); |
3024 | } | 3104 | } |
3025 | 3105 | ||
3026 | if (retval) | 3106 | if (retval) |
@@ -3033,7 +3113,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
3033 | if (journal->j_trans_start_time == 0) { | 3113 | if (journal->j_trans_start_time == 0) { |
3034 | journal->j_trans_start_time = get_seconds(); | 3114 | journal->j_trans_start_time = get_seconds(); |
3035 | } | 3115 | } |
3036 | atomic_inc(&(journal->j_wcount)); | 3116 | atomic_inc(&journal->j_wcount); |
3037 | journal->j_len_alloc += nblocks; | 3117 | journal->j_len_alloc += nblocks; |
3038 | th->t_blocks_logged = 0; | 3118 | th->t_blocks_logged = 0; |
3039 | th->t_blocks_allocated = nblocks; | 3119 | th->t_blocks_allocated = nblocks; |
@@ -3042,11 +3122,13 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
3042 | INIT_LIST_HEAD(&th->t_list); | 3122 | INIT_LIST_HEAD(&th->t_list); |
3043 | return 0; | 3123 | return 0; |
3044 | 3124 | ||
3045 | out_fail: | 3125 | out_fail: |
3046 | memset(th, 0, sizeof(*th)); | 3126 | memset(th, 0, sizeof(*th)); |
3047 | /* Re-set th->t_super, so we can properly keep track of how many | 3127 | /* |
3128 | * Re-set th->t_super, so we can properly keep track of how many | ||
3048 | * persistent transactions there are. We need to do this so if this | 3129 | * persistent transactions there are. We need to do this so if this |
3049 | * call is part of a failed restart_transaction, we can free it later */ | 3130 | * call is part of a failed restart_transaction, we can free it later |
3131 | */ | ||
3050 | th->t_super = sb; | 3132 | th->t_super = sb; |
3051 | return retval; | 3133 | return retval; |
3052 | } | 3134 | } |
@@ -3059,14 +3141,15 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct | |||
3059 | int ret; | 3141 | int ret; |
3060 | struct reiserfs_transaction_handle *th; | 3142 | struct reiserfs_transaction_handle *th; |
3061 | 3143 | ||
3062 | /* if we're nesting into an existing transaction. It will be | 3144 | /* |
3063 | ** persistent on its own | 3145 | * if we're nesting into an existing transaction. It will be |
3146 | * persistent on its own | ||
3064 | */ | 3147 | */ |
3065 | if (reiserfs_transaction_running(s)) { | 3148 | if (reiserfs_transaction_running(s)) { |
3066 | th = current->journal_info; | 3149 | th = current->journal_info; |
3067 | th->t_refcount++; | 3150 | th->t_refcount++; |
3068 | BUG_ON(th->t_refcount < 2); | 3151 | BUG_ON(th->t_refcount < 2); |
3069 | 3152 | ||
3070 | return th; | 3153 | return th; |
3071 | } | 3154 | } |
3072 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); | 3155 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); |
@@ -3087,7 +3170,7 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) | |||
3087 | struct super_block *s = th->t_super; | 3170 | struct super_block *s = th->t_super; |
3088 | int ret = 0; | 3171 | int ret = 0; |
3089 | if (th->t_trans_id) | 3172 | if (th->t_trans_id) |
3090 | ret = journal_end(th, th->t_super, th->t_blocks_allocated); | 3173 | ret = journal_end(th); |
3091 | else | 3174 | else |
3092 | ret = -EIO; | 3175 | ret = -EIO; |
3093 | if (th->t_refcount == 0) { | 3176 | if (th->t_refcount == 0) { |
@@ -3098,29 +3181,31 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) | |||
3098 | } | 3181 | } |
3099 | 3182 | ||
3100 | static int journal_join(struct reiserfs_transaction_handle *th, | 3183 | static int journal_join(struct reiserfs_transaction_handle *th, |
3101 | struct super_block *sb, unsigned long nblocks) | 3184 | struct super_block *sb) |
3102 | { | 3185 | { |
3103 | struct reiserfs_transaction_handle *cur_th = current->journal_info; | 3186 | struct reiserfs_transaction_handle *cur_th = current->journal_info; |
3104 | 3187 | ||
3105 | /* this keeps do_journal_end from NULLing out the current->journal_info | 3188 | /* |
3106 | ** pointer | 3189 | * this keeps do_journal_end from NULLing out the |
3190 | * current->journal_info pointer | ||
3107 | */ | 3191 | */ |
3108 | th->t_handle_save = cur_th; | 3192 | th->t_handle_save = cur_th; |
3109 | BUG_ON(cur_th && cur_th->t_refcount > 1); | 3193 | BUG_ON(cur_th && cur_th->t_refcount > 1); |
3110 | return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN); | 3194 | return do_journal_begin_r(th, sb, 1, JBEGIN_JOIN); |
3111 | } | 3195 | } |
3112 | 3196 | ||
3113 | int journal_join_abort(struct reiserfs_transaction_handle *th, | 3197 | int journal_join_abort(struct reiserfs_transaction_handle *th, |
3114 | struct super_block *sb, unsigned long nblocks) | 3198 | struct super_block *sb) |
3115 | { | 3199 | { |
3116 | struct reiserfs_transaction_handle *cur_th = current->journal_info; | 3200 | struct reiserfs_transaction_handle *cur_th = current->journal_info; |
3117 | 3201 | ||
3118 | /* this keeps do_journal_end from NULLing out the current->journal_info | 3202 | /* |
3119 | ** pointer | 3203 | * this keeps do_journal_end from NULLing out the |
3204 | * current->journal_info pointer | ||
3120 | */ | 3205 | */ |
3121 | th->t_handle_save = cur_th; | 3206 | th->t_handle_save = cur_th; |
3122 | BUG_ON(cur_th && cur_th->t_refcount > 1); | 3207 | BUG_ON(cur_th && cur_th->t_refcount > 1); |
3123 | return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT); | 3208 | return do_journal_begin_r(th, sb, 1, JBEGIN_ABORT); |
3124 | } | 3209 | } |
3125 | 3210 | ||
3126 | int journal_begin(struct reiserfs_transaction_handle *th, | 3211 | int journal_begin(struct reiserfs_transaction_handle *th, |
@@ -3142,9 +3227,10 @@ int journal_begin(struct reiserfs_transaction_handle *th, | |||
3142 | "journal_info != 0"); | 3227 | "journal_info != 0"); |
3143 | return 0; | 3228 | return 0; |
3144 | } else { | 3229 | } else { |
3145 | /* we've ended up with a handle from a different filesystem. | 3230 | /* |
3146 | ** save it and restore on journal_end. This should never | 3231 | * we've ended up with a handle from a different |
3147 | ** really happen... | 3232 | * filesystem. save it and restore on journal_end. |
3233 | * This should never really happen... | ||
3148 | */ | 3234 | */ |
3149 | reiserfs_warning(sb, "clm-2100", | 3235 | reiserfs_warning(sb, "clm-2100", |
3150 | "nesting info a different FS"); | 3236 | "nesting info a different FS"); |
@@ -3157,9 +3243,10 @@ int journal_begin(struct reiserfs_transaction_handle *th, | |||
3157 | ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); | 3243 | ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); |
3158 | BUG_ON(current->journal_info != th); | 3244 | BUG_ON(current->journal_info != th); |
3159 | 3245 | ||
3160 | /* I guess this boils down to being the reciprocal of clm-2100 above. | 3246 | /* |
3161 | * If do_journal_begin_r fails, we need to put it back, since journal_end | 3247 | * I guess this boils down to being the reciprocal of clm-2100 above. |
3162 | * won't be called to do it. */ | 3248 | * If do_journal_begin_r fails, we need to put it back, since |
3249 | * journal_end won't be called to do it. */ | ||
3163 | if (ret) | 3250 | if (ret) |
3164 | current->journal_info = th->t_handle_save; | 3251 | current->journal_info = th->t_handle_save; |
3165 | else | 3252 | else |
@@ -3169,17 +3256,19 @@ int journal_begin(struct reiserfs_transaction_handle *th, | |||
3169 | } | 3256 | } |
3170 | 3257 | ||
3171 | /* | 3258 | /* |
3172 | ** puts bh into the current transaction. If it was already there, reorders removes the | 3259 | * puts bh into the current transaction. If it was already there, reorders |
3173 | ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). | 3260 | * removes the old pointers from the hash, and puts new ones in (to make |
3174 | ** | 3261 | * sure replay happen in the right order). |
3175 | ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the | 3262 | * |
3176 | ** transaction is committed. | 3263 | * if it was dirty, cleans and files onto the clean list. I can't let it |
3177 | ** | 3264 | * be dirty again until the transaction is committed. |
3178 | ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. | 3265 | * |
3179 | */ | 3266 | * if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. |
3267 | */ | ||
3180 | int journal_mark_dirty(struct reiserfs_transaction_handle *th, | 3268 | int journal_mark_dirty(struct reiserfs_transaction_handle *th, |
3181 | struct super_block *sb, struct buffer_head *bh) | 3269 | struct buffer_head *bh) |
3182 | { | 3270 | { |
3271 | struct super_block *sb = th->t_super; | ||
3183 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3272 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3184 | struct reiserfs_journal_cnode *cn = NULL; | 3273 | struct reiserfs_journal_cnode *cn = NULL; |
3185 | int count_already_incd = 0; | 3274 | int count_already_incd = 0; |
@@ -3201,9 +3290,10 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, | |||
3201 | return 0; | 3290 | return 0; |
3202 | } | 3291 | } |
3203 | 3292 | ||
3204 | /* this must be turned into a panic instead of a warning. We can't allow | 3293 | /* |
3205 | ** a dirty or journal_dirty or locked buffer to be logged, as some changes | 3294 | * this must be turned into a panic instead of a warning. We can't |
3206 | ** could get to disk too early. NOT GOOD. | 3295 | * allow a dirty or journal_dirty or locked buffer to be logged, as |
3296 | * some changes could get to disk too early. NOT GOOD. | ||
3207 | */ | 3297 | */ |
3208 | if (!prepared || buffer_dirty(bh)) { | 3298 | if (!prepared || buffer_dirty(bh)) { |
3209 | reiserfs_warning(sb, "journal-1777", | 3299 | reiserfs_warning(sb, "journal-1777", |
@@ -3216,14 +3306,16 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, | |||
3216 | buffer_journal_dirty(bh) ? ' ' : '!'); | 3306 | buffer_journal_dirty(bh) ? ' ' : '!'); |
3217 | } | 3307 | } |
3218 | 3308 | ||
3219 | if (atomic_read(&(journal->j_wcount)) <= 0) { | 3309 | if (atomic_read(&journal->j_wcount) <= 0) { |
3220 | reiserfs_warning(sb, "journal-1409", | 3310 | reiserfs_warning(sb, "journal-1409", |
3221 | "returning because j_wcount was %d", | 3311 | "returning because j_wcount was %d", |
3222 | atomic_read(&(journal->j_wcount))); | 3312 | atomic_read(&journal->j_wcount)); |
3223 | return 1; | 3313 | return 1; |
3224 | } | 3314 | } |
3225 | /* this error means I've screwed up, and we've overflowed the transaction. | 3315 | /* |
3226 | ** Nothing can be done here, except make the FS readonly or panic. | 3316 | * this error means I've screwed up, and we've overflowed |
3317 | * the transaction. Nothing can be done here, except make the | ||
3318 | * FS readonly or panic. | ||
3227 | */ | 3319 | */ |
3228 | if (journal->j_len >= journal->j_trans_max) { | 3320 | if (journal->j_len >= journal->j_trans_max) { |
3229 | reiserfs_panic(th->t_super, "journal-1413", | 3321 | reiserfs_panic(th->t_super, "journal-1413", |
@@ -3280,9 +3372,9 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, | |||
3280 | return 0; | 3372 | return 0; |
3281 | } | 3373 | } |
3282 | 3374 | ||
3283 | int journal_end(struct reiserfs_transaction_handle *th, | 3375 | int journal_end(struct reiserfs_transaction_handle *th) |
3284 | struct super_block *sb, unsigned long nblocks) | ||
3285 | { | 3376 | { |
3377 | struct super_block *sb = th->t_super; | ||
3286 | if (!current->journal_info && th->t_refcount > 1) | 3378 | if (!current->journal_info && th->t_refcount > 1) |
3287 | reiserfs_warning(sb, "REISER-NESTING", | 3379 | reiserfs_warning(sb, "REISER-NESTING", |
3288 | "th NULL, refcount %d", th->t_refcount); | 3380 | "th NULL, refcount %d", th->t_refcount); |
@@ -3297,8 +3389,9 @@ int journal_end(struct reiserfs_transaction_handle *th, | |||
3297 | struct reiserfs_transaction_handle *cur_th = | 3389 | struct reiserfs_transaction_handle *cur_th = |
3298 | current->journal_info; | 3390 | current->journal_info; |
3299 | 3391 | ||
3300 | /* we aren't allowed to close a nested transaction on a different | 3392 | /* |
3301 | ** filesystem from the one in the task struct | 3393 | * we aren't allowed to close a nested transaction on a |
3394 | * different filesystem from the one in the task struct | ||
3302 | */ | 3395 | */ |
3303 | BUG_ON(cur_th->t_super != th->t_super); | 3396 | BUG_ON(cur_th->t_super != th->t_super); |
3304 | 3397 | ||
@@ -3308,17 +3401,18 @@ int journal_end(struct reiserfs_transaction_handle *th, | |||
3308 | } | 3401 | } |
3309 | return 0; | 3402 | return 0; |
3310 | } else { | 3403 | } else { |
3311 | return do_journal_end(th, sb, nblocks, 0); | 3404 | return do_journal_end(th, 0); |
3312 | } | 3405 | } |
3313 | } | 3406 | } |
3314 | 3407 | ||
3315 | /* removes from the current transaction, relsing and descrementing any counters. | 3408 | /* |
3316 | ** also files the removed buffer directly onto the clean list | 3409 | * removes from the current transaction, relsing and descrementing any counters. |
3317 | ** | 3410 | * also files the removed buffer directly onto the clean list |
3318 | ** called by journal_mark_freed when a block has been deleted | 3411 | * |
3319 | ** | 3412 | * called by journal_mark_freed when a block has been deleted |
3320 | ** returns 1 if it cleaned and relsed the buffer. 0 otherwise | 3413 | * |
3321 | */ | 3414 | * returns 1 if it cleaned and relsed the buffer. 0 otherwise |
3415 | */ | ||
3322 | static int remove_from_transaction(struct super_block *sb, | 3416 | static int remove_from_transaction(struct super_block *sb, |
3323 | b_blocknr_t blocknr, int already_cleaned) | 3417 | b_blocknr_t blocknr, int already_cleaned) |
3324 | { | 3418 | { |
@@ -3354,7 +3448,7 @@ static int remove_from_transaction(struct super_block *sb, | |||
3354 | clear_buffer_dirty(bh); | 3448 | clear_buffer_dirty(bh); |
3355 | clear_buffer_journal_test(bh); | 3449 | clear_buffer_journal_test(bh); |
3356 | put_bh(bh); | 3450 | put_bh(bh); |
3357 | if (atomic_read(&(bh->b_count)) < 0) { | 3451 | if (atomic_read(&bh->b_count) < 0) { |
3358 | reiserfs_warning(sb, "journal-1752", | 3452 | reiserfs_warning(sb, "journal-1752", |
3359 | "b_count < 0"); | 3453 | "b_count < 0"); |
3360 | } | 3454 | } |
@@ -3367,15 +3461,16 @@ static int remove_from_transaction(struct super_block *sb, | |||
3367 | } | 3461 | } |
3368 | 3462 | ||
3369 | /* | 3463 | /* |
3370 | ** for any cnode in a journal list, it can only be dirtied of all the | 3464 | * for any cnode in a journal list, it can only be dirtied of all the |
3371 | ** transactions that include it are committed to disk. | 3465 | * transactions that include it are committed to disk. |
3372 | ** this checks through each transaction, and returns 1 if you are allowed to dirty, | 3466 | * this checks through each transaction, and returns 1 if you are allowed |
3373 | ** and 0 if you aren't | 3467 | * to dirty, and 0 if you aren't |
3374 | ** | 3468 | * |
3375 | ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log | 3469 | * it is called by dirty_journal_list, which is called after |
3376 | ** blocks for a given transaction on disk | 3470 | * flush_commit_list has gotten all the log blocks for a given |
3377 | ** | 3471 | * transaction on disk |
3378 | */ | 3472 | * |
3473 | */ | ||
3379 | static int can_dirty(struct reiserfs_journal_cnode *cn) | 3474 | static int can_dirty(struct reiserfs_journal_cnode *cn) |
3380 | { | 3475 | { |
3381 | struct super_block *sb = cn->sb; | 3476 | struct super_block *sb = cn->sb; |
@@ -3383,9 +3478,10 @@ static int can_dirty(struct reiserfs_journal_cnode *cn) | |||
3383 | struct reiserfs_journal_cnode *cur = cn->hprev; | 3478 | struct reiserfs_journal_cnode *cur = cn->hprev; |
3384 | int can_dirty = 1; | 3479 | int can_dirty = 1; |
3385 | 3480 | ||
3386 | /* first test hprev. These are all newer than cn, so any node here | 3481 | /* |
3387 | ** with the same block number and dev means this node can't be sent | 3482 | * first test hprev. These are all newer than cn, so any node here |
3388 | ** to disk right now. | 3483 | * with the same block number and dev means this node can't be sent |
3484 | * to disk right now. | ||
3389 | */ | 3485 | */ |
3390 | while (cur && can_dirty) { | 3486 | while (cur && can_dirty) { |
3391 | if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && | 3487 | if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && |
@@ -3394,13 +3490,14 @@ static int can_dirty(struct reiserfs_journal_cnode *cn) | |||
3394 | } | 3490 | } |
3395 | cur = cur->hprev; | 3491 | cur = cur->hprev; |
3396 | } | 3492 | } |
3397 | /* then test hnext. These are all older than cn. As long as they | 3493 | /* |
3398 | ** are committed to the log, it is safe to write cn to disk | 3494 | * then test hnext. These are all older than cn. As long as they |
3495 | * are committed to the log, it is safe to write cn to disk | ||
3399 | */ | 3496 | */ |
3400 | cur = cn->hnext; | 3497 | cur = cn->hnext; |
3401 | while (cur && can_dirty) { | 3498 | while (cur && can_dirty) { |
3402 | if (cur->jlist && cur->jlist->j_len > 0 && | 3499 | if (cur->jlist && cur->jlist->j_len > 0 && |
3403 | atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && | 3500 | atomic_read(&cur->jlist->j_commit_left) > 0 && cur->bh && |
3404 | cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { | 3501 | cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { |
3405 | can_dirty = 0; | 3502 | can_dirty = 0; |
3406 | } | 3503 | } |
@@ -3409,12 +3506,13 @@ static int can_dirty(struct reiserfs_journal_cnode *cn) | |||
3409 | return can_dirty; | 3506 | return can_dirty; |
3410 | } | 3507 | } |
3411 | 3508 | ||
3412 | /* syncs the commit blocks, but does not force the real buffers to disk | 3509 | /* |
3413 | ** will wait until the current transaction is done/committed before returning | 3510 | * syncs the commit blocks, but does not force the real buffers to disk |
3414 | */ | 3511 | * will wait until the current transaction is done/committed before returning |
3415 | int journal_end_sync(struct reiserfs_transaction_handle *th, | 3512 | */ |
3416 | struct super_block *sb, unsigned long nblocks) | 3513 | int journal_end_sync(struct reiserfs_transaction_handle *th) |
3417 | { | 3514 | { |
3515 | struct super_block *sb = th->t_super; | ||
3418 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3516 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3419 | 3517 | ||
3420 | BUG_ON(!th->t_trans_id); | 3518 | BUG_ON(!th->t_trans_id); |
@@ -3423,14 +3521,12 @@ int journal_end_sync(struct reiserfs_transaction_handle *th, | |||
3423 | if (journal->j_len == 0) { | 3521 | if (journal->j_len == 0) { |
3424 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), | 3522 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), |
3425 | 1); | 3523 | 1); |
3426 | journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); | 3524 | journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb)); |
3427 | } | 3525 | } |
3428 | return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); | 3526 | return do_journal_end(th, COMMIT_NOW | WAIT); |
3429 | } | 3527 | } |
3430 | 3528 | ||
3431 | /* | 3529 | /* writeback the pending async commits to disk */ |
3432 | ** writeback the pending async commits to disk | ||
3433 | */ | ||
3434 | static void flush_async_commits(struct work_struct *work) | 3530 | static void flush_async_commits(struct work_struct *work) |
3435 | { | 3531 | { |
3436 | struct reiserfs_journal *journal = | 3532 | struct reiserfs_journal *journal = |
@@ -3450,9 +3546,9 @@ static void flush_async_commits(struct work_struct *work) | |||
3450 | } | 3546 | } |
3451 | 3547 | ||
3452 | /* | 3548 | /* |
3453 | ** flushes any old transactions to disk | 3549 | * flushes any old transactions to disk |
3454 | ** ends the current transaction if it is too old | 3550 | * ends the current transaction if it is too old |
3455 | */ | 3551 | */ |
3456 | void reiserfs_flush_old_commits(struct super_block *sb) | 3552 | void reiserfs_flush_old_commits(struct super_block *sb) |
3457 | { | 3553 | { |
3458 | time_t now; | 3554 | time_t now; |
@@ -3460,48 +3556,53 @@ void reiserfs_flush_old_commits(struct super_block *sb) | |||
3460 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3556 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3461 | 3557 | ||
3462 | now = get_seconds(); | 3558 | now = get_seconds(); |
3463 | /* safety check so we don't flush while we are replaying the log during | 3559 | /* |
3560 | * safety check so we don't flush while we are replaying the log during | ||
3464 | * mount | 3561 | * mount |
3465 | */ | 3562 | */ |
3466 | if (list_empty(&journal->j_journal_list)) | 3563 | if (list_empty(&journal->j_journal_list)) |
3467 | return; | 3564 | return; |
3468 | 3565 | ||
3469 | /* check the current transaction. If there are no writers, and it is | 3566 | /* |
3567 | * check the current transaction. If there are no writers, and it is | ||
3470 | * too old, finish it, and force the commit blocks to disk | 3568 | * too old, finish it, and force the commit blocks to disk |
3471 | */ | 3569 | */ |
3472 | if (atomic_read(&journal->j_wcount) <= 0 && | 3570 | if (atomic_read(&journal->j_wcount) <= 0 && |
3473 | journal->j_trans_start_time > 0 && | 3571 | journal->j_trans_start_time > 0 && |
3474 | journal->j_len > 0 && | 3572 | journal->j_len > 0 && |
3475 | (now - journal->j_trans_start_time) > journal->j_max_trans_age) { | 3573 | (now - journal->j_trans_start_time) > journal->j_max_trans_age) { |
3476 | if (!journal_join(&th, sb, 1)) { | 3574 | if (!journal_join(&th, sb)) { |
3477 | reiserfs_prepare_for_journal(sb, | 3575 | reiserfs_prepare_for_journal(sb, |
3478 | SB_BUFFER_WITH_SB(sb), | 3576 | SB_BUFFER_WITH_SB(sb), |
3479 | 1); | 3577 | 1); |
3480 | journal_mark_dirty(&th, sb, | 3578 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb)); |
3481 | SB_BUFFER_WITH_SB(sb)); | ||
3482 | 3579 | ||
3483 | /* we're only being called from kreiserfsd, it makes no sense to do | 3580 | /* |
3484 | ** an async commit so that kreiserfsd can do it later | 3581 | * we're only being called from kreiserfsd, it makes |
3582 | * no sense to do an async commit so that kreiserfsd | ||
3583 | * can do it later | ||
3485 | */ | 3584 | */ |
3486 | do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); | 3585 | do_journal_end(&th, COMMIT_NOW | WAIT); |
3487 | } | 3586 | } |
3488 | } | 3587 | } |
3489 | } | 3588 | } |
3490 | 3589 | ||
3491 | /* | 3590 | /* |
3492 | ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit | 3591 | * returns 0 if do_journal_end should return right away, returns 1 if |
3493 | ** | 3592 | * do_journal_end should finish the commit |
3494 | ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all | 3593 | * |
3495 | ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just | 3594 | * if the current transaction is too old, but still has writers, this will |
3496 | ** flushes the commit list and returns 0. | 3595 | * wait on j_join_wait until all the writers are done. By the time it |
3497 | ** | 3596 | * wakes up, the transaction it was called has already ended, so it just |
3498 | ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. | 3597 | * flushes the commit list and returns 0. |
3499 | ** | 3598 | * |
3500 | ** Note, we can't allow the journal_end to proceed while there are still writers in the log. | 3599 | * Won't batch when flush or commit_now is set. Also won't batch when |
3501 | */ | 3600 | * others are waiting on j_join_wait. |
3502 | static int check_journal_end(struct reiserfs_transaction_handle *th, | 3601 | * |
3503 | struct super_block *sb, unsigned long nblocks, | 3602 | * Note, we can't allow the journal_end to proceed while there are still |
3504 | int flags) | 3603 | * writers in the log. |
3604 | */ | ||
3605 | static int check_journal_end(struct reiserfs_transaction_handle *th, int flags) | ||
3505 | { | 3606 | { |
3506 | 3607 | ||
3507 | time_t now; | 3608 | time_t now; |
@@ -3509,6 +3610,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3509 | int commit_now = flags & COMMIT_NOW; | 3610 | int commit_now = flags & COMMIT_NOW; |
3510 | int wait_on_commit = flags & WAIT; | 3611 | int wait_on_commit = flags & WAIT; |
3511 | struct reiserfs_journal_list *jl; | 3612 | struct reiserfs_journal_list *jl; |
3613 | struct super_block *sb = th->t_super; | ||
3512 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3614 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3513 | 3615 | ||
3514 | BUG_ON(!th->t_trans_id); | 3616 | BUG_ON(!th->t_trans_id); |
@@ -3520,23 +3622,27 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3520 | } | 3622 | } |
3521 | 3623 | ||
3522 | journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); | 3624 | journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); |
3523 | if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ | 3625 | /* <= 0 is allowed. unmounting might not call begin */ |
3524 | atomic_dec(&(journal->j_wcount)); | 3626 | if (atomic_read(&journal->j_wcount) > 0) |
3525 | } | 3627 | atomic_dec(&journal->j_wcount); |
3526 | 3628 | ||
3527 | /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released | 3629 | /* |
3528 | ** will be dealt with by next transaction that actually writes something, but should be taken | 3630 | * BUG, deal with case where j_len is 0, but people previously |
3529 | ** care of in this trans | 3631 | * freed blocks need to be released will be dealt with by next |
3632 | * transaction that actually writes something, but should be taken | ||
3633 | * care of in this trans | ||
3530 | */ | 3634 | */ |
3531 | BUG_ON(journal->j_len == 0); | 3635 | BUG_ON(journal->j_len == 0); |
3532 | 3636 | ||
3533 | /* if wcount > 0, and we are called to with flush or commit_now, | 3637 | /* |
3534 | ** we wait on j_join_wait. We will wake up when the last writer has | 3638 | * if wcount > 0, and we are called to with flush or commit_now, |
3535 | ** finished the transaction, and started it on its way to the disk. | 3639 | * we wait on j_join_wait. We will wake up when the last writer has |
3536 | ** Then, we flush the commit or journal list, and just return 0 | 3640 | * finished the transaction, and started it on its way to the disk. |
3537 | ** because the rest of journal end was already done for this transaction. | 3641 | * Then, we flush the commit or journal list, and just return 0 |
3642 | * because the rest of journal end was already done for this | ||
3643 | * transaction. | ||
3538 | */ | 3644 | */ |
3539 | if (atomic_read(&(journal->j_wcount)) > 0) { | 3645 | if (atomic_read(&journal->j_wcount) > 0) { |
3540 | if (flush || commit_now) { | 3646 | if (flush || commit_now) { |
3541 | unsigned trans_id; | 3647 | unsigned trans_id; |
3542 | 3648 | ||
@@ -3544,27 +3650,30 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3544 | trans_id = jl->j_trans_id; | 3650 | trans_id = jl->j_trans_id; |
3545 | if (wait_on_commit) | 3651 | if (wait_on_commit) |
3546 | jl->j_state |= LIST_COMMIT_PENDING; | 3652 | jl->j_state |= LIST_COMMIT_PENDING; |
3547 | atomic_set(&(journal->j_jlock), 1); | 3653 | atomic_set(&journal->j_jlock, 1); |
3548 | if (flush) { | 3654 | if (flush) { |
3549 | journal->j_next_full_flush = 1; | 3655 | journal->j_next_full_flush = 1; |
3550 | } | 3656 | } |
3551 | unlock_journal(sb); | 3657 | unlock_journal(sb); |
3552 | 3658 | ||
3553 | /* sleep while the current transaction is still j_jlocked */ | 3659 | /* |
3660 | * sleep while the current transaction is | ||
3661 | * still j_jlocked | ||
3662 | */ | ||
3554 | while (journal->j_trans_id == trans_id) { | 3663 | while (journal->j_trans_id == trans_id) { |
3555 | if (atomic_read(&journal->j_jlock)) { | 3664 | if (atomic_read(&journal->j_jlock)) { |
3556 | queue_log_writer(sb); | 3665 | queue_log_writer(sb); |
3557 | } else { | 3666 | } else { |
3558 | lock_journal(sb); | 3667 | lock_journal(sb); |
3559 | if (journal->j_trans_id == trans_id) { | 3668 | if (journal->j_trans_id == trans_id) { |
3560 | atomic_set(&(journal->j_jlock), | 3669 | atomic_set(&journal->j_jlock, |
3561 | 1); | 3670 | 1); |
3562 | } | 3671 | } |
3563 | unlock_journal(sb); | 3672 | unlock_journal(sb); |
3564 | } | 3673 | } |
3565 | } | 3674 | } |
3566 | BUG_ON(journal->j_trans_id == trans_id); | 3675 | BUG_ON(journal->j_trans_id == trans_id); |
3567 | 3676 | ||
3568 | if (commit_now | 3677 | if (commit_now |
3569 | && journal_list_still_alive(sb, trans_id) | 3678 | && journal_list_still_alive(sb, trans_id) |
3570 | && wait_on_commit) { | 3679 | && wait_on_commit) { |
@@ -3584,7 +3693,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3584 | } | 3693 | } |
3585 | /* don't batch when someone is waiting on j_join_wait */ | 3694 | /* don't batch when someone is waiting on j_join_wait */ |
3586 | /* don't batch when syncing the commit or flushing the whole trans */ | 3695 | /* don't batch when syncing the commit or flushing the whole trans */ |
3587 | if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) | 3696 | if (!(journal->j_must_wait > 0) && !(atomic_read(&journal->j_jlock)) |
3588 | && !flush && !commit_now && (journal->j_len < journal->j_max_batch) | 3697 | && !flush && !commit_now && (journal->j_len < journal->j_max_batch) |
3589 | && journal->j_len_alloc < journal->j_max_batch | 3698 | && journal->j_len_alloc < journal->j_max_batch |
3590 | && journal->j_cnode_free > (journal->j_trans_max * 3)) { | 3699 | && journal->j_cnode_free > (journal->j_trans_max * 3)) { |
@@ -3602,19 +3711,22 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3602 | } | 3711 | } |
3603 | 3712 | ||
3604 | /* | 3713 | /* |
3605 | ** Does all the work that makes deleting blocks safe. | 3714 | * Does all the work that makes deleting blocks safe. |
3606 | ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. | 3715 | * when deleting a block mark BH_JNew, just remove it from the current |
3607 | ** | 3716 | * transaction, clean it's buffer_head and move on. |
3608 | ** otherwise: | 3717 | * |
3609 | ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes | 3718 | * otherwise: |
3610 | ** before this transaction has finished. | 3719 | * set a bit for the block in the journal bitmap. That will prevent it from |
3611 | ** | 3720 | * being allocated for unformatted nodes before this transaction has finished. |
3612 | ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with | 3721 | * |
3613 | ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, | 3722 | * mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. |
3614 | ** the block can't be reallocated yet. | 3723 | * That will prevent any old transactions with this block from trying to flush |
3615 | ** | 3724 | * to the real location. Since we aren't removing the cnode from the |
3616 | ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. | 3725 | * journal_list_hash, *the block can't be reallocated yet. |
3617 | */ | 3726 | * |
3727 | * Then remove it from the current transaction, decrementing any counters and | ||
3728 | * filing it on the clean list. | ||
3729 | */ | ||
3618 | int journal_mark_freed(struct reiserfs_transaction_handle *th, | 3730 | int journal_mark_freed(struct reiserfs_transaction_handle *th, |
3619 | struct super_block *sb, b_blocknr_t blocknr) | 3731 | struct super_block *sb, b_blocknr_t blocknr) |
3620 | { | 3732 | { |
@@ -3637,7 +3749,10 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th, | |||
3637 | reiserfs_clean_and_file_buffer(bh); | 3749 | reiserfs_clean_and_file_buffer(bh); |
3638 | cleaned = remove_from_transaction(sb, blocknr, cleaned); | 3750 | cleaned = remove_from_transaction(sb, blocknr, cleaned); |
3639 | } else { | 3751 | } else { |
3640 | /* set the bit for this block in the journal bitmap for this transaction */ | 3752 | /* |
3753 | * set the bit for this block in the journal bitmap | ||
3754 | * for this transaction | ||
3755 | */ | ||
3641 | jb = journal->j_current_jl->j_list_bitmap; | 3756 | jb = journal->j_current_jl->j_list_bitmap; |
3642 | if (!jb) { | 3757 | if (!jb) { |
3643 | reiserfs_panic(sb, "journal-1702", | 3758 | reiserfs_panic(sb, "journal-1702", |
@@ -3653,17 +3768,22 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th, | |||
3653 | } | 3768 | } |
3654 | cleaned = remove_from_transaction(sb, blocknr, cleaned); | 3769 | cleaned = remove_from_transaction(sb, blocknr, cleaned); |
3655 | 3770 | ||
3656 | /* find all older transactions with this block, make sure they don't try to write it out */ | 3771 | /* |
3772 | * find all older transactions with this block, | ||
3773 | * make sure they don't try to write it out | ||
3774 | */ | ||
3657 | cn = get_journal_hash_dev(sb, journal->j_list_hash_table, | 3775 | cn = get_journal_hash_dev(sb, journal->j_list_hash_table, |
3658 | blocknr); | 3776 | blocknr); |
3659 | while (cn) { | 3777 | while (cn) { |
3660 | if (sb == cn->sb && blocknr == cn->blocknr) { | 3778 | if (sb == cn->sb && blocknr == cn->blocknr) { |
3661 | set_bit(BLOCK_FREED, &cn->state); | 3779 | set_bit(BLOCK_FREED, &cn->state); |
3662 | if (cn->bh) { | 3780 | if (cn->bh) { |
3781 | /* | ||
3782 | * remove_from_transaction will brelse | ||
3783 | * the buffer if it was in the current | ||
3784 | * trans | ||
3785 | */ | ||
3663 | if (!cleaned) { | 3786 | if (!cleaned) { |
3664 | /* remove_from_transaction will brelse the buffer if it was | ||
3665 | ** in the current trans | ||
3666 | */ | ||
3667 | clear_buffer_journal_dirty(cn-> | 3787 | clear_buffer_journal_dirty(cn-> |
3668 | bh); | 3788 | bh); |
3669 | clear_buffer_dirty(cn->bh); | 3789 | clear_buffer_dirty(cn->bh); |
@@ -3672,16 +3792,19 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th, | |||
3672 | cleaned = 1; | 3792 | cleaned = 1; |
3673 | put_bh(cn->bh); | 3793 | put_bh(cn->bh); |
3674 | if (atomic_read | 3794 | if (atomic_read |
3675 | (&(cn->bh->b_count)) < 0) { | 3795 | (&cn->bh->b_count) < 0) { |
3676 | reiserfs_warning(sb, | 3796 | reiserfs_warning(sb, |
3677 | "journal-2138", | 3797 | "journal-2138", |
3678 | "cn->bh->b_count < 0"); | 3798 | "cn->bh->b_count < 0"); |
3679 | } | 3799 | } |
3680 | } | 3800 | } |
3681 | if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ | 3801 | /* |
3682 | atomic_dec(& | 3802 | * since we are clearing the bh, |
3683 | (cn->jlist-> | 3803 | * we MUST dec nonzerolen |
3684 | j_nonzerolen)); | 3804 | */ |
3805 | if (cn->jlist) { | ||
3806 | atomic_dec(&cn->jlist-> | ||
3807 | j_nonzerolen); | ||
3685 | } | 3808 | } |
3686 | cn->bh = NULL; | 3809 | cn->bh = NULL; |
3687 | } | 3810 | } |
@@ -3714,10 +3837,16 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id, | |||
3714 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3837 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3715 | int ret = 0; | 3838 | int ret = 0; |
3716 | 3839 | ||
3717 | /* is it from the current transaction, or from an unknown transaction? */ | 3840 | /* |
3841 | * is it from the current transaction, | ||
3842 | * or from an unknown transaction? | ||
3843 | */ | ||
3718 | if (id == journal->j_trans_id) { | 3844 | if (id == journal->j_trans_id) { |
3719 | jl = journal->j_current_jl; | 3845 | jl = journal->j_current_jl; |
3720 | /* try to let other writers come in and grow this transaction */ | 3846 | /* |
3847 | * try to let other writers come in and | ||
3848 | * grow this transaction | ||
3849 | */ | ||
3721 | let_transaction_grow(sb, id); | 3850 | let_transaction_grow(sb, id); |
3722 | if (journal->j_trans_id != id) { | 3851 | if (journal->j_trans_id != id) { |
3723 | goto flush_commit_only; | 3852 | goto flush_commit_only; |
@@ -3731,21 +3860,22 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id, | |||
3731 | if (journal->j_trans_id != id) { | 3860 | if (journal->j_trans_id != id) { |
3732 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), | 3861 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), |
3733 | 1); | 3862 | 1); |
3734 | journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); | 3863 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb)); |
3735 | ret = journal_end(&th, sb, 1); | 3864 | ret = journal_end(&th); |
3736 | goto flush_commit_only; | 3865 | goto flush_commit_only; |
3737 | } | 3866 | } |
3738 | 3867 | ||
3739 | ret = journal_end_sync(&th, sb, 1); | 3868 | ret = journal_end_sync(&th); |
3740 | if (!ret) | 3869 | if (!ret) |
3741 | ret = 1; | 3870 | ret = 1; |
3742 | 3871 | ||
3743 | } else { | 3872 | } else { |
3744 | /* this gets tricky, we have to make sure the journal list in | 3873 | /* |
3874 | * this gets tricky, we have to make sure the journal list in | ||
3745 | * the inode still exists. We know the list is still around | 3875 | * the inode still exists. We know the list is still around |
3746 | * if we've got a larger transaction id than the oldest list | 3876 | * if we've got a larger transaction id than the oldest list |
3747 | */ | 3877 | */ |
3748 | flush_commit_only: | 3878 | flush_commit_only: |
3749 | if (journal_list_still_alive(inode->i_sb, id)) { | 3879 | if (journal_list_still_alive(inode->i_sb, id)) { |
3750 | /* | 3880 | /* |
3751 | * we only set ret to 1 when we know for sure | 3881 | * we only set ret to 1 when we know for sure |
@@ -3768,7 +3898,8 @@ int reiserfs_commit_for_inode(struct inode *inode) | |||
3768 | unsigned int id = REISERFS_I(inode)->i_trans_id; | 3898 | unsigned int id = REISERFS_I(inode)->i_trans_id; |
3769 | struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; | 3899 | struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; |
3770 | 3900 | ||
3771 | /* for the whole inode, assume unset id means it was | 3901 | /* |
3902 | * for the whole inode, assume unset id means it was | ||
3772 | * changed in the current transaction. More conservative | 3903 | * changed in the current transaction. More conservative |
3773 | */ | 3904 | */ |
3774 | if (!id || !jl) { | 3905 | if (!id || !jl) { |
@@ -3806,12 +3937,11 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb, | |||
3806 | 3937 | ||
3807 | extern struct tree_balance *cur_tb; | 3938 | extern struct tree_balance *cur_tb; |
3808 | /* | 3939 | /* |
3809 | ** before we can change a metadata block, we have to make sure it won't | 3940 | * before we can change a metadata block, we have to make sure it won't |
3810 | ** be written to disk while we are altering it. So, we must: | 3941 | * be written to disk while we are altering it. So, we must: |
3811 | ** clean it | 3942 | * clean it |
3812 | ** wait on it. | 3943 | * wait on it. |
3813 | ** | 3944 | */ |
3814 | */ | ||
3815 | int reiserfs_prepare_for_journal(struct super_block *sb, | 3945 | int reiserfs_prepare_for_journal(struct super_block *sb, |
3816 | struct buffer_head *bh, int wait) | 3946 | struct buffer_head *bh, int wait) |
3817 | { | 3947 | { |
@@ -3832,19 +3962,18 @@ int reiserfs_prepare_for_journal(struct super_block *sb, | |||
3832 | } | 3962 | } |
3833 | 3963 | ||
3834 | /* | 3964 | /* |
3835 | ** long and ugly. If flush, will not return until all commit | 3965 | * long and ugly. If flush, will not return until all commit |
3836 | ** blocks and all real buffers in the trans are on disk. | 3966 | * blocks and all real buffers in the trans are on disk. |
3837 | ** If no_async, won't return until all commit blocks are on disk. | 3967 | * If no_async, won't return until all commit blocks are on disk. |
3838 | ** | 3968 | * |
3839 | ** keep reading, there are comments as you go along | 3969 | * keep reading, there are comments as you go along |
3840 | ** | 3970 | * |
3841 | ** If the journal is aborted, we just clean up. Things like flushing | 3971 | * If the journal is aborted, we just clean up. Things like flushing |
3842 | ** journal lists, etc just won't happen. | 3972 | * journal lists, etc just won't happen. |
3843 | */ | 3973 | */ |
3844 | static int do_journal_end(struct reiserfs_transaction_handle *th, | 3974 | static int do_journal_end(struct reiserfs_transaction_handle *th, int flags) |
3845 | struct super_block *sb, unsigned long nblocks, | ||
3846 | int flags) | ||
3847 | { | 3975 | { |
3976 | struct super_block *sb = th->t_super; | ||
3848 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3977 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3849 | struct reiserfs_journal_cnode *cn, *next, *jl_cn; | 3978 | struct reiserfs_journal_cnode *cn, *next, *jl_cn; |
3850 | struct reiserfs_journal_cnode *last_cn = NULL; | 3979 | struct reiserfs_journal_cnode *last_cn = NULL; |
@@ -3866,9 +3995,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3866 | 3995 | ||
3867 | BUG_ON(th->t_refcount > 1); | 3996 | BUG_ON(th->t_refcount > 1); |
3868 | BUG_ON(!th->t_trans_id); | 3997 | BUG_ON(!th->t_trans_id); |
3998 | BUG_ON(!th->t_super); | ||
3869 | 3999 | ||
3870 | /* protect flush_older_commits from doing mistakes if the | 4000 | /* |
3871 | transaction ID counter gets overflowed. */ | 4001 | * protect flush_older_commits from doing mistakes if the |
4002 | * transaction ID counter gets overflowed. | ||
4003 | */ | ||
3872 | if (th->t_trans_id == ~0U) | 4004 | if (th->t_trans_id == ~0U) |
3873 | flags |= FLUSH_ALL | COMMIT_NOW | WAIT; | 4005 | flags |= FLUSH_ALL | COMMIT_NOW | WAIT; |
3874 | flush = flags & FLUSH_ALL; | 4006 | flush = flags & FLUSH_ALL; |
@@ -3879,7 +4011,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3879 | if (journal->j_len == 0) { | 4011 | if (journal->j_len == 0) { |
3880 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), | 4012 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), |
3881 | 1); | 4013 | 1); |
3882 | journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); | 4014 | journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb)); |
3883 | } | 4015 | } |
3884 | 4016 | ||
3885 | lock_journal(sb); | 4017 | lock_journal(sb); |
@@ -3892,10 +4024,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3892 | wait_on_commit = 1; | 4024 | wait_on_commit = 1; |
3893 | } | 4025 | } |
3894 | 4026 | ||
3895 | /* check_journal_end locks the journal, and unlocks if it does not return 1 | 4027 | /* |
3896 | ** it tells us if we should continue with the journal_end, or just return | 4028 | * check_journal_end locks the journal, and unlocks if it does |
4029 | * not return 1 it tells us if we should continue with the | ||
4030 | * journal_end, or just return | ||
3897 | */ | 4031 | */ |
3898 | if (!check_journal_end(th, sb, nblocks, flags)) { | 4032 | if (!check_journal_end(th, flags)) { |
3899 | reiserfs_schedule_old_flush(sb); | 4033 | reiserfs_schedule_old_flush(sb); |
3900 | wake_queued_writers(sb); | 4034 | wake_queued_writers(sb); |
3901 | reiserfs_async_progress_wait(sb); | 4035 | reiserfs_async_progress_wait(sb); |
@@ -3908,19 +4042,23 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3908 | } | 4042 | } |
3909 | 4043 | ||
3910 | /* | 4044 | /* |
3911 | ** j must wait means we have to flush the log blocks, and the real blocks for | 4045 | * j must wait means we have to flush the log blocks, and the |
3912 | ** this transaction | 4046 | * real blocks for this transaction |
3913 | */ | 4047 | */ |
3914 | if (journal->j_must_wait > 0) { | 4048 | if (journal->j_must_wait > 0) { |
3915 | flush = 1; | 4049 | flush = 1; |
3916 | } | 4050 | } |
3917 | #ifdef REISERFS_PREALLOCATE | 4051 | #ifdef REISERFS_PREALLOCATE |
3918 | /* quota ops might need to nest, setup the journal_info pointer for them | 4052 | /* |
3919 | * and raise the refcount so that it is > 0. */ | 4053 | * quota ops might need to nest, setup the journal_info pointer |
4054 | * for them and raise the refcount so that it is > 0. | ||
4055 | */ | ||
3920 | current->journal_info = th; | 4056 | current->journal_info = th; |
3921 | th->t_refcount++; | 4057 | th->t_refcount++; |
3922 | reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into | 4058 | |
3923 | * the transaction */ | 4059 | /* it should not involve new blocks into the transaction */ |
4060 | reiserfs_discard_all_prealloc(th); | ||
4061 | |||
3924 | th->t_refcount--; | 4062 | th->t_refcount--; |
3925 | current->journal_info = th->t_handle_save; | 4063 | current->journal_info = th->t_handle_save; |
3926 | #endif | 4064 | #endif |
@@ -3936,7 +4074,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3936 | memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); | 4074 | memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); |
3937 | set_desc_trans_id(desc, journal->j_trans_id); | 4075 | set_desc_trans_id(desc, journal->j_trans_id); |
3938 | 4076 | ||
3939 | /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ | 4077 | /* |
4078 | * setup commit block. Don't write (keep it clean too) this one | ||
4079 | * until after everyone else is written | ||
4080 | */ | ||
3940 | c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + | 4081 | c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + |
3941 | ((journal->j_start + journal->j_len + | 4082 | ((journal->j_start + journal->j_len + |
3942 | 1) % SB_ONDISK_JOURNAL_SIZE(sb))); | 4083 | 1) % SB_ONDISK_JOURNAL_SIZE(sb))); |
@@ -3948,7 +4089,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3948 | /* init this journal list */ | 4089 | /* init this journal list */ |
3949 | jl = journal->j_current_jl; | 4090 | jl = journal->j_current_jl; |
3950 | 4091 | ||
3951 | /* we lock the commit before doing anything because | 4092 | /* |
4093 | * we lock the commit before doing anything because | ||
3952 | * we want to make sure nobody tries to run flush_commit_list until | 4094 | * we want to make sure nobody tries to run flush_commit_list until |
3953 | * the new transaction is fully setup, and we've already flushed the | 4095 | * the new transaction is fully setup, and we've already flushed the |
3954 | * ordered bh list | 4096 | * ordered bh list |
@@ -3968,9 +4110,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3968 | atomic_set(&jl->j_commit_left, journal->j_len + 2); | 4110 | atomic_set(&jl->j_commit_left, journal->j_len + 2); |
3969 | jl->j_realblock = NULL; | 4111 | jl->j_realblock = NULL; |
3970 | 4112 | ||
3971 | /* The ENTIRE FOR LOOP MUST not cause schedule to occur. | 4113 | /* |
3972 | ** for each real block, add it to the journal list hash, | 4114 | * The ENTIRE FOR LOOP MUST not cause schedule to occur. |
3973 | ** copy into real block index array in the commit or desc block | 4115 | * for each real block, add it to the journal list hash, |
4116 | * copy into real block index array in the commit or desc block | ||
3974 | */ | 4117 | */ |
3975 | trans_half = journal_trans_half(sb->s_blocksize); | 4118 | trans_half = journal_trans_half(sb->s_blocksize); |
3976 | for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { | 4119 | for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { |
@@ -3989,9 +4132,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3989 | last_cn->next = jl_cn; | 4132 | last_cn->next = jl_cn; |
3990 | } | 4133 | } |
3991 | last_cn = jl_cn; | 4134 | last_cn = jl_cn; |
3992 | /* make sure the block we are trying to log is not a block | 4135 | /* |
3993 | of journal or reserved area */ | 4136 | * make sure the block we are trying to log |
3994 | 4137 | * is not a block of journal or reserved area | |
4138 | */ | ||
3995 | if (is_block_in_log_or_reserved_area | 4139 | if (is_block_in_log_or_reserved_area |
3996 | (sb, cn->bh->b_blocknr)) { | 4140 | (sb, cn->bh->b_blocknr)) { |
3997 | reiserfs_panic(sb, "journal-2332", | 4141 | reiserfs_panic(sb, "journal-2332", |
@@ -4021,19 +4165,26 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4021 | set_desc_trans_id(desc, journal->j_trans_id); | 4165 | set_desc_trans_id(desc, journal->j_trans_id); |
4022 | set_commit_trans_len(commit, journal->j_len); | 4166 | set_commit_trans_len(commit, journal->j_len); |
4023 | 4167 | ||
4024 | /* special check in case all buffers in the journal were marked for not logging */ | 4168 | /* |
4169 | * special check in case all buffers in the journal | ||
4170 | * were marked for not logging | ||
4171 | */ | ||
4025 | BUG_ON(journal->j_len == 0); | 4172 | BUG_ON(journal->j_len == 0); |
4026 | 4173 | ||
4027 | /* we're about to dirty all the log blocks, mark the description block | 4174 | /* |
4175 | * we're about to dirty all the log blocks, mark the description block | ||
4028 | * dirty now too. Don't mark the commit block dirty until all the | 4176 | * dirty now too. Don't mark the commit block dirty until all the |
4029 | * others are on disk | 4177 | * others are on disk |
4030 | */ | 4178 | */ |
4031 | mark_buffer_dirty(d_bh); | 4179 | mark_buffer_dirty(d_bh); |
4032 | 4180 | ||
4033 | /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ | 4181 | /* |
4182 | * first data block is j_start + 1, so add one to | ||
4183 | * cur_write_start wherever you use it | ||
4184 | */ | ||
4034 | cur_write_start = journal->j_start; | 4185 | cur_write_start = journal->j_start; |
4035 | cn = journal->j_first; | 4186 | cn = journal->j_first; |
4036 | jindex = 1; /* start at one so we don't get the desc again */ | 4187 | jindex = 1; /* start at one so we don't get the desc again */ |
4037 | while (cn) { | 4188 | while (cn) { |
4038 | clear_buffer_journal_new(cn->bh); | 4189 | clear_buffer_journal_new(cn->bh); |
4039 | /* copy all the real blocks into log area. dirty log blocks */ | 4190 | /* copy all the real blocks into log area. dirty log blocks */ |
@@ -4059,7 +4210,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4059 | set_buffer_journal_dirty(cn->bh); | 4210 | set_buffer_journal_dirty(cn->bh); |
4060 | clear_buffer_journaled(cn->bh); | 4211 | clear_buffer_journaled(cn->bh); |
4061 | } else { | 4212 | } else { |
4062 | /* JDirty cleared sometime during transaction. don't log this one */ | 4213 | /* |
4214 | * JDirty cleared sometime during transaction. | ||
4215 | * don't log this one | ||
4216 | */ | ||
4063 | reiserfs_warning(sb, "journal-2048", | 4217 | reiserfs_warning(sb, "journal-2048", |
4064 | "BAD, buffer in journal hash, " | 4218 | "BAD, buffer in journal hash, " |
4065 | "but not JDirty!"); | 4219 | "but not JDirty!"); |
@@ -4071,9 +4225,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4071 | reiserfs_cond_resched(sb); | 4225 | reiserfs_cond_resched(sb); |
4072 | } | 4226 | } |
4073 | 4227 | ||
4074 | /* we are done with both the c_bh and d_bh, but | 4228 | /* |
4075 | ** c_bh must be written after all other commit blocks, | 4229 | * we are done with both the c_bh and d_bh, but |
4076 | ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. | 4230 | * c_bh must be written after all other commit blocks, |
4231 | * so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. | ||
4077 | */ | 4232 | */ |
4078 | 4233 | ||
4079 | journal->j_current_jl = alloc_journal_list(sb); | 4234 | journal->j_current_jl = alloc_journal_list(sb); |
@@ -4088,7 +4243,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4088 | journal->j_start = | 4243 | journal->j_start = |
4089 | (journal->j_start + journal->j_len + | 4244 | (journal->j_start + journal->j_len + |
4090 | 2) % SB_ONDISK_JOURNAL_SIZE(sb); | 4245 | 2) % SB_ONDISK_JOURNAL_SIZE(sb); |
4091 | atomic_set(&(journal->j_wcount), 0); | 4246 | atomic_set(&journal->j_wcount, 0); |
4092 | journal->j_bcount = 0; | 4247 | journal->j_bcount = 0; |
4093 | journal->j_last = NULL; | 4248 | journal->j_last = NULL; |
4094 | journal->j_first = NULL; | 4249 | journal->j_first = NULL; |
@@ -4104,15 +4259,18 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4104 | journal->j_next_async_flush = 0; | 4259 | journal->j_next_async_flush = 0; |
4105 | init_journal_hash(sb); | 4260 | init_journal_hash(sb); |
4106 | 4261 | ||
4107 | // make sure reiserfs_add_jh sees the new current_jl before we | 4262 | /* |
4108 | // write out the tails | 4263 | * make sure reiserfs_add_jh sees the new current_jl before we |
4264 | * write out the tails | ||
4265 | */ | ||
4109 | smp_mb(); | 4266 | smp_mb(); |
4110 | 4267 | ||
4111 | /* tail conversion targets have to hit the disk before we end the | 4268 | /* |
4269 | * tail conversion targets have to hit the disk before we end the | ||
4112 | * transaction. Otherwise a later transaction might repack the tail | 4270 | * transaction. Otherwise a later transaction might repack the tail |
4113 | * before this transaction commits, leaving the data block unflushed and | 4271 | * before this transaction commits, leaving the data block unflushed |
4114 | * clean, if we crash before the later transaction commits, the data block | 4272 | * and clean, if we crash before the later transaction commits, the |
4115 | * is lost. | 4273 | * data block is lost. |
4116 | */ | 4274 | */ |
4117 | if (!list_empty(&jl->j_tail_bh_list)) { | 4275 | if (!list_empty(&jl->j_tail_bh_list)) { |
4118 | depth = reiserfs_write_unlock_nested(sb); | 4276 | depth = reiserfs_write_unlock_nested(sb); |
@@ -4123,24 +4281,27 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4123 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); | 4281 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); |
4124 | mutex_unlock(&jl->j_commit_mutex); | 4282 | mutex_unlock(&jl->j_commit_mutex); |
4125 | 4283 | ||
4126 | /* honor the flush wishes from the caller, simple commits can | 4284 | /* |
4127 | ** be done outside the journal lock, they are done below | 4285 | * honor the flush wishes from the caller, simple commits can |
4128 | ** | 4286 | * be done outside the journal lock, they are done below |
4129 | ** if we don't flush the commit list right now, we put it into | 4287 | * |
4130 | ** the work queue so the people waiting on the async progress work | 4288 | * if we don't flush the commit list right now, we put it into |
4131 | ** queue don't wait for this proc to flush journal lists and such. | 4289 | * the work queue so the people waiting on the async progress work |
4290 | * queue don't wait for this proc to flush journal lists and such. | ||
4132 | */ | 4291 | */ |
4133 | if (flush) { | 4292 | if (flush) { |
4134 | flush_commit_list(sb, jl, 1); | 4293 | flush_commit_list(sb, jl, 1); |
4135 | flush_journal_list(sb, jl, 1); | 4294 | flush_journal_list(sb, jl, 1); |
4136 | } else if (!(jl->j_state & LIST_COMMIT_PENDING)) | 4295 | } else if (!(jl->j_state & LIST_COMMIT_PENDING)) |
4137 | queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); | 4296 | queue_delayed_work(REISERFS_SB(sb)->commit_wq, |
4297 | &journal->j_work, HZ / 10); | ||
4138 | 4298 | ||
4139 | /* if the next transaction has any chance of wrapping, flush | 4299 | /* |
4140 | ** transactions that might get overwritten. If any journal lists are very | 4300 | * if the next transaction has any chance of wrapping, flush |
4141 | ** old flush them as well. | 4301 | * transactions that might get overwritten. If any journal lists |
4302 | * are very old flush them as well. | ||
4142 | */ | 4303 | */ |
4143 | first_jl: | 4304 | first_jl: |
4144 | list_for_each_safe(entry, safe, &journal->j_journal_list) { | 4305 | list_for_each_safe(entry, safe, &journal->j_journal_list) { |
4145 | temp_jl = JOURNAL_LIST_ENTRY(entry); | 4306 | temp_jl = JOURNAL_LIST_ENTRY(entry); |
4146 | if (journal->j_start <= temp_jl->j_start) { | 4307 | if (journal->j_start <= temp_jl->j_start) { |
@@ -4151,8 +4312,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4151 | } else if ((journal->j_start + | 4312 | } else if ((journal->j_start + |
4152 | journal->j_trans_max + 1) < | 4313 | journal->j_trans_max + 1) < |
4153 | SB_ONDISK_JOURNAL_SIZE(sb)) { | 4314 | SB_ONDISK_JOURNAL_SIZE(sb)) { |
4154 | /* if we don't cross into the next transaction and we don't | 4315 | /* |
4155 | * wrap, there is no way we can overlap any later transactions | 4316 | * if we don't cross into the next |
4317 | * transaction and we don't wrap, there is | ||
4318 | * no way we can overlap any later transactions | ||
4156 | * break now | 4319 | * break now |
4157 | */ | 4320 | */ |
4158 | break; | 4321 | break; |
@@ -4166,10 +4329,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4166 | flush_used_journal_lists(sb, temp_jl); | 4329 | flush_used_journal_lists(sb, temp_jl); |
4167 | goto first_jl; | 4330 | goto first_jl; |
4168 | } else { | 4331 | } else { |
4169 | /* we don't overlap anything from out start to the end of the | 4332 | /* |
4170 | * log, and our wrapped portion doesn't overlap anything at | 4333 | * we don't overlap anything from out start |
4171 | * the start of the log. We can break | 4334 | * to the end of the log, and our wrapped |
4172 | */ | 4335 | * portion doesn't overlap anything at |
4336 | * the start of the log. We can break | ||
4337 | */ | ||
4173 | break; | 4338 | break; |
4174 | } | 4339 | } |
4175 | } | 4340 | } |
@@ -4183,23 +4348,25 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4183 | "could not get a list bitmap"); | 4348 | "could not get a list bitmap"); |
4184 | } | 4349 | } |
4185 | 4350 | ||
4186 | atomic_set(&(journal->j_jlock), 0); | 4351 | atomic_set(&journal->j_jlock, 0); |
4187 | unlock_journal(sb); | 4352 | unlock_journal(sb); |
4188 | /* wake up any body waiting to join. */ | 4353 | /* wake up any body waiting to join. */ |
4189 | clear_bit(J_WRITERS_QUEUED, &journal->j_state); | 4354 | clear_bit(J_WRITERS_QUEUED, &journal->j_state); |
4190 | wake_up(&(journal->j_join_wait)); | 4355 | wake_up(&journal->j_join_wait); |
4191 | 4356 | ||
4192 | if (!flush && wait_on_commit && | 4357 | if (!flush && wait_on_commit && |
4193 | journal_list_still_alive(sb, commit_trans_id)) { | 4358 | journal_list_still_alive(sb, commit_trans_id)) { |
4194 | flush_commit_list(sb, jl, 1); | 4359 | flush_commit_list(sb, jl, 1); |
4195 | } | 4360 | } |
4196 | out: | 4361 | out: |
4197 | reiserfs_check_lock_depth(sb, "journal end2"); | 4362 | reiserfs_check_lock_depth(sb, "journal end2"); |
4198 | 4363 | ||
4199 | memset(th, 0, sizeof(*th)); | 4364 | memset(th, 0, sizeof(*th)); |
4200 | /* Re-set th->t_super, so we can properly keep track of how many | 4365 | /* |
4366 | * Re-set th->t_super, so we can properly keep track of how many | ||
4201 | * persistent transactions there are. We need to do this so if this | 4367 | * persistent transactions there are. We need to do this so if this |
4202 | * call is part of a failed restart_transaction, we can free it later */ | 4368 | * call is part of a failed restart_transaction, we can free it later |
4369 | */ | ||
4203 | th->t_super = sb; | 4370 | th->t_super = sb; |
4204 | 4371 | ||
4205 | return journal->j_errno; | 4372 | return journal->j_errno; |
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 79e5a8b4c226..d6744c8b24e1 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c | |||
@@ -8,46 +8,42 @@ | |||
8 | #include "reiserfs.h" | 8 | #include "reiserfs.h" |
9 | #include <linux/buffer_head.h> | 9 | #include <linux/buffer_head.h> |
10 | 10 | ||
11 | /* these are used in do_balance.c */ | 11 | /* |
12 | 12 | * copy copy_count entries from source directory item to dest buffer | |
13 | /* leaf_move_items | 13 | * (creating new item if needed) |
14 | leaf_shift_left | 14 | */ |
15 | leaf_shift_right | ||
16 | leaf_delete_items | ||
17 | leaf_insert_into_buf | ||
18 | leaf_paste_in_buffer | ||
19 | leaf_cut_from_buffer | ||
20 | leaf_paste_entries | ||
21 | */ | ||
22 | |||
23 | /* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */ | ||
24 | static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | 15 | static void leaf_copy_dir_entries(struct buffer_info *dest_bi, |
25 | struct buffer_head *source, int last_first, | 16 | struct buffer_head *source, int last_first, |
26 | int item_num, int from, int copy_count) | 17 | int item_num, int from, int copy_count) |
27 | { | 18 | { |
28 | struct buffer_head *dest = dest_bi->bi_bh; | 19 | struct buffer_head *dest = dest_bi->bi_bh; |
29 | int item_num_in_dest; /* either the number of target item, | 20 | /* |
30 | or if we must create a new item, | 21 | * either the number of target item, or if we must create a |
31 | the number of the item we will | 22 | * new item, the number of the item we will create it next to |
32 | create it next to */ | 23 | */ |
24 | int item_num_in_dest; | ||
25 | |||
33 | struct item_head *ih; | 26 | struct item_head *ih; |
34 | struct reiserfs_de_head *deh; | 27 | struct reiserfs_de_head *deh; |
35 | int copy_records_len; /* length of all records in item to be copied */ | 28 | int copy_records_len; /* length of all records in item to be copied */ |
36 | char *records; | 29 | char *records; |
37 | 30 | ||
38 | ih = B_N_PITEM_HEAD(source, item_num); | 31 | ih = item_head(source, item_num); |
39 | 32 | ||
40 | RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); | 33 | RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); |
41 | 34 | ||
42 | /* length of all record to be copied and first byte of the last of them */ | 35 | /* |
36 | * length of all record to be copied and first byte of | ||
37 | * the last of them | ||
38 | */ | ||
43 | deh = B_I_DEH(source, ih); | 39 | deh = B_I_DEH(source, ih); |
44 | if (copy_count) { | 40 | if (copy_count) { |
45 | copy_records_len = (from ? deh_location(&(deh[from - 1])) : | 41 | copy_records_len = (from ? deh_location(&deh[from - 1]) : |
46 | ih_item_len(ih)) - | 42 | ih_item_len(ih)) - |
47 | deh_location(&(deh[from + copy_count - 1])); | 43 | deh_location(&deh[from + copy_count - 1]); |
48 | records = | 44 | records = |
49 | source->b_data + ih_location(ih) + | 45 | source->b_data + ih_location(ih) + |
50 | deh_location(&(deh[from + copy_count - 1])); | 46 | deh_location(&deh[from + copy_count - 1]); |
51 | } else { | 47 | } else { |
52 | copy_records_len = 0; | 48 | copy_records_len = 0; |
53 | records = NULL; | 49 | records = NULL; |
@@ -59,12 +55,15 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | |||
59 | LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) | 55 | LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) |
60 | - 1); | 56 | - 1); |
61 | 57 | ||
62 | /* if there are no items in dest or the first/last item in dest is not item of the same directory */ | 58 | /* |
59 | * if there are no items in dest or the first/last item in | ||
60 | * dest is not item of the same directory | ||
61 | */ | ||
63 | if ((item_num_in_dest == -1) || | 62 | if ((item_num_in_dest == -1) || |
64 | (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || | 63 | (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || |
65 | (last_first == LAST_TO_FIRST | 64 | (last_first == LAST_TO_FIRST |
66 | && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key, | 65 | && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key, |
67 | B_N_PKEY(dest, | 66 | leaf_key(dest, |
68 | item_num_in_dest)))) | 67 | item_num_in_dest)))) |
69 | { | 68 | { |
70 | /* create new item in dest */ | 69 | /* create new item in dest */ |
@@ -80,16 +79,22 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | |||
80 | 79 | ||
81 | if (last_first == LAST_TO_FIRST) { | 80 | if (last_first == LAST_TO_FIRST) { |
82 | /* form key by the following way */ | 81 | /* form key by the following way */ |
83 | if (from < I_ENTRY_COUNT(ih)) { | 82 | if (from < ih_entry_count(ih)) { |
84 | set_le_ih_k_offset(&new_ih, | 83 | set_le_ih_k_offset(&new_ih, |
85 | deh_offset(&(deh[from]))); | 84 | deh_offset(&deh[from])); |
86 | /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */ | ||
87 | } else { | 85 | } else { |
88 | /* no entries will be copied to this item in this function */ | 86 | /* |
87 | * no entries will be copied to this | ||
88 | * item in this function | ||
89 | */ | ||
89 | set_le_ih_k_offset(&new_ih, U32_MAX); | 90 | set_le_ih_k_offset(&new_ih, U32_MAX); |
90 | /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ | 91 | /* |
92 | * this item is not yet valid, but we | ||
93 | * want I_IS_DIRECTORY_ITEM to return 1 | ||
94 | * for it, so we -1 | ||
95 | */ | ||
91 | } | 96 | } |
92 | set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key), | 97 | set_le_key_k_type(KEY_FORMAT_3_5, &new_ih.ih_key, |
93 | TYPE_DIRENTRY); | 98 | TYPE_DIRENTRY); |
94 | } | 99 | } |
95 | 100 | ||
@@ -113,36 +118,44 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | |||
113 | 118 | ||
114 | leaf_paste_entries(dest_bi, item_num_in_dest, | 119 | leaf_paste_entries(dest_bi, item_num_in_dest, |
115 | (last_first == | 120 | (last_first == |
116 | FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest, | 121 | FIRST_TO_LAST) ? ih_entry_count(item_head(dest, |
117 | item_num_in_dest)) | 122 | item_num_in_dest)) |
118 | : 0, copy_count, deh + from, records, | 123 | : 0, copy_count, deh + from, records, |
119 | DEH_SIZE * copy_count + copy_records_len); | 124 | DEH_SIZE * copy_count + copy_records_len); |
120 | } | 125 | } |
121 | 126 | ||
122 | /* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or | 127 | /* |
123 | part of it or nothing (see the return 0 below) from SOURCE to the end | 128 | * Copy the first (if last_first == FIRST_TO_LAST) or last |
124 | (if last_first) or beginning (!last_first) of the DEST */ | 129 | * (last_first == LAST_TO_FIRST) item or part of it or nothing |
130 | * (see the return 0 below) from SOURCE to the end (if last_first) | ||
131 | * or beginning (!last_first) of the DEST | ||
132 | */ | ||
125 | /* returns 1 if anything was copied, else 0 */ | 133 | /* returns 1 if anything was copied, else 0 */ |
126 | static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | 134 | static int leaf_copy_boundary_item(struct buffer_info *dest_bi, |
127 | struct buffer_head *src, int last_first, | 135 | struct buffer_head *src, int last_first, |
128 | int bytes_or_entries) | 136 | int bytes_or_entries) |
129 | { | 137 | { |
130 | struct buffer_head *dest = dest_bi->bi_bh; | 138 | struct buffer_head *dest = dest_bi->bi_bh; |
131 | int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ | 139 | /* number of items in the source and destination buffers */ |
140 | int dest_nr_item, src_nr_item; | ||
132 | struct item_head *ih; | 141 | struct item_head *ih; |
133 | struct item_head *dih; | 142 | struct item_head *dih; |
134 | 143 | ||
135 | dest_nr_item = B_NR_ITEMS(dest); | 144 | dest_nr_item = B_NR_ITEMS(dest); |
136 | 145 | ||
146 | /* | ||
147 | * if ( DEST is empty or first item of SOURCE and last item of | ||
148 | * DEST are the items of different objects or of different types ) | ||
149 | * then there is no need to treat this item differently from the | ||
150 | * other items that we copy, so we return | ||
151 | */ | ||
137 | if (last_first == FIRST_TO_LAST) { | 152 | if (last_first == FIRST_TO_LAST) { |
138 | /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects | 153 | ih = item_head(src, 0); |
139 | or of different types ) then there is no need to treat this item differently from the other items | 154 | dih = item_head(dest, dest_nr_item - 1); |
140 | that we copy, so we return */ | 155 | |
141 | ih = B_N_PITEM_HEAD(src, 0); | 156 | /* there is nothing to merge */ |
142 | dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1); | ||
143 | if (!dest_nr_item | 157 | if (!dest_nr_item |
144 | || (!op_is_left_mergeable(&(ih->ih_key), src->b_size))) | 158 | || (!op_is_left_mergeable(&ih->ih_key, src->b_size))) |
145 | /* there is nothing to merge */ | ||
146 | return 0; | 159 | return 0; |
147 | 160 | ||
148 | RFALSE(!ih_item_len(ih), | 161 | RFALSE(!ih_item_len(ih), |
@@ -157,8 +170,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
157 | return 1; | 170 | return 1; |
158 | } | 171 | } |
159 | 172 | ||
160 | /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST | 173 | /* |
161 | part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header | 174 | * copy part of the body of the first item of SOURCE |
175 | * to the end of the body of the last item of the DEST | ||
176 | * part defined by 'bytes_or_entries'; if bytes_or_entries | ||
177 | * == -1 copy whole body; don't create new item header | ||
162 | */ | 178 | */ |
163 | if (bytes_or_entries == -1) | 179 | if (bytes_or_entries == -1) |
164 | bytes_or_entries = ih_item_len(ih); | 180 | bytes_or_entries = ih_item_len(ih); |
@@ -176,11 +192,13 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
176 | } | 192 | } |
177 | #endif | 193 | #endif |
178 | 194 | ||
179 | /* merge first item (or its part) of src buffer with the last | 195 | /* |
180 | item of dest buffer. Both are of the same file */ | 196 | * merge first item (or its part) of src buffer with the last |
197 | * item of dest buffer. Both are of the same file | ||
198 | */ | ||
181 | leaf_paste_in_buffer(dest_bi, | 199 | leaf_paste_in_buffer(dest_bi, |
182 | dest_nr_item - 1, ih_item_len(dih), | 200 | dest_nr_item - 1, ih_item_len(dih), |
183 | bytes_or_entries, B_I_PITEM(src, ih), 0); | 201 | bytes_or_entries, ih_item_body(src, ih), 0); |
184 | 202 | ||
185 | if (is_indirect_le_ih(dih)) { | 203 | if (is_indirect_le_ih(dih)) { |
186 | RFALSE(get_ih_free_space(dih), | 204 | RFALSE(get_ih_free_space(dih), |
@@ -195,19 +213,23 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
195 | 213 | ||
196 | /* copy boundary item to right (last_first == LAST_TO_FIRST) */ | 214 | /* copy boundary item to right (last_first == LAST_TO_FIRST) */ |
197 | 215 | ||
198 | /* ( DEST is empty or last item of SOURCE and first item of DEST | 216 | /* |
199 | are the items of different object or of different types ) | 217 | * (DEST is empty or last item of SOURCE and first item of DEST |
218 | * are the items of different object or of different types) | ||
200 | */ | 219 | */ |
201 | src_nr_item = B_NR_ITEMS(src); | 220 | src_nr_item = B_NR_ITEMS(src); |
202 | ih = B_N_PITEM_HEAD(src, src_nr_item - 1); | 221 | ih = item_head(src, src_nr_item - 1); |
203 | dih = B_N_PITEM_HEAD(dest, 0); | 222 | dih = item_head(dest, 0); |
204 | 223 | ||
205 | if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size)) | 224 | if (!dest_nr_item || !op_is_left_mergeable(&dih->ih_key, src->b_size)) |
206 | return 0; | 225 | return 0; |
207 | 226 | ||
208 | if (is_direntry_le_ih(ih)) { | 227 | if (is_direntry_le_ih(ih)) { |
228 | /* | ||
229 | * bytes_or_entries = entries number in last | ||
230 | * item body of SOURCE | ||
231 | */ | ||
209 | if (bytes_or_entries == -1) | 232 | if (bytes_or_entries == -1) |
210 | /* bytes_or_entries = entries number in last item body of SOURCE */ | ||
211 | bytes_or_entries = ih_entry_count(ih); | 233 | bytes_or_entries = ih_entry_count(ih); |
212 | 234 | ||
213 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, | 235 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, |
@@ -217,9 +239,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
217 | return 1; | 239 | return 1; |
218 | } | 240 | } |
219 | 241 | ||
220 | /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; | 242 | /* |
221 | part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; | 243 | * copy part of the body of the last item of SOURCE to the |
222 | don't create new item header | 244 | * begin of the body of the first item of the DEST; part defined |
245 | * by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; | ||
246 | * change first item key of the DEST; don't create new item header | ||
223 | */ | 247 | */ |
224 | 248 | ||
225 | RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), | 249 | RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), |
@@ -270,15 +294,18 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
270 | } | 294 | } |
271 | 295 | ||
272 | leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries, | 296 | leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries, |
273 | B_I_PITEM(src, | 297 | ih_item_body(src, |
274 | ih) + ih_item_len(ih) - bytes_or_entries, | 298 | ih) + ih_item_len(ih) - bytes_or_entries, |
275 | 0); | 299 | 0); |
276 | return 1; | 300 | return 1; |
277 | } | 301 | } |
278 | 302 | ||
279 | /* copy cpy_mun items from buffer src to buffer dest | 303 | /* |
280 | * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest | 304 | * copy cpy_mun items from buffer src to buffer dest |
281 | * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest | 305 | * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning |
306 | * from first-th item in src to tail of dest | ||
307 | * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning | ||
308 | * from first-th item in src to head of dest | ||
282 | */ | 309 | */ |
283 | static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | 310 | static void leaf_copy_items_entirely(struct buffer_info *dest_bi, |
284 | struct buffer_head *src, int last_first, | 311 | struct buffer_head *src, int last_first, |
@@ -311,11 +338,14 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | |||
311 | nr = blkh_nr_item(blkh); | 338 | nr = blkh_nr_item(blkh); |
312 | free_space = blkh_free_space(blkh); | 339 | free_space = blkh_free_space(blkh); |
313 | 340 | ||
314 | /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ | 341 | /* |
342 | * we will insert items before 0-th or nr-th item in dest buffer. | ||
343 | * It depends of last_first parameter | ||
344 | */ | ||
315 | dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; | 345 | dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; |
316 | 346 | ||
317 | /* location of head of first new item */ | 347 | /* location of head of first new item */ |
318 | ih = B_N_PITEM_HEAD(dest, dest_before); | 348 | ih = item_head(dest, dest_before); |
319 | 349 | ||
320 | RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE, | 350 | RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE, |
321 | "vs-10140: not enough free space for headers %d (needed %d)", | 351 | "vs-10140: not enough free space for headers %d (needed %d)", |
@@ -325,7 +355,7 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | |||
325 | memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE); | 355 | memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE); |
326 | 356 | ||
327 | /* copy item headers */ | 357 | /* copy item headers */ |
328 | memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE); | 358 | memcpy(ih, item_head(src, first), cpy_num * IH_SIZE); |
329 | 359 | ||
330 | free_space -= (IH_SIZE * cpy_num); | 360 | free_space -= (IH_SIZE * cpy_num); |
331 | set_blkh_free_space(blkh, free_space); | 361 | set_blkh_free_space(blkh, free_space); |
@@ -338,8 +368,8 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | |||
338 | } | 368 | } |
339 | 369 | ||
340 | /* prepare space for items */ | 370 | /* prepare space for items */ |
341 | last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before])); | 371 | last_loc = ih_location(&ih[nr + cpy_num - 1 - dest_before]); |
342 | last_inserted_loc = ih_location(&(ih[cpy_num - 1])); | 372 | last_inserted_loc = ih_location(&ih[cpy_num - 1]); |
343 | 373 | ||
344 | /* check free space */ | 374 | /* check free space */ |
345 | RFALSE(free_space < j - last_inserted_loc, | 375 | RFALSE(free_space < j - last_inserted_loc, |
@@ -352,7 +382,8 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | |||
352 | 382 | ||
353 | /* copy items */ | 383 | /* copy items */ |
354 | memcpy(dest->b_data + last_inserted_loc, | 384 | memcpy(dest->b_data + last_inserted_loc, |
355 | B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc); | 385 | item_body(src, (first + cpy_num - 1)), |
386 | j - last_inserted_loc); | ||
356 | 387 | ||
357 | /* sizes, item number */ | 388 | /* sizes, item number */ |
358 | set_blkh_nr_item(blkh, nr + cpy_num); | 389 | set_blkh_nr_item(blkh, nr + cpy_num); |
@@ -376,8 +407,10 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | |||
376 | } | 407 | } |
377 | } | 408 | } |
378 | 409 | ||
379 | /* This function splits the (liquid) item into two items (useful when | 410 | /* |
380 | shifting part of an item into another node.) */ | 411 | * This function splits the (liquid) item into two items (useful when |
412 | * shifting part of an item into another node.) | ||
413 | */ | ||
381 | static void leaf_item_bottle(struct buffer_info *dest_bi, | 414 | static void leaf_item_bottle(struct buffer_info *dest_bi, |
382 | struct buffer_head *src, int last_first, | 415 | struct buffer_head *src, int last_first, |
383 | int item_num, int cpy_bytes) | 416 | int item_num, int cpy_bytes) |
@@ -389,17 +422,22 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
389 | "vs-10170: bytes == - 1 means: do not split item"); | 422 | "vs-10170: bytes == - 1 means: do not split item"); |
390 | 423 | ||
391 | if (last_first == FIRST_TO_LAST) { | 424 | if (last_first == FIRST_TO_LAST) { |
392 | /* if ( if item in position item_num in buffer SOURCE is directory item ) */ | 425 | /* |
393 | ih = B_N_PITEM_HEAD(src, item_num); | 426 | * if ( if item in position item_num in buffer SOURCE |
427 | * is directory item ) | ||
428 | */ | ||
429 | ih = item_head(src, item_num); | ||
394 | if (is_direntry_le_ih(ih)) | 430 | if (is_direntry_le_ih(ih)) |
395 | leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, | 431 | leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, |
396 | item_num, 0, cpy_bytes); | 432 | item_num, 0, cpy_bytes); |
397 | else { | 433 | else { |
398 | struct item_head n_ih; | 434 | struct item_head n_ih; |
399 | 435 | ||
400 | /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST | 436 | /* |
401 | part defined by 'cpy_bytes'; create new item header; change old item_header (????); | 437 | * copy part of the body of the item number 'item_num' |
402 | n_ih = new item_header; | 438 | * of SOURCE to the end of the DEST part defined by |
439 | * 'cpy_bytes'; create new item header; change old | ||
440 | * item_header (????); n_ih = new item_header; | ||
403 | */ | 441 | */ |
404 | memcpy(&n_ih, ih, IH_SIZE); | 442 | memcpy(&n_ih, ih, IH_SIZE); |
405 | put_ih_item_len(&n_ih, cpy_bytes); | 443 | put_ih_item_len(&n_ih, cpy_bytes); |
@@ -411,30 +449,36 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
411 | set_ih_free_space(&n_ih, 0); | 449 | set_ih_free_space(&n_ih, 0); |
412 | } | 450 | } |
413 | 451 | ||
414 | RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size), | 452 | RFALSE(op_is_left_mergeable(&ih->ih_key, src->b_size), |
415 | "vs-10190: bad mergeability of item %h", ih); | 453 | "vs-10190: bad mergeability of item %h", ih); |
416 | n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ | 454 | n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ |
417 | leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih, | 455 | leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih, |
418 | B_N_PITEM(src, item_num), 0); | 456 | item_body(src, item_num), 0); |
419 | } | 457 | } |
420 | } else { | 458 | } else { |
421 | /* if ( if item in position item_num in buffer SOURCE is directory item ) */ | 459 | /* |
422 | ih = B_N_PITEM_HEAD(src, item_num); | 460 | * if ( if item in position item_num in buffer |
461 | * SOURCE is directory item ) | ||
462 | */ | ||
463 | ih = item_head(src, item_num); | ||
423 | if (is_direntry_le_ih(ih)) | 464 | if (is_direntry_le_ih(ih)) |
424 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, | 465 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, |
425 | item_num, | 466 | item_num, |
426 | I_ENTRY_COUNT(ih) - cpy_bytes, | 467 | ih_entry_count(ih) - cpy_bytes, |
427 | cpy_bytes); | 468 | cpy_bytes); |
428 | else { | 469 | else { |
429 | struct item_head n_ih; | 470 | struct item_head n_ih; |
430 | 471 | ||
431 | /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST | 472 | /* |
432 | part defined by 'cpy_bytes'; create new item header; | 473 | * copy part of the body of the item number 'item_num' |
433 | n_ih = new item_header; | 474 | * of SOURCE to the begin of the DEST part defined by |
475 | * 'cpy_bytes'; create new item header; | ||
476 | * n_ih = new item_header; | ||
434 | */ | 477 | */ |
435 | memcpy(&n_ih, ih, SHORT_KEY_SIZE); | 478 | memcpy(&n_ih, ih, SHORT_KEY_SIZE); |
436 | 479 | ||
437 | n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ | 480 | /* Endian safe, both le */ |
481 | n_ih.ih_version = ih->ih_version; | ||
438 | 482 | ||
439 | if (is_direct_le_ih(ih)) { | 483 | if (is_direct_le_ih(ih)) { |
440 | set_le_ih_k_offset(&n_ih, | 484 | set_le_ih_k_offset(&n_ih, |
@@ -458,20 +502,22 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
458 | /* set item length */ | 502 | /* set item length */ |
459 | put_ih_item_len(&n_ih, cpy_bytes); | 503 | put_ih_item_len(&n_ih, cpy_bytes); |
460 | 504 | ||
461 | n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ | 505 | /* Endian safe, both le */ |
506 | n_ih.ih_version = ih->ih_version; | ||
462 | 507 | ||
463 | leaf_insert_into_buf(dest_bi, 0, &n_ih, | 508 | leaf_insert_into_buf(dest_bi, 0, &n_ih, |
464 | B_N_PITEM(src, | 509 | item_body(src, item_num) + |
465 | item_num) + | 510 | ih_item_len(ih) - cpy_bytes, 0); |
466 | ih_item_len(ih) - cpy_bytes, 0); | ||
467 | } | 511 | } |
468 | } | 512 | } |
469 | } | 513 | } |
470 | 514 | ||
471 | /* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. | 515 | /* |
472 | If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. | 516 | * If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE |
473 | From last item copy cpy_num bytes for regular item and cpy_num directory entries for | 517 | * to DEST. If cpy_bytes not equal to minus one than copy cpy_num-1 whole |
474 | directory item. */ | 518 | * items from SOURCE to DEST. From last item copy cpy_num bytes for regular |
519 | * item and cpy_num directory entries for directory item. | ||
520 | */ | ||
475 | static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | 521 | static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, |
476 | int last_first, int cpy_num, int cpy_bytes) | 522 | int last_first, int cpy_num, int cpy_bytes) |
477 | { | 523 | { |
@@ -498,22 +544,34 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
498 | else | 544 | else |
499 | bytes = -1; | 545 | bytes = -1; |
500 | 546 | ||
501 | /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ | 547 | /* |
548 | * copy the first item or it part or nothing to the end of | ||
549 | * the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) | ||
550 | */ | ||
502 | i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); | 551 | i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); |
503 | cpy_num -= i; | 552 | cpy_num -= i; |
504 | if (cpy_num == 0) | 553 | if (cpy_num == 0) |
505 | return i; | 554 | return i; |
506 | pos += i; | 555 | pos += i; |
507 | if (cpy_bytes == -1) | 556 | if (cpy_bytes == -1) |
508 | /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ | 557 | /* |
558 | * copy first cpy_num items starting from position | ||
559 | * 'pos' of SOURCE to end of DEST | ||
560 | */ | ||
509 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, | 561 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, |
510 | pos, cpy_num); | 562 | pos, cpy_num); |
511 | else { | 563 | else { |
512 | /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ | 564 | /* |
565 | * copy first cpy_num-1 items starting from position | ||
566 | * 'pos-1' of the SOURCE to the end of the DEST | ||
567 | */ | ||
513 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, | 568 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, |
514 | pos, cpy_num - 1); | 569 | pos, cpy_num - 1); |
515 | 570 | ||
516 | /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ | 571 | /* |
572 | * copy part of the item which number is | ||
573 | * cpy_num+pos-1 to the end of the DEST | ||
574 | */ | ||
517 | leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, | 575 | leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, |
518 | cpy_num + pos - 1, cpy_bytes); | 576 | cpy_num + pos - 1, cpy_bytes); |
519 | } | 577 | } |
@@ -525,7 +583,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
525 | else | 583 | else |
526 | bytes = -1; | 584 | bytes = -1; |
527 | 585 | ||
528 | /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ | 586 | /* |
587 | * copy the last item or it part or nothing to the | ||
588 | * begin of the DEST | ||
589 | * (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); | ||
590 | */ | ||
529 | i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); | 591 | i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); |
530 | 592 | ||
531 | cpy_num -= i; | 593 | cpy_num -= i; |
@@ -534,15 +596,24 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
534 | 596 | ||
535 | pos = src_nr_item - cpy_num - i; | 597 | pos = src_nr_item - cpy_num - i; |
536 | if (cpy_bytes == -1) { | 598 | if (cpy_bytes == -1) { |
537 | /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ | 599 | /* |
600 | * starting from position 'pos' copy last cpy_num | ||
601 | * items of SOURCE to begin of DEST | ||
602 | */ | ||
538 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, | 603 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, |
539 | pos, cpy_num); | 604 | pos, cpy_num); |
540 | } else { | 605 | } else { |
541 | /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ | 606 | /* |
607 | * copy last cpy_num-1 items starting from position | ||
608 | * 'pos+1' of the SOURCE to the begin of the DEST; | ||
609 | */ | ||
542 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, | 610 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, |
543 | pos + 1, cpy_num - 1); | 611 | pos + 1, cpy_num - 1); |
544 | 612 | ||
545 | /* copy part of the item which number is pos to the begin of the DEST */ | 613 | /* |
614 | * copy part of the item which number is pos to | ||
615 | * the begin of the DEST | ||
616 | */ | ||
546 | leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, | 617 | leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, |
547 | cpy_bytes); | 618 | cpy_bytes); |
548 | } | 619 | } |
@@ -550,9 +621,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
550 | return i; | 621 | return i; |
551 | } | 622 | } |
552 | 623 | ||
553 | /* there are types of coping: from S[0] to L[0], from S[0] to R[0], | 624 | /* |
554 | from R[0] to L[0]. for each of these we have to define parent and | 625 | * there are types of coping: from S[0] to L[0], from S[0] to R[0], |
555 | positions of destination and source buffers */ | 626 | * from R[0] to L[0]. for each of these we have to define parent and |
627 | * positions of destination and source buffers | ||
628 | */ | ||
556 | static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, | 629 | static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, |
557 | struct buffer_info *dest_bi, | 630 | struct buffer_info *dest_bi, |
558 | struct buffer_info *src_bi, | 631 | struct buffer_info *src_bi, |
@@ -568,7 +641,9 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, | |||
568 | src_bi->tb = tb; | 641 | src_bi->tb = tb; |
569 | src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); | 642 | src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); |
570 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); | 643 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); |
571 | src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */ | 644 | |
645 | /* src->b_item_order */ | ||
646 | src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); | ||
572 | dest_bi->tb = tb; | 647 | dest_bi->tb = tb; |
573 | dest_bi->bi_bh = tb->L[0]; | 648 | dest_bi->bi_bh = tb->L[0]; |
574 | dest_bi->bi_parent = tb->FL[0]; | 649 | dest_bi->bi_parent = tb->FL[0]; |
@@ -633,8 +708,10 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, | |||
633 | shift_mode, src_bi->bi_bh, dest_bi->bi_bh); | 708 | shift_mode, src_bi->bi_bh, dest_bi->bi_bh); |
634 | } | 709 | } |
635 | 710 | ||
636 | /* copy mov_num items and mov_bytes of the (mov_num-1)th item to | 711 | /* |
637 | neighbor. Delete them from source */ | 712 | * copy mov_num items and mov_bytes of the (mov_num-1)th item to |
713 | * neighbor. Delete them from source | ||
714 | */ | ||
638 | int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, | 715 | int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, |
639 | int mov_bytes, struct buffer_head *Snew) | 716 | int mov_bytes, struct buffer_head *Snew) |
640 | { | 717 | { |
@@ -657,18 +734,24 @@ int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, | |||
657 | return ret_value; | 734 | return ret_value; |
658 | } | 735 | } |
659 | 736 | ||
660 | /* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) | 737 | /* |
661 | from S[0] to L[0] and replace the delimiting key */ | 738 | * Shift shift_num items (and shift_bytes of last shifted item if |
739 | * shift_bytes != -1) from S[0] to L[0] and replace the delimiting key | ||
740 | */ | ||
662 | int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) | 741 | int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) |
663 | { | 742 | { |
664 | struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); | 743 | struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); |
665 | int i; | 744 | int i; |
666 | 745 | ||
667 | /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ | 746 | /* |
747 | * move shift_num (and shift_bytes bytes) items from S[0] | ||
748 | * to left neighbor L[0] | ||
749 | */ | ||
668 | i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); | 750 | i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); |
669 | 751 | ||
670 | if (shift_num) { | 752 | if (shift_num) { |
671 | if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */ | 753 | /* number of items in S[0] == 0 */ |
754 | if (B_NR_ITEMS(S0) == 0) { | ||
672 | 755 | ||
673 | RFALSE(shift_bytes != -1, | 756 | RFALSE(shift_bytes != -1, |
674 | "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", | 757 | "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", |
@@ -691,10 +774,10 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) | |||
691 | replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0); | 774 | replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0); |
692 | 775 | ||
693 | RFALSE((shift_bytes != -1 && | 776 | RFALSE((shift_bytes != -1 && |
694 | !(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0)) | 777 | !(is_direntry_le_ih(item_head(S0, 0)) |
695 | && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) && | 778 | && !ih_entry_count(item_head(S0, 0)))) && |
696 | (!op_is_left_mergeable | 779 | (!op_is_left_mergeable |
697 | (B_N_PKEY(S0, 0), S0->b_size)), | 780 | (leaf_key(S0, 0), S0->b_size)), |
698 | "vs-10280: item must be mergeable"); | 781 | "vs-10280: item must be mergeable"); |
699 | } | 782 | } |
700 | } | 783 | } |
@@ -704,13 +787,18 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) | |||
704 | 787 | ||
705 | /* CLEANING STOPPED HERE */ | 788 | /* CLEANING STOPPED HERE */ |
706 | 789 | ||
707 | /* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ | 790 | /* |
791 | * Shift shift_num (shift_bytes) items from S[0] to the right neighbor, | ||
792 | * and replace the delimiting key | ||
793 | */ | ||
708 | int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) | 794 | int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) |
709 | { | 795 | { |
710 | // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); | ||
711 | int ret_value; | 796 | int ret_value; |
712 | 797 | ||
713 | /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ | 798 | /* |
799 | * move shift_num (and shift_bytes) items from S[0] to | ||
800 | * right neighbor R[0] | ||
801 | */ | ||
714 | ret_value = | 802 | ret_value = |
715 | leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); | 803 | leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); |
716 | 804 | ||
@@ -725,12 +813,16 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) | |||
725 | 813 | ||
726 | static void leaf_delete_items_entirely(struct buffer_info *bi, | 814 | static void leaf_delete_items_entirely(struct buffer_info *bi, |
727 | int first, int del_num); | 815 | int first, int del_num); |
728 | /* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. | 816 | /* |
729 | If not. | 817 | * If del_bytes == -1, starting from position 'first' delete del_num |
730 | If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of | 818 | * items in whole in buffer CUR. |
731 | the first item. Part defined by del_bytes. Don't delete first item header | 819 | * If not. |
732 | If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of | 820 | * If last_first == 0. Starting from position 'first' delete del_num-1 |
733 | the last item . Part defined by del_bytes. Don't delete last item header. | 821 | * items in whole. Delete part of body of the first item. Part defined by |
822 | * del_bytes. Don't delete first item header | ||
823 | * If last_first == 1. Starting from position 'first+1' delete del_num-1 | ||
824 | * items in whole. Delete part of body of the last item . Part defined by | ||
825 | * del_bytes. Don't delete last item header. | ||
734 | */ | 826 | */ |
735 | void leaf_delete_items(struct buffer_info *cur_bi, int last_first, | 827 | void leaf_delete_items(struct buffer_info *cur_bi, int last_first, |
736 | int first, int del_num, int del_bytes) | 828 | int first, int del_num, int del_bytes) |
@@ -761,32 +853,43 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first, | |||
761 | leaf_delete_items_entirely(cur_bi, first, del_num); | 853 | leaf_delete_items_entirely(cur_bi, first, del_num); |
762 | else { | 854 | else { |
763 | if (last_first == FIRST_TO_LAST) { | 855 | if (last_first == FIRST_TO_LAST) { |
764 | /* delete del_num-1 items beginning from item in position first */ | 856 | /* |
857 | * delete del_num-1 items beginning from | ||
858 | * item in position first | ||
859 | */ | ||
765 | leaf_delete_items_entirely(cur_bi, first, del_num - 1); | 860 | leaf_delete_items_entirely(cur_bi, first, del_num - 1); |
766 | 861 | ||
767 | /* delete the part of the first item of the bh | 862 | /* |
768 | do not delete item header | 863 | * delete the part of the first item of the bh |
864 | * do not delete item header | ||
769 | */ | 865 | */ |
770 | leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); | 866 | leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); |
771 | } else { | 867 | } else { |
772 | struct item_head *ih; | 868 | struct item_head *ih; |
773 | int len; | 869 | int len; |
774 | 870 | ||
775 | /* delete del_num-1 items beginning from item in position first+1 */ | 871 | /* |
872 | * delete del_num-1 items beginning from | ||
873 | * item in position first+1 | ||
874 | */ | ||
776 | leaf_delete_items_entirely(cur_bi, first + 1, | 875 | leaf_delete_items_entirely(cur_bi, first + 1, |
777 | del_num - 1); | 876 | del_num - 1); |
778 | 877 | ||
779 | ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1); | 878 | ih = item_head(bh, B_NR_ITEMS(bh) - 1); |
780 | if (is_direntry_le_ih(ih)) | 879 | if (is_direntry_le_ih(ih)) |
781 | /* the last item is directory */ | 880 | /* the last item is directory */ |
782 | /* len = numbers of directory entries in this item */ | 881 | /* |
882 | * len = numbers of directory entries | ||
883 | * in this item | ||
884 | */ | ||
783 | len = ih_entry_count(ih); | 885 | len = ih_entry_count(ih); |
784 | else | 886 | else |
785 | /* len = body len of item */ | 887 | /* len = body len of item */ |
786 | len = ih_item_len(ih); | 888 | len = ih_item_len(ih); |
787 | 889 | ||
788 | /* delete the part of the last item of the bh | 890 | /* |
789 | do not delete item header | 891 | * delete the part of the last item of the bh |
892 | * do not delete item header | ||
790 | */ | 893 | */ |
791 | leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, | 894 | leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, |
792 | len - del_bytes, del_bytes); | 895 | len - del_bytes, del_bytes); |
@@ -820,10 +923,10 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before, | |||
820 | zeros_number, ih_item_len(inserted_item_ih)); | 923 | zeros_number, ih_item_len(inserted_item_ih)); |
821 | 924 | ||
822 | /* get item new item must be inserted before */ | 925 | /* get item new item must be inserted before */ |
823 | ih = B_N_PITEM_HEAD(bh, before); | 926 | ih = item_head(bh, before); |
824 | 927 | ||
825 | /* prepare space for the body of new item */ | 928 | /* prepare space for the body of new item */ |
826 | last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size; | 929 | last_loc = nr ? ih_location(&ih[nr - before - 1]) : bh->b_size; |
827 | unmoved_loc = before ? ih_location(ih - 1) : bh->b_size; | 930 | unmoved_loc = before ? ih_location(ih - 1) : bh->b_size; |
828 | 931 | ||
829 | memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih), | 932 | memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih), |
@@ -846,8 +949,8 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before, | |||
846 | 949 | ||
847 | /* change locations */ | 950 | /* change locations */ |
848 | for (i = before; i < nr + 1; i++) { | 951 | for (i = before; i < nr + 1; i++) { |
849 | unmoved_loc -= ih_item_len(&(ih[i - before])); | 952 | unmoved_loc -= ih_item_len(&ih[i - before]); |
850 | put_ih_location(&(ih[i - before]), unmoved_loc); | 953 | put_ih_location(&ih[i - before], unmoved_loc); |
851 | } | 954 | } |
852 | 955 | ||
853 | /* sizes, free space, item number */ | 956 | /* sizes, free space, item number */ |
@@ -867,8 +970,10 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before, | |||
867 | } | 970 | } |
868 | } | 971 | } |
869 | 972 | ||
870 | /* paste paste_size bytes to affected_item_num-th item. | 973 | /* |
871 | When item is a directory, this only prepare space for new entries */ | 974 | * paste paste_size bytes to affected_item_num-th item. |
975 | * When item is a directory, this only prepare space for new entries | ||
976 | */ | ||
872 | void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, | 977 | void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, |
873 | int pos_in_item, int paste_size, | 978 | int pos_in_item, int paste_size, |
874 | const char *body, int zeros_number) | 979 | const char *body, int zeros_number) |
@@ -902,9 +1007,9 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, | |||
902 | #endif /* CONFIG_REISERFS_CHECK */ | 1007 | #endif /* CONFIG_REISERFS_CHECK */ |
903 | 1008 | ||
904 | /* item to be appended */ | 1009 | /* item to be appended */ |
905 | ih = B_N_PITEM_HEAD(bh, affected_item_num); | 1010 | ih = item_head(bh, affected_item_num); |
906 | 1011 | ||
907 | last_loc = ih_location(&(ih[nr - affected_item_num - 1])); | 1012 | last_loc = ih_location(&ih[nr - affected_item_num - 1]); |
908 | unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size; | 1013 | unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size; |
909 | 1014 | ||
910 | /* prepare space */ | 1015 | /* prepare space */ |
@@ -913,8 +1018,8 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, | |||
913 | 1018 | ||
914 | /* change locations */ | 1019 | /* change locations */ |
915 | for (i = affected_item_num; i < nr; i++) | 1020 | for (i = affected_item_num; i < nr; i++) |
916 | put_ih_location(&(ih[i - affected_item_num]), | 1021 | put_ih_location(&ih[i - affected_item_num], |
917 | ih_location(&(ih[i - affected_item_num])) - | 1022 | ih_location(&ih[i - affected_item_num]) - |
918 | paste_size); | 1023 | paste_size); |
919 | 1024 | ||
920 | if (body) { | 1025 | if (body) { |
@@ -957,10 +1062,12 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, | |||
957 | } | 1062 | } |
958 | } | 1063 | } |
959 | 1064 | ||
960 | /* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item | 1065 | /* |
961 | does not have free space, so it moves DEHs and remaining records as | 1066 | * cuts DEL_COUNT entries beginning from FROM-th entry. Directory item |
962 | necessary. Return value is size of removed part of directory item | 1067 | * does not have free space, so it moves DEHs and remaining records as |
963 | in bytes. */ | 1068 | * necessary. Return value is size of removed part of directory item |
1069 | * in bytes. | ||
1070 | */ | ||
964 | static int leaf_cut_entries(struct buffer_head *bh, | 1071 | static int leaf_cut_entries(struct buffer_head *bh, |
965 | struct item_head *ih, int from, int del_count) | 1072 | struct item_head *ih, int from, int del_count) |
966 | { | 1073 | { |
@@ -971,12 +1078,14 @@ static int leaf_cut_entries(struct buffer_head *bh, | |||
971 | int cut_records_len; /* length of all removed records */ | 1078 | int cut_records_len; /* length of all removed records */ |
972 | int i; | 1079 | int i; |
973 | 1080 | ||
974 | /* make sure, that item is directory and there are enough entries to | 1081 | /* |
975 | remove */ | 1082 | * make sure that item is directory and there are enough entries to |
1083 | * remove | ||
1084 | */ | ||
976 | RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); | 1085 | RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); |
977 | RFALSE(I_ENTRY_COUNT(ih) < from + del_count, | 1086 | RFALSE(ih_entry_count(ih) < from + del_count, |
978 | "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", | 1087 | "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", |
979 | I_ENTRY_COUNT(ih), from, del_count); | 1088 | ih_entry_count(ih), from, del_count); |
980 | 1089 | ||
981 | if (del_count == 0) | 1090 | if (del_count == 0) |
982 | return 0; | 1091 | return 0; |
@@ -987,22 +1096,24 @@ static int leaf_cut_entries(struct buffer_head *bh, | |||
987 | /* entry head array */ | 1096 | /* entry head array */ |
988 | deh = B_I_DEH(bh, ih); | 1097 | deh = B_I_DEH(bh, ih); |
989 | 1098 | ||
990 | /* first byte of remaining entries, those are BEFORE cut entries | 1099 | /* |
991 | (prev_record) and length of all removed records (cut_records_len) */ | 1100 | * first byte of remaining entries, those are BEFORE cut entries |
1101 | * (prev_record) and length of all removed records (cut_records_len) | ||
1102 | */ | ||
992 | prev_record_offset = | 1103 | prev_record_offset = |
993 | (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih)); | 1104 | (from ? deh_location(&deh[from - 1]) : ih_item_len(ih)); |
994 | cut_records_len = prev_record_offset /*from_record */ - | 1105 | cut_records_len = prev_record_offset /*from_record */ - |
995 | deh_location(&(deh[from + del_count - 1])); | 1106 | deh_location(&deh[from + del_count - 1]); |
996 | prev_record = item + prev_record_offset; | 1107 | prev_record = item + prev_record_offset; |
997 | 1108 | ||
998 | /* adjust locations of remaining entries */ | 1109 | /* adjust locations of remaining entries */ |
999 | for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--) | 1110 | for (i = ih_entry_count(ih) - 1; i > from + del_count - 1; i--) |
1000 | put_deh_location(&(deh[i]), | 1111 | put_deh_location(&deh[i], |
1001 | deh_location(&deh[i]) - | 1112 | deh_location(&deh[i]) - |
1002 | (DEH_SIZE * del_count)); | 1113 | (DEH_SIZE * del_count)); |
1003 | 1114 | ||
1004 | for (i = 0; i < from; i++) | 1115 | for (i = 0; i < from; i++) |
1005 | put_deh_location(&(deh[i]), | 1116 | put_deh_location(&deh[i], |
1006 | deh_location(&deh[i]) - (DEH_SIZE * del_count + | 1117 | deh_location(&deh[i]) - (DEH_SIZE * del_count + |
1007 | cut_records_len)); | 1118 | cut_records_len)); |
1008 | 1119 | ||
@@ -1021,14 +1132,15 @@ static int leaf_cut_entries(struct buffer_head *bh, | |||
1021 | return DEH_SIZE * del_count + cut_records_len; | 1132 | return DEH_SIZE * del_count + cut_records_len; |
1022 | } | 1133 | } |
1023 | 1134 | ||
1024 | /* when cut item is part of regular file | 1135 | /* |
1025 | pos_in_item - first byte that must be cut | 1136 | * when cut item is part of regular file |
1026 | cut_size - number of bytes to be cut beginning from pos_in_item | 1137 | * pos_in_item - first byte that must be cut |
1027 | 1138 | * cut_size - number of bytes to be cut beginning from pos_in_item | |
1028 | when cut item is part of directory | 1139 | * |
1029 | pos_in_item - number of first deleted entry | 1140 | * when cut item is part of directory |
1030 | cut_size - count of deleted entries | 1141 | * pos_in_item - number of first deleted entry |
1031 | */ | 1142 | * cut_size - count of deleted entries |
1143 | */ | ||
1032 | void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, | 1144 | void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, |
1033 | int pos_in_item, int cut_size) | 1145 | int pos_in_item, int cut_size) |
1034 | { | 1146 | { |
@@ -1043,7 +1155,7 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, | |||
1043 | nr = blkh_nr_item(blkh); | 1155 | nr = blkh_nr_item(blkh); |
1044 | 1156 | ||
1045 | /* item head of truncated item */ | 1157 | /* item head of truncated item */ |
1046 | ih = B_N_PITEM_HEAD(bh, cut_item_num); | 1158 | ih = item_head(bh, cut_item_num); |
1047 | 1159 | ||
1048 | if (is_direntry_le_ih(ih)) { | 1160 | if (is_direntry_le_ih(ih)) { |
1049 | /* first cut entry () */ | 1161 | /* first cut entry () */ |
@@ -1055,7 +1167,6 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, | |||
1055 | cut_item_num); | 1167 | cut_item_num); |
1056 | /* change item key by key of first entry in the item */ | 1168 | /* change item key by key of first entry in the item */ |
1057 | set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); | 1169 | set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); |
1058 | /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */ | ||
1059 | } | 1170 | } |
1060 | } else { | 1171 | } else { |
1061 | /* item is direct or indirect */ | 1172 | /* item is direct or indirect */ |
@@ -1089,7 +1200,7 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, | |||
1089 | } | 1200 | } |
1090 | 1201 | ||
1091 | /* location of the last item */ | 1202 | /* location of the last item */ |
1092 | last_loc = ih_location(&(ih[nr - cut_item_num - 1])); | 1203 | last_loc = ih_location(&ih[nr - cut_item_num - 1]); |
1093 | 1204 | ||
1094 | /* location of the item, which is remaining at the same place */ | 1205 | /* location of the item, which is remaining at the same place */ |
1095 | unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size; | 1206 | unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size; |
@@ -1108,7 +1219,7 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, | |||
1108 | 1219 | ||
1109 | /* change locations */ | 1220 | /* change locations */ |
1110 | for (i = cut_item_num; i < nr; i++) | 1221 | for (i = cut_item_num; i < nr; i++) |
1111 | put_ih_location(&(ih[i - cut_item_num]), | 1222 | put_ih_location(&ih[i - cut_item_num], |
1112 | ih_location(&ih[i - cut_item_num]) + cut_size); | 1223 | ih_location(&ih[i - cut_item_num]) + cut_size); |
1113 | 1224 | ||
1114 | /* size, free space */ | 1225 | /* size, free space */ |
@@ -1156,14 +1267,14 @@ static void leaf_delete_items_entirely(struct buffer_info *bi, | |||
1156 | return; | 1267 | return; |
1157 | } | 1268 | } |
1158 | 1269 | ||
1159 | ih = B_N_PITEM_HEAD(bh, first); | 1270 | ih = item_head(bh, first); |
1160 | 1271 | ||
1161 | /* location of unmovable item */ | 1272 | /* location of unmovable item */ |
1162 | j = (first == 0) ? bh->b_size : ih_location(ih - 1); | 1273 | j = (first == 0) ? bh->b_size : ih_location(ih - 1); |
1163 | 1274 | ||
1164 | /* delete items */ | 1275 | /* delete items */ |
1165 | last_loc = ih_location(&(ih[nr - 1 - first])); | 1276 | last_loc = ih_location(&ih[nr - 1 - first]); |
1166 | last_removed_loc = ih_location(&(ih[del_num - 1])); | 1277 | last_removed_loc = ih_location(&ih[del_num - 1]); |
1167 | 1278 | ||
1168 | memmove(bh->b_data + last_loc + j - last_removed_loc, | 1279 | memmove(bh->b_data + last_loc + j - last_removed_loc, |
1169 | bh->b_data + last_loc, last_removed_loc - last_loc); | 1280 | bh->b_data + last_loc, last_removed_loc - last_loc); |
@@ -1173,8 +1284,8 @@ static void leaf_delete_items_entirely(struct buffer_info *bi, | |||
1173 | 1284 | ||
1174 | /* change item location */ | 1285 | /* change item location */ |
1175 | for (i = first; i < nr - del_num; i++) | 1286 | for (i = first; i < nr - del_num; i++) |
1176 | put_ih_location(&(ih[i - first]), | 1287 | put_ih_location(&ih[i - first], |
1177 | ih_location(&(ih[i - first])) + (j - | 1288 | ih_location(&ih[i - first]) + (j - |
1178 | last_removed_loc)); | 1289 | last_removed_loc)); |
1179 | 1290 | ||
1180 | /* sizes, item number */ | 1291 | /* sizes, item number */ |
@@ -1195,7 +1306,10 @@ static void leaf_delete_items_entirely(struct buffer_info *bi, | |||
1195 | } | 1306 | } |
1196 | } | 1307 | } |
1197 | 1308 | ||
1198 | /* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ | 1309 | /* |
1310 | * paste new_entry_count entries (new_dehs, records) into position | ||
1311 | * before to item_num-th item | ||
1312 | */ | ||
1199 | void leaf_paste_entries(struct buffer_info *bi, | 1313 | void leaf_paste_entries(struct buffer_info *bi, |
1200 | int item_num, | 1314 | int item_num, |
1201 | int before, | 1315 | int before, |
@@ -1213,13 +1327,16 @@ void leaf_paste_entries(struct buffer_info *bi, | |||
1213 | if (new_entry_count == 0) | 1327 | if (new_entry_count == 0) |
1214 | return; | 1328 | return; |
1215 | 1329 | ||
1216 | ih = B_N_PITEM_HEAD(bh, item_num); | 1330 | ih = item_head(bh, item_num); |
1217 | 1331 | ||
1218 | /* make sure, that item is directory, and there are enough records in it */ | 1332 | /* |
1333 | * make sure, that item is directory, and there are enough | ||
1334 | * records in it | ||
1335 | */ | ||
1219 | RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); | 1336 | RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); |
1220 | RFALSE(I_ENTRY_COUNT(ih) < before, | 1337 | RFALSE(ih_entry_count(ih) < before, |
1221 | "10230: there are no entry we paste entries before. entry_count = %d, before = %d", | 1338 | "10230: there are no entry we paste entries before. entry_count = %d, before = %d", |
1222 | I_ENTRY_COUNT(ih), before); | 1339 | ih_entry_count(ih), before); |
1223 | 1340 | ||
1224 | /* first byte of dest item */ | 1341 | /* first byte of dest item */ |
1225 | item = bh->b_data + ih_location(ih); | 1342 | item = bh->b_data + ih_location(ih); |
@@ -1230,21 +1347,21 @@ void leaf_paste_entries(struct buffer_info *bi, | |||
1230 | /* new records will be pasted at this point */ | 1347 | /* new records will be pasted at this point */ |
1231 | insert_point = | 1348 | insert_point = |
1232 | item + | 1349 | item + |
1233 | (before ? deh_location(&(deh[before - 1])) | 1350 | (before ? deh_location(&deh[before - 1]) |
1234 | : (ih_item_len(ih) - paste_size)); | 1351 | : (ih_item_len(ih) - paste_size)); |
1235 | 1352 | ||
1236 | /* adjust locations of records that will be AFTER new records */ | 1353 | /* adjust locations of records that will be AFTER new records */ |
1237 | for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--) | 1354 | for (i = ih_entry_count(ih) - 1; i >= before; i--) |
1238 | put_deh_location(&(deh[i]), | 1355 | put_deh_location(&deh[i], |
1239 | deh_location(&(deh[i])) + | 1356 | deh_location(&deh[i]) + |
1240 | (DEH_SIZE * new_entry_count)); | 1357 | (DEH_SIZE * new_entry_count)); |
1241 | 1358 | ||
1242 | /* adjust locations of records that will be BEFORE new records */ | 1359 | /* adjust locations of records that will be BEFORE new records */ |
1243 | for (i = 0; i < before; i++) | 1360 | for (i = 0; i < before; i++) |
1244 | put_deh_location(&(deh[i]), | 1361 | put_deh_location(&deh[i], |
1245 | deh_location(&(deh[i])) + paste_size); | 1362 | deh_location(&deh[i]) + paste_size); |
1246 | 1363 | ||
1247 | old_entry_num = I_ENTRY_COUNT(ih); | 1364 | old_entry_num = ih_entry_count(ih); |
1248 | put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count); | 1365 | put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count); |
1249 | 1366 | ||
1250 | /* prepare space for pasted records */ | 1367 | /* prepare space for pasted records */ |
@@ -1266,10 +1383,10 @@ void leaf_paste_entries(struct buffer_info *bi, | |||
1266 | 1383 | ||
1267 | /* set locations of new records */ | 1384 | /* set locations of new records */ |
1268 | for (i = 0; i < new_entry_count; i++) { | 1385 | for (i = 0; i < new_entry_count; i++) { |
1269 | put_deh_location(&(deh[i]), | 1386 | put_deh_location(&deh[i], |
1270 | deh_location(&(deh[i])) + | 1387 | deh_location(&deh[i]) + |
1271 | (-deh_location | 1388 | (-deh_location |
1272 | (&(new_dehs[new_entry_count - 1])) + | 1389 | (&new_dehs[new_entry_count - 1]) + |
1273 | insert_point + DEH_SIZE * new_entry_count - | 1390 | insert_point + DEH_SIZE * new_entry_count - |
1274 | item)); | 1391 | item)); |
1275 | } | 1392 | } |
@@ -1277,28 +1394,26 @@ void leaf_paste_entries(struct buffer_info *bi, | |||
1277 | /* change item key if necessary (when we paste before 0-th entry */ | 1394 | /* change item key if necessary (when we paste before 0-th entry */ |
1278 | if (!before) { | 1395 | if (!before) { |
1279 | set_le_ih_k_offset(ih, deh_offset(new_dehs)); | 1396 | set_le_ih_k_offset(ih, deh_offset(new_dehs)); |
1280 | /* memcpy (&ih->ih_key.k_offset, | ||
1281 | &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ | ||
1282 | } | 1397 | } |
1283 | #ifdef CONFIG_REISERFS_CHECK | 1398 | #ifdef CONFIG_REISERFS_CHECK |
1284 | { | 1399 | { |
1285 | int prev, next; | 1400 | int prev, next; |
1286 | /* check record locations */ | 1401 | /* check record locations */ |
1287 | deh = B_I_DEH(bh, ih); | 1402 | deh = B_I_DEH(bh, ih); |
1288 | for (i = 0; i < I_ENTRY_COUNT(ih); i++) { | 1403 | for (i = 0; i < ih_entry_count(ih); i++) { |
1289 | next = | 1404 | next = |
1290 | (i < | 1405 | (i < |
1291 | I_ENTRY_COUNT(ih) - | 1406 | ih_entry_count(ih) - |
1292 | 1) ? deh_location(&(deh[i + 1])) : 0; | 1407 | 1) ? deh_location(&deh[i + 1]) : 0; |
1293 | prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0; | 1408 | prev = (i != 0) ? deh_location(&deh[i - 1]) : 0; |
1294 | 1409 | ||
1295 | if (prev && prev <= deh_location(&(deh[i]))) | 1410 | if (prev && prev <= deh_location(&deh[i])) |
1296 | reiserfs_error(sb_from_bi(bi), "vs-10240", | 1411 | reiserfs_error(sb_from_bi(bi), "vs-10240", |
1297 | "directory item (%h) " | 1412 | "directory item (%h) " |
1298 | "corrupted (prev %a, " | 1413 | "corrupted (prev %a, " |
1299 | "cur(%d) %a)", | 1414 | "cur(%d) %a)", |
1300 | ih, deh + i - 1, i, deh + i); | 1415 | ih, deh + i - 1, i, deh + i); |
1301 | if (next && next >= deh_location(&(deh[i]))) | 1416 | if (next && next >= deh_location(&deh[i])) |
1302 | reiserfs_error(sb_from_bi(bi), "vs-10250", | 1417 | reiserfs_error(sb_from_bi(bi), "vs-10250", |
1303 | "directory item (%h) " | 1418 | "directory item (%h) " |
1304 | "corrupted (cur(%d) %a, " | 1419 | "corrupted (cur(%d) %a, " |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index e825f8b63e6b..cd11358b10c7 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -22,8 +22,10 @@ | |||
22 | #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } | 22 | #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } |
23 | #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); | 23 | #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); |
24 | 24 | ||
25 | // directory item contains array of entry headers. This performs | 25 | /* |
26 | // binary search through that array | 26 | * directory item contains array of entry headers. This performs |
27 | * binary search through that array | ||
28 | */ | ||
27 | static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) | 29 | static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) |
28 | { | 30 | { |
29 | struct item_head *ih = de->de_ih; | 31 | struct item_head *ih = de->de_ih; |
@@ -31,7 +33,7 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) | |||
31 | int rbound, lbound, j; | 33 | int rbound, lbound, j; |
32 | 34 | ||
33 | lbound = 0; | 35 | lbound = 0; |
34 | rbound = I_ENTRY_COUNT(ih) - 1; | 36 | rbound = ih_entry_count(ih) - 1; |
35 | 37 | ||
36 | for (j = (rbound + lbound) / 2; lbound <= rbound; | 38 | for (j = (rbound + lbound) / 2; lbound <= rbound; |
37 | j = (rbound + lbound) / 2) { | 39 | j = (rbound + lbound) / 2) { |
@@ -43,7 +45,7 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) | |||
43 | lbound = j + 1; | 45 | lbound = j + 1; |
44 | continue; | 46 | continue; |
45 | } | 47 | } |
46 | // this is not name found, but matched third key component | 48 | /* this is not name found, but matched third key component */ |
47 | de->de_entry_num = j; | 49 | de->de_entry_num = j; |
48 | return NAME_FOUND; | 50 | return NAME_FOUND; |
49 | } | 51 | } |
@@ -52,17 +54,21 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) | |||
52 | return NAME_NOT_FOUND; | 54 | return NAME_NOT_FOUND; |
53 | } | 55 | } |
54 | 56 | ||
55 | // comment? maybe something like set de to point to what the path points to? | 57 | /* |
58 | * comment? maybe something like set de to point to what the path points to? | ||
59 | */ | ||
56 | static inline void set_de_item_location(struct reiserfs_dir_entry *de, | 60 | static inline void set_de_item_location(struct reiserfs_dir_entry *de, |
57 | struct treepath *path) | 61 | struct treepath *path) |
58 | { | 62 | { |
59 | de->de_bh = get_last_bh(path); | 63 | de->de_bh = get_last_bh(path); |
60 | de->de_ih = get_ih(path); | 64 | de->de_ih = tp_item_head(path); |
61 | de->de_deh = B_I_DEH(de->de_bh, de->de_ih); | 65 | de->de_deh = B_I_DEH(de->de_bh, de->de_ih); |
62 | de->de_item_num = PATH_LAST_POSITION(path); | 66 | de->de_item_num = PATH_LAST_POSITION(path); |
63 | } | 67 | } |
64 | 68 | ||
65 | // de_bh, de_ih, de_deh (points to first element of array), de_item_num is set | 69 | /* |
70 | * de_bh, de_ih, de_deh (points to first element of array), de_item_num is set | ||
71 | */ | ||
66 | inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) | 72 | inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) |
67 | { | 73 | { |
68 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; | 74 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; |
@@ -71,17 +77,17 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) | |||
71 | 77 | ||
72 | de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); | 78 | de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); |
73 | de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); | 79 | de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); |
74 | de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh); | 80 | de->de_name = ih_item_body(de->de_bh, de->de_ih) + deh_location(deh); |
75 | if (de->de_name[de->de_namelen - 1] == 0) | 81 | if (de->de_name[de->de_namelen - 1] == 0) |
76 | de->de_namelen = strlen(de->de_name); | 82 | de->de_namelen = strlen(de->de_name); |
77 | } | 83 | } |
78 | 84 | ||
79 | // what entry points to | 85 | /* what entry points to */ |
80 | static inline void set_de_object_key(struct reiserfs_dir_entry *de) | 86 | static inline void set_de_object_key(struct reiserfs_dir_entry *de) |
81 | { | 87 | { |
82 | BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); | 88 | BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); |
83 | de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); | 89 | de->de_dir_id = deh_dir_id(&de->de_deh[de->de_entry_num]); |
84 | de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); | 90 | de->de_objectid = deh_objectid(&de->de_deh[de->de_entry_num]); |
85 | } | 91 | } |
86 | 92 | ||
87 | static inline void store_de_entry_key(struct reiserfs_dir_entry *de) | 93 | static inline void store_de_entry_key(struct reiserfs_dir_entry *de) |
@@ -96,21 +102,20 @@ static inline void store_de_entry_key(struct reiserfs_dir_entry *de) | |||
96 | le32_to_cpu(de->de_ih->ih_key.k_dir_id); | 102 | le32_to_cpu(de->de_ih->ih_key.k_dir_id); |
97 | de->de_entry_key.on_disk_key.k_objectid = | 103 | de->de_entry_key.on_disk_key.k_objectid = |
98 | le32_to_cpu(de->de_ih->ih_key.k_objectid); | 104 | le32_to_cpu(de->de_ih->ih_key.k_objectid); |
99 | set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh)); | 105 | set_cpu_key_k_offset(&de->de_entry_key, deh_offset(deh)); |
100 | set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); | 106 | set_cpu_key_k_type(&de->de_entry_key, TYPE_DIRENTRY); |
101 | } | 107 | } |
102 | 108 | ||
103 | /* We assign a key to each directory item, and place multiple entries | 109 | /* |
104 | in a single directory item. A directory item has a key equal to the | 110 | * We assign a key to each directory item, and place multiple entries in a |
105 | key of the first directory entry in it. | 111 | * single directory item. A directory item has a key equal to the key of |
106 | 112 | * the first directory entry in it. | |
107 | This function first calls search_by_key, then, if item whose first | 113 | |
108 | entry matches is not found it looks for the entry inside directory | 114 | * This function first calls search_by_key, then, if item whose first entry |
109 | item found by search_by_key. Fills the path to the entry, and to the | 115 | * matches is not found it looks for the entry inside directory item found |
110 | entry position in the item | 116 | * by search_by_key. Fills the path to the entry, and to the entry position |
111 | 117 | * in the item | |
112 | */ | 118 | */ |
113 | |||
114 | /* The function is NOT SCHEDULE-SAFE! */ | 119 | /* The function is NOT SCHEDULE-SAFE! */ |
115 | int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, | 120 | int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, |
116 | struct treepath *path, struct reiserfs_dir_entry *de) | 121 | struct treepath *path, struct reiserfs_dir_entry *de) |
@@ -144,7 +149,7 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, | |||
144 | 149 | ||
145 | #ifdef CONFIG_REISERFS_CHECK | 150 | #ifdef CONFIG_REISERFS_CHECK |
146 | if (!is_direntry_le_ih(de->de_ih) || | 151 | if (!is_direntry_le_ih(de->de_ih) || |
147 | COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) { | 152 | COMP_SHORT_KEYS(&de->de_ih->ih_key, key)) { |
148 | print_block(de->de_bh, 0, -1, -1); | 153 | print_block(de->de_bh, 0, -1, -1); |
149 | reiserfs_panic(sb, "vs-7005", "found item %h is not directory " | 154 | reiserfs_panic(sb, "vs-7005", "found item %h is not directory " |
150 | "item or does not belong to the same directory " | 155 | "item or does not belong to the same directory " |
@@ -152,12 +157,17 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, | |||
152 | } | 157 | } |
153 | #endif /* CONFIG_REISERFS_CHECK */ | 158 | #endif /* CONFIG_REISERFS_CHECK */ |
154 | 159 | ||
155 | /* binary search in directory item by third componen t of the | 160 | /* |
156 | key. sets de->de_entry_num of de */ | 161 | * binary search in directory item by third component of the |
162 | * key. sets de->de_entry_num of de | ||
163 | */ | ||
157 | retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); | 164 | retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); |
158 | path->pos_in_item = de->de_entry_num; | 165 | path->pos_in_item = de->de_entry_num; |
159 | if (retval != NAME_NOT_FOUND) { | 166 | if (retval != NAME_NOT_FOUND) { |
160 | // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set | 167 | /* |
168 | * ugly, but rename needs de_bh, de_deh, de_name, | ||
169 | * de_namelen, de_objectid set | ||
170 | */ | ||
161 | set_de_name_and_namelen(de); | 171 | set_de_name_and_namelen(de); |
162 | set_de_object_key(de); | 172 | set_de_object_key(de); |
163 | } | 173 | } |
@@ -166,11 +176,12 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, | |||
166 | 176 | ||
167 | /* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ | 177 | /* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ |
168 | 178 | ||
169 | /* The third component is hashed, and you can choose from more than | 179 | /* |
170 | one hash function. Per directory hashes are not yet implemented | 180 | * The third component is hashed, and you can choose from more than |
171 | but are thought about. This function should be moved to hashes.c | 181 | * one hash function. Per directory hashes are not yet implemented |
172 | Jedi, please do so. -Hans */ | 182 | * but are thought about. This function should be moved to hashes.c |
173 | 183 | * Jedi, please do so. -Hans | |
184 | */ | ||
174 | static __u32 get_third_component(struct super_block *s, | 185 | static __u32 get_third_component(struct super_block *s, |
175 | const char *name, int len) | 186 | const char *name, int len) |
176 | { | 187 | { |
@@ -183,11 +194,13 @@ static __u32 get_third_component(struct super_block *s, | |||
183 | 194 | ||
184 | res = REISERFS_SB(s)->s_hash_function(name, len); | 195 | res = REISERFS_SB(s)->s_hash_function(name, len); |
185 | 196 | ||
186 | // take bits from 7-th to 30-th including both bounds | 197 | /* take bits from 7-th to 30-th including both bounds */ |
187 | res = GET_HASH_VALUE(res); | 198 | res = GET_HASH_VALUE(res); |
188 | if (res == 0) | 199 | if (res == 0) |
189 | // needed to have no names before "." and ".." those have hash | 200 | /* |
190 | // value == 0 and generation conters 1 and 2 accordingly | 201 | * needed to have no names before "." and ".." those have hash |
202 | * value == 0 and generation conters 1 and 2 accordingly | ||
203 | */ | ||
191 | res = 128; | 204 | res = 128; |
192 | return res + MAX_GENERATION_NUMBER; | 205 | return res + MAX_GENERATION_NUMBER; |
193 | } | 206 | } |
@@ -208,7 +221,7 @@ static int reiserfs_match(struct reiserfs_dir_entry *de, | |||
208 | 221 | ||
209 | /* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ | 222 | /* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ |
210 | 223 | ||
211 | /* used when hash collisions exist */ | 224 | /* used when hash collisions exist */ |
212 | 225 | ||
213 | static int linear_search_in_dir_item(struct cpu_key *key, | 226 | static int linear_search_in_dir_item(struct cpu_key *key, |
214 | struct reiserfs_dir_entry *de, | 227 | struct reiserfs_dir_entry *de, |
@@ -220,7 +233,7 @@ static int linear_search_in_dir_item(struct cpu_key *key, | |||
220 | 233 | ||
221 | i = de->de_entry_num; | 234 | i = de->de_entry_num; |
222 | 235 | ||
223 | if (i == I_ENTRY_COUNT(de->de_ih) || | 236 | if (i == ih_entry_count(de->de_ih) || |
224 | GET_HASH_VALUE(deh_offset(deh + i)) != | 237 | GET_HASH_VALUE(deh_offset(deh + i)) != |
225 | GET_HASH_VALUE(cpu_key_k_offset(key))) { | 238 | GET_HASH_VALUE(cpu_key_k_offset(key))) { |
226 | i--; | 239 | i--; |
@@ -232,43 +245,50 @@ static int linear_search_in_dir_item(struct cpu_key *key, | |||
232 | deh += i; | 245 | deh += i; |
233 | 246 | ||
234 | for (; i >= 0; i--, deh--) { | 247 | for (; i >= 0; i--, deh--) { |
248 | /* hash value does not match, no need to check whole name */ | ||
235 | if (GET_HASH_VALUE(deh_offset(deh)) != | 249 | if (GET_HASH_VALUE(deh_offset(deh)) != |
236 | GET_HASH_VALUE(cpu_key_k_offset(key))) { | 250 | GET_HASH_VALUE(cpu_key_k_offset(key))) { |
237 | // hash value does not match, no need to check whole name | ||
238 | return NAME_NOT_FOUND; | 251 | return NAME_NOT_FOUND; |
239 | } | 252 | } |
240 | 253 | ||
241 | /* mark, that this generation number is used */ | 254 | /* mark that this generation number is used */ |
242 | if (de->de_gen_number_bit_string) | 255 | if (de->de_gen_number_bit_string) |
243 | set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), | 256 | set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), |
244 | de->de_gen_number_bit_string); | 257 | de->de_gen_number_bit_string); |
245 | 258 | ||
246 | // calculate pointer to name and namelen | 259 | /* calculate pointer to name and namelen */ |
247 | de->de_entry_num = i; | 260 | de->de_entry_num = i; |
248 | set_de_name_and_namelen(de); | 261 | set_de_name_and_namelen(de); |
249 | 262 | ||
263 | /* | ||
264 | * de's de_name, de_namelen, de_recordlen are set. | ||
265 | * Fill the rest. | ||
266 | */ | ||
250 | if ((retval = | 267 | if ((retval = |
251 | reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { | 268 | reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { |
252 | // de's de_name, de_namelen, de_recordlen are set. Fill the rest: | ||
253 | 269 | ||
254 | // key of pointed object | 270 | /* key of pointed object */ |
255 | set_de_object_key(de); | 271 | set_de_object_key(de); |
256 | 272 | ||
257 | store_de_entry_key(de); | 273 | store_de_entry_key(de); |
258 | 274 | ||
259 | // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE | 275 | /* retval can be NAME_FOUND or NAME_FOUND_INVISIBLE */ |
260 | return retval; | 276 | return retval; |
261 | } | 277 | } |
262 | } | 278 | } |
263 | 279 | ||
264 | if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) | 280 | if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) |
265 | /* we have reached left most entry in the node. In common we | 281 | /* |
266 | have to go to the left neighbor, but if generation counter | 282 | * we have reached left most entry in the node. In common we |
267 | is 0 already, we know for sure, that there is no name with | 283 | * have to go to the left neighbor, but if generation counter |
268 | the same hash value */ | 284 | * is 0 already, we know for sure, that there is no name with |
269 | // FIXME: this work correctly only because hash value can not | 285 | * the same hash value |
270 | // be 0. Btw, in case of Yura's hash it is probably possible, | 286 | */ |
271 | // so, this is a bug | 287 | /* |
288 | * FIXME: this work correctly only because hash value can not | ||
289 | * be 0. Btw, in case of Yura's hash it is probably possible, | ||
290 | * so, this is a bug | ||
291 | */ | ||
272 | return NAME_NOT_FOUND; | 292 | return NAME_NOT_FOUND; |
273 | 293 | ||
274 | RFALSE(de->de_item_num, | 294 | RFALSE(de->de_item_num, |
@@ -277,8 +297,10 @@ static int linear_search_in_dir_item(struct cpu_key *key, | |||
277 | return GOTO_PREVIOUS_ITEM; | 297 | return GOTO_PREVIOUS_ITEM; |
278 | } | 298 | } |
279 | 299 | ||
280 | // may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND | 300 | /* |
281 | // FIXME: should add something like IOERROR | 301 | * may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND |
302 | * FIXME: should add something like IOERROR | ||
303 | */ | ||
282 | static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, | 304 | static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, |
283 | struct treepath *path_to_entry, | 305 | struct treepath *path_to_entry, |
284 | struct reiserfs_dir_entry *de) | 306 | struct reiserfs_dir_entry *de) |
@@ -307,13 +329,19 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, | |||
307 | retval = | 329 | retval = |
308 | linear_search_in_dir_item(&key_to_search, de, name, | 330 | linear_search_in_dir_item(&key_to_search, de, name, |
309 | namelen); | 331 | namelen); |
332 | /* | ||
333 | * there is no need to scan directory anymore. | ||
334 | * Given entry found or does not exist | ||
335 | */ | ||
310 | if (retval != GOTO_PREVIOUS_ITEM) { | 336 | if (retval != GOTO_PREVIOUS_ITEM) { |
311 | /* there is no need to scan directory anymore. Given entry found or does not exist */ | ||
312 | path_to_entry->pos_in_item = de->de_entry_num; | 337 | path_to_entry->pos_in_item = de->de_entry_num; |
313 | return retval; | 338 | return retval; |
314 | } | 339 | } |
315 | 340 | ||
316 | /* there is left neighboring item of this directory and given entry can be there */ | 341 | /* |
342 | * there is left neighboring item of this directory | ||
343 | * and given entry can be there | ||
344 | */ | ||
317 | set_cpu_key_k_offset(&key_to_search, | 345 | set_cpu_key_k_offset(&key_to_search, |
318 | le_ih_k_offset(de->de_ih) - 1); | 346 | le_ih_k_offset(de->de_ih) - 1); |
319 | pathrelse(path_to_entry); | 347 | pathrelse(path_to_entry); |
@@ -341,14 +369,16 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
341 | pathrelse(&path_to_entry); | 369 | pathrelse(&path_to_entry); |
342 | if (retval == NAME_FOUND) { | 370 | if (retval == NAME_FOUND) { |
343 | inode = reiserfs_iget(dir->i_sb, | 371 | inode = reiserfs_iget(dir->i_sb, |
344 | (struct cpu_key *)&(de.de_dir_id)); | 372 | (struct cpu_key *)&de.de_dir_id); |
345 | if (!inode || IS_ERR(inode)) { | 373 | if (!inode || IS_ERR(inode)) { |
346 | reiserfs_write_unlock(dir->i_sb); | 374 | reiserfs_write_unlock(dir->i_sb); |
347 | return ERR_PTR(-EACCES); | 375 | return ERR_PTR(-EACCES); |
348 | } | 376 | } |
349 | 377 | ||
350 | /* Propagate the private flag so we know we're | 378 | /* |
351 | * in the priv tree */ | 379 | * Propagate the private flag so we know we're |
380 | * in the priv tree | ||
381 | */ | ||
352 | if (IS_PRIVATE(dir)) | 382 | if (IS_PRIVATE(dir)) |
353 | inode->i_flags |= S_PRIVATE; | 383 | inode->i_flags |= S_PRIVATE; |
354 | } | 384 | } |
@@ -361,9 +391,9 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
361 | } | 391 | } |
362 | 392 | ||
363 | /* | 393 | /* |
364 | ** looks up the dentry of the parent directory for child. | 394 | * looks up the dentry of the parent directory for child. |
365 | ** taken from ext2_get_parent | 395 | * taken from ext2_get_parent |
366 | */ | 396 | */ |
367 | struct dentry *reiserfs_get_parent(struct dentry *child) | 397 | struct dentry *reiserfs_get_parent(struct dentry *child) |
368 | { | 398 | { |
369 | int retval; | 399 | int retval; |
@@ -384,7 +414,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child) | |||
384 | reiserfs_write_unlock(dir->i_sb); | 414 | reiserfs_write_unlock(dir->i_sb); |
385 | return ERR_PTR(-ENOENT); | 415 | return ERR_PTR(-ENOENT); |
386 | } | 416 | } |
387 | inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); | 417 | inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&de.de_dir_id); |
388 | reiserfs_write_unlock(dir->i_sb); | 418 | reiserfs_write_unlock(dir->i_sb); |
389 | 419 | ||
390 | return d_obtain_alias(inode); | 420 | return d_obtain_alias(inode); |
@@ -406,8 +436,13 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
406 | struct reiserfs_dir_entry de; | 436 | struct reiserfs_dir_entry de; |
407 | DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); | 437 | DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); |
408 | int gen_number; | 438 | int gen_number; |
409 | char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc | 439 | |
410 | if we create file with short name */ | 440 | /* |
441 | * 48 bytes now and we avoid kmalloc if we | ||
442 | * create file with short name | ||
443 | */ | ||
444 | char small_buf[32 + DEH_SIZE]; | ||
445 | |||
411 | char *buffer; | 446 | char *buffer; |
412 | int buflen, paste_size; | 447 | int buflen, paste_size; |
413 | int retval; | 448 | int retval; |
@@ -439,21 +474,30 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
439 | (get_inode_sd_version(dir) == | 474 | (get_inode_sd_version(dir) == |
440 | STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; | 475 | STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; |
441 | 476 | ||
442 | /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ | 477 | /* |
478 | * fill buffer : directory entry head, name[, dir objectid | , | ||
479 | * stat data | ,stat data, dir objectid ] | ||
480 | */ | ||
443 | deh = (struct reiserfs_de_head *)buffer; | 481 | deh = (struct reiserfs_de_head *)buffer; |
444 | deh->deh_location = 0; /* JDM Endian safe if 0 */ | 482 | deh->deh_location = 0; /* JDM Endian safe if 0 */ |
445 | put_deh_offset(deh, cpu_key_k_offset(&entry_key)); | 483 | put_deh_offset(deh, cpu_key_k_offset(&entry_key)); |
446 | deh->deh_state = 0; /* JDM Endian safe if 0 */ | 484 | deh->deh_state = 0; /* JDM Endian safe if 0 */ |
447 | /* put key (ino analog) to de */ | 485 | /* put key (ino analog) to de */ |
448 | deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */ | 486 | |
449 | deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */ | 487 | /* safe: k_dir_id is le */ |
488 | deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; | ||
489 | /* safe: k_objectid is le */ | ||
490 | deh->deh_objectid = INODE_PKEY(inode)->k_objectid; | ||
450 | 491 | ||
451 | /* copy name */ | 492 | /* copy name */ |
452 | memcpy((char *)(deh + 1), name, namelen); | 493 | memcpy((char *)(deh + 1), name, namelen); |
453 | /* padd by 0s to the 4 byte boundary */ | 494 | /* padd by 0s to the 4 byte boundary */ |
454 | padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); | 495 | padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); |
455 | 496 | ||
456 | /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ | 497 | /* |
498 | * entry is ready to be pasted into tree, set 'visibility' | ||
499 | * and 'stat data in entry' attributes | ||
500 | */ | ||
457 | mark_de_without_sd(deh); | 501 | mark_de_without_sd(deh); |
458 | visible ? mark_de_visible(deh) : mark_de_hidden(deh); | 502 | visible ? mark_de_visible(deh) : mark_de_hidden(deh); |
459 | 503 | ||
@@ -499,7 +543,8 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
499 | /* update max-hash-collisions counter in reiserfs_sb_info */ | 543 | /* update max-hash-collisions counter in reiserfs_sb_info */ |
500 | PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); | 544 | PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); |
501 | 545 | ||
502 | if (gen_number != 0) { /* we need to re-search for the insertion point */ | 546 | /* we need to re-search for the insertion point */ |
547 | if (gen_number != 0) { | ||
503 | if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != | 548 | if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != |
504 | NAME_NOT_FOUND) { | 549 | NAME_NOT_FOUND) { |
505 | reiserfs_warning(dir->i_sb, "vs-7032", | 550 | reiserfs_warning(dir->i_sb, "vs-7032", |
@@ -527,18 +572,19 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
527 | dir->i_size += paste_size; | 572 | dir->i_size += paste_size; |
528 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; | 573 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; |
529 | if (!S_ISDIR(inode->i_mode) && visible) | 574 | if (!S_ISDIR(inode->i_mode) && visible) |
530 | // reiserfs_mkdir or reiserfs_rename will do that by itself | 575 | /* reiserfs_mkdir or reiserfs_rename will do that by itself */ |
531 | reiserfs_update_sd(th, dir); | 576 | reiserfs_update_sd(th, dir); |
532 | 577 | ||
533 | reiserfs_check_path(&path); | 578 | reiserfs_check_path(&path); |
534 | return 0; | 579 | return 0; |
535 | } | 580 | } |
536 | 581 | ||
537 | /* quota utility function, call if you've had to abort after calling | 582 | /* |
538 | ** new_inode_init, and have not called reiserfs_new_inode yet. | 583 | * quota utility function, call if you've had to abort after calling |
539 | ** This should only be called on inodes that do not have stat data | 584 | * new_inode_init, and have not called reiserfs_new_inode yet. |
540 | ** inserted into the tree yet. | 585 | * This should only be called on inodes that do not have stat data |
541 | */ | 586 | * inserted into the tree yet. |
587 | */ | ||
542 | static int drop_new_inode(struct inode *inode) | 588 | static int drop_new_inode(struct inode *inode) |
543 | { | 589 | { |
544 | dquot_drop(inode); | 590 | dquot_drop(inode); |
@@ -548,18 +594,23 @@ static int drop_new_inode(struct inode *inode) | |||
548 | return 0; | 594 | return 0; |
549 | } | 595 | } |
550 | 596 | ||
551 | /* utility function that does setup for reiserfs_new_inode. | 597 | /* |
552 | ** dquot_initialize needs lots of credits so it's better to have it | 598 | * utility function that does setup for reiserfs_new_inode. |
553 | ** outside of a transaction, so we had to pull some bits of | 599 | * dquot_initialize needs lots of credits so it's better to have it |
554 | ** reiserfs_new_inode out into this func. | 600 | * outside of a transaction, so we had to pull some bits of |
555 | */ | 601 | * reiserfs_new_inode out into this func. |
602 | */ | ||
556 | static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) | 603 | static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) |
557 | { | 604 | { |
558 | /* Make inode invalid - just in case we are going to drop it before | 605 | /* |
559 | * the initialization happens */ | 606 | * Make inode invalid - just in case we are going to drop it before |
607 | * the initialization happens | ||
608 | */ | ||
560 | INODE_PKEY(inode)->k_objectid = 0; | 609 | INODE_PKEY(inode)->k_objectid = 0; |
561 | /* the quota init calls have to know who to charge the quota to, so | 610 | |
562 | ** we have to set uid and gid here | 611 | /* |
612 | * the quota init calls have to know who to charge the quota to, so | ||
613 | * we have to set uid and gid here | ||
563 | */ | 614 | */ |
564 | inode_init_owner(inode, dir, mode); | 615 | inode_init_owner(inode, dir, mode); |
565 | dquot_initialize(inode); | 616 | dquot_initialize(inode); |
@@ -571,7 +622,10 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod | |||
571 | { | 622 | { |
572 | int retval; | 623 | int retval; |
573 | struct inode *inode; | 624 | struct inode *inode; |
574 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 625 | /* |
626 | * We need blocks for transaction + (user+group)*(quotas | ||
627 | * for new inode + update of quota for directory owner) | ||
628 | */ | ||
575 | int jbegin_count = | 629 | int jbegin_count = |
576 | JOURNAL_PER_BALANCE_CNT * 2 + | 630 | JOURNAL_PER_BALANCE_CNT * 2 + |
577 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | 631 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + |
@@ -618,7 +672,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod | |||
618 | int err; | 672 | int err; |
619 | drop_nlink(inode); | 673 | drop_nlink(inode); |
620 | reiserfs_update_sd(&th, inode); | 674 | reiserfs_update_sd(&th, inode); |
621 | err = journal_end(&th, dir->i_sb, jbegin_count); | 675 | err = journal_end(&th); |
622 | if (err) | 676 | if (err) |
623 | retval = err; | 677 | retval = err; |
624 | unlock_new_inode(inode); | 678 | unlock_new_inode(inode); |
@@ -630,9 +684,9 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod | |||
630 | 684 | ||
631 | unlock_new_inode(inode); | 685 | unlock_new_inode(inode); |
632 | d_instantiate(dentry, inode); | 686 | d_instantiate(dentry, inode); |
633 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 687 | retval = journal_end(&th); |
634 | 688 | ||
635 | out_failed: | 689 | out_failed: |
636 | reiserfs_write_unlock(dir->i_sb); | 690 | reiserfs_write_unlock(dir->i_sb); |
637 | return retval; | 691 | return retval; |
638 | } | 692 | } |
@@ -644,7 +698,10 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode | |||
644 | struct inode *inode; | 698 | struct inode *inode; |
645 | struct reiserfs_transaction_handle th; | 699 | struct reiserfs_transaction_handle th; |
646 | struct reiserfs_security_handle security; | 700 | struct reiserfs_security_handle security; |
647 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 701 | /* |
702 | * We need blocks for transaction + (user+group)*(quotas | ||
703 | * for new inode + update of quota for directory owner) | ||
704 | */ | ||
648 | int jbegin_count = | 705 | int jbegin_count = |
649 | JOURNAL_PER_BALANCE_CNT * 3 + | 706 | JOURNAL_PER_BALANCE_CNT * 3 + |
650 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | 707 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + |
@@ -685,7 +742,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode | |||
685 | inode->i_op = &reiserfs_special_inode_operations; | 742 | inode->i_op = &reiserfs_special_inode_operations; |
686 | init_special_inode(inode, inode->i_mode, rdev); | 743 | init_special_inode(inode, inode->i_mode, rdev); |
687 | 744 | ||
688 | //FIXME: needed for block and char devices only | 745 | /* FIXME: needed for block and char devices only */ |
689 | reiserfs_update_sd(&th, inode); | 746 | reiserfs_update_sd(&th, inode); |
690 | 747 | ||
691 | reiserfs_update_inode_transaction(inode); | 748 | reiserfs_update_inode_transaction(inode); |
@@ -698,7 +755,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode | |||
698 | int err; | 755 | int err; |
699 | drop_nlink(inode); | 756 | drop_nlink(inode); |
700 | reiserfs_update_sd(&th, inode); | 757 | reiserfs_update_sd(&th, inode); |
701 | err = journal_end(&th, dir->i_sb, jbegin_count); | 758 | err = journal_end(&th); |
702 | if (err) | 759 | if (err) |
703 | retval = err; | 760 | retval = err; |
704 | unlock_new_inode(inode); | 761 | unlock_new_inode(inode); |
@@ -708,9 +765,9 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode | |||
708 | 765 | ||
709 | unlock_new_inode(inode); | 766 | unlock_new_inode(inode); |
710 | d_instantiate(dentry, inode); | 767 | d_instantiate(dentry, inode); |
711 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 768 | retval = journal_end(&th); |
712 | 769 | ||
713 | out_failed: | 770 | out_failed: |
714 | reiserfs_write_unlock(dir->i_sb); | 771 | reiserfs_write_unlock(dir->i_sb); |
715 | return retval; | 772 | return retval; |
716 | } | 773 | } |
@@ -721,7 +778,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
721 | struct inode *inode; | 778 | struct inode *inode; |
722 | struct reiserfs_transaction_handle th; | 779 | struct reiserfs_transaction_handle th; |
723 | struct reiserfs_security_handle security; | 780 | struct reiserfs_security_handle security; |
724 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 781 | /* |
782 | * We need blocks for transaction + (user+group)*(quotas | ||
783 | * for new inode + update of quota for directory owner) | ||
784 | */ | ||
725 | int jbegin_count = | 785 | int jbegin_count = |
726 | JOURNAL_PER_BALANCE_CNT * 3 + | 786 | JOURNAL_PER_BALANCE_CNT * 3 + |
727 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | 787 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + |
@@ -730,7 +790,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
730 | dquot_initialize(dir); | 790 | dquot_initialize(dir); |
731 | 791 | ||
732 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 792 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
733 | /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ | 793 | /* |
794 | * set flag that new packing locality created and new blocks | ||
795 | * for the content of that directory are not displaced yet | ||
796 | */ | ||
734 | REISERFS_I(dir)->new_packing_locality = 1; | 797 | REISERFS_I(dir)->new_packing_locality = 1; |
735 | #endif | 798 | #endif |
736 | mode = S_IFDIR | mode; | 799 | mode = S_IFDIR | mode; |
@@ -754,8 +817,9 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
754 | goto out_failed; | 817 | goto out_failed; |
755 | } | 818 | } |
756 | 819 | ||
757 | /* inc the link count now, so another writer doesn't overflow it while | 820 | /* |
758 | ** we sleep later on. | 821 | * inc the link count now, so another writer doesn't overflow |
822 | * it while we sleep later on. | ||
759 | */ | 823 | */ |
760 | INC_DIR_INODE_NLINK(dir) | 824 | INC_DIR_INODE_NLINK(dir) |
761 | 825 | ||
@@ -774,7 +838,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
774 | inode->i_op = &reiserfs_dir_inode_operations; | 838 | inode->i_op = &reiserfs_dir_inode_operations; |
775 | inode->i_fop = &reiserfs_dir_operations; | 839 | inode->i_fop = &reiserfs_dir_operations; |
776 | 840 | ||
777 | // note, _this_ add_entry will not update dir's stat data | 841 | /* note, _this_ add_entry will not update dir's stat data */ |
778 | retval = | 842 | retval = |
779 | reiserfs_add_entry(&th, dir, dentry->d_name.name, | 843 | reiserfs_add_entry(&th, dir, dentry->d_name.name, |
780 | dentry->d_name.len, inode, 1 /*visible */ ); | 844 | dentry->d_name.len, inode, 1 /*visible */ ); |
@@ -783,19 +847,19 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
783 | clear_nlink(inode); | 847 | clear_nlink(inode); |
784 | DEC_DIR_INODE_NLINK(dir); | 848 | DEC_DIR_INODE_NLINK(dir); |
785 | reiserfs_update_sd(&th, inode); | 849 | reiserfs_update_sd(&th, inode); |
786 | err = journal_end(&th, dir->i_sb, jbegin_count); | 850 | err = journal_end(&th); |
787 | if (err) | 851 | if (err) |
788 | retval = err; | 852 | retval = err; |
789 | unlock_new_inode(inode); | 853 | unlock_new_inode(inode); |
790 | iput(inode); | 854 | iput(inode); |
791 | goto out_failed; | 855 | goto out_failed; |
792 | } | 856 | } |
793 | // the above add_entry did not update dir's stat data | 857 | /* the above add_entry did not update dir's stat data */ |
794 | reiserfs_update_sd(&th, dir); | 858 | reiserfs_update_sd(&th, dir); |
795 | 859 | ||
796 | unlock_new_inode(inode); | 860 | unlock_new_inode(inode); |
797 | d_instantiate(dentry, inode); | 861 | d_instantiate(dentry, inode); |
798 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 862 | retval = journal_end(&th); |
799 | out_failed: | 863 | out_failed: |
800 | reiserfs_write_unlock(dir->i_sb); | 864 | reiserfs_write_unlock(dir->i_sb); |
801 | return retval; | 865 | return retval; |
@@ -803,10 +867,11 @@ out_failed: | |||
803 | 867 | ||
804 | static inline int reiserfs_empty_dir(struct inode *inode) | 868 | static inline int reiserfs_empty_dir(struct inode *inode) |
805 | { | 869 | { |
806 | /* we can cheat because an old format dir cannot have | 870 | /* |
807 | ** EMPTY_DIR_SIZE, and a new format dir cannot have | 871 | * we can cheat because an old format dir cannot have |
808 | ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, | 872 | * EMPTY_DIR_SIZE, and a new format dir cannot have |
809 | ** regardless of disk format version, the directory is empty. | 873 | * EMPTY_DIR_SIZE_V1. So, if the inode is either size, |
874 | * regardless of disk format version, the directory is empty. | ||
810 | */ | 875 | */ |
811 | if (inode->i_size != EMPTY_DIR_SIZE && | 876 | if (inode->i_size != EMPTY_DIR_SIZE && |
812 | inode->i_size != EMPTY_DIR_SIZE_V1) { | 877 | inode->i_size != EMPTY_DIR_SIZE_V1) { |
@@ -824,10 +889,12 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
824 | INITIALIZE_PATH(path); | 889 | INITIALIZE_PATH(path); |
825 | struct reiserfs_dir_entry de; | 890 | struct reiserfs_dir_entry de; |
826 | 891 | ||
827 | /* we will be doing 2 balancings and update 2 stat data, we change quotas | 892 | /* |
828 | * of the owner of the directory and of the owner of the parent directory. | 893 | * we will be doing 2 balancings and update 2 stat data, we |
829 | * The quota structure is possibly deleted only on last iput => outside | 894 | * change quotas of the owner of the directory and of the owner |
830 | * of this transaction */ | 895 | * of the parent directory. The quota structure is possibly |
896 | * deleted only on last iput => outside of this transaction | ||
897 | */ | ||
831 | jbegin_count = | 898 | jbegin_count = |
832 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + | 899 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + |
833 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); | 900 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); |
@@ -856,8 +923,9 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
856 | reiserfs_update_inode_transaction(dir); | 923 | reiserfs_update_inode_transaction(dir); |
857 | 924 | ||
858 | if (de.de_objectid != inode->i_ino) { | 925 | if (de.de_objectid != inode->i_ino) { |
859 | // FIXME: compare key of an object and a key found in the | 926 | /* |
860 | // entry | 927 | * FIXME: compare key of an object and a key found in the entry |
928 | */ | ||
861 | retval = -EIO; | 929 | retval = -EIO; |
862 | goto end_rmdir; | 930 | goto end_rmdir; |
863 | } | 931 | } |
@@ -867,7 +935,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
867 | } | 935 | } |
868 | 936 | ||
869 | /* cut entry from dir directory */ | 937 | /* cut entry from dir directory */ |
870 | retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, /* page */ | 938 | retval = reiserfs_cut_from_item(&th, &path, &de.de_entry_key, |
939 | dir, NULL, /* page */ | ||
871 | 0 /*new file size - not used here */ ); | 940 | 0 /*new file size - not used here */ ); |
872 | if (retval < 0) | 941 | if (retval < 0) |
873 | goto end_rmdir; | 942 | goto end_rmdir; |
@@ -888,18 +957,20 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
888 | /* prevent empty directory from getting lost */ | 957 | /* prevent empty directory from getting lost */ |
889 | add_save_link(&th, inode, 0 /* not truncate */ ); | 958 | add_save_link(&th, inode, 0 /* not truncate */ ); |
890 | 959 | ||
891 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 960 | retval = journal_end(&th); |
892 | reiserfs_check_path(&path); | 961 | reiserfs_check_path(&path); |
893 | out_rmdir: | 962 | out_rmdir: |
894 | reiserfs_write_unlock(dir->i_sb); | 963 | reiserfs_write_unlock(dir->i_sb); |
895 | return retval; | 964 | return retval; |
896 | 965 | ||
897 | end_rmdir: | 966 | end_rmdir: |
898 | /* we must release path, because we did not call | 967 | /* |
899 | reiserfs_cut_from_item, or reiserfs_cut_from_item does not | 968 | * we must release path, because we did not call |
900 | release path if operation was not complete */ | 969 | * reiserfs_cut_from_item, or reiserfs_cut_from_item does not |
970 | * release path if operation was not complete | ||
971 | */ | ||
901 | pathrelse(&path); | 972 | pathrelse(&path); |
902 | err = journal_end(&th, dir->i_sb, jbegin_count); | 973 | err = journal_end(&th); |
903 | reiserfs_write_unlock(dir->i_sb); | 974 | reiserfs_write_unlock(dir->i_sb); |
904 | return err ? err : retval; | 975 | return err ? err : retval; |
905 | } | 976 | } |
@@ -918,10 +989,13 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) | |||
918 | 989 | ||
919 | inode = dentry->d_inode; | 990 | inode = dentry->d_inode; |
920 | 991 | ||
921 | /* in this transaction we can be doing at max two balancings and update | 992 | /* |
922 | * two stat datas, we change quotas of the owner of the directory and of | 993 | * in this transaction we can be doing at max two balancings and |
923 | * the owner of the parent directory. The quota structure is possibly | 994 | * update two stat datas, we change quotas of the owner of the |
924 | * deleted only on iput => outside of this transaction */ | 995 | * directory and of the owner of the parent directory. The quota |
996 | * structure is possibly deleted only on iput => outside of | ||
997 | * this transaction | ||
998 | */ | ||
925 | jbegin_count = | 999 | jbegin_count = |
926 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + | 1000 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + |
927 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); | 1001 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); |
@@ -946,8 +1020,9 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) | |||
946 | reiserfs_update_inode_transaction(dir); | 1020 | reiserfs_update_inode_transaction(dir); |
947 | 1021 | ||
948 | if (de.de_objectid != inode->i_ino) { | 1022 | if (de.de_objectid != inode->i_ino) { |
949 | // FIXME: compare key of an object and a key found in the | 1023 | /* |
950 | // entry | 1024 | * FIXME: compare key of an object and a key found in the entry |
1025 | */ | ||
951 | retval = -EIO; | 1026 | retval = -EIO; |
952 | goto end_unlink; | 1027 | goto end_unlink; |
953 | } | 1028 | } |
@@ -968,7 +1043,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) | |||
968 | savelink = inode->i_nlink; | 1043 | savelink = inode->i_nlink; |
969 | 1044 | ||
970 | retval = | 1045 | retval = |
971 | reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, | 1046 | reiserfs_cut_from_item(&th, &path, &de.de_entry_key, dir, NULL, |
972 | 0); | 1047 | 0); |
973 | if (retval < 0) { | 1048 | if (retval < 0) { |
974 | inc_nlink(inode); | 1049 | inc_nlink(inode); |
@@ -985,18 +1060,18 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) | |||
985 | /* prevent file from getting lost */ | 1060 | /* prevent file from getting lost */ |
986 | add_save_link(&th, inode, 0 /* not truncate */ ); | 1061 | add_save_link(&th, inode, 0 /* not truncate */ ); |
987 | 1062 | ||
988 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 1063 | retval = journal_end(&th); |
989 | reiserfs_check_path(&path); | 1064 | reiserfs_check_path(&path); |
990 | reiserfs_write_unlock(dir->i_sb); | 1065 | reiserfs_write_unlock(dir->i_sb); |
991 | return retval; | 1066 | return retval; |
992 | 1067 | ||
993 | end_unlink: | 1068 | end_unlink: |
994 | pathrelse(&path); | 1069 | pathrelse(&path); |
995 | err = journal_end(&th, dir->i_sb, jbegin_count); | 1070 | err = journal_end(&th); |
996 | reiserfs_check_path(&path); | 1071 | reiserfs_check_path(&path); |
997 | if (err) | 1072 | if (err) |
998 | retval = err; | 1073 | retval = err; |
999 | out_unlink: | 1074 | out_unlink: |
1000 | reiserfs_write_unlock(dir->i_sb); | 1075 | reiserfs_write_unlock(dir->i_sb); |
1001 | return retval; | 1076 | return retval; |
1002 | } | 1077 | } |
@@ -1011,7 +1086,10 @@ static int reiserfs_symlink(struct inode *parent_dir, | |||
1011 | struct reiserfs_transaction_handle th; | 1086 | struct reiserfs_transaction_handle th; |
1012 | struct reiserfs_security_handle security; | 1087 | struct reiserfs_security_handle security; |
1013 | int mode = S_IFLNK | S_IRWXUGO; | 1088 | int mode = S_IFLNK | S_IRWXUGO; |
1014 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 1089 | /* |
1090 | * We need blocks for transaction + (user+group)*(quotas for | ||
1091 | * new inode + update of quota for directory owner) | ||
1092 | */ | ||
1015 | int jbegin_count = | 1093 | int jbegin_count = |
1016 | JOURNAL_PER_BALANCE_CNT * 3 + | 1094 | JOURNAL_PER_BALANCE_CNT * 3 + |
1017 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + | 1095 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + |
@@ -1070,17 +1148,13 @@ static int reiserfs_symlink(struct inode *parent_dir, | |||
1070 | inode->i_op = &reiserfs_symlink_inode_operations; | 1148 | inode->i_op = &reiserfs_symlink_inode_operations; |
1071 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; | 1149 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; |
1072 | 1150 | ||
1073 | // must be sure this inode is written with this transaction | ||
1074 | // | ||
1075 | //reiserfs_update_sd (&th, inode, READ_BLOCKS); | ||
1076 | |||
1077 | retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, | 1151 | retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, |
1078 | dentry->d_name.len, inode, 1 /*visible */ ); | 1152 | dentry->d_name.len, inode, 1 /*visible */ ); |
1079 | if (retval) { | 1153 | if (retval) { |
1080 | int err; | 1154 | int err; |
1081 | drop_nlink(inode); | 1155 | drop_nlink(inode); |
1082 | reiserfs_update_sd(&th, inode); | 1156 | reiserfs_update_sd(&th, inode); |
1083 | err = journal_end(&th, parent_dir->i_sb, jbegin_count); | 1157 | err = journal_end(&th); |
1084 | if (err) | 1158 | if (err) |
1085 | retval = err; | 1159 | retval = err; |
1086 | unlock_new_inode(inode); | 1160 | unlock_new_inode(inode); |
@@ -1090,8 +1164,8 @@ static int reiserfs_symlink(struct inode *parent_dir, | |||
1090 | 1164 | ||
1091 | unlock_new_inode(inode); | 1165 | unlock_new_inode(inode); |
1092 | d_instantiate(dentry, inode); | 1166 | d_instantiate(dentry, inode); |
1093 | retval = journal_end(&th, parent_dir->i_sb, jbegin_count); | 1167 | retval = journal_end(&th); |
1094 | out_failed: | 1168 | out_failed: |
1095 | reiserfs_write_unlock(parent_dir->i_sb); | 1169 | reiserfs_write_unlock(parent_dir->i_sb); |
1096 | return retval; | 1170 | return retval; |
1097 | } | 1171 | } |
@@ -1102,7 +1176,10 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1102 | int retval; | 1176 | int retval; |
1103 | struct inode *inode = old_dentry->d_inode; | 1177 | struct inode *inode = old_dentry->d_inode; |
1104 | struct reiserfs_transaction_handle th; | 1178 | struct reiserfs_transaction_handle th; |
1105 | /* We need blocks for transaction + update of quotas for the owners of the directory */ | 1179 | /* |
1180 | * We need blocks for transaction + update of quotas for | ||
1181 | * the owners of the directory | ||
1182 | */ | ||
1106 | int jbegin_count = | 1183 | int jbegin_count = |
1107 | JOURNAL_PER_BALANCE_CNT * 3 + | 1184 | JOURNAL_PER_BALANCE_CNT * 3 + |
1108 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); | 1185 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); |
@@ -1111,7 +1188,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1111 | 1188 | ||
1112 | reiserfs_write_lock(dir->i_sb); | 1189 | reiserfs_write_lock(dir->i_sb); |
1113 | if (inode->i_nlink >= REISERFS_LINK_MAX) { | 1190 | if (inode->i_nlink >= REISERFS_LINK_MAX) { |
1114 | //FIXME: sd_nlink is 32 bit for new files | 1191 | /* FIXME: sd_nlink is 32 bit for new files */ |
1115 | reiserfs_write_unlock(dir->i_sb); | 1192 | reiserfs_write_unlock(dir->i_sb); |
1116 | return -EMLINK; | 1193 | return -EMLINK; |
1117 | } | 1194 | } |
@@ -1137,7 +1214,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1137 | if (retval) { | 1214 | if (retval) { |
1138 | int err; | 1215 | int err; |
1139 | drop_nlink(inode); | 1216 | drop_nlink(inode); |
1140 | err = journal_end(&th, dir->i_sb, jbegin_count); | 1217 | err = journal_end(&th); |
1141 | reiserfs_write_unlock(dir->i_sb); | 1218 | reiserfs_write_unlock(dir->i_sb); |
1142 | return err ? err : retval; | 1219 | return err ? err : retval; |
1143 | } | 1220 | } |
@@ -1147,7 +1224,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1147 | 1224 | ||
1148 | ihold(inode); | 1225 | ihold(inode); |
1149 | d_instantiate(dentry, inode); | 1226 | d_instantiate(dentry, inode); |
1150 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 1227 | retval = journal_end(&th); |
1151 | reiserfs_write_unlock(dir->i_sb); | 1228 | reiserfs_write_unlock(dir->i_sb); |
1152 | return retval; | 1229 | return retval; |
1153 | } | 1230 | } |
@@ -1158,9 +1235,9 @@ static int de_still_valid(const char *name, int len, | |||
1158 | { | 1235 | { |
1159 | struct reiserfs_dir_entry tmp = *de; | 1236 | struct reiserfs_dir_entry tmp = *de; |
1160 | 1237 | ||
1161 | // recalculate pointer to name and name length | 1238 | /* recalculate pointer to name and name length */ |
1162 | set_de_name_and_namelen(&tmp); | 1239 | set_de_name_and_namelen(&tmp); |
1163 | // FIXME: could check more | 1240 | /* FIXME: could check more */ |
1164 | if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) | 1241 | if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) |
1165 | return 0; | 1242 | return 0; |
1166 | return 1; | 1243 | return 1; |
@@ -1217,14 +1294,16 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1217 | unsigned long savelink = 1; | 1294 | unsigned long savelink = 1; |
1218 | struct timespec ctime; | 1295 | struct timespec ctime; |
1219 | 1296 | ||
1220 | /* three balancings: (1) old name removal, (2) new name insertion | 1297 | /* |
1221 | and (3) maybe "save" link insertion | 1298 | * three balancings: (1) old name removal, (2) new name insertion |
1222 | stat data updates: (1) old directory, | 1299 | * and (3) maybe "save" link insertion |
1223 | (2) new directory and (3) maybe old object stat data (when it is | 1300 | * stat data updates: (1) old directory, |
1224 | directory) and (4) maybe stat data of object to which new entry | 1301 | * (2) new directory and (3) maybe old object stat data (when it is |
1225 | pointed initially and (5) maybe block containing ".." of | 1302 | * directory) and (4) maybe stat data of object to which new entry |
1226 | renamed directory | 1303 | * pointed initially and (5) maybe block containing ".." of |
1227 | quota updates: two parent directories */ | 1304 | * renamed directory |
1305 | * quota updates: two parent directories | ||
1306 | */ | ||
1228 | jbegin_count = | 1307 | jbegin_count = |
1229 | JOURNAL_PER_BALANCE_CNT * 3 + 5 + | 1308 | JOURNAL_PER_BALANCE_CNT * 3 + 5 + |
1230 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); | 1309 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); |
@@ -1235,8 +1314,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1235 | old_inode = old_dentry->d_inode; | 1314 | old_inode = old_dentry->d_inode; |
1236 | new_dentry_inode = new_dentry->d_inode; | 1315 | new_dentry_inode = new_dentry->d_inode; |
1237 | 1316 | ||
1238 | // make sure, that oldname still exists and points to an object we | 1317 | /* |
1239 | // are going to rename | 1318 | * make sure that oldname still exists and points to an object we |
1319 | * are going to rename | ||
1320 | */ | ||
1240 | old_de.de_gen_number_bit_string = NULL; | 1321 | old_de.de_gen_number_bit_string = NULL; |
1241 | reiserfs_write_lock(old_dir->i_sb); | 1322 | reiserfs_write_lock(old_dir->i_sb); |
1242 | retval = | 1323 | retval = |
@@ -1256,10 +1337,11 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1256 | 1337 | ||
1257 | old_inode_mode = old_inode->i_mode; | 1338 | old_inode_mode = old_inode->i_mode; |
1258 | if (S_ISDIR(old_inode_mode)) { | 1339 | if (S_ISDIR(old_inode_mode)) { |
1259 | // make sure, that directory being renamed has correct ".." | 1340 | /* |
1260 | // and that its new parent directory has not too many links | 1341 | * make sure that directory being renamed has correct ".." |
1261 | // already | 1342 | * and that its new parent directory has not too many links |
1262 | 1343 | * already | |
1344 | */ | ||
1263 | if (new_dentry_inode) { | 1345 | if (new_dentry_inode) { |
1264 | if (!reiserfs_empty_dir(new_dentry_inode)) { | 1346 | if (!reiserfs_empty_dir(new_dentry_inode)) { |
1265 | reiserfs_write_unlock(old_dir->i_sb); | 1347 | reiserfs_write_unlock(old_dir->i_sb); |
@@ -1267,8 +1349,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1267 | } | 1349 | } |
1268 | } | 1350 | } |
1269 | 1351 | ||
1270 | /* directory is renamed, its parent directory will be changed, | 1352 | /* |
1271 | ** so find ".." entry | 1353 | * directory is renamed, its parent directory will be changed, |
1354 | * so find ".." entry | ||
1272 | */ | 1355 | */ |
1273 | dot_dot_de.de_gen_number_bit_string = NULL; | 1356 | dot_dot_de.de_gen_number_bit_string = NULL; |
1274 | retval = | 1357 | retval = |
@@ -1303,7 +1386,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1303 | "new entry is found, new inode == 0"); | 1386 | "new entry is found, new inode == 0"); |
1304 | } | 1387 | } |
1305 | } else if (retval) { | 1388 | } else if (retval) { |
1306 | int err = journal_end(&th, old_dir->i_sb, jbegin_count); | 1389 | int err = journal_end(&th); |
1307 | reiserfs_write_unlock(old_dir->i_sb); | 1390 | reiserfs_write_unlock(old_dir->i_sb); |
1308 | return err ? err : retval; | 1391 | return err ? err : retval; |
1309 | } | 1392 | } |
@@ -1311,8 +1394,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1311 | reiserfs_update_inode_transaction(old_dir); | 1394 | reiserfs_update_inode_transaction(old_dir); |
1312 | reiserfs_update_inode_transaction(new_dir); | 1395 | reiserfs_update_inode_transaction(new_dir); |
1313 | 1396 | ||
1314 | /* this makes it so an fsync on an open fd for the old name will | 1397 | /* |
1315 | ** commit the rename operation | 1398 | * this makes it so an fsync on an open fd for the old name will |
1399 | * commit the rename operation | ||
1316 | */ | 1400 | */ |
1317 | reiserfs_update_inode_transaction(old_inode); | 1401 | reiserfs_update_inode_transaction(old_inode); |
1318 | 1402 | ||
@@ -1320,38 +1404,45 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1320 | reiserfs_update_inode_transaction(new_dentry_inode); | 1404 | reiserfs_update_inode_transaction(new_dentry_inode); |
1321 | 1405 | ||
1322 | while (1) { | 1406 | while (1) { |
1323 | // look for old name using corresponding entry key (found by reiserfs_find_entry) | 1407 | /* |
1408 | * look for old name using corresponding entry key | ||
1409 | * (found by reiserfs_find_entry) | ||
1410 | */ | ||
1324 | if ((retval = | 1411 | if ((retval = |
1325 | search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, | 1412 | search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, |
1326 | &old_entry_path, | 1413 | &old_entry_path, |
1327 | &old_de)) != NAME_FOUND) { | 1414 | &old_de)) != NAME_FOUND) { |
1328 | pathrelse(&old_entry_path); | 1415 | pathrelse(&old_entry_path); |
1329 | journal_end(&th, old_dir->i_sb, jbegin_count); | 1416 | journal_end(&th); |
1330 | reiserfs_write_unlock(old_dir->i_sb); | 1417 | reiserfs_write_unlock(old_dir->i_sb); |
1331 | return -EIO; | 1418 | return -EIO; |
1332 | } | 1419 | } |
1333 | 1420 | ||
1334 | copy_item_head(&old_entry_ih, get_ih(&old_entry_path)); | 1421 | copy_item_head(&old_entry_ih, tp_item_head(&old_entry_path)); |
1335 | 1422 | ||
1336 | reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); | 1423 | reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); |
1337 | 1424 | ||
1338 | // look for new name by reiserfs_find_entry | 1425 | /* look for new name by reiserfs_find_entry */ |
1339 | new_de.de_gen_number_bit_string = NULL; | 1426 | new_de.de_gen_number_bit_string = NULL; |
1340 | retval = | 1427 | retval = |
1341 | reiserfs_find_entry(new_dir, new_dentry->d_name.name, | 1428 | reiserfs_find_entry(new_dir, new_dentry->d_name.name, |
1342 | new_dentry->d_name.len, &new_entry_path, | 1429 | new_dentry->d_name.len, &new_entry_path, |
1343 | &new_de); | 1430 | &new_de); |
1344 | // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from | 1431 | /* |
1345 | // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. | 1432 | * reiserfs_add_entry should not return IO_ERROR, |
1433 | * because it is called with essentially same parameters from | ||
1434 | * reiserfs_add_entry above, and we'll catch any i/o errors | ||
1435 | * before we get here. | ||
1436 | */ | ||
1346 | if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { | 1437 | if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { |
1347 | pathrelse(&new_entry_path); | 1438 | pathrelse(&new_entry_path); |
1348 | pathrelse(&old_entry_path); | 1439 | pathrelse(&old_entry_path); |
1349 | journal_end(&th, old_dir->i_sb, jbegin_count); | 1440 | journal_end(&th); |
1350 | reiserfs_write_unlock(old_dir->i_sb); | 1441 | reiserfs_write_unlock(old_dir->i_sb); |
1351 | return -EIO; | 1442 | return -EIO; |
1352 | } | 1443 | } |
1353 | 1444 | ||
1354 | copy_item_head(&new_entry_ih, get_ih(&new_entry_path)); | 1445 | copy_item_head(&new_entry_ih, tp_item_head(&new_entry_path)); |
1355 | 1446 | ||
1356 | reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1); | 1447 | reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1); |
1357 | 1448 | ||
@@ -1364,28 +1455,32 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1364 | pathrelse(&dot_dot_entry_path); | 1455 | pathrelse(&dot_dot_entry_path); |
1365 | pathrelse(&new_entry_path); | 1456 | pathrelse(&new_entry_path); |
1366 | pathrelse(&old_entry_path); | 1457 | pathrelse(&old_entry_path); |
1367 | journal_end(&th, old_dir->i_sb, jbegin_count); | 1458 | journal_end(&th); |
1368 | reiserfs_write_unlock(old_dir->i_sb); | 1459 | reiserfs_write_unlock(old_dir->i_sb); |
1369 | return -EIO; | 1460 | return -EIO; |
1370 | } | 1461 | } |
1371 | copy_item_head(&dot_dot_ih, | 1462 | copy_item_head(&dot_dot_ih, |
1372 | get_ih(&dot_dot_entry_path)); | 1463 | tp_item_head(&dot_dot_entry_path)); |
1373 | // node containing ".." gets into transaction | 1464 | /* node containing ".." gets into transaction */ |
1374 | reiserfs_prepare_for_journal(old_inode->i_sb, | 1465 | reiserfs_prepare_for_journal(old_inode->i_sb, |
1375 | dot_dot_de.de_bh, 1); | 1466 | dot_dot_de.de_bh, 1); |
1376 | } | 1467 | } |
1377 | /* we should check seals here, not do | 1468 | /* |
1378 | this stuff, yes? Then, having | 1469 | * we should check seals here, not do |
1379 | gathered everything into RAM we | 1470 | * this stuff, yes? Then, having |
1380 | should lock the buffers, yes? -Hans */ | 1471 | * gathered everything into RAM we |
1381 | /* probably. our rename needs to hold more | 1472 | * should lock the buffers, yes? -Hans |
1382 | ** than one path at once. The seals would | 1473 | */ |
1383 | ** have to be written to deal with multi-path | 1474 | /* |
1384 | ** issues -chris | 1475 | * probably. our rename needs to hold more |
1476 | * than one path at once. The seals would | ||
1477 | * have to be written to deal with multi-path | ||
1478 | * issues -chris | ||
1385 | */ | 1479 | */ |
1386 | /* sanity checking before doing the rename - avoid races many | 1480 | /* |
1387 | ** of the above checks could have scheduled. We have to be | 1481 | * sanity checking before doing the rename - avoid races many |
1388 | ** sure our items haven't been shifted by another process. | 1482 | * of the above checks could have scheduled. We have to be |
1483 | * sure our items haven't been shifted by another process. | ||
1389 | */ | 1484 | */ |
1390 | if (item_moved(&new_entry_ih, &new_entry_path) || | 1485 | if (item_moved(&new_entry_ih, &new_entry_path) || |
1391 | !entry_points_to_object(new_dentry->d_name.name, | 1486 | !entry_points_to_object(new_dentry->d_name.name, |
@@ -1430,24 +1525,28 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1430 | break; | 1525 | break; |
1431 | } | 1526 | } |
1432 | 1527 | ||
1433 | /* ok, all the changes can be done in one fell swoop when we | 1528 | /* |
1434 | have claimed all the buffers needed. */ | 1529 | * ok, all the changes can be done in one fell swoop when we |
1530 | * have claimed all the buffers needed. | ||
1531 | */ | ||
1435 | 1532 | ||
1436 | mark_de_visible(new_de.de_deh + new_de.de_entry_num); | 1533 | mark_de_visible(new_de.de_deh + new_de.de_entry_num); |
1437 | set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); | 1534 | set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); |
1438 | journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh); | 1535 | journal_mark_dirty(&th, new_de.de_bh); |
1439 | 1536 | ||
1440 | mark_de_hidden(old_de.de_deh + old_de.de_entry_num); | 1537 | mark_de_hidden(old_de.de_deh + old_de.de_entry_num); |
1441 | journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh); | 1538 | journal_mark_dirty(&th, old_de.de_bh); |
1442 | ctime = CURRENT_TIME_SEC; | 1539 | ctime = CURRENT_TIME_SEC; |
1443 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 1540 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
1444 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 1541 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
1445 | /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of | 1542 | /* |
1446 | renamed object */ | 1543 | * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch |
1544 | * which adds ctime update of renamed object | ||
1545 | */ | ||
1447 | old_inode->i_ctime = ctime; | 1546 | old_inode->i_ctime = ctime; |
1448 | 1547 | ||
1449 | if (new_dentry_inode) { | 1548 | if (new_dentry_inode) { |
1450 | // adjust link number of the victim | 1549 | /* adjust link number of the victim */ |
1451 | if (S_ISDIR(new_dentry_inode->i_mode)) { | 1550 | if (S_ISDIR(new_dentry_inode->i_mode)) { |
1452 | clear_nlink(new_dentry_inode); | 1551 | clear_nlink(new_dentry_inode); |
1453 | } else { | 1552 | } else { |
@@ -1460,25 +1559,32 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1460 | if (S_ISDIR(old_inode_mode)) { | 1559 | if (S_ISDIR(old_inode_mode)) { |
1461 | /* adjust ".." of renamed directory */ | 1560 | /* adjust ".." of renamed directory */ |
1462 | set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); | 1561 | set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); |
1463 | journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); | 1562 | journal_mark_dirty(&th, dot_dot_de.de_bh); |
1464 | 1563 | ||
1564 | /* | ||
1565 | * there (in new_dir) was no directory, so it got new link | ||
1566 | * (".." of renamed directory) | ||
1567 | */ | ||
1465 | if (!new_dentry_inode) | 1568 | if (!new_dentry_inode) |
1466 | /* there (in new_dir) was no directory, so it got new link | ||
1467 | (".." of renamed directory) */ | ||
1468 | INC_DIR_INODE_NLINK(new_dir); | 1569 | INC_DIR_INODE_NLINK(new_dir); |
1469 | 1570 | ||
1470 | /* old directory lost one link - ".. " of renamed directory */ | 1571 | /* old directory lost one link - ".. " of renamed directory */ |
1471 | DEC_DIR_INODE_NLINK(old_dir); | 1572 | DEC_DIR_INODE_NLINK(old_dir); |
1472 | } | 1573 | } |
1473 | // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse | 1574 | /* |
1575 | * looks like in 2.3.99pre3 brelse is atomic. | ||
1576 | * so we can use pathrelse | ||
1577 | */ | ||
1474 | pathrelse(&new_entry_path); | 1578 | pathrelse(&new_entry_path); |
1475 | pathrelse(&dot_dot_entry_path); | 1579 | pathrelse(&dot_dot_entry_path); |
1476 | 1580 | ||
1477 | // FIXME: this reiserfs_cut_from_item's return value may screw up | 1581 | /* |
1478 | // anybody, but it will panic if will not be able to find the | 1582 | * FIXME: this reiserfs_cut_from_item's return value may screw up |
1479 | // entry. This needs one more clean up | 1583 | * anybody, but it will panic if will not be able to find the |
1584 | * entry. This needs one more clean up | ||
1585 | */ | ||
1480 | if (reiserfs_cut_from_item | 1586 | if (reiserfs_cut_from_item |
1481 | (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, | 1587 | (&th, &old_entry_path, &old_de.de_entry_key, old_dir, NULL, |
1482 | 0) < 0) | 1588 | 0) < 0) |
1483 | reiserfs_error(old_dir->i_sb, "vs-7060", | 1589 | reiserfs_error(old_dir->i_sb, "vs-7060", |
1484 | "couldn't not cut old name. Fsck later?"); | 1590 | "couldn't not cut old name. Fsck later?"); |
@@ -1496,16 +1602,13 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1496 | reiserfs_update_sd(&th, new_dentry_inode); | 1602 | reiserfs_update_sd(&th, new_dentry_inode); |
1497 | } | 1603 | } |
1498 | 1604 | ||
1499 | retval = journal_end(&th, old_dir->i_sb, jbegin_count); | 1605 | retval = journal_end(&th); |
1500 | reiserfs_write_unlock(old_dir->i_sb); | 1606 | reiserfs_write_unlock(old_dir->i_sb); |
1501 | return retval; | 1607 | return retval; |
1502 | } | 1608 | } |
1503 | 1609 | ||
1504 | /* | 1610 | /* directories can handle most operations... */ |
1505 | * directories can handle most operations... | ||
1506 | */ | ||
1507 | const struct inode_operations reiserfs_dir_inode_operations = { | 1611 | const struct inode_operations reiserfs_dir_inode_operations = { |
1508 | //&reiserfs_dir_operations, /* default_file_ops */ | ||
1509 | .create = reiserfs_create, | 1612 | .create = reiserfs_create, |
1510 | .lookup = reiserfs_lookup, | 1613 | .lookup = reiserfs_lookup, |
1511 | .link = reiserfs_link, | 1614 | .link = reiserfs_link, |
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index f732d6a5251d..99a5d5dae46a 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/time.h> | 7 | #include <linux/time.h> |
8 | #include "reiserfs.h" | 8 | #include "reiserfs.h" |
9 | 9 | ||
10 | // find where objectid map starts | 10 | /* find where objectid map starts */ |
11 | #define objectid_map(s,rs) (old_format_only (s) ? \ | 11 | #define objectid_map(s,rs) (old_format_only (s) ? \ |
12 | (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ | 12 | (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ |
13 | (__le32 *)((rs) + 1)) | 13 | (__le32 *)((rs) + 1)) |
@@ -20,7 +20,7 @@ static void check_objectid_map(struct super_block *s, __le32 * map) | |||
20 | reiserfs_panic(s, "vs-15010", "map corrupted: %lx", | 20 | reiserfs_panic(s, "vs-15010", "map corrupted: %lx", |
21 | (long unsigned int)le32_to_cpu(map[0])); | 21 | (long unsigned int)le32_to_cpu(map[0])); |
22 | 22 | ||
23 | // FIXME: add something else here | 23 | /* FIXME: add something else here */ |
24 | } | 24 | } |
25 | 25 | ||
26 | #else | 26 | #else |
@@ -29,19 +29,21 @@ static void check_objectid_map(struct super_block *s, __le32 * map) | |||
29 | } | 29 | } |
30 | #endif | 30 | #endif |
31 | 31 | ||
32 | /* When we allocate objectids we allocate the first unused objectid. | 32 | /* |
33 | Each sequence of objectids in use (the odd sequences) is followed | 33 | * When we allocate objectids we allocate the first unused objectid. |
34 | by a sequence of objectids not in use (the even sequences). We | 34 | * Each sequence of objectids in use (the odd sequences) is followed |
35 | only need to record the last objectid in each of these sequences | 35 | * by a sequence of objectids not in use (the even sequences). We |
36 | (both the odd and even sequences) in order to fully define the | 36 | * only need to record the last objectid in each of these sequences |
37 | boundaries of the sequences. A consequence of allocating the first | 37 | * (both the odd and even sequences) in order to fully define the |
38 | objectid not in use is that under most conditions this scheme is | 38 | * boundaries of the sequences. A consequence of allocating the first |
39 | extremely compact. The exception is immediately after a sequence | 39 | * objectid not in use is that under most conditions this scheme is |
40 | of operations which deletes a large number of objects of | 40 | * extremely compact. The exception is immediately after a sequence |
41 | non-sequential objectids, and even then it will become compact | 41 | * of operations which deletes a large number of objects of |
42 | again as soon as more objects are created. Note that many | 42 | * non-sequential objectids, and even then it will become compact |
43 | interesting optimizations of layout could result from complicating | 43 | * again as soon as more objects are created. Note that many |
44 | objectid assignment, but we have deferred making them for now. */ | 44 | * interesting optimizations of layout could result from complicating |
45 | * objectid assignment, but we have deferred making them for now. | ||
46 | */ | ||
45 | 47 | ||
46 | /* get unique object identifier */ | 48 | /* get unique object identifier */ |
47 | __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) | 49 | __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) |
@@ -64,26 +66,30 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) | |||
64 | return 0; | 66 | return 0; |
65 | } | 67 | } |
66 | 68 | ||
67 | /* This incrementation allocates the first unused objectid. That | 69 | /* |
68 | is to say, the first entry on the objectid map is the first | 70 | * This incrementation allocates the first unused objectid. That |
69 | unused objectid, and by incrementing it we use it. See below | 71 | * is to say, the first entry on the objectid map is the first |
70 | where we check to see if we eliminated a sequence of unused | 72 | * unused objectid, and by incrementing it we use it. See below |
71 | objectids.... */ | 73 | * where we check to see if we eliminated a sequence of unused |
74 | * objectids.... | ||
75 | */ | ||
72 | map[1] = cpu_to_le32(unused_objectid + 1); | 76 | map[1] = cpu_to_le32(unused_objectid + 1); |
73 | 77 | ||
74 | /* Now we check to see if we eliminated the last remaining member of | 78 | /* |
75 | the first even sequence (and can eliminate the sequence by | 79 | * Now we check to see if we eliminated the last remaining member of |
76 | eliminating its last objectid from oids), and can collapse the | 80 | * the first even sequence (and can eliminate the sequence by |
77 | first two odd sequences into one sequence. If so, then the net | 81 | * eliminating its last objectid from oids), and can collapse the |
78 | result is to eliminate a pair of objectids from oids. We do this | 82 | * first two odd sequences into one sequence. If so, then the net |
79 | by shifting the entire map to the left. */ | 83 | * result is to eliminate a pair of objectids from oids. We do this |
84 | * by shifting the entire map to the left. | ||
85 | */ | ||
80 | if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { | 86 | if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { |
81 | memmove(map + 1, map + 3, | 87 | memmove(map + 1, map + 3, |
82 | (sb_oid_cursize(rs) - 3) * sizeof(__u32)); | 88 | (sb_oid_cursize(rs) - 3) * sizeof(__u32)); |
83 | set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); | 89 | set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); |
84 | } | 90 | } |
85 | 91 | ||
86 | journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); | 92 | journal_mark_dirty(th, SB_BUFFER_WITH_SB(s)); |
87 | return unused_objectid; | 93 | return unused_objectid; |
88 | } | 94 | } |
89 | 95 | ||
@@ -97,30 +103,33 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, | |||
97 | int i = 0; | 103 | int i = 0; |
98 | 104 | ||
99 | BUG_ON(!th->t_trans_id); | 105 | BUG_ON(!th->t_trans_id); |
100 | //return; | 106 | /*return; */ |
101 | check_objectid_map(s, map); | 107 | check_objectid_map(s, map); |
102 | 108 | ||
103 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); | 109 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); |
104 | journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); | 110 | journal_mark_dirty(th, SB_BUFFER_WITH_SB(s)); |
105 | 111 | ||
106 | /* start at the beginning of the objectid map (i = 0) and go to | 112 | /* |
107 | the end of it (i = disk_sb->s_oid_cursize). Linear search is | 113 | * start at the beginning of the objectid map (i = 0) and go to |
108 | what we use, though it is possible that binary search would be | 114 | * the end of it (i = disk_sb->s_oid_cursize). Linear search is |
109 | more efficient after performing lots of deletions (which is | 115 | * what we use, though it is possible that binary search would be |
110 | when oids is large.) We only check even i's. */ | 116 | * more efficient after performing lots of deletions (which is |
117 | * when oids is large.) We only check even i's. | ||
118 | */ | ||
111 | while (i < sb_oid_cursize(rs)) { | 119 | while (i < sb_oid_cursize(rs)) { |
112 | if (objectid_to_release == le32_to_cpu(map[i])) { | 120 | if (objectid_to_release == le32_to_cpu(map[i])) { |
113 | /* This incrementation unallocates the objectid. */ | 121 | /* This incrementation unallocates the objectid. */ |
114 | //map[i]++; | ||
115 | le32_add_cpu(&map[i], 1); | 122 | le32_add_cpu(&map[i], 1); |
116 | 123 | ||
117 | /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ | 124 | /* |
125 | * Did we unallocate the last member of an | ||
126 | * odd sequence, and can shrink oids? | ||
127 | */ | ||
118 | if (map[i] == map[i + 1]) { | 128 | if (map[i] == map[i + 1]) { |
119 | /* shrink objectid map */ | 129 | /* shrink objectid map */ |
120 | memmove(map + i, map + i + 2, | 130 | memmove(map + i, map + i + 2, |
121 | (sb_oid_cursize(rs) - i - | 131 | (sb_oid_cursize(rs) - i - |
122 | 2) * sizeof(__u32)); | 132 | 2) * sizeof(__u32)); |
123 | //disk_sb->s_oid_cursize -= 2; | ||
124 | set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); | 133 | set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); |
125 | 134 | ||
126 | RFALSE(sb_oid_cursize(rs) < 2 || | 135 | RFALSE(sb_oid_cursize(rs) < 2 || |
@@ -135,14 +144,19 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, | |||
135 | objectid_to_release < le32_to_cpu(map[i + 1])) { | 144 | objectid_to_release < le32_to_cpu(map[i + 1])) { |
136 | /* size of objectid map is not changed */ | 145 | /* size of objectid map is not changed */ |
137 | if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { | 146 | if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { |
138 | //objectid_map[i+1]--; | ||
139 | le32_add_cpu(&map[i + 1], -1); | 147 | le32_add_cpu(&map[i + 1], -1); |
140 | return; | 148 | return; |
141 | } | 149 | } |
142 | 150 | ||
143 | /* JDM comparing two little-endian values for equality -- safe */ | 151 | /* |
152 | * JDM comparing two little-endian values for | ||
153 | * equality -- safe | ||
154 | */ | ||
155 | /* | ||
156 | * objectid map must be expanded, but | ||
157 | * there is no space | ||
158 | */ | ||
144 | if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { | 159 | if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { |
145 | /* objectid map must be expanded, but there is no space */ | ||
146 | PROC_INFO_INC(s, leaked_oid); | 160 | PROC_INFO_INC(s, leaked_oid); |
147 | return; | 161 | return; |
148 | } | 162 | } |
@@ -178,8 +192,9 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s) | |||
178 | new_objectid_map = (__le32 *) (disk_sb + 1); | 192 | new_objectid_map = (__le32 *) (disk_sb + 1); |
179 | 193 | ||
180 | if (cur_size > new_size) { | 194 | if (cur_size > new_size) { |
181 | /* mark everyone used that was listed as free at the end of the objectid | 195 | /* |
182 | ** map | 196 | * mark everyone used that was listed as free at |
197 | * the end of the objectid map | ||
183 | */ | 198 | */ |
184 | objectid_map[new_size - 1] = objectid_map[cur_size - 1]; | 199 | objectid_map[new_size - 1] = objectid_map[cur_size - 1]; |
185 | set_sb_oid_cursize(disk_sb, new_size); | 200 | set_sb_oid_cursize(disk_sb, new_size); |
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 54944d5a4a6e..c9b47e91baf8 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c | |||
@@ -172,18 +172,19 @@ static char *is_there_reiserfs_struct(char *fmt, int *what) | |||
172 | return k; | 172 | return k; |
173 | } | 173 | } |
174 | 174 | ||
175 | /* debugging reiserfs we used to print out a lot of different | 175 | /* |
176 | variables, like keys, item headers, buffer heads etc. Values of | 176 | * debugging reiserfs we used to print out a lot of different |
177 | most fields matter. So it took a long time just to write | 177 | * variables, like keys, item headers, buffer heads etc. Values of |
178 | appropriative printk. With this reiserfs_warning you can use format | 178 | * most fields matter. So it took a long time just to write |
179 | specification for complex structures like you used to do with | 179 | * appropriative printk. With this reiserfs_warning you can use format |
180 | printfs for integers, doubles and pointers. For instance, to print | 180 | * specification for complex structures like you used to do with |
181 | out key structure you have to write just: | 181 | * printfs for integers, doubles and pointers. For instance, to print |
182 | reiserfs_warning ("bad key %k", key); | 182 | * out key structure you have to write just: |
183 | instead of | 183 | * reiserfs_warning ("bad key %k", key); |
184 | printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, | 184 | * instead of |
185 | key->k_offset, key->k_uniqueness); | 185 | * printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, |
186 | */ | 186 | * key->k_offset, key->k_uniqueness); |
187 | */ | ||
187 | static DEFINE_SPINLOCK(error_lock); | 188 | static DEFINE_SPINLOCK(error_lock); |
188 | static void prepare_error_buf(const char *fmt, va_list args) | 189 | static void prepare_error_buf(const char *fmt, va_list args) |
189 | { | 190 | { |
@@ -243,15 +244,16 @@ static void prepare_error_buf(const char *fmt, va_list args) | |||
243 | 244 | ||
244 | } | 245 | } |
245 | 246 | ||
246 | /* in addition to usual conversion specifiers this accepts reiserfs | 247 | /* |
247 | specific conversion specifiers: | 248 | * in addition to usual conversion specifiers this accepts reiserfs |
248 | %k to print little endian key, | 249 | * specific conversion specifiers: |
249 | %K to print cpu key, | 250 | * %k to print little endian key, |
250 | %h to print item_head, | 251 | * %K to print cpu key, |
251 | %t to print directory entry | 252 | * %h to print item_head, |
252 | %z to print block head (arg must be struct buffer_head * | 253 | * %t to print directory entry |
253 | %b to print buffer_head | 254 | * %z to print block head (arg must be struct buffer_head * |
254 | */ | 255 | * %b to print buffer_head |
256 | */ | ||
255 | 257 | ||
256 | #define do_reiserfs_warning(fmt)\ | 258 | #define do_reiserfs_warning(fmt)\ |
257 | {\ | 259 | {\ |
@@ -304,50 +306,52 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) | |||
304 | #endif | 306 | #endif |
305 | } | 307 | } |
306 | 308 | ||
307 | /* The format: | 309 | /* |
308 | 310 | * The format: | |
309 | maintainer-errorid: [function-name:] message | 311 | * |
310 | 312 | * maintainer-errorid: [function-name:] message | |
311 | where errorid is unique to the maintainer and function-name is | 313 | * |
312 | optional, is recommended, so that anyone can easily find the bug | 314 | * where errorid is unique to the maintainer and function-name is |
313 | with a simple grep for the short to type string | 315 | * optional, is recommended, so that anyone can easily find the bug |
314 | maintainer-errorid. Don't bother with reusing errorids, there are | 316 | * with a simple grep for the short to type string |
315 | lots of numbers out there. | 317 | * maintainer-errorid. Don't bother with reusing errorids, there are |
316 | 318 | * lots of numbers out there. | |
317 | Example: | 319 | * |
318 | 320 | * Example: | |
319 | reiserfs_panic( | 321 | * |
320 | p_sb, "reiser-29: reiserfs_new_blocknrs: " | 322 | * reiserfs_panic( |
321 | "one of search_start or rn(%d) is equal to MAX_B_NUM," | 323 | * p_sb, "reiser-29: reiserfs_new_blocknrs: " |
322 | "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", | 324 | * "one of search_start or rn(%d) is equal to MAX_B_NUM," |
323 | rn, bh | 325 | * "which means that we are optimizing location based on the " |
324 | ); | 326 | * "bogus location of a temp buffer (%p).", |
325 | 327 | * rn, bh | |
326 | Regular panic()s sometimes clear the screen before the message can | 328 | * ); |
327 | be read, thus the need for the while loop. | 329 | * |
328 | 330 | * Regular panic()s sometimes clear the screen before the message can | |
329 | Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it | 331 | * be read, thus the need for the while loop. |
330 | pointless complexity): | 332 | * |
331 | 333 | * Numbering scheme for panic used by Vladimir and Anatoly( Hans completely | |
332 | panics in reiserfs.h have numbers from 1000 to 1999 | 334 | * ignores this scheme, and considers it pointless complexity): |
333 | super.c 2000 to 2999 | 335 | * |
334 | preserve.c (unused) 3000 to 3999 | 336 | * panics in reiserfs_fs.h have numbers from 1000 to 1999 |
335 | bitmap.c 4000 to 4999 | 337 | * super.c 2000 to 2999 |
336 | stree.c 5000 to 5999 | 338 | * preserve.c (unused) 3000 to 3999 |
337 | prints.c 6000 to 6999 | 339 | * bitmap.c 4000 to 4999 |
338 | namei.c 7000 to 7999 | 340 | * stree.c 5000 to 5999 |
339 | fix_nodes.c 8000 to 8999 | 341 | * prints.c 6000 to 6999 |
340 | dir.c 9000 to 9999 | 342 | * namei.c 7000 to 7999 |
341 | lbalance.c 10000 to 10999 | 343 | * fix_nodes.c 8000 to 8999 |
342 | ibalance.c 11000 to 11999 not ready | 344 | * dir.c 9000 to 9999 |
343 | do_balan.c 12000 to 12999 | 345 | * lbalance.c 10000 to 10999 |
344 | inode.c 13000 to 13999 | 346 | * ibalance.c 11000 to 11999 not ready |
345 | file.c 14000 to 14999 | 347 | * do_balan.c 12000 to 12999 |
346 | objectid.c 15000 - 15999 | 348 | * inode.c 13000 to 13999 |
347 | buffer.c 16000 - 16999 | 349 | * file.c 14000 to 14999 |
348 | symlink.c 17000 - 17999 | 350 | * objectid.c 15000 - 15999 |
349 | 351 | * buffer.c 16000 - 16999 | |
350 | . */ | 352 | * symlink.c 17000 - 17999 |
353 | * | ||
354 | * . */ | ||
351 | 355 | ||
352 | void __reiserfs_panic(struct super_block *sb, const char *id, | 356 | void __reiserfs_panic(struct super_block *sb, const char *id, |
353 | const char *function, const char *fmt, ...) | 357 | const char *function, const char *fmt, ...) |
@@ -411,9 +415,11 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) | |||
411 | reiserfs_abort_journal(sb, errno); | 415 | reiserfs_abort_journal(sb, errno); |
412 | } | 416 | } |
413 | 417 | ||
414 | /* this prints internal nodes (4 keys/items in line) (dc_number, | 418 | /* |
415 | dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, | 419 | * this prints internal nodes (4 keys/items in line) (dc_number, |
416 | dc_size)...*/ | 420 | * dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, |
421 | * dc_size)... | ||
422 | */ | ||
417 | static int print_internal(struct buffer_head *bh, int first, int last) | 423 | static int print_internal(struct buffer_head *bh, int first, int last) |
418 | { | 424 | { |
419 | struct reiserfs_key *key; | 425 | struct reiserfs_key *key; |
@@ -439,7 +445,7 @@ static int print_internal(struct buffer_head *bh, int first, int last) | |||
439 | dc = B_N_CHILD(bh, from); | 445 | dc = B_N_CHILD(bh, from); |
440 | reiserfs_printk("PTR %d: %y ", from, dc); | 446 | reiserfs_printk("PTR %d: %y ", from, dc); |
441 | 447 | ||
442 | for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to; | 448 | for (i = from, key = internal_key(bh, from), dc++; i < to; |
443 | i++, key++, dc++) { | 449 | i++, key++, dc++) { |
444 | reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); | 450 | reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); |
445 | if (i && i % 4 == 0) | 451 | if (i && i % 4 == 0) |
@@ -463,7 +469,7 @@ static int print_leaf(struct buffer_head *bh, int print_mode, int first, | |||
463 | check_leaf(bh); | 469 | check_leaf(bh); |
464 | 470 | ||
465 | blkh = B_BLK_HEAD(bh); | 471 | blkh = B_BLK_HEAD(bh); |
466 | ih = B_N_PITEM_HEAD(bh, 0); | 472 | ih = item_head(bh, 0); |
467 | nr = blkh_nr_item(blkh); | 473 | nr = blkh_nr_item(blkh); |
468 | 474 | ||
469 | printk | 475 | printk |
@@ -496,7 +502,7 @@ static int print_leaf(struct buffer_head *bh, int print_mode, int first, | |||
496 | ("-------------------------------------------------------------------------------\n"); | 502 | ("-------------------------------------------------------------------------------\n"); |
497 | reiserfs_printk("|%2d| %h |\n", i, ih); | 503 | reiserfs_printk("|%2d| %h |\n", i, ih); |
498 | if (print_mode & PRINT_LEAF_ITEMS) | 504 | if (print_mode & PRINT_LEAF_ITEMS) |
499 | op_print_item(ih, B_I_PITEM(bh, ih)); | 505 | op_print_item(ih, ih_item_body(bh, ih)); |
500 | } | 506 | } |
501 | 507 | ||
502 | printk | 508 | printk |
@@ -543,9 +549,11 @@ static int print_super_block(struct buffer_head *bh) | |||
543 | printk("Block count %u\n", sb_block_count(rs)); | 549 | printk("Block count %u\n", sb_block_count(rs)); |
544 | printk("Blocksize %d\n", sb_blocksize(rs)); | 550 | printk("Blocksize %d\n", sb_blocksize(rs)); |
545 | printk("Free blocks %u\n", sb_free_blocks(rs)); | 551 | printk("Free blocks %u\n", sb_free_blocks(rs)); |
546 | // FIXME: this would be confusing if | 552 | /* |
547 | // someone stores reiserfs super block in some data block ;) | 553 | * FIXME: this would be confusing if |
554 | * someone stores reiserfs super block in some data block ;) | ||
548 | // skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); | 555 | // skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); |
556 | */ | ||
549 | skipped = bh->b_blocknr; | 557 | skipped = bh->b_blocknr; |
550 | data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - | 558 | data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - |
551 | (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + | 559 | (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + |
@@ -581,8 +589,8 @@ static int print_desc_block(struct buffer_head *bh) | |||
581 | 589 | ||
582 | return 0; | 590 | return 0; |
583 | } | 591 | } |
584 | 592 | /* ..., int print_mode, int first, int last) */ | |
585 | void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last) | 593 | void print_block(struct buffer_head *bh, ...) |
586 | { | 594 | { |
587 | va_list args; | 595 | va_list args; |
588 | int mode, first, last; | 596 | int mode, first, last; |
@@ -644,11 +652,11 @@ void store_print_tb(struct tree_balance *tb) | |||
644 | "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", | 652 | "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", |
645 | h, | 653 | h, |
646 | (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL), | 654 | (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL), |
647 | (tbSh) ? atomic_read(&(tbSh->b_count)) : -1, | 655 | (tbSh) ? atomic_read(&tbSh->b_count) : -1, |
648 | (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL), | 656 | (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL), |
649 | (tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1, | 657 | (tb->L[h]) ? atomic_read(&tb->L[h]->b_count) : -1, |
650 | (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL), | 658 | (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL), |
651 | (tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1, | 659 | (tb->R[h]) ? atomic_read(&tb->R[h]->b_count) : -1, |
652 | (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL), | 660 | (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL), |
653 | (tb->FL[h]) ? (long long)(tb->FL[h]-> | 661 | (tb->FL[h]) ? (long long)(tb->FL[h]-> |
654 | b_blocknr) : (-1LL), | 662 | b_blocknr) : (-1LL), |
@@ -665,9 +673,9 @@ void store_print_tb(struct tree_balance *tb) | |||
665 | "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" | 673 | "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" |
666 | "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", | 674 | "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", |
667 | tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0], | 675 | tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0], |
668 | tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes, | 676 | tb->rbytes, tb->blknum[0], tb->s0num, tb->snum[0], |
669 | tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], | 677 | tb->sbytes[0], tb->snum[1], tb->sbytes[1], |
670 | tb->rkey[0]); | 678 | tb->cur_blknum, tb->lkey[0], tb->rkey[0]); |
671 | 679 | ||
672 | /* this prints balance parameters for non-leaf levels */ | 680 | /* this prints balance parameters for non-leaf levels */ |
673 | h = 0; | 681 | h = 0; |
@@ -690,7 +698,7 @@ void store_print_tb(struct tree_balance *tb) | |||
690 | "%p (%llu %d)%s", tb->FEB[i], | 698 | "%p (%llu %d)%s", tb->FEB[i], |
691 | tb->FEB[i] ? (unsigned long long)tb->FEB[i]-> | 699 | tb->FEB[i] ? (unsigned long long)tb->FEB[i]-> |
692 | b_blocknr : 0ULL, | 700 | b_blocknr : 0ULL, |
693 | tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0, | 701 | tb->FEB[i] ? atomic_read(&tb->FEB[i]->b_count) : 0, |
694 | (i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", "); | 702 | (i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", "); |
695 | 703 | ||
696 | sprintf(print_tb_buf + strlen(print_tb_buf), | 704 | sprintf(print_tb_buf + strlen(print_tb_buf), |
@@ -744,8 +752,8 @@ void check_leaf(struct buffer_head *bh) | |||
744 | if (!bh) | 752 | if (!bh) |
745 | return; | 753 | return; |
746 | check_leaf_block_head(bh); | 754 | check_leaf_block_head(bh); |
747 | for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++) | 755 | for (i = 0, ih = item_head(bh, 0); i < B_NR_ITEMS(bh); i++, ih++) |
748 | op_check_item(ih, B_I_PITEM(bh, ih)); | 756 | op_check_item(ih, ih_item_body(bh, ih)); |
749 | } | 757 | } |
750 | 758 | ||
751 | void check_internal(struct buffer_head *bh) | 759 | void check_internal(struct buffer_head *bh) |
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 83d4eac8059a..bf53888c7f59 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details | 2 | * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for |
3 | * licensing and copyright details | ||
3 | */ | 4 | */ |
4 | 5 | ||
5 | #include <linux/reiserfs_fs.h> | 6 | #include <linux/reiserfs_fs.h> |
@@ -23,52 +24,73 @@ | |||
23 | 24 | ||
24 | struct reiserfs_journal_list; | 25 | struct reiserfs_journal_list; |
25 | 26 | ||
26 | /** bitmasks for i_flags field in reiserfs-specific part of inode */ | 27 | /* bitmasks for i_flags field in reiserfs-specific part of inode */ |
27 | typedef enum { | 28 | typedef enum { |
28 | /** this says what format of key do all items (but stat data) of | 29 | /* |
29 | an object have. If this is set, that format is 3.6 otherwise | 30 | * this says what format of key do all items (but stat data) of |
30 | - 3.5 */ | 31 | * an object have. If this is set, that format is 3.6 otherwise - 3.5 |
32 | */ | ||
31 | i_item_key_version_mask = 0x0001, | 33 | i_item_key_version_mask = 0x0001, |
32 | /** If this is unset, object has 3.5 stat data, otherwise, it has | 34 | |
33 | 3.6 stat data with 64bit size, 32bit nlink etc. */ | 35 | /* |
36 | * If this is unset, object has 3.5 stat data, otherwise, | ||
37 | * it has 3.6 stat data with 64bit size, 32bit nlink etc. | ||
38 | */ | ||
34 | i_stat_data_version_mask = 0x0002, | 39 | i_stat_data_version_mask = 0x0002, |
35 | /** file might need tail packing on close */ | 40 | |
41 | /* file might need tail packing on close */ | ||
36 | i_pack_on_close_mask = 0x0004, | 42 | i_pack_on_close_mask = 0x0004, |
37 | /** don't pack tail of file */ | 43 | |
44 | /* don't pack tail of file */ | ||
38 | i_nopack_mask = 0x0008, | 45 | i_nopack_mask = 0x0008, |
39 | /** If those is set, "safe link" was created for this file during | 46 | |
40 | truncate or unlink. Safe link is used to avoid leakage of disk | 47 | /* |
41 | space on crash with some files open, but unlinked. */ | 48 | * If either of these are set, "safe link" was created for this |
49 | * file during truncate or unlink. Safe link is used to avoid | ||
50 | * leakage of disk space on crash with some files open, but unlinked. | ||
51 | */ | ||
42 | i_link_saved_unlink_mask = 0x0010, | 52 | i_link_saved_unlink_mask = 0x0010, |
43 | i_link_saved_truncate_mask = 0x0020, | 53 | i_link_saved_truncate_mask = 0x0020, |
54 | |||
44 | i_has_xattr_dir = 0x0040, | 55 | i_has_xattr_dir = 0x0040, |
45 | i_data_log = 0x0080, | 56 | i_data_log = 0x0080, |
46 | } reiserfs_inode_flags; | 57 | } reiserfs_inode_flags; |
47 | 58 | ||
48 | struct reiserfs_inode_info { | 59 | struct reiserfs_inode_info { |
49 | __u32 i_key[4]; /* key is still 4 32 bit integers */ | 60 | __u32 i_key[4]; /* key is still 4 32 bit integers */ |
50 | /** transient inode flags that are never stored on disk. Bitmasks | 61 | |
51 | for this field are defined above. */ | 62 | /* |
63 | * transient inode flags that are never stored on disk. Bitmasks | ||
64 | * for this field are defined above. | ||
65 | */ | ||
52 | __u32 i_flags; | 66 | __u32 i_flags; |
53 | 67 | ||
54 | __u32 i_first_direct_byte; // offset of first byte stored in direct item. | 68 | /* offset of first byte stored in direct item. */ |
69 | __u32 i_first_direct_byte; | ||
55 | 70 | ||
56 | /* copy of persistent inode flags read from sd_attrs. */ | 71 | /* copy of persistent inode flags read from sd_attrs. */ |
57 | __u32 i_attrs; | 72 | __u32 i_attrs; |
58 | 73 | ||
59 | int i_prealloc_block; /* first unused block of a sequence of unused blocks */ | 74 | /* first unused block of a sequence of unused blocks */ |
75 | int i_prealloc_block; | ||
60 | int i_prealloc_count; /* length of that sequence */ | 76 | int i_prealloc_count; /* length of that sequence */ |
61 | struct list_head i_prealloc_list; /* per-transaction list of inodes which | ||
62 | * have preallocated blocks */ | ||
63 | 77 | ||
64 | unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks | 78 | /* per-transaction list of inodes which have preallocated blocks */ |
65 | * for the contents of this directory should be | 79 | struct list_head i_prealloc_list; |
66 | * displaced */ | ||
67 | 80 | ||
68 | /* we use these for fsync or O_SYNC to decide which transaction | 81 | /* |
69 | ** needs to be committed in order for this inode to be properly | 82 | * new_packing_locality is created; new blocks for the contents |
70 | ** flushed */ | 83 | * of this directory should be displaced |
84 | */ | ||
85 | unsigned new_packing_locality:1; | ||
86 | |||
87 | /* | ||
88 | * we use these for fsync or O_SYNC to decide which transaction | ||
89 | * needs to be committed in order for this inode to be properly | ||
90 | * flushed | ||
91 | */ | ||
71 | unsigned int i_trans_id; | 92 | unsigned int i_trans_id; |
93 | |||
72 | struct reiserfs_journal_list *i_jl; | 94 | struct reiserfs_journal_list *i_jl; |
73 | atomic_t openers; | 95 | atomic_t openers; |
74 | struct mutex tailpack; | 96 | struct mutex tailpack; |
@@ -82,9 +104,10 @@ typedef enum { | |||
82 | reiserfs_attrs_cleared = 0x00000001, | 104 | reiserfs_attrs_cleared = 0x00000001, |
83 | } reiserfs_super_block_flags; | 105 | } reiserfs_super_block_flags; |
84 | 106 | ||
85 | /* struct reiserfs_super_block accessors/mutators | 107 | /* |
86 | * since this is a disk structure, it will always be in | 108 | * struct reiserfs_super_block accessors/mutators since this is a disk |
87 | * little endian format. */ | 109 | * structure, it will always be in little endian format. |
110 | */ | ||
88 | #define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) | 111 | #define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) |
89 | #define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) | 112 | #define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) |
90 | #define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) | 113 | #define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) |
@@ -152,48 +175,61 @@ typedef enum { | |||
152 | 175 | ||
153 | /* LOGGING -- */ | 176 | /* LOGGING -- */ |
154 | 177 | ||
155 | /* These all interelate for performance. | 178 | /* |
156 | ** | 179 | * These all interelate for performance. |
157 | ** If the journal block count is smaller than n transactions, you lose speed. | 180 | * |
158 | ** I don't know what n is yet, I'm guessing 8-16. | 181 | * If the journal block count is smaller than n transactions, you lose speed. |
159 | ** | 182 | * I don't know what n is yet, I'm guessing 8-16. |
160 | ** typical transaction size depends on the application, how often fsync is | 183 | * |
161 | ** called, and how many metadata blocks you dirty in a 30 second period. | 184 | * typical transaction size depends on the application, how often fsync is |
162 | ** The more small files (<16k) you use, the larger your transactions will | 185 | * called, and how many metadata blocks you dirty in a 30 second period. |
163 | ** be. | 186 | * The more small files (<16k) you use, the larger your transactions will |
164 | ** | 187 | * be. |
165 | ** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal | 188 | * |
166 | ** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough | 189 | * If your journal fills faster than dirty buffers get flushed to disk, it |
167 | ** to prevent wrapping before dirty meta blocks get to disk. | 190 | * must flush them before allowing the journal to wrap, which slows things |
168 | ** | 191 | * down. If you need high speed meta data updates, the journal should be |
169 | ** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal | 192 | * big enough to prevent wrapping before dirty meta blocks get to disk. |
170 | ** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. | 193 | * |
171 | ** | 194 | * If the batch max is smaller than the transaction max, you'll waste space |
172 | ** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. | 195 | * at the end of the journal because journal_end sets the next transaction |
173 | ** | 196 | * to start at 0 if the next transaction has any chance of wrapping. |
174 | */ | 197 | * |
198 | * The large the batch max age, the better the speed, and the more meta | ||
199 | * data changes you'll lose after a crash. | ||
200 | */ | ||
175 | 201 | ||
176 | /* don't mess with these for a while */ | 202 | /* don't mess with these for a while */ |
177 | /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ | 203 | /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ |
178 | #define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ | 204 | #define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ |
179 | #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ | 205 | #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ |
180 | #define JOURNAL_HASH_SIZE 8192 | 206 | #define JOURNAL_HASH_SIZE 8192 |
181 | #define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ | 207 | |
182 | 208 | /* number of copies of the bitmaps to have floating. Must be >= 2 */ | |
183 | /* One of these for every block in every transaction | 209 | #define JOURNAL_NUM_BITMAPS 5 |
184 | ** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a | 210 | |
185 | ** hash of all the in memory transactions. | 211 | /* |
186 | ** next and prev are used by the current transaction (journal_hash). | 212 | * One of these for every block in every transaction |
187 | ** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash | 213 | * Each one is in two hash tables. First, a hash of the current transaction, |
188 | ** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging | 214 | * and after journal_end, a hash of all the in memory transactions. |
189 | ** to a given transaction. | 215 | * next and prev are used by the current transaction (journal_hash). |
190 | */ | 216 | * hnext and hprev are used by journal_list_hash. If a block is in more |
217 | * than one transaction, the journal_list_hash links it in multiple times. | ||
218 | * This allows flush_journal_list to remove just the cnode belonging to a | ||
219 | * given transaction. | ||
220 | */ | ||
191 | struct reiserfs_journal_cnode { | 221 | struct reiserfs_journal_cnode { |
192 | struct buffer_head *bh; /* real buffer head */ | 222 | struct buffer_head *bh; /* real buffer head */ |
193 | struct super_block *sb; /* dev of real buffer head */ | 223 | struct super_block *sb; /* dev of real buffer head */ |
194 | __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */ | 224 | |
225 | /* block number of real buffer head, == 0 when buffer on disk */ | ||
226 | __u32 blocknr; | ||
227 | |||
195 | unsigned long state; | 228 | unsigned long state; |
196 | struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */ | 229 | |
230 | /* journal list this cnode lives in */ | ||
231 | struct reiserfs_journal_list *jlist; | ||
232 | |||
197 | struct reiserfs_journal_cnode *next; /* next in transaction list */ | 233 | struct reiserfs_journal_cnode *next; /* next in transaction list */ |
198 | struct reiserfs_journal_cnode *prev; /* prev in transaction list */ | 234 | struct reiserfs_journal_cnode *prev; /* prev in transaction list */ |
199 | struct reiserfs_journal_cnode *hprev; /* prev in hash list */ | 235 | struct reiserfs_journal_cnode *hprev; /* prev in hash list */ |
@@ -212,18 +248,22 @@ struct reiserfs_list_bitmap { | |||
212 | }; | 248 | }; |
213 | 249 | ||
214 | /* | 250 | /* |
215 | ** one of these for each transaction. The most important part here is the j_realblock. | 251 | * one of these for each transaction. The most important part here is the |
216 | ** this list of cnodes is used to hash all the blocks in all the commits, to mark all the | 252 | * j_realblock. this list of cnodes is used to hash all the blocks in all |
217 | ** real buffer heads dirty once all the commits hit the disk, | 253 | * the commits, to mark all the real buffer heads dirty once all the commits |
218 | ** and to make sure every real block in a transaction is on disk before allowing the log area | 254 | * hit the disk, and to make sure every real block in a transaction is on |
219 | ** to be overwritten */ | 255 | * disk before allowing the log area to be overwritten |
256 | */ | ||
220 | struct reiserfs_journal_list { | 257 | struct reiserfs_journal_list { |
221 | unsigned long j_start; | 258 | unsigned long j_start; |
222 | unsigned long j_state; | 259 | unsigned long j_state; |
223 | unsigned long j_len; | 260 | unsigned long j_len; |
224 | atomic_t j_nonzerolen; | 261 | atomic_t j_nonzerolen; |
225 | atomic_t j_commit_left; | 262 | atomic_t j_commit_left; |
226 | atomic_t j_older_commits_done; /* all commits older than this on disk */ | 263 | |
264 | /* all commits older than this on disk */ | ||
265 | atomic_t j_older_commits_done; | ||
266 | |||
227 | struct mutex j_commit_mutex; | 267 | struct mutex j_commit_mutex; |
228 | unsigned int j_trans_id; | 268 | unsigned int j_trans_id; |
229 | time_t j_timestamp; | 269 | time_t j_timestamp; |
@@ -234,11 +274,15 @@ struct reiserfs_journal_list { | |||
234 | /* time ordered list of all active transactions */ | 274 | /* time ordered list of all active transactions */ |
235 | struct list_head j_list; | 275 | struct list_head j_list; |
236 | 276 | ||
237 | /* time ordered list of all transactions we haven't tried to flush yet */ | 277 | /* |
278 | * time ordered list of all transactions we haven't tried | ||
279 | * to flush yet | ||
280 | */ | ||
238 | struct list_head j_working_list; | 281 | struct list_head j_working_list; |
239 | 282 | ||
240 | /* list of tail conversion targets in need of flush before commit */ | 283 | /* list of tail conversion targets in need of flush before commit */ |
241 | struct list_head j_tail_bh_list; | 284 | struct list_head j_tail_bh_list; |
285 | |||
242 | /* list of data=ordered buffers in need of flush before commit */ | 286 | /* list of data=ordered buffers in need of flush before commit */ |
243 | struct list_head j_bh_list; | 287 | struct list_head j_bh_list; |
244 | int j_refcount; | 288 | int j_refcount; |
@@ -246,46 +290,83 @@ struct reiserfs_journal_list { | |||
246 | 290 | ||
247 | struct reiserfs_journal { | 291 | struct reiserfs_journal { |
248 | struct buffer_head **j_ap_blocks; /* journal blocks on disk */ | 292 | struct buffer_head **j_ap_blocks; /* journal blocks on disk */ |
249 | struct reiserfs_journal_cnode *j_last; /* newest journal block */ | 293 | /* newest journal block */ |
250 | struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ | 294 | struct reiserfs_journal_cnode *j_last; |
295 | |||
296 | /* oldest journal block. start here for traverse */ | ||
297 | struct reiserfs_journal_cnode *j_first; | ||
251 | 298 | ||
252 | struct block_device *j_dev_bd; | 299 | struct block_device *j_dev_bd; |
253 | fmode_t j_dev_mode; | 300 | fmode_t j_dev_mode; |
254 | int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ | 301 | |
302 | /* first block on s_dev of reserved area journal */ | ||
303 | int j_1st_reserved_block; | ||
255 | 304 | ||
256 | unsigned long j_state; | 305 | unsigned long j_state; |
257 | unsigned int j_trans_id; | 306 | unsigned int j_trans_id; |
258 | unsigned long j_mount_id; | 307 | unsigned long j_mount_id; |
259 | unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */ | 308 | |
309 | /* start of current waiting commit (index into j_ap_blocks) */ | ||
310 | unsigned long j_start; | ||
260 | unsigned long j_len; /* length of current waiting commit */ | 311 | unsigned long j_len; /* length of current waiting commit */ |
261 | unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */ | 312 | |
313 | /* number of buffers requested by journal_begin() */ | ||
314 | unsigned long j_len_alloc; | ||
315 | |||
262 | atomic_t j_wcount; /* count of writers for current commit */ | 316 | atomic_t j_wcount; /* count of writers for current commit */ |
263 | unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ | 317 | |
264 | unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ | 318 | /* batch count. allows turning X transactions into 1 */ |
265 | unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ | 319 | unsigned long j_bcount; |
320 | |||
321 | /* first unflushed transactions offset */ | ||
322 | unsigned long j_first_unflushed_offset; | ||
323 | |||
324 | /* last fully flushed journal timestamp */ | ||
325 | unsigned j_last_flush_trans_id; | ||
326 | |||
266 | struct buffer_head *j_header_bh; | 327 | struct buffer_head *j_header_bh; |
267 | 328 | ||
268 | time_t j_trans_start_time; /* time this transaction started */ | 329 | time_t j_trans_start_time; /* time this transaction started */ |
269 | struct mutex j_mutex; | 330 | struct mutex j_mutex; |
270 | struct mutex j_flush_mutex; | 331 | struct mutex j_flush_mutex; |
271 | wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ | 332 | |
272 | atomic_t j_jlock; /* lock for j_join_wait */ | 333 | /* wait for current transaction to finish before starting new one */ |
334 | wait_queue_head_t j_join_wait; | ||
335 | |||
336 | atomic_t j_jlock; /* lock for j_join_wait */ | ||
273 | int j_list_bitmap_index; /* number of next list bitmap to use */ | 337 | int j_list_bitmap_index; /* number of next list bitmap to use */ |
274 | int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */ | 338 | |
275 | int j_next_full_flush; /* next journal_end will flush all journal list */ | 339 | /* no more journal begins allowed. MUST sleep on j_join_wait */ |
276 | int j_next_async_flush; /* next journal_end will flush all async commits */ | 340 | int j_must_wait; |
341 | |||
342 | /* next journal_end will flush all journal list */ | ||
343 | int j_next_full_flush; | ||
344 | |||
345 | /* next journal_end will flush all async commits */ | ||
346 | int j_next_async_flush; | ||
277 | 347 | ||
278 | int j_cnode_used; /* number of cnodes on the used list */ | 348 | int j_cnode_used; /* number of cnodes on the used list */ |
279 | int j_cnode_free; /* number of cnodes on the free list */ | 349 | int j_cnode_free; /* number of cnodes on the free list */ |
280 | 350 | ||
281 | unsigned int j_trans_max; /* max number of blocks in a transaction. */ | 351 | /* max number of blocks in a transaction. */ |
282 | unsigned int j_max_batch; /* max number of blocks to batch into a trans */ | 352 | unsigned int j_trans_max; |
283 | unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */ | 353 | |
284 | unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */ | 354 | /* max number of blocks to batch into a trans */ |
285 | unsigned int j_default_max_commit_age; /* the default for the max commit age */ | 355 | unsigned int j_max_batch; |
356 | |||
357 | /* in seconds, how old can an async commit be */ | ||
358 | unsigned int j_max_commit_age; | ||
359 | |||
360 | /* in seconds, how old can a transaction be */ | ||
361 | unsigned int j_max_trans_age; | ||
362 | |||
363 | /* the default for the max commit age */ | ||
364 | unsigned int j_default_max_commit_age; | ||
286 | 365 | ||
287 | struct reiserfs_journal_cnode *j_cnode_free_list; | 366 | struct reiserfs_journal_cnode *j_cnode_free_list; |
288 | struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */ | 367 | |
368 | /* orig pointer returned from vmalloc */ | ||
369 | struct reiserfs_journal_cnode *j_cnode_free_orig; | ||
289 | 370 | ||
290 | struct reiserfs_journal_list *j_current_jl; | 371 | struct reiserfs_journal_list *j_current_jl; |
291 | int j_free_bitmap_nodes; | 372 | int j_free_bitmap_nodes; |
@@ -306,14 +387,21 @@ struct reiserfs_journal { | |||
306 | 387 | ||
307 | /* list of all active transactions */ | 388 | /* list of all active transactions */ |
308 | struct list_head j_journal_list; | 389 | struct list_head j_journal_list; |
390 | |||
309 | /* lists that haven't been touched by writeback attempts */ | 391 | /* lists that haven't been touched by writeback attempts */ |
310 | struct list_head j_working_list; | 392 | struct list_head j_working_list; |
311 | 393 | ||
312 | struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ | 394 | /* hash table for real buffer heads in current trans */ |
313 | struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ | 395 | struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; |
314 | struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all | 396 | |
315 | the transactions */ | 397 | /* hash table for all the real buffer heads in all the transactions */ |
316 | struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ | 398 | struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; |
399 | |||
400 | /* array of bitmaps to record the deleted blocks */ | ||
401 | struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; | ||
402 | |||
403 | /* list of inodes which have preallocated blocks */ | ||
404 | struct list_head j_prealloc_list; | ||
317 | int j_persistent_trans; | 405 | int j_persistent_trans; |
318 | unsigned long j_max_trans_size; | 406 | unsigned long j_max_trans_size; |
319 | unsigned long j_max_batch_size; | 407 | unsigned long j_max_batch_size; |
@@ -328,11 +416,12 @@ struct reiserfs_journal { | |||
328 | 416 | ||
329 | enum journal_state_bits { | 417 | enum journal_state_bits { |
330 | J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ | 418 | J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ |
331 | J_WRITERS_QUEUED, /* set when log is full due to too many writers */ | 419 | J_WRITERS_QUEUED, /* set when log is full due to too many writers */ |
332 | J_ABORTED, /* set when log is aborted */ | 420 | J_ABORTED, /* set when log is aborted */ |
333 | }; | 421 | }; |
334 | 422 | ||
335 | #define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ | 423 | /* ick. magic string to find desc blocks in the journal */ |
424 | #define JOURNAL_DESC_MAGIC "ReIsErLB" | ||
336 | 425 | ||
337 | typedef __u32(*hashf_t) (const signed char *, int); | 426 | typedef __u32(*hashf_t) (const signed char *, int); |
338 | 427 | ||
@@ -364,7 +453,10 @@ typedef struct reiserfs_proc_info_data { | |||
364 | stat_cnt_t leaked_oid; | 453 | stat_cnt_t leaked_oid; |
365 | stat_cnt_t leaves_removable; | 454 | stat_cnt_t leaves_removable; |
366 | 455 | ||
367 | /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ | 456 | /* |
457 | * balances per level. | ||
458 | * Use explicit 5 as MAX_HEIGHT is not visible yet. | ||
459 | */ | ||
368 | stat_cnt_t balance_at[5]; /* XXX */ | 460 | stat_cnt_t balance_at[5]; /* XXX */ |
369 | /* sbk == search_by_key */ | 461 | /* sbk == search_by_key */ |
370 | stat_cnt_t sbk_read_at[5]; /* XXX */ | 462 | stat_cnt_t sbk_read_at[5]; /* XXX */ |
@@ -416,47 +508,75 @@ typedef struct reiserfs_proc_info_data { | |||
416 | 508 | ||
417 | /* reiserfs union of in-core super block data */ | 509 | /* reiserfs union of in-core super block data */ |
418 | struct reiserfs_sb_info { | 510 | struct reiserfs_sb_info { |
419 | struct buffer_head *s_sbh; /* Buffer containing the super block */ | 511 | /* Buffer containing the super block */ |
420 | /* both the comment and the choice of | 512 | struct buffer_head *s_sbh; |
421 | name are unclear for s_rs -Hans */ | 513 | |
422 | struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */ | 514 | /* Pointer to the on-disk super block in the buffer */ |
515 | struct reiserfs_super_block *s_rs; | ||
423 | struct reiserfs_bitmap_info *s_ap_bitmap; | 516 | struct reiserfs_bitmap_info *s_ap_bitmap; |
424 | struct reiserfs_journal *s_journal; /* pointer to journal information */ | 517 | |
518 | /* pointer to journal information */ | ||
519 | struct reiserfs_journal *s_journal; | ||
520 | |||
425 | unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ | 521 | unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ |
426 | 522 | ||
427 | /* Serialize writers access, replace the old bkl */ | 523 | /* Serialize writers access, replace the old bkl */ |
428 | struct mutex lock; | 524 | struct mutex lock; |
525 | |||
429 | /* Owner of the lock (can be recursive) */ | 526 | /* Owner of the lock (can be recursive) */ |
430 | struct task_struct *lock_owner; | 527 | struct task_struct *lock_owner; |
528 | |||
431 | /* Depth of the lock, start from -1 like the bkl */ | 529 | /* Depth of the lock, start from -1 like the bkl */ |
432 | int lock_depth; | 530 | int lock_depth; |
433 | 531 | ||
532 | struct workqueue_struct *commit_wq; | ||
533 | |||
434 | /* Comment? -Hans */ | 534 | /* Comment? -Hans */ |
435 | void (*end_io_handler) (struct buffer_head *, int); | 535 | void (*end_io_handler) (struct buffer_head *, int); |
436 | hashf_t s_hash_function; /* pointer to function which is used | 536 | |
437 | to sort names in directory. Set on | 537 | /* |
438 | mount */ | 538 | * pointer to function which is used to sort names in directory. |
439 | unsigned long s_mount_opt; /* reiserfs's mount options are set | 539 | * Set on mount |
440 | here (currently - NOTAIL, NOLOG, | 540 | */ |
441 | REPLAYONLY) */ | 541 | hashf_t s_hash_function; |
442 | 542 | ||
443 | struct { /* This is a structure that describes block allocator options */ | 543 | /* reiserfs's mount options are set here */ |
444 | unsigned long bits; /* Bitfield for enable/disable kind of options */ | 544 | unsigned long s_mount_opt; |
445 | unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ | 545 | |
546 | /* This is a structure that describes block allocator options */ | ||
547 | struct { | ||
548 | /* Bitfield for enable/disable kind of options */ | ||
549 | unsigned long bits; | ||
550 | |||
551 | /* | ||
552 | * size started from which we consider file | ||
553 | * to be a large one (in blocks) | ||
554 | */ | ||
555 | unsigned long large_file_size; | ||
556 | |||
446 | int border; /* percentage of disk, border takes */ | 557 | int border; /* percentage of disk, border takes */ |
447 | int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ | 558 | |
448 | int preallocsize; /* Number of blocks we try to prealloc when file | 559 | /* |
449 | reaches preallocmin size (in blocks) or | 560 | * Minimal file size (in blocks) starting |
450 | prealloc_list is empty. */ | 561 | * from which we do preallocations |
562 | */ | ||
563 | int preallocmin; | ||
564 | |||
565 | /* | ||
566 | * Number of blocks we try to prealloc when file | ||
567 | * reaches preallocmin size (in blocks) or prealloc_list | ||
568 | is empty. | ||
569 | */ | ||
570 | int preallocsize; | ||
451 | } s_alloc_options; | 571 | } s_alloc_options; |
452 | 572 | ||
453 | /* Comment? -Hans */ | 573 | /* Comment? -Hans */ |
454 | wait_queue_head_t s_wait; | 574 | wait_queue_head_t s_wait; |
455 | /* To be obsoleted soon by per buffer seals.. -Hans */ | 575 | /* increased by one every time the tree gets re-balanced */ |
456 | atomic_t s_generation_counter; // increased by one every time the | 576 | atomic_t s_generation_counter; |
457 | // tree gets re-balanced | 577 | |
458 | unsigned long s_properties; /* File system properties. Currently holds | 578 | /* File system properties. Currently holds on-disk FS format */ |
459 | on-disk FS format */ | 579 | unsigned long s_properties; |
460 | 580 | ||
461 | /* session statistics */ | 581 | /* session statistics */ |
462 | int s_disk_reads; | 582 | int s_disk_reads; |
@@ -469,14 +589,23 @@ struct reiserfs_sb_info { | |||
469 | int s_bmaps_without_search; | 589 | int s_bmaps_without_search; |
470 | int s_direct2indirect; | 590 | int s_direct2indirect; |
471 | int s_indirect2direct; | 591 | int s_indirect2direct; |
472 | /* set up when it's ok for reiserfs_read_inode2() to read from | 592 | |
473 | disk inode with nlink==0. Currently this is only used during | 593 | /* |
474 | finish_unfinished() processing at mount time */ | 594 | * set up when it's ok for reiserfs_read_inode2() to read from |
595 | * disk inode with nlink==0. Currently this is only used during | ||
596 | * finish_unfinished() processing at mount time | ||
597 | */ | ||
475 | int s_is_unlinked_ok; | 598 | int s_is_unlinked_ok; |
599 | |||
476 | reiserfs_proc_info_data_t s_proc_info_data; | 600 | reiserfs_proc_info_data_t s_proc_info_data; |
477 | struct proc_dir_entry *procdir; | 601 | struct proc_dir_entry *procdir; |
478 | int reserved_blocks; /* amount of blocks reserved for further allocations */ | 602 | |
479 | spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ | 603 | /* amount of blocks reserved for further allocations */ |
604 | int reserved_blocks; | ||
605 | |||
606 | |||
607 | /* this lock on now only used to protect reserved_blocks variable */ | ||
608 | spinlock_t bitmap_lock; | ||
480 | struct dentry *priv_root; /* root of /.reiserfs_priv */ | 609 | struct dentry *priv_root; /* root of /.reiserfs_priv */ |
481 | struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ | 610 | struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ |
482 | int j_errno; | 611 | int j_errno; |
@@ -492,14 +621,13 @@ struct reiserfs_sb_info { | |||
492 | char *s_jdev; /* Stored jdev for mount option showing */ | 621 | char *s_jdev; /* Stored jdev for mount option showing */ |
493 | #ifdef CONFIG_REISERFS_CHECK | 622 | #ifdef CONFIG_REISERFS_CHECK |
494 | 623 | ||
495 | struct tree_balance *cur_tb; /* | 624 | /* |
496 | * Detects whether more than one | 625 | * Detects whether more than one copy of tb exists per superblock |
497 | * copy of tb exists per superblock | 626 | * as a means of checking whether do_balance is executing |
498 | * as a means of checking whether | 627 | * concurrently against another tree reader/writer on a same |
499 | * do_balance is executing concurrently | 628 | * mount point. |
500 | * against another tree reader/writer | 629 | */ |
501 | * on a same mount point. | 630 | struct tree_balance *cur_tb; |
502 | */ | ||
503 | #endif | 631 | #endif |
504 | }; | 632 | }; |
505 | 633 | ||
@@ -508,25 +636,36 @@ struct reiserfs_sb_info { | |||
508 | #define REISERFS_3_6 1 | 636 | #define REISERFS_3_6 1 |
509 | #define REISERFS_OLD_FORMAT 2 | 637 | #define REISERFS_OLD_FORMAT 2 |
510 | 638 | ||
511 | enum reiserfs_mount_options { | ||
512 | /* Mount options */ | 639 | /* Mount options */ |
513 | REISERFS_LARGETAIL, /* large tails will be created in a session */ | 640 | enum reiserfs_mount_options { |
514 | REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ | 641 | /* large tails will be created in a session */ |
515 | REPLAYONLY, /* replay journal and return 0. Use by fsck */ | 642 | REISERFS_LARGETAIL, |
516 | REISERFS_CONVERT, /* -o conv: causes conversion of old | 643 | /* |
517 | format super block to the new | 644 | * small (for files less than block size) tails will |
518 | format. If not specified - old | 645 | * be created in a session |
519 | partition will be dealt with in a | 646 | */ |
520 | manner of 3.5.x */ | 647 | REISERFS_SMALLTAIL, |
521 | 648 | ||
522 | /* -o hash={tea, rupasov, r5, detect} is meant for properly mounting | 649 | /* replay journal and return 0. Use by fsck */ |
523 | ** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option | 650 | REPLAYONLY, |
524 | ** is not required. If the normal autodection code can't determine which | 651 | |
525 | ** hash to use (because both hashes had the same value for a file) | 652 | /* |
526 | ** use this option to force a specific hash. It won't allow you to override | 653 | * -o conv: causes conversion of old format super block to the |
527 | ** the existing hash on the FS, so if you have a tea hash disk, and mount | 654 | * new format. If not specified - old partition will be dealt |
528 | ** with -o hash=rupasov, the mount will fail. | 655 | * with in a manner of 3.5.x |
529 | */ | 656 | */ |
657 | REISERFS_CONVERT, | ||
658 | |||
659 | /* | ||
660 | * -o hash={tea, rupasov, r5, detect} is meant for properly mounting | ||
661 | * reiserfs disks from 3.5.19 or earlier. 99% of the time, this | ||
662 | * option is not required. If the normal autodection code can't | ||
663 | * determine which hash to use (because both hashes had the same | ||
664 | * value for a file) use this option to force a specific hash. | ||
665 | * It won't allow you to override the existing hash on the FS, so | ||
666 | * if you have a tea hash disk, and mount with -o hash=rupasov, | ||
667 | * the mount will fail. | ||
668 | */ | ||
530 | FORCE_TEA_HASH, /* try to force tea hash on mount */ | 669 | FORCE_TEA_HASH, /* try to force tea hash on mount */ |
531 | FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ | 670 | FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ |
532 | FORCE_R5_HASH, /* try to force rupasov hash on mount */ | 671 | FORCE_R5_HASH, /* try to force rupasov hash on mount */ |
@@ -536,9 +675,11 @@ enum reiserfs_mount_options { | |||
536 | REISERFS_DATA_ORDERED, | 675 | REISERFS_DATA_ORDERED, |
537 | REISERFS_DATA_WRITEBACK, | 676 | REISERFS_DATA_WRITEBACK, |
538 | 677 | ||
539 | /* used for testing experimental features, makes benchmarking new | 678 | /* |
540 | features with and without more convenient, should never be used by | 679 | * used for testing experimental features, makes benchmarking new |
541 | users in any code shipped to users (ideally) */ | 680 | * features with and without more convenient, should never be used by |
681 | * users in any code shipped to users (ideally) | ||
682 | */ | ||
542 | 683 | ||
543 | REISERFS_NO_BORDER, | 684 | REISERFS_NO_BORDER, |
544 | REISERFS_NO_UNHASHED_RELOCATION, | 685 | REISERFS_NO_UNHASHED_RELOCATION, |
@@ -705,28 +846,28 @@ static inline void reiserfs_cond_resched(struct super_block *s) | |||
705 | 846 | ||
706 | struct fid; | 847 | struct fid; |
707 | 848 | ||
708 | /* in reading the #defines, it may help to understand that they employ | 849 | /* |
709 | the following abbreviations: | 850 | * in reading the #defines, it may help to understand that they employ |
710 | 851 | * the following abbreviations: | |
711 | B = Buffer | 852 | * |
712 | I = Item header | 853 | * B = Buffer |
713 | H = Height within the tree (should be changed to LEV) | 854 | * I = Item header |
714 | N = Number of the item in the node | 855 | * H = Height within the tree (should be changed to LEV) |
715 | STAT = stat data | 856 | * N = Number of the item in the node |
716 | DEH = Directory Entry Header | 857 | * STAT = stat data |
717 | EC = Entry Count | 858 | * DEH = Directory Entry Header |
718 | E = Entry number | 859 | * EC = Entry Count |
719 | UL = Unsigned Long | 860 | * E = Entry number |
720 | BLKH = BLocK Header | 861 | * UL = Unsigned Long |
721 | UNFM = UNForMatted node | 862 | * BLKH = BLocK Header |
722 | DC = Disk Child | 863 | * UNFM = UNForMatted node |
723 | P = Path | 864 | * DC = Disk Child |
724 | 865 | * P = Path | |
725 | These #defines are named by concatenating these abbreviations, | 866 | * |
726 | where first comes the arguments, and last comes the return value, | 867 | * These #defines are named by concatenating these abbreviations, |
727 | of the macro. | 868 | * where first comes the arguments, and last comes the return value, |
728 | 869 | * of the macro. | |
729 | */ | 870 | */ |
730 | 871 | ||
731 | #define USE_INODE_GENERATION_COUNTER | 872 | #define USE_INODE_GENERATION_COUNTER |
732 | 873 | ||
@@ -737,14 +878,17 @@ struct fid; | |||
737 | /* n must be power of 2 */ | 878 | /* n must be power of 2 */ |
738 | #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) | 879 | #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) |
739 | 880 | ||
740 | // to be ok for alpha and others we have to align structures to 8 byte | 881 | /* |
741 | // boundary. | 882 | * to be ok for alpha and others we have to align structures to 8 byte |
742 | // FIXME: do not change 4 by anything else: there is code which relies on that | 883 | * boundary. |
884 | * FIXME: do not change 4 by anything else: there is code which relies on that | ||
885 | */ | ||
743 | #define ROUND_UP(x) _ROUND_UP(x,8LL) | 886 | #define ROUND_UP(x) _ROUND_UP(x,8LL) |
744 | 887 | ||
745 | /* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug | 888 | /* |
746 | ** messages. | 889 | * debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug |
747 | */ | 890 | * messages. |
891 | */ | ||
748 | #define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ | 892 | #define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ |
749 | 893 | ||
750 | void __reiserfs_warning(struct super_block *s, const char *id, | 894 | void __reiserfs_warning(struct super_block *s, const char *id, |
@@ -753,7 +897,7 @@ void __reiserfs_warning(struct super_block *s, const char *id, | |||
753 | __reiserfs_warning(s, id, __func__, fmt, ##args) | 897 | __reiserfs_warning(s, id, __func__, fmt, ##args) |
754 | /* assertions handling */ | 898 | /* assertions handling */ |
755 | 899 | ||
756 | /** always check a condition and panic if it's false. */ | 900 | /* always check a condition and panic if it's false. */ |
757 | #define __RASSERT(cond, scond, format, args...) \ | 901 | #define __RASSERT(cond, scond, format, args...) \ |
758 | do { \ | 902 | do { \ |
759 | if (!(cond)) \ | 903 | if (!(cond)) \ |
@@ -776,35 +920,48 @@ do { \ | |||
776 | * Disk Data Structures | 920 | * Disk Data Structures |
777 | */ | 921 | */ |
778 | 922 | ||
779 | /***************************************************************************/ | 923 | /*************************************************************************** |
780 | /* SUPER BLOCK */ | 924 | * SUPER BLOCK * |
781 | /***************************************************************************/ | 925 | ***************************************************************************/ |
782 | 926 | ||
783 | /* | 927 | /* |
784 | * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs | 928 | * Structure of super block on disk, a version of which in RAM is often |
785 | * the version in RAM is part of a larger structure containing fields never written to disk. | 929 | * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger |
930 | * structure containing fields never written to disk. | ||
786 | */ | 931 | */ |
787 | #define UNSET_HASH 0 // read_super will guess about, what hash names | 932 | #define UNSET_HASH 0 /* Detect hash on disk */ |
788 | // in directories were sorted with | ||
789 | #define TEA_HASH 1 | 933 | #define TEA_HASH 1 |
790 | #define YURA_HASH 2 | 934 | #define YURA_HASH 2 |
791 | #define R5_HASH 3 | 935 | #define R5_HASH 3 |
792 | #define DEFAULT_HASH R5_HASH | 936 | #define DEFAULT_HASH R5_HASH |
793 | 937 | ||
794 | struct journal_params { | 938 | struct journal_params { |
795 | __le32 jp_journal_1st_block; /* where does journal start from on its | 939 | /* where does journal start from on its * device */ |
796 | * device */ | 940 | __le32 jp_journal_1st_block; |
797 | __le32 jp_journal_dev; /* journal device st_rdev */ | 941 | |
798 | __le32 jp_journal_size; /* size of the journal */ | 942 | /* journal device st_rdev */ |
799 | __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ | 943 | __le32 jp_journal_dev; |
800 | __le32 jp_journal_magic; /* random value made on fs creation (this | 944 | |
801 | * was sb_journal_block_count) */ | 945 | /* size of the journal */ |
802 | __le32 jp_journal_max_batch; /* max number of blocks to batch into a | 946 | __le32 jp_journal_size; |
803 | * trans */ | 947 | |
804 | __le32 jp_journal_max_commit_age; /* in seconds, how old can an async | 948 | /* max number of blocks in a transaction. */ |
805 | * commit be */ | 949 | __le32 jp_journal_trans_max; |
806 | __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction | 950 | |
807 | * be */ | 951 | /* |
952 | * random value made on fs creation | ||
953 | * (this was sb_journal_block_count) | ||
954 | */ | ||
955 | __le32 jp_journal_magic; | ||
956 | |||
957 | /* max number of blocks to batch into a trans */ | ||
958 | __le32 jp_journal_max_batch; | ||
959 | |||
960 | /* in seconds, how old can an async commit be */ | ||
961 | __le32 jp_journal_max_commit_age; | ||
962 | |||
963 | /* in seconds, how old can a transaction be */ | ||
964 | __le32 jp_journal_max_trans_age; | ||
808 | }; | 965 | }; |
809 | 966 | ||
810 | /* this is the super from 3.5.X, where X >= 10 */ | 967 | /* this is the super from 3.5.X, where X >= 10 */ |
@@ -814,26 +971,48 @@ struct reiserfs_super_block_v1 { | |||
814 | __le32 s_root_block; /* root block number */ | 971 | __le32 s_root_block; /* root block number */ |
815 | struct journal_params s_journal; | 972 | struct journal_params s_journal; |
816 | __le16 s_blocksize; /* block size */ | 973 | __le16 s_blocksize; /* block size */ |
817 | __le16 s_oid_maxsize; /* max size of object id array, see | 974 | |
818 | * get_objectid() commentary */ | 975 | /* max size of object id array, see get_objectid() commentary */ |
976 | __le16 s_oid_maxsize; | ||
819 | __le16 s_oid_cursize; /* current size of object id array */ | 977 | __le16 s_oid_cursize; /* current size of object id array */ |
820 | __le16 s_umount_state; /* this is set to 1 when filesystem was | 978 | |
821 | * umounted, to 2 - when not */ | 979 | /* this is set to 1 when filesystem was umounted, to 2 - when not */ |
822 | char s_magic[10]; /* reiserfs magic string indicates that | 980 | __le16 s_umount_state; |
823 | * file system is reiserfs: | 981 | |
824 | * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ | 982 | /* |
825 | __le16 s_fs_state; /* it is set to used by fsck to mark which | 983 | * reiserfs magic string indicates that file system is reiserfs: |
826 | * phase of rebuilding is done */ | 984 | * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" |
827 | __le32 s_hash_function_code; /* indicate, what hash function is being use | 985 | */ |
828 | * to sort names in a directory*/ | 986 | char s_magic[10]; |
987 | |||
988 | /* | ||
989 | * it is set to used by fsck to mark which | ||
990 | * phase of rebuilding is done | ||
991 | */ | ||
992 | __le16 s_fs_state; | ||
993 | /* | ||
994 | * indicate, what hash function is being use | ||
995 | * to sort names in a directory | ||
996 | */ | ||
997 | __le32 s_hash_function_code; | ||
829 | __le16 s_tree_height; /* height of disk tree */ | 998 | __le16 s_tree_height; /* height of disk tree */ |
830 | __le16 s_bmap_nr; /* amount of bitmap blocks needed to address | 999 | |
831 | * each block of file system */ | 1000 | /* |
832 | __le16 s_version; /* this field is only reliable on filesystem | 1001 | * amount of bitmap blocks needed to address |
833 | * with non-standard journal */ | 1002 | * each block of file system |
834 | __le16 s_reserved_for_journal; /* size in blocks of journal area on main | 1003 | */ |
835 | * device, we need to keep after | 1004 | __le16 s_bmap_nr; |
836 | * making fs with non-standard journal */ | 1005 | |
1006 | /* | ||
1007 | * this field is only reliable on filesystem with non-standard journal | ||
1008 | */ | ||
1009 | __le16 s_version; | ||
1010 | |||
1011 | /* | ||
1012 | * size in blocks of journal area on main device, we need to | ||
1013 | * keep after making fs with non-standard journal | ||
1014 | */ | ||
1015 | __le16 s_reserved_for_journal; | ||
837 | } __attribute__ ((__packed__)); | 1016 | } __attribute__ ((__packed__)); |
838 | 1017 | ||
839 | #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) | 1018 | #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) |
@@ -842,17 +1021,21 @@ struct reiserfs_super_block_v1 { | |||
842 | struct reiserfs_super_block { | 1021 | struct reiserfs_super_block { |
843 | struct reiserfs_super_block_v1 s_v1; | 1022 | struct reiserfs_super_block_v1 s_v1; |
844 | __le32 s_inode_generation; | 1023 | __le32 s_inode_generation; |
845 | __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ | 1024 | |
1025 | /* Right now used only by inode-attributes, if enabled */ | ||
1026 | __le32 s_flags; | ||
1027 | |||
846 | unsigned char s_uuid[16]; /* filesystem unique identifier */ | 1028 | unsigned char s_uuid[16]; /* filesystem unique identifier */ |
847 | unsigned char s_label[16]; /* filesystem volume label */ | 1029 | unsigned char s_label[16]; /* filesystem volume label */ |
848 | __le16 s_mnt_count; /* Count of mounts since last fsck */ | 1030 | __le16 s_mnt_count; /* Count of mounts since last fsck */ |
849 | __le16 s_max_mnt_count; /* Maximum mounts before check */ | 1031 | __le16 s_max_mnt_count; /* Maximum mounts before check */ |
850 | __le32 s_lastcheck; /* Timestamp of last fsck */ | 1032 | __le32 s_lastcheck; /* Timestamp of last fsck */ |
851 | __le32 s_check_interval; /* Interval between checks */ | 1033 | __le32 s_check_interval; /* Interval between checks */ |
852 | char s_unused[76]; /* zero filled by mkreiserfs and | 1034 | |
853 | * reiserfs_convert_objectid_map_v1() | 1035 | /* |
854 | * so any additions must be updated | 1036 | * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1() |
855 | * there as well. */ | 1037 | * so any additions must be updated there as well. */ |
1038 | char s_unused[76]; | ||
856 | } __attribute__ ((__packed__)); | 1039 | } __attribute__ ((__packed__)); |
857 | 1040 | ||
858 | #define SB_SIZE (sizeof(struct reiserfs_super_block)) | 1041 | #define SB_SIZE (sizeof(struct reiserfs_super_block)) |
@@ -860,7 +1043,7 @@ struct reiserfs_super_block { | |||
860 | #define REISERFS_VERSION_1 0 | 1043 | #define REISERFS_VERSION_1 0 |
861 | #define REISERFS_VERSION_2 2 | 1044 | #define REISERFS_VERSION_2 2 |
862 | 1045 | ||
863 | // on-disk super block fields converted to cpu form | 1046 | /* on-disk super block fields converted to cpu form */ |
864 | #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) | 1047 | #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) |
865 | #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) | 1048 | #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) |
866 | #define SB_BLOCKSIZE(s) \ | 1049 | #define SB_BLOCKSIZE(s) \ |
@@ -915,11 +1098,13 @@ int is_reiserfs_3_5(struct reiserfs_super_block *rs); | |||
915 | int is_reiserfs_3_6(struct reiserfs_super_block *rs); | 1098 | int is_reiserfs_3_6(struct reiserfs_super_block *rs); |
916 | int is_reiserfs_jr(struct reiserfs_super_block *rs); | 1099 | int is_reiserfs_jr(struct reiserfs_super_block *rs); |
917 | 1100 | ||
918 | /* ReiserFS leaves the first 64k unused, so that partition labels have | 1101 | /* |
919 | enough space. If someone wants to write a fancy bootloader that | 1102 | * ReiserFS leaves the first 64k unused, so that partition labels have |
920 | needs more than 64k, let us know, and this will be increased in size. | 1103 | * enough space. If someone wants to write a fancy bootloader that |
921 | This number must be larger than than the largest block size on any | 1104 | * needs more than 64k, let us know, and this will be increased in size. |
922 | platform, or code will break. -Hans */ | 1105 | * This number must be larger than than the largest block size on any |
1106 | * platform, or code will break. -Hans | ||
1107 | */ | ||
923 | #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) | 1108 | #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) |
924 | #define REISERFS_FIRST_BLOCK unused_define | 1109 | #define REISERFS_FIRST_BLOCK unused_define |
925 | #define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES | 1110 | #define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES |
@@ -944,8 +1129,7 @@ struct unfm_nodeinfo { | |||
944 | unsigned short unfm_freespace; | 1129 | unsigned short unfm_freespace; |
945 | }; | 1130 | }; |
946 | 1131 | ||
947 | /* there are two formats of keys: 3.5 and 3.6 | 1132 | /* there are two formats of keys: 3.5 and 3.6 */ |
948 | */ | ||
949 | #define KEY_FORMAT_3_5 0 | 1133 | #define KEY_FORMAT_3_5 0 |
950 | #define KEY_FORMAT_3_6 1 | 1134 | #define KEY_FORMAT_3_6 1 |
951 | 1135 | ||
@@ -963,8 +1147,10 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) | |||
963 | return sb->s_fs_info; | 1147 | return sb->s_fs_info; |
964 | } | 1148 | } |
965 | 1149 | ||
966 | /* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 | 1150 | /* |
967 | * which overflows on large file systems. */ | 1151 | * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 |
1152 | * which overflows on large file systems. | ||
1153 | */ | ||
968 | static inline __u32 reiserfs_bmap_count(struct super_block *sb) | 1154 | static inline __u32 reiserfs_bmap_count(struct super_block *sb) |
969 | { | 1155 | { |
970 | return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; | 1156 | return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; |
@@ -975,8 +1161,10 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
975 | return bmap_nr > ((1LL << 16) - 1); | 1161 | return bmap_nr > ((1LL << 16) - 1); |
976 | } | 1162 | } |
977 | 1163 | ||
978 | /** this says about version of key of all items (but stat data) the | 1164 | /* |
979 | object consists of */ | 1165 | * this says about version of key of all items (but stat data) the |
1166 | * object consists of | ||
1167 | */ | ||
980 | #define get_inode_item_key_version( inode ) \ | 1168 | #define get_inode_item_key_version( inode ) \ |
981 | ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) | 1169 | ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) |
982 | 1170 | ||
@@ -995,16 +1183,18 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
995 | else \ | 1183 | else \ |
996 | REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) | 1184 | REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) |
997 | 1185 | ||
998 | /* This is an aggressive tail suppression policy, I am hoping it | 1186 | /* |
999 | improves our benchmarks. The principle behind it is that percentage | 1187 | * This is an aggressive tail suppression policy, I am hoping it |
1000 | space saving is what matters, not absolute space saving. This is | 1188 | * improves our benchmarks. The principle behind it is that percentage |
1001 | non-intuitive, but it helps to understand it if you consider that the | 1189 | * space saving is what matters, not absolute space saving. This is |
1002 | cost to access 4 blocks is not much more than the cost to access 1 | 1190 | * non-intuitive, but it helps to understand it if you consider that the |
1003 | block, if you have to do a seek and rotate. A tail risks a | 1191 | * cost to access 4 blocks is not much more than the cost to access 1 |
1004 | non-linear disk access that is significant as a percentage of total | 1192 | * block, if you have to do a seek and rotate. A tail risks a |
1005 | time cost for a 4 block file and saves an amount of space that is | 1193 | * non-linear disk access that is significant as a percentage of total |
1006 | less significant as a percentage of space, or so goes the hypothesis. | 1194 | * time cost for a 4 block file and saves an amount of space that is |
1007 | -Hans */ | 1195 | * less significant as a percentage of space, or so goes the hypothesis. |
1196 | * -Hans | ||
1197 | */ | ||
1008 | #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ | 1198 | #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ |
1009 | (\ | 1199 | (\ |
1010 | (!(n_tail_size)) || \ | 1200 | (!(n_tail_size)) || \ |
@@ -1018,10 +1208,11 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
1018 | ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ | 1208 | ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ |
1019 | ) | 1209 | ) |
1020 | 1210 | ||
1021 | /* Another strategy for tails, this one means only create a tail if all the | 1211 | /* |
1022 | file would fit into one DIRECT item. | 1212 | * Another strategy for tails, this one means only create a tail if all the |
1023 | Primary intention for this one is to increase performance by decreasing | 1213 | * file would fit into one DIRECT item. |
1024 | seeking. | 1214 | * Primary intention for this one is to increase performance by decreasing |
1215 | * seeking. | ||
1025 | */ | 1216 | */ |
1026 | #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ | 1217 | #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ |
1027 | (\ | 1218 | (\ |
@@ -1035,23 +1226,21 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
1035 | #define REISERFS_VALID_FS 1 | 1226 | #define REISERFS_VALID_FS 1 |
1036 | #define REISERFS_ERROR_FS 2 | 1227 | #define REISERFS_ERROR_FS 2 |
1037 | 1228 | ||
1038 | // | 1229 | /* |
1039 | // there are 5 item types currently | 1230 | * there are 5 item types currently |
1040 | // | 1231 | */ |
1041 | #define TYPE_STAT_DATA 0 | 1232 | #define TYPE_STAT_DATA 0 |
1042 | #define TYPE_INDIRECT 1 | 1233 | #define TYPE_INDIRECT 1 |
1043 | #define TYPE_DIRECT 2 | 1234 | #define TYPE_DIRECT 2 |
1044 | #define TYPE_DIRENTRY 3 | 1235 | #define TYPE_DIRENTRY 3 |
1045 | #define TYPE_MAXTYPE 3 | 1236 | #define TYPE_MAXTYPE 3 |
1046 | #define TYPE_ANY 15 // FIXME: comment is required | 1237 | #define TYPE_ANY 15 /* FIXME: comment is required */ |
1047 | 1238 | ||
1048 | /***************************************************************************/ | 1239 | /*************************************************************************** |
1049 | /* KEY & ITEM HEAD */ | 1240 | * KEY & ITEM HEAD * |
1050 | /***************************************************************************/ | 1241 | ***************************************************************************/ |
1051 | 1242 | ||
1052 | // | 1243 | /* * directories use this key as well as old files */ |
1053 | // directories use this key as well as old files | ||
1054 | // | ||
1055 | struct offset_v1 { | 1244 | struct offset_v1 { |
1056 | __le32 k_offset; | 1245 | __le32 k_offset; |
1057 | __le32 k_uniqueness; | 1246 | __le32 k_uniqueness; |
@@ -1084,11 +1273,14 @@ static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) | |||
1084 | v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); | 1273 | v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); |
1085 | } | 1274 | } |
1086 | 1275 | ||
1087 | /* Key of an item determines its location in the S+tree, and | 1276 | /* |
1088 | is composed of 4 components */ | 1277 | * Key of an item determines its location in the S+tree, and |
1278 | * is composed of 4 components | ||
1279 | */ | ||
1089 | struct reiserfs_key { | 1280 | struct reiserfs_key { |
1090 | __le32 k_dir_id; /* packing locality: by default parent | 1281 | /* packing locality: by default parent directory object id */ |
1091 | directory object id */ | 1282 | __le32 k_dir_id; |
1283 | |||
1092 | __le32 k_objectid; /* object identifier */ | 1284 | __le32 k_objectid; /* object identifier */ |
1093 | union { | 1285 | union { |
1094 | struct offset_v1 k_offset_v1; | 1286 | struct offset_v1 k_offset_v1; |
@@ -1097,8 +1289,8 @@ struct reiserfs_key { | |||
1097 | } __attribute__ ((__packed__)); | 1289 | } __attribute__ ((__packed__)); |
1098 | 1290 | ||
1099 | struct in_core_key { | 1291 | struct in_core_key { |
1100 | __u32 k_dir_id; /* packing locality: by default parent | 1292 | /* packing locality: by default parent directory object id */ |
1101 | directory object id */ | 1293 | __u32 k_dir_id; |
1102 | __u32 k_objectid; /* object identifier */ | 1294 | __u32 k_objectid; /* object identifier */ |
1103 | __u64 k_offset; | 1295 | __u64 k_offset; |
1104 | __u8 k_type; | 1296 | __u8 k_type; |
@@ -1107,14 +1299,16 @@ struct in_core_key { | |||
1107 | struct cpu_key { | 1299 | struct cpu_key { |
1108 | struct in_core_key on_disk_key; | 1300 | struct in_core_key on_disk_key; |
1109 | int version; | 1301 | int version; |
1110 | int key_length; /* 3 in all cases but direct2indirect and | 1302 | /* 3 in all cases but direct2indirect and indirect2direct conversion */ |
1111 | indirect2direct conversion */ | 1303 | int key_length; |
1112 | }; | 1304 | }; |
1113 | 1305 | ||
1114 | /* Our function for comparing keys can compare keys of different | 1306 | /* |
1115 | lengths. It takes as a parameter the length of the keys it is to | 1307 | * Our function for comparing keys can compare keys of different |
1116 | compare. These defines are used in determining what is to be passed | 1308 | * lengths. It takes as a parameter the length of the keys it is to |
1117 | to it as that parameter. */ | 1309 | * compare. These defines are used in determining what is to be passed |
1310 | * to it as that parameter. | ||
1311 | */ | ||
1118 | #define REISERFS_FULL_KEY_LEN 4 | 1312 | #define REISERFS_FULL_KEY_LEN 4 |
1119 | #define REISERFS_SHORT_KEY_LEN 2 | 1313 | #define REISERFS_SHORT_KEY_LEN 2 |
1120 | 1314 | ||
@@ -1143,40 +1337,52 @@ struct cpu_key { | |||
1143 | #define POSITION_FOUND 1 | 1337 | #define POSITION_FOUND 1 |
1144 | #define POSITION_NOT_FOUND 0 | 1338 | #define POSITION_NOT_FOUND 0 |
1145 | 1339 | ||
1146 | // return values for reiserfs_find_entry and search_by_entry_key | 1340 | /* return values for reiserfs_find_entry and search_by_entry_key */ |
1147 | #define NAME_FOUND 1 | 1341 | #define NAME_FOUND 1 |
1148 | #define NAME_NOT_FOUND 0 | 1342 | #define NAME_NOT_FOUND 0 |
1149 | #define GOTO_PREVIOUS_ITEM 2 | 1343 | #define GOTO_PREVIOUS_ITEM 2 |
1150 | #define NAME_FOUND_INVISIBLE 3 | 1344 | #define NAME_FOUND_INVISIBLE 3 |
1151 | 1345 | ||
1152 | /* Everything in the filesystem is stored as a set of items. The | 1346 | /* |
1153 | item head contains the key of the item, its free space (for | 1347 | * Everything in the filesystem is stored as a set of items. The |
1154 | indirect items) and specifies the location of the item itself | 1348 | * item head contains the key of the item, its free space (for |
1155 | within the block. */ | 1349 | * indirect items) and specifies the location of the item itself |
1350 | * within the block. | ||
1351 | */ | ||
1156 | 1352 | ||
1157 | struct item_head { | 1353 | struct item_head { |
1158 | /* Everything in the tree is found by searching for it based on | 1354 | /* |
1159 | * its key.*/ | 1355 | * Everything in the tree is found by searching for it based on |
1356 | * its key. | ||
1357 | */ | ||
1160 | struct reiserfs_key ih_key; | 1358 | struct reiserfs_key ih_key; |
1161 | union { | 1359 | union { |
1162 | /* The free space in the last unformatted node of an | 1360 | /* |
1163 | indirect item if this is an indirect item. This | 1361 | * The free space in the last unformatted node of an |
1164 | equals 0xFFFF iff this is a direct item or stat data | 1362 | * indirect item if this is an indirect item. This |
1165 | item. Note that the key, not this field, is used to | 1363 | * equals 0xFFFF iff this is a direct item or stat data |
1166 | determine the item type, and thus which field this | 1364 | * item. Note that the key, not this field, is used to |
1167 | union contains. */ | 1365 | * determine the item type, and thus which field this |
1366 | * union contains. | ||
1367 | */ | ||
1168 | __le16 ih_free_space_reserved; | 1368 | __le16 ih_free_space_reserved; |
1169 | /* Iff this is a directory item, this field equals the | 1369 | |
1170 | number of directory entries in the directory item. */ | 1370 | /* |
1371 | * Iff this is a directory item, this field equals the | ||
1372 | * number of directory entries in the directory item. | ||
1373 | */ | ||
1171 | __le16 ih_entry_count; | 1374 | __le16 ih_entry_count; |
1172 | } __attribute__ ((__packed__)) u; | 1375 | } __attribute__ ((__packed__)) u; |
1173 | __le16 ih_item_len; /* total size of the item body */ | 1376 | __le16 ih_item_len; /* total size of the item body */ |
1174 | __le16 ih_item_location; /* an offset to the item body | 1377 | |
1175 | * within the block */ | 1378 | /* an offset to the item body within the block */ |
1176 | __le16 ih_version; /* 0 for all old items, 2 for new | 1379 | __le16 ih_item_location; |
1177 | ones. Highest bit is set by fsck | 1380 | |
1178 | temporary, cleaned after all | 1381 | /* |
1179 | done */ | 1382 | * 0 for all old items, 2 for new ones. Highest bit is set by fsck |
1383 | * temporary, cleaned after all done | ||
1384 | */ | ||
1385 | __le16 ih_version; | ||
1180 | } __attribute__ ((__packed__)); | 1386 | } __attribute__ ((__packed__)); |
1181 | /* size of item header */ | 1387 | /* size of item header */ |
1182 | #define IH_SIZE (sizeof(struct item_head)) | 1388 | #define IH_SIZE (sizeof(struct item_head)) |
@@ -1198,27 +1404,24 @@ struct item_head { | |||
1198 | #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) | 1404 | #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) |
1199 | #define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) | 1405 | #define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) |
1200 | 1406 | ||
1201 | /* these operate on indirect items, where you've got an array of ints | 1407 | /* |
1202 | ** at a possibly unaligned location. These are a noop on ia32 | 1408 | * these operate on indirect items, where you've got an array of ints |
1203 | ** | 1409 | * at a possibly unaligned location. These are a noop on ia32 |
1204 | ** p is the array of __u32, i is the index into the array, v is the value | 1410 | * |
1205 | ** to store there. | 1411 | * p is the array of __u32, i is the index into the array, v is the value |
1206 | */ | 1412 | * to store there. |
1413 | */ | ||
1207 | #define get_block_num(p, i) get_unaligned_le32((p) + (i)) | 1414 | #define get_block_num(p, i) get_unaligned_le32((p) + (i)) |
1208 | #define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) | 1415 | #define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) |
1209 | 1416 | ||
1210 | // | 1417 | /* * in old version uniqueness field shows key type */ |
1211 | // in old version uniqueness field shows key type | ||
1212 | // | ||
1213 | #define V1_SD_UNIQUENESS 0 | 1418 | #define V1_SD_UNIQUENESS 0 |
1214 | #define V1_INDIRECT_UNIQUENESS 0xfffffffe | 1419 | #define V1_INDIRECT_UNIQUENESS 0xfffffffe |
1215 | #define V1_DIRECT_UNIQUENESS 0xffffffff | 1420 | #define V1_DIRECT_UNIQUENESS 0xffffffff |
1216 | #define V1_DIRENTRY_UNIQUENESS 500 | 1421 | #define V1_DIRENTRY_UNIQUENESS 500 |
1217 | #define V1_ANY_UNIQUENESS 555 // FIXME: comment is required | 1422 | #define V1_ANY_UNIQUENESS 555 /* FIXME: comment is required */ |
1218 | 1423 | ||
1219 | // | 1424 | /* here are conversion routines */ |
1220 | // here are conversion routines | ||
1221 | // | ||
1222 | static inline int uniqueness2type(__u32 uniqueness) CONSTF; | 1425 | static inline int uniqueness2type(__u32 uniqueness) CONSTF; |
1223 | static inline int uniqueness2type(__u32 uniqueness) | 1426 | static inline int uniqueness2type(__u32 uniqueness) |
1224 | { | 1427 | { |
@@ -1255,11 +1458,11 @@ static inline __u32 type2uniqueness(int type) | |||
1255 | } | 1458 | } |
1256 | } | 1459 | } |
1257 | 1460 | ||
1258 | // | 1461 | /* |
1259 | // key is pointer to on disk key which is stored in le, result is cpu, | 1462 | * key is pointer to on disk key which is stored in le, result is cpu, |
1260 | // there is no way to get version of object from key, so, provide | 1463 | * there is no way to get version of object from key, so, provide |
1261 | // version to these defines | 1464 | * version to these defines |
1262 | // | 1465 | */ |
1263 | static inline loff_t le_key_k_offset(int version, | 1466 | static inline loff_t le_key_k_offset(int version, |
1264 | const struct reiserfs_key *key) | 1467 | const struct reiserfs_key *key) |
1265 | { | 1468 | { |
@@ -1275,9 +1478,11 @@ static inline loff_t le_ih_k_offset(const struct item_head *ih) | |||
1275 | 1478 | ||
1276 | static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) | 1479 | static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) |
1277 | { | 1480 | { |
1278 | return (version == KEY_FORMAT_3_5) ? | 1481 | if (version == KEY_FORMAT_3_5) { |
1279 | uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) : | 1482 | loff_t val = le32_to_cpu(key->u.k_offset_v1.k_uniqueness); |
1280 | offset_v2_k_type(&(key->u.k_offset_v2)); | 1483 | return uniqueness2type(val); |
1484 | } else | ||
1485 | return offset_v2_k_type(&(key->u.k_offset_v2)); | ||
1281 | } | 1486 | } |
1282 | 1487 | ||
1283 | static inline loff_t le_ih_k_type(const struct item_head *ih) | 1488 | static inline loff_t le_ih_k_type(const struct item_head *ih) |
@@ -1288,8 +1493,22 @@ static inline loff_t le_ih_k_type(const struct item_head *ih) | |||
1288 | static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, | 1493 | static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, |
1289 | loff_t offset) | 1494 | loff_t offset) |
1290 | { | 1495 | { |
1291 | (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */ | 1496 | if (version == KEY_FORMAT_3_5) |
1292 | (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset)); | 1497 | key->u.k_offset_v1.k_offset = cpu_to_le32(offset); |
1498 | else | ||
1499 | set_offset_v2_k_offset(&key->u.k_offset_v2, offset); | ||
1500 | } | ||
1501 | |||
1502 | static inline void add_le_key_k_offset(int version, struct reiserfs_key *key, | ||
1503 | loff_t offset) | ||
1504 | { | ||
1505 | set_le_key_k_offset(version, key, | ||
1506 | le_key_k_offset(version, key) + offset); | ||
1507 | } | ||
1508 | |||
1509 | static inline void add_le_ih_k_offset(struct item_head *ih, loff_t offset) | ||
1510 | { | ||
1511 | add_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); | ||
1293 | } | 1512 | } |
1294 | 1513 | ||
1295 | static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) | 1514 | static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) |
@@ -1300,10 +1519,11 @@ static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) | |||
1300 | static inline void set_le_key_k_type(int version, struct reiserfs_key *key, | 1519 | static inline void set_le_key_k_type(int version, struct reiserfs_key *key, |
1301 | int type) | 1520 | int type) |
1302 | { | 1521 | { |
1303 | (version == KEY_FORMAT_3_5) ? | 1522 | if (version == KEY_FORMAT_3_5) { |
1304 | (void)(key->u.k_offset_v1.k_uniqueness = | 1523 | type = type2uniqueness(type); |
1305 | cpu_to_le32(type2uniqueness(type))) | 1524 | key->u.k_offset_v1.k_uniqueness = cpu_to_le32(type); |
1306 | : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type)); | 1525 | } else |
1526 | set_offset_v2_k_type(&key->u.k_offset_v2, type); | ||
1307 | } | 1527 | } |
1308 | 1528 | ||
1309 | static inline void set_le_ih_k_type(struct item_head *ih, int type) | 1529 | static inline void set_le_ih_k_type(struct item_head *ih, int type) |
@@ -1331,9 +1551,7 @@ static inline int is_statdata_le_key(int version, struct reiserfs_key *key) | |||
1331 | return le_key_k_type(version, key) == TYPE_STAT_DATA; | 1551 | return le_key_k_type(version, key) == TYPE_STAT_DATA; |
1332 | } | 1552 | } |
1333 | 1553 | ||
1334 | // | 1554 | /* item header has version. */ |
1335 | // item header has version. | ||
1336 | // | ||
1337 | static inline int is_direntry_le_ih(struct item_head *ih) | 1555 | static inline int is_direntry_le_ih(struct item_head *ih) |
1338 | { | 1556 | { |
1339 | return is_direntry_le_key(ih_version(ih), &ih->ih_key); | 1557 | return is_direntry_le_key(ih_version(ih), &ih->ih_key); |
@@ -1354,9 +1572,7 @@ static inline int is_statdata_le_ih(struct item_head *ih) | |||
1354 | return is_statdata_le_key(ih_version(ih), &ih->ih_key); | 1572 | return is_statdata_le_key(ih_version(ih), &ih->ih_key); |
1355 | } | 1573 | } |
1356 | 1574 | ||
1357 | // | 1575 | /* key is pointer to cpu key, result is cpu */ |
1358 | // key is pointer to cpu key, result is cpu | ||
1359 | // | ||
1360 | static inline loff_t cpu_key_k_offset(const struct cpu_key *key) | 1576 | static inline loff_t cpu_key_k_offset(const struct cpu_key *key) |
1361 | { | 1577 | { |
1362 | return key->on_disk_key.k_offset; | 1578 | return key->on_disk_key.k_offset; |
@@ -1407,7 +1623,7 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key) | |||
1407 | 1623 | ||
1408 | extern struct reiserfs_key root_key; | 1624 | extern struct reiserfs_key root_key; |
1409 | 1625 | ||
1410 | /* | 1626 | /* |
1411 | * Picture represents a leaf of the S+tree | 1627 | * Picture represents a leaf of the S+tree |
1412 | * ______________________________________________________ | 1628 | * ______________________________________________________ |
1413 | * | | Array of | | | | 1629 | * | | Array of | | | |
@@ -1416,15 +1632,19 @@ extern struct reiserfs_key root_key; | |||
1416 | * |______|_______________|___________________|___________| | 1632 | * |______|_______________|___________________|___________| |
1417 | */ | 1633 | */ |
1418 | 1634 | ||
1419 | /* Header of a disk block. More precisely, header of a formatted leaf | 1635 | /* |
1420 | or internal node, and not the header of an unformatted node. */ | 1636 | * Header of a disk block. More precisely, header of a formatted leaf |
1637 | * or internal node, and not the header of an unformatted node. | ||
1638 | */ | ||
1421 | struct block_head { | 1639 | struct block_head { |
1422 | __le16 blk_level; /* Level of a block in the tree. */ | 1640 | __le16 blk_level; /* Level of a block in the tree. */ |
1423 | __le16 blk_nr_item; /* Number of keys/items in a block. */ | 1641 | __le16 blk_nr_item; /* Number of keys/items in a block. */ |
1424 | __le16 blk_free_space; /* Block free space in bytes. */ | 1642 | __le16 blk_free_space; /* Block free space in bytes. */ |
1425 | __le16 blk_reserved; | 1643 | __le16 blk_reserved; |
1426 | /* dump this in v4/planA */ | 1644 | /* dump this in v4/planA */ |
1427 | struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ | 1645 | |
1646 | /* kept only for compatibility */ | ||
1647 | struct reiserfs_key blk_right_delim_key; | ||
1428 | }; | 1648 | }; |
1429 | 1649 | ||
1430 | #define BLKH_SIZE (sizeof(struct block_head)) | 1650 | #define BLKH_SIZE (sizeof(struct block_head)) |
@@ -1439,18 +1659,20 @@ struct block_head { | |||
1439 | #define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) | 1659 | #define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) |
1440 | #define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) | 1660 | #define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) |
1441 | 1661 | ||
1662 | /* values for blk_level field of the struct block_head */ | ||
1663 | |||
1442 | /* | 1664 | /* |
1443 | * values for blk_level field of the struct block_head | 1665 | * When node gets removed from the tree its blk_level is set to FREE_LEVEL. |
1666 | * It is then used to see whether the node is still in the tree | ||
1444 | */ | 1667 | */ |
1445 | 1668 | #define FREE_LEVEL 0 | |
1446 | #define FREE_LEVEL 0 /* when node gets removed from the tree its | ||
1447 | blk_level is set to FREE_LEVEL. It is then | ||
1448 | used to see whether the node is still in the | ||
1449 | tree */ | ||
1450 | 1669 | ||
1451 | #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ | 1670 | #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ |
1452 | 1671 | ||
1453 | /* Given the buffer head of a formatted node, resolve to the block head of that node. */ | 1672 | /* |
1673 | * Given the buffer head of a formatted node, resolve to the | ||
1674 | * block head of that node. | ||
1675 | */ | ||
1454 | #define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) | 1676 | #define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) |
1455 | /* Number of items that are in buffer. */ | 1677 | /* Number of items that are in buffer. */ |
1456 | #define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) | 1678 | #define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) |
@@ -1471,14 +1693,14 @@ struct block_head { | |||
1471 | #define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ | 1693 | #define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ |
1472 | && B_LEVEL(bh) <= MAX_HEIGHT) | 1694 | && B_LEVEL(bh) <= MAX_HEIGHT) |
1473 | 1695 | ||
1474 | /***************************************************************************/ | 1696 | /*************************************************************************** |
1475 | /* STAT DATA */ | 1697 | * STAT DATA * |
1476 | /***************************************************************************/ | 1698 | ***************************************************************************/ |
1477 | 1699 | ||
1478 | // | 1700 | /* |
1479 | // old stat data is 32 bytes long. We are going to distinguish new one by | 1701 | * old stat data is 32 bytes long. We are going to distinguish new one by |
1480 | // different size | 1702 | * different size |
1481 | // | 1703 | */ |
1482 | struct stat_data_v1 { | 1704 | struct stat_data_v1 { |
1483 | __le16 sd_mode; /* file type, permissions */ | 1705 | __le16 sd_mode; /* file type, permissions */ |
1484 | __le16 sd_nlink; /* number of hard links */ | 1706 | __le16 sd_nlink; /* number of hard links */ |
@@ -1487,20 +1709,25 @@ struct stat_data_v1 { | |||
1487 | __le32 sd_size; /* file size */ | 1709 | __le32 sd_size; /* file size */ |
1488 | __le32 sd_atime; /* time of last access */ | 1710 | __le32 sd_atime; /* time of last access */ |
1489 | __le32 sd_mtime; /* time file was last modified */ | 1711 | __le32 sd_mtime; /* time file was last modified */ |
1490 | __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ | 1712 | |
1713 | /* | ||
1714 | * time inode (stat data) was last changed | ||
1715 | * (except changes to sd_atime and sd_mtime) | ||
1716 | */ | ||
1717 | __le32 sd_ctime; | ||
1491 | union { | 1718 | union { |
1492 | __le32 sd_rdev; | 1719 | __le32 sd_rdev; |
1493 | __le32 sd_blocks; /* number of blocks file uses */ | 1720 | __le32 sd_blocks; /* number of blocks file uses */ |
1494 | } __attribute__ ((__packed__)) u; | 1721 | } __attribute__ ((__packed__)) u; |
1495 | __le32 sd_first_direct_byte; /* first byte of file which is stored | 1722 | |
1496 | in a direct item: except that if it | 1723 | /* |
1497 | equals 1 it is a symlink and if it | 1724 | * first byte of file which is stored in a direct item: except that if |
1498 | equals ~(__u32)0 there is no | 1725 | * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no |
1499 | direct item. The existence of this | 1726 | * direct item. The existence of this field really grates on me. |
1500 | field really grates on me. Let's | 1727 | * Let's replace it with a macro based on sd_size and our tail |
1501 | replace it with a macro based on | 1728 | * suppression policy. Someday. -Hans |
1502 | sd_size and our tail suppression | 1729 | */ |
1503 | policy. Someday. -Hans */ | 1730 | __le32 sd_first_direct_byte; |
1504 | } __attribute__ ((__packed__)); | 1731 | } __attribute__ ((__packed__)); |
1505 | 1732 | ||
1506 | #define SD_V1_SIZE (sizeof(struct stat_data_v1)) | 1733 | #define SD_V1_SIZE (sizeof(struct stat_data_v1)) |
@@ -1532,8 +1759,10 @@ struct stat_data_v1 { | |||
1532 | 1759 | ||
1533 | /* inode flags stored in sd_attrs (nee sd_reserved) */ | 1760 | /* inode flags stored in sd_attrs (nee sd_reserved) */ |
1534 | 1761 | ||
1535 | /* we want common flags to have the same values as in ext2, | 1762 | /* |
1536 | so chattr(1) will work without problems */ | 1763 | * we want common flags to have the same values as in ext2, |
1764 | * so chattr(1) will work without problems | ||
1765 | */ | ||
1537 | #define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL | 1766 | #define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL |
1538 | #define REISERFS_APPEND_FL FS_APPEND_FL | 1767 | #define REISERFS_APPEND_FL FS_APPEND_FL |
1539 | #define REISERFS_SYNC_FL FS_SYNC_FL | 1768 | #define REISERFS_SYNC_FL FS_SYNC_FL |
@@ -1553,8 +1782,10 @@ struct stat_data_v1 { | |||
1553 | REISERFS_COMPR_FL | \ | 1782 | REISERFS_COMPR_FL | \ |
1554 | REISERFS_NOTAIL_FL ) | 1783 | REISERFS_NOTAIL_FL ) |
1555 | 1784 | ||
1556 | /* Stat Data on disk (reiserfs version of UFS disk inode minus the | 1785 | /* |
1557 | address blocks) */ | 1786 | * Stat Data on disk (reiserfs version of UFS disk inode minus the |
1787 | * address blocks) | ||
1788 | */ | ||
1558 | struct stat_data { | 1789 | struct stat_data { |
1559 | __le16 sd_mode; /* file type, permissions */ | 1790 | __le16 sd_mode; /* file type, permissions */ |
1560 | __le16 sd_attrs; /* persistent inode flags */ | 1791 | __le16 sd_attrs; /* persistent inode flags */ |
@@ -1564,25 +1795,20 @@ struct stat_data { | |||
1564 | __le32 sd_gid; /* group */ | 1795 | __le32 sd_gid; /* group */ |
1565 | __le32 sd_atime; /* time of last access */ | 1796 | __le32 sd_atime; /* time of last access */ |
1566 | __le32 sd_mtime; /* time file was last modified */ | 1797 | __le32 sd_mtime; /* time file was last modified */ |
1567 | __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ | 1798 | |
1799 | /* | ||
1800 | * time inode (stat data) was last changed | ||
1801 | * (except changes to sd_atime and sd_mtime) | ||
1802 | */ | ||
1803 | __le32 sd_ctime; | ||
1568 | __le32 sd_blocks; | 1804 | __le32 sd_blocks; |
1569 | union { | 1805 | union { |
1570 | __le32 sd_rdev; | 1806 | __le32 sd_rdev; |
1571 | __le32 sd_generation; | 1807 | __le32 sd_generation; |
1572 | //__le32 sd_first_direct_byte; | ||
1573 | /* first byte of file which is stored in a | ||
1574 | direct item: except that if it equals 1 | ||
1575 | it is a symlink and if it equals | ||
1576 | ~(__u32)0 there is no direct item. The | ||
1577 | existence of this field really grates | ||
1578 | on me. Let's replace it with a macro | ||
1579 | based on sd_size and our tail | ||
1580 | suppression policy? */ | ||
1581 | } __attribute__ ((__packed__)) u; | 1808 | } __attribute__ ((__packed__)) u; |
1582 | } __attribute__ ((__packed__)); | 1809 | } __attribute__ ((__packed__)); |
1583 | // | 1810 | |
1584 | // this is 44 bytes long | 1811 | /* this is 44 bytes long */ |
1585 | // | ||
1586 | #define SD_SIZE (sizeof(struct stat_data)) | 1812 | #define SD_SIZE (sizeof(struct stat_data)) |
1587 | #define SD_V2_SIZE SD_SIZE | 1813 | #define SD_V2_SIZE SD_SIZE |
1588 | #define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) | 1814 | #define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) |
@@ -1613,48 +1839,61 @@ struct stat_data { | |||
1613 | #define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) | 1839 | #define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) |
1614 | #define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) | 1840 | #define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) |
1615 | 1841 | ||
1616 | /***************************************************************************/ | 1842 | /*************************************************************************** |
1617 | /* DIRECTORY STRUCTURE */ | 1843 | * DIRECTORY STRUCTURE * |
1618 | /***************************************************************************/ | 1844 | ***************************************************************************/ |
1619 | /* | 1845 | /* |
1620 | Picture represents the structure of directory items | 1846 | * Picture represents the structure of directory items |
1621 | ________________________________________________ | 1847 | * ________________________________________________ |
1622 | | Array of | | | | | | | 1848 | * | Array of | | | | | | |
1623 | | directory |N-1| N-2 | .... | 1st |0th| | 1849 | * | directory |N-1| N-2 | .... | 1st |0th| |
1624 | | entry headers | | | | | | | 1850 | * | entry headers | | | | | | |
1625 | |_______________|___|_____|________|_______|___| | 1851 | * |_______________|___|_____|________|_______|___| |
1626 | <---- directory entries ------> | 1852 | * <---- directory entries ------> |
1627 | 1853 | * | |
1628 | First directory item has k_offset component 1. We store "." and ".." | 1854 | * First directory item has k_offset component 1. We store "." and ".." |
1629 | in one item, always, we never split "." and ".." into differing | 1855 | * in one item, always, we never split "." and ".." into differing |
1630 | items. This makes, among other things, the code for removing | 1856 | * items. This makes, among other things, the code for removing |
1631 | directories simpler. */ | 1857 | * directories simpler. |
1858 | */ | ||
1632 | #define SD_OFFSET 0 | 1859 | #define SD_OFFSET 0 |
1633 | #define SD_UNIQUENESS 0 | 1860 | #define SD_UNIQUENESS 0 |
1634 | #define DOT_OFFSET 1 | 1861 | #define DOT_OFFSET 1 |
1635 | #define DOT_DOT_OFFSET 2 | 1862 | #define DOT_DOT_OFFSET 2 |
1636 | #define DIRENTRY_UNIQUENESS 500 | 1863 | #define DIRENTRY_UNIQUENESS 500 |
1637 | 1864 | ||
1638 | /* */ | ||
1639 | #define FIRST_ITEM_OFFSET 1 | 1865 | #define FIRST_ITEM_OFFSET 1 |
1640 | 1866 | ||
1641 | /* | 1867 | /* |
1642 | Q: How to get key of object pointed to by entry from entry? | 1868 | * Q: How to get key of object pointed to by entry from entry? |
1643 | 1869 | * | |
1644 | A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key | 1870 | * A: Each directory entry has its header. This header has deh_dir_id |
1645 | of object, entry points to */ | 1871 | * and deh_objectid fields, those are key of object, entry points to |
1872 | */ | ||
1646 | 1873 | ||
1647 | /* NOT IMPLEMENTED: | 1874 | /* |
1648 | Directory will someday contain stat data of object */ | 1875 | * NOT IMPLEMENTED: |
1876 | * Directory will someday contain stat data of object | ||
1877 | */ | ||
1649 | 1878 | ||
1650 | struct reiserfs_de_head { | 1879 | struct reiserfs_de_head { |
1651 | __le32 deh_offset; /* third component of the directory entry key */ | 1880 | __le32 deh_offset; /* third component of the directory entry key */ |
1652 | __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced | 1881 | |
1653 | by directory entry */ | 1882 | /* |
1654 | __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ | 1883 | * objectid of the parent directory of the object, that is referenced |
1884 | * by directory entry | ||
1885 | */ | ||
1886 | __le32 deh_dir_id; | ||
1887 | |||
1888 | /* objectid of the object, that is referenced by directory entry */ | ||
1889 | __le32 deh_objectid; | ||
1655 | __le16 deh_location; /* offset of name in the whole item */ | 1890 | __le16 deh_location; /* offset of name in the whole item */ |
1656 | __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether | 1891 | |
1657 | entry is hidden (unlinked) */ | 1892 | /* |
1893 | * whether 1) entry contains stat data (for future), and | ||
1894 | * 2) whether entry is hidden (unlinked) | ||
1895 | */ | ||
1896 | __le16 deh_state; | ||
1658 | } __attribute__ ((__packed__)); | 1897 | } __attribute__ ((__packed__)); |
1659 | #define DEH_SIZE sizeof(struct reiserfs_de_head) | 1898 | #define DEH_SIZE sizeof(struct reiserfs_de_head) |
1660 | #define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) | 1899 | #define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) |
@@ -1684,9 +1923,11 @@ struct reiserfs_de_head { | |||
1684 | # define ADDR_UNALIGNED_BITS (3) | 1923 | # define ADDR_UNALIGNED_BITS (3) |
1685 | #endif | 1924 | #endif |
1686 | 1925 | ||
1687 | /* These are only used to manipulate deh_state. | 1926 | /* |
1927 | * These are only used to manipulate deh_state. | ||
1688 | * Because of this, we'll use the ext2_ bit routines, | 1928 | * Because of this, we'll use the ext2_ bit routines, |
1689 | * since they are little endian */ | 1929 | * since they are little endian |
1930 | */ | ||
1690 | #ifdef ADDR_UNALIGNED_BITS | 1931 | #ifdef ADDR_UNALIGNED_BITS |
1691 | 1932 | ||
1692 | # define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) | 1933 | # define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) |
@@ -1721,46 +1962,16 @@ extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, | |||
1721 | extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, | 1962 | extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, |
1722 | __le32 par_dirid, __le32 par_objid); | 1963 | __le32 par_dirid, __le32 par_objid); |
1723 | 1964 | ||
1724 | /* array of the entry headers */ | 1965 | /* two entries per block (at least) */ |
1725 | /* get item body */ | ||
1726 | #define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) ) | ||
1727 | #define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih))) | ||
1728 | |||
1729 | /* length of the directory entry in directory item. This define | ||
1730 | calculates length of i-th directory entry using directory entry | ||
1731 | locations from dir entry head. When it calculates length of 0-th | ||
1732 | directory entry, it uses length of whole item in place of entry | ||
1733 | location of the non-existent following entry in the calculation. | ||
1734 | See picture above.*/ | ||
1735 | /* | ||
1736 | #define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ | ||
1737 | ((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh)))) | ||
1738 | */ | ||
1739 | static inline int entry_length(const struct buffer_head *bh, | ||
1740 | const struct item_head *ih, int pos_in_item) | ||
1741 | { | ||
1742 | struct reiserfs_de_head *deh; | ||
1743 | |||
1744 | deh = B_I_DEH(bh, ih) + pos_in_item; | ||
1745 | if (pos_in_item) | ||
1746 | return deh_location(deh - 1) - deh_location(deh); | ||
1747 | |||
1748 | return ih_item_len(ih) - deh_location(deh); | ||
1749 | } | ||
1750 | |||
1751 | /* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */ | ||
1752 | #define I_ENTRY_COUNT(ih) (ih_entry_count((ih))) | ||
1753 | |||
1754 | /* name by bh, ih and entry_num */ | ||
1755 | #define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num)))) | ||
1756 | |||
1757 | // two entries per block (at least) | ||
1758 | #define REISERFS_MAX_NAME(block_size) 255 | 1966 | #define REISERFS_MAX_NAME(block_size) 255 |
1759 | 1967 | ||
1760 | /* this structure is used for operations on directory entries. It is | 1968 | /* |
1761 | not a disk structure. */ | 1969 | * this structure is used for operations on directory entries. It is |
1762 | /* When reiserfs_find_entry or search_by_entry_key find directory | 1970 | * not a disk structure. |
1763 | entry, they return filled reiserfs_dir_entry structure */ | 1971 | * |
1972 | * When reiserfs_find_entry or search_by_entry_key find directory | ||
1973 | * entry, they return filled reiserfs_dir_entry structure | ||
1974 | */ | ||
1764 | struct reiserfs_dir_entry { | 1975 | struct reiserfs_dir_entry { |
1765 | struct buffer_head *de_bh; | 1976 | struct buffer_head *de_bh; |
1766 | int de_item_num; | 1977 | int de_item_num; |
@@ -1778,10 +1989,14 @@ struct reiserfs_dir_entry { | |||
1778 | struct cpu_key de_entry_key; | 1989 | struct cpu_key de_entry_key; |
1779 | }; | 1990 | }; |
1780 | 1991 | ||
1781 | /* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ | 1992 | /* |
1993 | * these defines are useful when a particular member of | ||
1994 | * a reiserfs_dir_entry is needed | ||
1995 | */ | ||
1782 | 1996 | ||
1783 | /* pointer to file name, stored in entry */ | 1997 | /* pointer to file name, stored in entry */ |
1784 | #define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh)) | 1998 | #define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \ |
1999 | (ih_item_body(bh, ih) + deh_location(deh)) | ||
1785 | 2000 | ||
1786 | /* length of name */ | 2001 | /* length of name */ |
1787 | #define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ | 2002 | #define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ |
@@ -1804,11 +2019,13 @@ struct reiserfs_dir_entry { | |||
1804 | * |______|_______________|___________________|___________| | 2019 | * |______|_______________|___________________|___________| |
1805 | */ | 2020 | */ |
1806 | 2021 | ||
1807 | /***************************************************************************/ | 2022 | /*************************************************************************** |
1808 | /* DISK CHILD */ | 2023 | * DISK CHILD * |
1809 | /***************************************************************************/ | 2024 | ***************************************************************************/ |
1810 | /* Disk child pointer: The pointer from an internal node of the tree | 2025 | /* |
1811 | to a node that is on disk. */ | 2026 | * Disk child pointer: |
2027 | * The pointer from an internal node of the tree to a node that is on disk. | ||
2028 | */ | ||
1812 | struct disk_child { | 2029 | struct disk_child { |
1813 | __le32 dc_block_number; /* Disk child's block number. */ | 2030 | __le32 dc_block_number; /* Disk child's block number. */ |
1814 | __le16 dc_size; /* Disk child's used space. */ | 2031 | __le16 dc_size; /* Disk child's used space. */ |
@@ -1841,47 +2058,66 @@ struct disk_child { | |||
1841 | #define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) | 2058 | #define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) |
1842 | #define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) | 2059 | #define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) |
1843 | 2060 | ||
1844 | /***************************************************************************/ | 2061 | /*************************************************************************** |
1845 | /* PATH STRUCTURES AND DEFINES */ | 2062 | * PATH STRUCTURES AND DEFINES * |
1846 | /***************************************************************************/ | 2063 | ***************************************************************************/ |
1847 | 2064 | ||
1848 | /* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the | 2065 | /* |
1849 | key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it | 2066 | * search_by_key fills up the path from the root to the leaf as it descends |
1850 | does not find them in the cache it reads them from disk. For each node search_by_key finds using | 2067 | * the tree looking for the key. It uses reiserfs_bread to try to find |
1851 | reiserfs_bread it then uses bin_search to look through that node. bin_search will find the | 2068 | * buffers in the cache given their block number. If it does not find |
1852 | position of the block_number of the next node if it is looking through an internal node. If it | 2069 | * them in the cache it reads them from disk. For each node search_by_key |
1853 | is looking through a leaf node bin_search will find the position of the item which has key either | 2070 | * finds using reiserfs_bread it then uses bin_search to look through that |
1854 | equal to given key, or which is the maximal key less than the given key. */ | 2071 | * node. bin_search will find the position of the block_number of the next |
2072 | * node if it is looking through an internal node. If it is looking through | ||
2073 | * a leaf node bin_search will find the position of the item which has key | ||
2074 | * either equal to given key, or which is the maximal key less than the | ||
2075 | * given key. | ||
2076 | */ | ||
1855 | 2077 | ||
1856 | struct path_element { | 2078 | struct path_element { |
1857 | struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */ | 2079 | /* Pointer to the buffer at the path in the tree. */ |
1858 | int pe_position; /* Position in the tree node which is placed in the */ | 2080 | struct buffer_head *pe_buffer; |
1859 | /* buffer above. */ | 2081 | /* Position in the tree node which is placed in the buffer above. */ |
2082 | int pe_position; | ||
1860 | }; | 2083 | }; |
1861 | 2084 | ||
1862 | #define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ | 2085 | /* |
1863 | #define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ | 2086 | * maximal height of a tree. don't change this without |
1864 | #define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ | 2087 | * changing JOURNAL_PER_BALANCE_CNT |
1865 | 2088 | */ | |
1866 | #define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ | 2089 | #define MAX_HEIGHT 5 |
1867 | #define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ | 2090 | |
1868 | 2091 | /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ | |
1869 | /* We need to keep track of who the ancestors of nodes are. When we | 2092 | #define EXTENDED_MAX_HEIGHT 7 |
1870 | perform a search we record which nodes were visited while | 2093 | |
1871 | descending the tree looking for the node we searched for. This list | 2094 | /* Must be equal to at least 2. */ |
1872 | of nodes is called the path. This information is used while | 2095 | #define FIRST_PATH_ELEMENT_OFFSET 2 |
1873 | performing balancing. Note that this path information may become | 2096 | |
1874 | invalid, and this means we must check it when using it to see if it | 2097 | /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ |
1875 | is still valid. You'll need to read search_by_key and the comments | 2098 | #define ILLEGAL_PATH_ELEMENT_OFFSET 1 |
1876 | in it, especially about decrement_counters_in_path(), to understand | 2099 | |
1877 | this structure. | 2100 | /* this MUST be MAX_HEIGHT + 1. See about FEB below */ |
1878 | 2101 | #define MAX_FEB_SIZE 6 | |
1879 | Paths make the code so much harder to work with and debug.... An | 2102 | |
1880 | enormous number of bugs are due to them, and trying to write or modify | 2103 | /* |
1881 | code that uses them just makes my head hurt. They are based on an | 2104 | * We need to keep track of who the ancestors of nodes are. When we |
1882 | excessive effort to avoid disturbing the precious VFS code.:-( The | 2105 | * perform a search we record which nodes were visited while |
1883 | gods only know how we are going to SMP the code that uses them. | 2106 | * descending the tree looking for the node we searched for. This list |
1884 | znodes are the way! */ | 2107 | * of nodes is called the path. This information is used while |
2108 | * performing balancing. Note that this path information may become | ||
2109 | * invalid, and this means we must check it when using it to see if it | ||
2110 | * is still valid. You'll need to read search_by_key and the comments | ||
2111 | * in it, especially about decrement_counters_in_path(), to understand | ||
2112 | * this structure. | ||
2113 | * | ||
2114 | * Paths make the code so much harder to work with and debug.... An | ||
2115 | * enormous number of bugs are due to them, and trying to write or modify | ||
2116 | * code that uses them just makes my head hurt. They are based on an | ||
2117 | * excessive effort to avoid disturbing the precious VFS code.:-( The | ||
2118 | * gods only know how we are going to SMP the code that uses them. | ||
2119 | * znodes are the way! | ||
2120 | */ | ||
1885 | 2121 | ||
1886 | #define PATH_READA 0x1 /* do read ahead */ | 2122 | #define PATH_READA 0x1 /* do read ahead */ |
1887 | #define PATH_READA_BACK 0x2 /* read backwards */ | 2123 | #define PATH_READA_BACK 0x2 /* read backwards */ |
@@ -1889,7 +2125,8 @@ znodes are the way! */ | |||
1889 | struct treepath { | 2125 | struct treepath { |
1890 | int path_length; /* Length of the array above. */ | 2126 | int path_length; /* Length of the array above. */ |
1891 | int reada; | 2127 | int reada; |
1892 | struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ | 2128 | /* Array of the path elements. */ |
2129 | struct path_element path_elements[EXTENDED_MAX_HEIGHT]; | ||
1893 | int pos_in_item; | 2130 | int pos_in_item; |
1894 | }; | 2131 | }; |
1895 | 2132 | ||
@@ -1908,41 +2145,124 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} | |||
1908 | #define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) | 2145 | #define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) |
1909 | 2146 | ||
1910 | #define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) | 2147 | #define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) |
1911 | /* you know, to the person who didn't | 2148 | |
1912 | write this the macro name does not | 2149 | /* |
1913 | at first suggest what it does. | 2150 | * you know, to the person who didn't write this the macro name does not |
1914 | Maybe POSITION_FROM_PATH_END? Or | 2151 | * at first suggest what it does. Maybe POSITION_FROM_PATH_END? Or |
1915 | maybe we should just focus on | 2152 | * maybe we should just focus on dumping paths... -Hans |
1916 | dumping paths... -Hans */ | 2153 | */ |
1917 | #define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) | 2154 | #define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) |
1918 | 2155 | ||
1919 | #define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)) | 2156 | /* |
2157 | * in do_balance leaf has h == 0 in contrast with path structure, | ||
2158 | * where root has level == 0. That is why we need these defines | ||
2159 | */ | ||
2160 | |||
2161 | /* tb->S[h] */ | ||
2162 | #define PATH_H_PBUFFER(path, h) \ | ||
2163 | PATH_OFFSET_PBUFFER(path, path->path_length - (h)) | ||
2164 | |||
2165 | /* tb->F[h] or tb->S[0]->b_parent */ | ||
2166 | #define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1) | ||
2167 | |||
2168 | #define PATH_H_POSITION(path, h) \ | ||
2169 | PATH_OFFSET_POSITION(path, path->path_length - (h)) | ||
1920 | 2170 | ||
1921 | /* in do_balance leaf has h == 0 in contrast with path structure, | 2171 | /* tb->S[h]->b_item_order */ |
1922 | where root has level == 0. That is why we need these defines */ | 2172 | #define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) |
1923 | #define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */ | ||
1924 | #define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ | ||
1925 | #define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) | ||
1926 | #define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ | ||
1927 | 2173 | ||
1928 | #define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) | 2174 | #define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) |
1929 | 2175 | ||
2176 | static inline void *reiserfs_node_data(const struct buffer_head *bh) | ||
2177 | { | ||
2178 | return bh->b_data + sizeof(struct block_head); | ||
2179 | } | ||
2180 | |||
2181 | /* get key from internal node */ | ||
2182 | static inline struct reiserfs_key *internal_key(struct buffer_head *bh, | ||
2183 | int item_num) | ||
2184 | { | ||
2185 | struct reiserfs_key *key = reiserfs_node_data(bh); | ||
2186 | |||
2187 | return &key[item_num]; | ||
2188 | } | ||
2189 | |||
2190 | /* get the item header from leaf node */ | ||
2191 | static inline struct item_head *item_head(const struct buffer_head *bh, | ||
2192 | int item_num) | ||
2193 | { | ||
2194 | struct item_head *ih = reiserfs_node_data(bh); | ||
2195 | |||
2196 | return &ih[item_num]; | ||
2197 | } | ||
2198 | |||
2199 | /* get the key from leaf node */ | ||
2200 | static inline struct reiserfs_key *leaf_key(const struct buffer_head *bh, | ||
2201 | int item_num) | ||
2202 | { | ||
2203 | return &item_head(bh, item_num)->ih_key; | ||
2204 | } | ||
2205 | |||
2206 | static inline void *ih_item_body(const struct buffer_head *bh, | ||
2207 | const struct item_head *ih) | ||
2208 | { | ||
2209 | return bh->b_data + ih_location(ih); | ||
2210 | } | ||
2211 | |||
2212 | /* get item body from leaf node */ | ||
2213 | static inline void *item_body(const struct buffer_head *bh, int item_num) | ||
2214 | { | ||
2215 | return ih_item_body(bh, item_head(bh, item_num)); | ||
2216 | } | ||
2217 | |||
2218 | static inline struct item_head *tp_item_head(const struct treepath *path) | ||
2219 | { | ||
2220 | return item_head(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); | ||
2221 | } | ||
2222 | |||
2223 | static inline void *tp_item_body(const struct treepath *path) | ||
2224 | { | ||
2225 | return item_body(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); | ||
2226 | } | ||
2227 | |||
1930 | #define get_last_bh(path) PATH_PLAST_BUFFER(path) | 2228 | #define get_last_bh(path) PATH_PLAST_BUFFER(path) |
1931 | #define get_ih(path) PATH_PITEM_HEAD(path) | ||
1932 | #define get_item_pos(path) PATH_LAST_POSITION(path) | 2229 | #define get_item_pos(path) PATH_LAST_POSITION(path) |
1933 | #define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path))) | ||
1934 | #define item_moved(ih,path) comp_items(ih, path) | 2230 | #define item_moved(ih,path) comp_items(ih, path) |
1935 | #define path_changed(ih,path) comp_items (ih, path) | 2231 | #define path_changed(ih,path) comp_items (ih, path) |
1936 | 2232 | ||
1937 | /***************************************************************************/ | 2233 | /* array of the entry headers */ |
1938 | /* MISC */ | 2234 | /* get item body */ |
1939 | /***************************************************************************/ | 2235 | #define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih))) |
2236 | |||
2237 | /* | ||
2238 | * length of the directory entry in directory item. This define | ||
2239 | * calculates length of i-th directory entry using directory entry | ||
2240 | * locations from dir entry head. When it calculates length of 0-th | ||
2241 | * directory entry, it uses length of whole item in place of entry | ||
2242 | * location of the non-existent following entry in the calculation. | ||
2243 | * See picture above. | ||
2244 | */ | ||
2245 | static inline int entry_length(const struct buffer_head *bh, | ||
2246 | const struct item_head *ih, int pos_in_item) | ||
2247 | { | ||
2248 | struct reiserfs_de_head *deh; | ||
2249 | |||
2250 | deh = B_I_DEH(bh, ih) + pos_in_item; | ||
2251 | if (pos_in_item) | ||
2252 | return deh_location(deh - 1) - deh_location(deh); | ||
2253 | |||
2254 | return ih_item_len(ih) - deh_location(deh); | ||
2255 | } | ||
2256 | |||
2257 | /*************************************************************************** | ||
2258 | * MISC * | ||
2259 | ***************************************************************************/ | ||
1940 | 2260 | ||
1941 | /* Size of pointer to the unformatted node. */ | 2261 | /* Size of pointer to the unformatted node. */ |
1942 | #define UNFM_P_SIZE (sizeof(unp_t)) | 2262 | #define UNFM_P_SIZE (sizeof(unp_t)) |
1943 | #define UNFM_P_SHIFT 2 | 2263 | #define UNFM_P_SHIFT 2 |
1944 | 2264 | ||
1945 | // in in-core inode key is stored on le form | 2265 | /* in in-core inode key is stored on le form */ |
1946 | #define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) | 2266 | #define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) |
1947 | 2267 | ||
1948 | #define MAX_UL_INT 0xffffffff | 2268 | #define MAX_UL_INT 0xffffffff |
@@ -1958,7 +2278,6 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) | |||
1958 | return (loff_t) ((~(__u64) 0) >> 4); | 2278 | return (loff_t) ((~(__u64) 0) >> 4); |
1959 | } | 2279 | } |
1960 | 2280 | ||
1961 | /*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/ | ||
1962 | #define MAX_KEY_OBJECTID MAX_UL_INT | 2281 | #define MAX_KEY_OBJECTID MAX_UL_INT |
1963 | 2282 | ||
1964 | #define MAX_B_NUM MAX_UL_INT | 2283 | #define MAX_B_NUM MAX_UL_INT |
@@ -1967,9 +2286,12 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) | |||
1967 | /* the purpose is to detect overflow of an unsigned short */ | 2286 | /* the purpose is to detect overflow of an unsigned short */ |
1968 | #define REISERFS_LINK_MAX (MAX_US_INT - 1000) | 2287 | #define REISERFS_LINK_MAX (MAX_US_INT - 1000) |
1969 | 2288 | ||
1970 | /* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ | 2289 | /* |
1971 | #define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ | 2290 | * The following defines are used in reiserfs_insert_item |
1972 | #define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ | 2291 | * and reiserfs_append_item |
2292 | */ | ||
2293 | #define REISERFS_KERNEL_MEM 0 /* kernel memory mode */ | ||
2294 | #define REISERFS_USER_MEM 1 /* user memory mode */ | ||
1973 | 2295 | ||
1974 | #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) | 2296 | #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) |
1975 | #define get_generation(s) atomic_read (&fs_generation(s)) | 2297 | #define get_generation(s) atomic_read (&fs_generation(s)) |
@@ -1981,46 +2303,65 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) | |||
1981 | __fs_changed(gen, s); \ | 2303 | __fs_changed(gen, s); \ |
1982 | }) | 2304 | }) |
1983 | 2305 | ||
1984 | /***************************************************************************/ | 2306 | /*************************************************************************** |
1985 | /* FIXATE NODES */ | 2307 | * FIXATE NODES * |
1986 | /***************************************************************************/ | 2308 | ***************************************************************************/ |
1987 | 2309 | ||
1988 | #define VI_TYPE_LEFT_MERGEABLE 1 | 2310 | #define VI_TYPE_LEFT_MERGEABLE 1 |
1989 | #define VI_TYPE_RIGHT_MERGEABLE 2 | 2311 | #define VI_TYPE_RIGHT_MERGEABLE 2 |
1990 | 2312 | ||
1991 | /* To make any changes in the tree we always first find node, that | 2313 | /* |
1992 | contains item to be changed/deleted or place to insert a new | 2314 | * To make any changes in the tree we always first find node, that |
1993 | item. We call this node S. To do balancing we need to decide what | 2315 | * contains item to be changed/deleted or place to insert a new |
1994 | we will shift to left/right neighbor, or to a new node, where new | 2316 | * item. We call this node S. To do balancing we need to decide what |
1995 | item will be etc. To make this analysis simpler we build virtual | 2317 | * we will shift to left/right neighbor, or to a new node, where new |
1996 | node. Virtual node is an array of items, that will replace items of | 2318 | * item will be etc. To make this analysis simpler we build virtual |
1997 | node S. (For instance if we are going to delete an item, virtual | 2319 | * node. Virtual node is an array of items, that will replace items of |
1998 | node does not contain it). Virtual node keeps information about | 2320 | * node S. (For instance if we are going to delete an item, virtual |
1999 | item sizes and types, mergeability of first and last items, sizes | 2321 | * node does not contain it). Virtual node keeps information about |
2000 | of all entries in directory item. We use this array of items when | 2322 | * item sizes and types, mergeability of first and last items, sizes |
2001 | calculating what we can shift to neighbors and how many nodes we | 2323 | * of all entries in directory item. We use this array of items when |
2002 | have to have if we do not any shiftings, if we shift to left/right | 2324 | * calculating what we can shift to neighbors and how many nodes we |
2003 | neighbor or to both. */ | 2325 | * have to have if we do not any shiftings, if we shift to left/right |
2326 | * neighbor or to both. | ||
2327 | */ | ||
2004 | struct virtual_item { | 2328 | struct virtual_item { |
2005 | int vi_index; // index in the array of item operations | 2329 | int vi_index; /* index in the array of item operations */ |
2006 | unsigned short vi_type; // left/right mergeability | 2330 | unsigned short vi_type; /* left/right mergeability */ |
2007 | unsigned short vi_item_len; /* length of item that it will have after balancing */ | 2331 | |
2332 | /* length of item that it will have after balancing */ | ||
2333 | unsigned short vi_item_len; | ||
2334 | |||
2008 | struct item_head *vi_ih; | 2335 | struct item_head *vi_ih; |
2009 | const char *vi_item; // body of item (old or new) | 2336 | const char *vi_item; /* body of item (old or new) */ |
2010 | const void *vi_new_data; // 0 always but paste mode | 2337 | const void *vi_new_data; /* 0 always but paste mode */ |
2011 | void *vi_uarea; // item specific area | 2338 | void *vi_uarea; /* item specific area */ |
2012 | }; | 2339 | }; |
2013 | 2340 | ||
2014 | struct virtual_node { | 2341 | struct virtual_node { |
2015 | char *vn_free_ptr; /* this is a pointer to the free space in the buffer */ | 2342 | /* this is a pointer to the free space in the buffer */ |
2343 | char *vn_free_ptr; | ||
2344 | |||
2016 | unsigned short vn_nr_item; /* number of items in virtual node */ | 2345 | unsigned short vn_nr_item; /* number of items in virtual node */ |
2017 | short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ | 2346 | |
2018 | short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ | 2347 | /* |
2348 | * size of node , that node would have if it has | ||
2349 | * unlimited size and no balancing is performed | ||
2350 | */ | ||
2351 | short vn_size; | ||
2352 | |||
2353 | /* mode of balancing (paste, insert, delete, cut) */ | ||
2354 | short vn_mode; | ||
2355 | |||
2019 | short vn_affected_item_num; | 2356 | short vn_affected_item_num; |
2020 | short vn_pos_in_item; | 2357 | short vn_pos_in_item; |
2021 | struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */ | 2358 | |
2359 | /* item header of inserted item, 0 for other modes */ | ||
2360 | struct item_head *vn_ins_ih; | ||
2022 | const void *vn_data; | 2361 | const void *vn_data; |
2023 | struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */ | 2362 | |
2363 | /* array of items (including a new one, excluding item to be deleted) */ | ||
2364 | struct virtual_item *vn_vi; | ||
2024 | }; | 2365 | }; |
2025 | 2366 | ||
2026 | /* used by directory items when creating virtual nodes */ | 2367 | /* used by directory items when creating virtual nodes */ |
@@ -2030,22 +2371,25 @@ struct direntry_uarea { | |||
2030 | __u16 entry_sizes[1]; | 2371 | __u16 entry_sizes[1]; |
2031 | } __attribute__ ((__packed__)); | 2372 | } __attribute__ ((__packed__)); |
2032 | 2373 | ||
2033 | /***************************************************************************/ | 2374 | /*************************************************************************** |
2034 | /* TREE BALANCE */ | 2375 | * TREE BALANCE * |
2035 | /***************************************************************************/ | 2376 | ***************************************************************************/ |
2036 | 2377 | ||
2037 | /* This temporary structure is used in tree balance algorithms, and | 2378 | /* |
2038 | constructed as we go to the extent that its various parts are | 2379 | * This temporary structure is used in tree balance algorithms, and |
2039 | needed. It contains arrays of nodes that can potentially be | 2380 | * constructed as we go to the extent that its various parts are |
2040 | involved in the balancing of node S, and parameters that define how | 2381 | * needed. It contains arrays of nodes that can potentially be |
2041 | each of the nodes must be balanced. Note that in these algorithms | 2382 | * involved in the balancing of node S, and parameters that define how |
2042 | for balancing the worst case is to need to balance the current node | 2383 | * each of the nodes must be balanced. Note that in these algorithms |
2043 | S and the left and right neighbors and all of their parents plus | 2384 | * for balancing the worst case is to need to balance the current node |
2044 | create a new node. We implement S1 balancing for the leaf nodes | 2385 | * S and the left and right neighbors and all of their parents plus |
2045 | and S0 balancing for the internal nodes (S1 and S0 are defined in | 2386 | * create a new node. We implement S1 balancing for the leaf nodes |
2046 | our papers.)*/ | 2387 | * and S0 balancing for the internal nodes (S1 and S0 are defined in |
2388 | * our papers.) | ||
2389 | */ | ||
2047 | 2390 | ||
2048 | #define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ | 2391 | /* size of the array of buffers to free at end of do_balance */ |
2392 | #define MAX_FREE_BLOCK 7 | ||
2049 | 2393 | ||
2050 | /* maximum number of FEB blocknrs on a single level */ | 2394 | /* maximum number of FEB blocknrs on a single level */ |
2051 | #define MAX_AMOUNT_NEEDED 2 | 2395 | #define MAX_AMOUNT_NEEDED 2 |
@@ -2057,64 +2401,144 @@ struct tree_balance { | |||
2057 | struct super_block *tb_sb; | 2401 | struct super_block *tb_sb; |
2058 | struct reiserfs_transaction_handle *transaction_handle; | 2402 | struct reiserfs_transaction_handle *transaction_handle; |
2059 | struct treepath *tb_path; | 2403 | struct treepath *tb_path; |
2060 | struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ | 2404 | |
2061 | struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ | 2405 | /* array of left neighbors of nodes in the path */ |
2062 | struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ | 2406 | struct buffer_head *L[MAX_HEIGHT]; |
2063 | struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ | 2407 | |
2064 | struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ | 2408 | /* array of right neighbors of nodes in the path */ |
2065 | struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ | 2409 | struct buffer_head *R[MAX_HEIGHT]; |
2066 | 2410 | ||
2067 | struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals | 2411 | /* array of fathers of the left neighbors */ |
2068 | cur_blknum. */ | 2412 | struct buffer_head *FL[MAX_HEIGHT]; |
2413 | |||
2414 | /* array of fathers of the right neighbors */ | ||
2415 | struct buffer_head *FR[MAX_HEIGHT]; | ||
2416 | /* array of common parents of center node and its left neighbor */ | ||
2417 | struct buffer_head *CFL[MAX_HEIGHT]; | ||
2418 | |||
2419 | /* array of common parents of center node and its right neighbor */ | ||
2420 | struct buffer_head *CFR[MAX_HEIGHT]; | ||
2421 | |||
2422 | /* | ||
2423 | * array of empty buffers. Number of buffers in array equals | ||
2424 | * cur_blknum. | ||
2425 | */ | ||
2426 | struct buffer_head *FEB[MAX_FEB_SIZE]; | ||
2069 | struct buffer_head *used[MAX_FEB_SIZE]; | 2427 | struct buffer_head *used[MAX_FEB_SIZE]; |
2070 | struct buffer_head *thrown[MAX_FEB_SIZE]; | 2428 | struct buffer_head *thrown[MAX_FEB_SIZE]; |
2071 | int lnum[MAX_HEIGHT]; /* array of number of items which must be | 2429 | |
2072 | shifted to the left in order to balance the | 2430 | /* |
2073 | current node; for leaves includes item that | 2431 | * array of number of items which must be shifted to the left in |
2074 | will be partially shifted; for internal | 2432 | * order to balance the current node; for leaves includes item that |
2075 | nodes, it is the number of child pointers | 2433 | * will be partially shifted; for internal nodes, it is the number |
2076 | rather than items. It includes the new item | 2434 | * of child pointers rather than items. It includes the new item |
2077 | being created. The code sometimes subtracts | 2435 | * being created. The code sometimes subtracts one to get the |
2078 | one to get the number of wholly shifted | 2436 | * number of wholly shifted items for other purposes. |
2079 | items for other purposes. */ | 2437 | */ |
2080 | int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ | 2438 | int lnum[MAX_HEIGHT]; |
2081 | int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and | 2439 | |
2082 | S[h] to its item number within the node CFL[h] */ | 2440 | /* substitute right for left in comment above */ |
2083 | int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ | 2441 | int rnum[MAX_HEIGHT]; |
2084 | int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from | 2442 | |
2085 | S[h]. A negative value means removing. */ | 2443 | /* |
2086 | int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after | 2444 | * array indexed by height h mapping the key delimiting L[h] and |
2087 | balancing on the level h of the tree. If 0 then S is | 2445 | * S[h] to its item number within the node CFL[h] |
2088 | being deleted, if 1 then S is remaining and no new nodes | 2446 | */ |
2089 | are being created, if 2 or 3 then 1 or 2 new nodes is | 2447 | int lkey[MAX_HEIGHT]; |
2090 | being created */ | 2448 | |
2449 | /* substitute r for l in comment above */ | ||
2450 | int rkey[MAX_HEIGHT]; | ||
2451 | |||
2452 | /* | ||
2453 | * the number of bytes by we are trying to add or remove from | ||
2454 | * S[h]. A negative value means removing. | ||
2455 | */ | ||
2456 | int insert_size[MAX_HEIGHT]; | ||
2457 | |||
2458 | /* | ||
2459 | * number of nodes that will replace node S[h] after balancing | ||
2460 | * on the level h of the tree. If 0 then S is being deleted, | ||
2461 | * if 1 then S is remaining and no new nodes are being created, | ||
2462 | * if 2 or 3 then 1 or 2 new nodes is being created | ||
2463 | */ | ||
2464 | int blknum[MAX_HEIGHT]; | ||
2091 | 2465 | ||
2092 | /* fields that are used only for balancing leaves of the tree */ | 2466 | /* fields that are used only for balancing leaves of the tree */ |
2093 | int cur_blknum; /* number of empty blocks having been already allocated */ | 2467 | |
2094 | int s0num; /* number of items that fall into left most node when S[0] splits */ | 2468 | /* number of empty blocks having been already allocated */ |
2095 | int s1num; /* number of items that fall into first new node when S[0] splits */ | 2469 | int cur_blknum; |
2096 | int s2num; /* number of items that fall into second new node when S[0] splits */ | 2470 | |
2097 | int lbytes; /* number of bytes which can flow to the left neighbor from the left */ | 2471 | /* number of items that fall into left most node when S[0] splits */ |
2098 | /* most liquid item that cannot be shifted from S[0] entirely */ | 2472 | int s0num; |
2099 | /* if -1 then nothing will be partially shifted */ | 2473 | |
2100 | int rbytes; /* number of bytes which will flow to the right neighbor from the right */ | 2474 | /* |
2101 | /* most liquid item that cannot be shifted from S[0] entirely */ | 2475 | * number of bytes which can flow to the left neighbor from the left |
2102 | /* if -1 then nothing will be partially shifted */ | 2476 | * most liquid item that cannot be shifted from S[0] entirely |
2103 | int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ | 2477 | * if -1 then nothing will be partially shifted |
2104 | /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ | 2478 | */ |
2105 | int s2bytes; | 2479 | int lbytes; |
2106 | struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ | 2480 | |
2107 | char *vn_buf; /* kmalloced memory. Used to create | 2481 | /* |
2108 | virtual node and keep map of | 2482 | * number of bytes which will flow to the right neighbor from the right |
2109 | dirtied bitmap blocks */ | 2483 | * most liquid item that cannot be shifted from S[0] entirely |
2484 | * if -1 then nothing will be partially shifted | ||
2485 | */ | ||
2486 | int rbytes; | ||
2487 | |||
2488 | |||
2489 | /* | ||
2490 | * index into the array of item headers in | ||
2491 | * S[0] of the affected item | ||
2492 | */ | ||
2493 | int item_pos; | ||
2494 | |||
2495 | /* new nodes allocated to hold what could not fit into S */ | ||
2496 | struct buffer_head *S_new[2]; | ||
2497 | |||
2498 | /* | ||
2499 | * number of items that will be placed into nodes in S_new | ||
2500 | * when S[0] splits | ||
2501 | */ | ||
2502 | int snum[2]; | ||
2503 | |||
2504 | /* | ||
2505 | * number of bytes which flow to nodes in S_new when S[0] splits | ||
2506 | * note: if S[0] splits into 3 nodes, then items do not need to be cut | ||
2507 | */ | ||
2508 | int sbytes[2]; | ||
2509 | |||
2510 | int pos_in_item; | ||
2511 | int zeroes_num; | ||
2512 | |||
2513 | /* | ||
2514 | * buffers which are to be freed after do_balance finishes | ||
2515 | * by unfix_nodes | ||
2516 | */ | ||
2517 | struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; | ||
2518 | |||
2519 | /* | ||
2520 | * kmalloced memory. Used to create virtual node and keep | ||
2521 | * map of dirtied bitmap blocks | ||
2522 | */ | ||
2523 | char *vn_buf; | ||
2524 | |||
2110 | int vn_buf_size; /* size of the vn_buf */ | 2525 | int vn_buf_size; /* size of the vn_buf */ |
2111 | struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */ | ||
2112 | 2526 | ||
2113 | int fs_gen; /* saved value of `reiserfs_generation' counter | 2527 | /* VN starts after bitmap of bitmap blocks */ |
2114 | see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ | 2528 | struct virtual_node *tb_vn; |
2529 | |||
2530 | /* | ||
2531 | * saved value of `reiserfs_generation' counter see | ||
2532 | * FILESYSTEM_CHANGED() macro in reiserfs_fs.h | ||
2533 | */ | ||
2534 | int fs_gen; | ||
2535 | |||
2115 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 2536 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
2116 | struct in_core_key key; /* key pointer, to pass to block allocator or | 2537 | /* |
2117 | another low-level subsystem */ | 2538 | * key pointer, to pass to block allocator or |
2539 | * another low-level subsystem | ||
2540 | */ | ||
2541 | struct in_core_key key; | ||
2118 | #endif | 2542 | #endif |
2119 | }; | 2543 | }; |
2120 | 2544 | ||
@@ -2122,20 +2546,24 @@ struct tree_balance { | |||
2122 | 2546 | ||
2123 | /* When inserting an item. */ | 2547 | /* When inserting an item. */ |
2124 | #define M_INSERT 'i' | 2548 | #define M_INSERT 'i' |
2125 | /* When inserting into (directories only) or appending onto an already | 2549 | /* |
2126 | existent item. */ | 2550 | * When inserting into (directories only) or appending onto an already |
2551 | * existent item. | ||
2552 | */ | ||
2127 | #define M_PASTE 'p' | 2553 | #define M_PASTE 'p' |
2128 | /* When deleting an item. */ | 2554 | /* When deleting an item. */ |
2129 | #define M_DELETE 'd' | 2555 | #define M_DELETE 'd' |
2130 | /* When truncating an item or removing an entry from a (directory) item. */ | 2556 | /* When truncating an item or removing an entry from a (directory) item. */ |
2131 | #define M_CUT 'c' | 2557 | #define M_CUT 'c' |
2132 | 2558 | ||
2133 | /* used when balancing on leaf level skipped (in reiserfsck) */ | 2559 | /* used when balancing on leaf level skipped (in reiserfsck) */ |
2134 | #define M_INTERNAL 'n' | 2560 | #define M_INTERNAL 'n' |
2135 | 2561 | ||
2136 | /* When further balancing is not needed, then do_balance does not need | 2562 | /* |
2137 | to be called. */ | 2563 | * When further balancing is not needed, then do_balance does not need |
2138 | #define M_SKIP_BALANCING 's' | 2564 | * to be called. |
2565 | */ | ||
2566 | #define M_SKIP_BALANCING 's' | ||
2139 | #define M_CONVERT 'v' | 2567 | #define M_CONVERT 'v' |
2140 | 2568 | ||
2141 | /* modes of leaf_move_items */ | 2569 | /* modes of leaf_move_items */ |
@@ -2148,8 +2576,10 @@ struct tree_balance { | |||
2148 | #define FIRST_TO_LAST 0 | 2576 | #define FIRST_TO_LAST 0 |
2149 | #define LAST_TO_FIRST 1 | 2577 | #define LAST_TO_FIRST 1 |
2150 | 2578 | ||
2151 | /* used in do_balance for passing parent of node information that has | 2579 | /* |
2152 | been gotten from tb struct */ | 2580 | * used in do_balance for passing parent of node information that has |
2581 | * been gotten from tb struct | ||
2582 | */ | ||
2153 | struct buffer_info { | 2583 | struct buffer_info { |
2154 | struct tree_balance *tb; | 2584 | struct tree_balance *tb; |
2155 | struct buffer_head *bi_bh; | 2585 | struct buffer_head *bi_bh; |
@@ -2167,20 +2597,24 @@ static inline struct super_block *sb_from_bi(struct buffer_info *bi) | |||
2167 | return bi ? sb_from_tb(bi->tb) : NULL; | 2597 | return bi ? sb_from_tb(bi->tb) : NULL; |
2168 | } | 2598 | } |
2169 | 2599 | ||
2170 | /* there are 4 types of items: stat data, directory item, indirect, direct. | 2600 | /* |
2171 | +-------------------+------------+--------------+------------+ | 2601 | * there are 4 types of items: stat data, directory item, indirect, direct. |
2172 | | | k_offset | k_uniqueness | mergeable? | | 2602 | * +-------------------+------------+--------------+------------+ |
2173 | +-------------------+------------+--------------+------------+ | 2603 | * | | k_offset | k_uniqueness | mergeable? | |
2174 | | stat data | 0 | 0 | no | | 2604 | * +-------------------+------------+--------------+------------+ |
2175 | +-------------------+------------+--------------+------------+ | 2605 | * | stat data | 0 | 0 | no | |
2176 | | 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | | 2606 | * +-------------------+------------+--------------+------------+ |
2177 | | non 1st directory | hash value | | yes | | 2607 | * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. | no | |
2178 | | item | | | | | 2608 | * | non 1st directory | hash value | UNIQUENESS | yes | |
2179 | +-------------------+------------+--------------+------------+ | 2609 | * | item | | | | |
2180 | | indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object | 2610 | * +-------------------+------------+--------------+------------+ |
2181 | +-------------------+------------+--------------+------------+ | 2611 | * | indirect item | offset + 1 |TYPE_INDIRECT | [1] | |
2182 | | direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object | 2612 | * +-------------------+------------+--------------+------------+ |
2183 | +-------------------+------------+--------------+------------+ | 2613 | * | direct item | offset + 1 |TYPE_DIRECT | [2] | |
2614 | * +-------------------+------------+--------------+------------+ | ||
2615 | * | ||
2616 | * [1] if this is not the first indirect item of the object | ||
2617 | * [2] if this is not the first direct item of the object | ||
2184 | */ | 2618 | */ |
2185 | 2619 | ||
2186 | struct item_operations { | 2620 | struct item_operations { |
@@ -2219,49 +2653,43 @@ extern struct item_operations *item_ops[TYPE_ANY + 1]; | |||
2219 | /* number of blocks pointed to by the indirect item */ | 2653 | /* number of blocks pointed to by the indirect item */ |
2220 | #define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) | 2654 | #define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) |
2221 | 2655 | ||
2222 | /* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ | 2656 | /* |
2657 | * the used space within the unformatted node corresponding | ||
2658 | * to pos within the item pointed to by ih | ||
2659 | */ | ||
2223 | #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) | 2660 | #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) |
2224 | 2661 | ||
2225 | /* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ | 2662 | /* |
2226 | 2663 | * number of bytes contained by the direct item or the | |
2227 | /* get the item header */ | 2664 | * unformatted nodes the indirect item points to |
2228 | #define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) ) | 2665 | */ |
2229 | |||
2230 | /* get key */ | ||
2231 | #define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) ) | ||
2232 | |||
2233 | /* get the key */ | ||
2234 | #define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) ) | ||
2235 | |||
2236 | /* get item body */ | ||
2237 | #define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num)))) | ||
2238 | |||
2239 | /* get the stat data by the buffer header and the item order */ | ||
2240 | #define B_N_STAT_DATA(bh,nr) \ | ||
2241 | ( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) ) | ||
2242 | 2666 | ||
2243 | /* following defines use reiserfs buffer header and item header */ | 2667 | /* following defines use reiserfs buffer header and item header */ |
2244 | 2668 | ||
2245 | /* get stat-data */ | 2669 | /* get stat-data */ |
2246 | #define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) | 2670 | #define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) |
2247 | 2671 | ||
2248 | // this is 3976 for size==4096 | 2672 | /* this is 3976 for size==4096 */ |
2249 | #define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) | 2673 | #define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) |
2250 | 2674 | ||
2251 | /* indirect items consist of entries which contain blocknrs, pos | 2675 | /* |
2252 | indicates which entry, and B_I_POS_UNFM_POINTER resolves to the | 2676 | * indirect items consist of entries which contain blocknrs, pos |
2253 | blocknr contained by the entry pos points to */ | 2677 | * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the |
2254 | #define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos))) | 2678 | * blocknr contained by the entry pos points to |
2255 | #define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) | 2679 | */ |
2680 | #define B_I_POS_UNFM_POINTER(bh, ih, pos) \ | ||
2681 | le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos))) | ||
2682 | #define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \ | ||
2683 | (*(((unp_t *)ih_item_body(bh, ih)) + (pos)) = cpu_to_le32(val)) | ||
2256 | 2684 | ||
2257 | struct reiserfs_iget_args { | 2685 | struct reiserfs_iget_args { |
2258 | __u32 objectid; | 2686 | __u32 objectid; |
2259 | __u32 dirid; | 2687 | __u32 dirid; |
2260 | }; | 2688 | }; |
2261 | 2689 | ||
2262 | /***************************************************************************/ | 2690 | /*************************************************************************** |
2263 | /* FUNCTION DECLARATIONS */ | 2691 | * FUNCTION DECLARATIONS * |
2264 | /***************************************************************************/ | 2692 | ***************************************************************************/ |
2265 | 2693 | ||
2266 | #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) | 2694 | #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) |
2267 | 2695 | ||
@@ -2273,7 +2701,10 @@ struct reiserfs_iget_args { | |||
2273 | /* first block written in a commit. */ | 2701 | /* first block written in a commit. */ |
2274 | struct reiserfs_journal_desc { | 2702 | struct reiserfs_journal_desc { |
2275 | __le32 j_trans_id; /* id of commit */ | 2703 | __le32 j_trans_id; /* id of commit */ |
2276 | __le32 j_len; /* length of commit. len +1 is the commit block */ | 2704 | |
2705 | /* length of commit. len +1 is the commit block */ | ||
2706 | __le32 j_len; | ||
2707 | |||
2277 | __le32 j_mount_id; /* mount id of this trans */ | 2708 | __le32 j_mount_id; /* mount id of this trans */ |
2278 | __le32 j_realblock[1]; /* real locations for each block */ | 2709 | __le32 j_realblock[1]; /* real locations for each block */ |
2279 | }; | 2710 | }; |
@@ -2300,22 +2731,35 @@ struct reiserfs_journal_commit { | |||
2300 | #define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) | 2731 | #define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) |
2301 | #define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) | 2732 | #define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) |
2302 | 2733 | ||
2303 | /* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the | 2734 | /* |
2304 | ** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, | 2735 | * this header block gets written whenever a transaction is considered |
2305 | ** and this transaction does not need to be replayed. | 2736 | * fully flushed, and is more recent than the last fully flushed transaction. |
2306 | */ | 2737 | * fully flushed means all the log blocks and all the real blocks are on |
2738 | * disk, and this transaction does not need to be replayed. | ||
2739 | */ | ||
2307 | struct reiserfs_journal_header { | 2740 | struct reiserfs_journal_header { |
2308 | __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */ | 2741 | /* id of last fully flushed transaction */ |
2309 | __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */ | 2742 | __le32 j_last_flush_trans_id; |
2743 | |||
2744 | /* offset in the log of where to start replay after a crash */ | ||
2745 | __le32 j_first_unflushed_offset; | ||
2746 | |||
2310 | __le32 j_mount_id; | 2747 | __le32 j_mount_id; |
2311 | /* 12 */ struct journal_params jh_journal; | 2748 | /* 12 */ struct journal_params jh_journal; |
2312 | }; | 2749 | }; |
2313 | 2750 | ||
2314 | /* biggest tunable defines are right here */ | 2751 | /* biggest tunable defines are right here */ |
2315 | #define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ | 2752 | #define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ |
2316 | #define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ | 2753 | |
2754 | /* biggest possible single transaction, don't change for now (8/3/99) */ | ||
2755 | #define JOURNAL_TRANS_MAX_DEFAULT 1024 | ||
2317 | #define JOURNAL_TRANS_MIN_DEFAULT 256 | 2756 | #define JOURNAL_TRANS_MIN_DEFAULT 256 |
2318 | #define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ | 2757 | |
2758 | /* | ||
2759 | * max blocks to batch into one transaction, | ||
2760 | * don't make this any bigger than 900 | ||
2761 | */ | ||
2762 | #define JOURNAL_MAX_BATCH_DEFAULT 900 | ||
2319 | #define JOURNAL_MIN_RATIO 2 | 2763 | #define JOURNAL_MIN_RATIO 2 |
2320 | #define JOURNAL_MAX_COMMIT_AGE 30 | 2764 | #define JOURNAL_MAX_COMMIT_AGE 30 |
2321 | #define JOURNAL_MAX_TRANS_AGE 30 | 2765 | #define JOURNAL_MAX_TRANS_AGE 30 |
@@ -2340,16 +2784,18 @@ struct reiserfs_journal_header { | |||
2340 | #define REISERFS_QUOTA_DEL_BLOCKS(s) 0 | 2784 | #define REISERFS_QUOTA_DEL_BLOCKS(s) 0 |
2341 | #endif | 2785 | #endif |
2342 | 2786 | ||
2343 | /* both of these can be as low as 1, or as high as you want. The min is the | 2787 | /* |
2344 | ** number of 4k bitmap nodes preallocated on mount. New nodes are allocated | 2788 | * both of these can be as low as 1, or as high as you want. The min is the |
2345 | ** as needed, and released when transactions are committed. On release, if | 2789 | * number of 4k bitmap nodes preallocated on mount. New nodes are allocated |
2346 | ** the current number of nodes is > max, the node is freed, otherwise, | 2790 | * as needed, and released when transactions are committed. On release, if |
2347 | ** it is put on a free list for faster use later. | 2791 | * the current number of nodes is > max, the node is freed, otherwise, |
2792 | * it is put on a free list for faster use later. | ||
2348 | */ | 2793 | */ |
2349 | #define REISERFS_MIN_BITMAP_NODES 10 | 2794 | #define REISERFS_MIN_BITMAP_NODES 10 |
2350 | #define REISERFS_MAX_BITMAP_NODES 100 | 2795 | #define REISERFS_MAX_BITMAP_NODES 100 |
2351 | 2796 | ||
2352 | #define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ | 2797 | /* these are based on journal hash size of 8192 */ |
2798 | #define JBH_HASH_SHIFT 13 | ||
2353 | #define JBH_HASH_MASK 8191 | 2799 | #define JBH_HASH_MASK 8191 |
2354 | 2800 | ||
2355 | #define _jhashfn(sb,block) \ | 2801 | #define _jhashfn(sb,block) \ |
@@ -2357,7 +2803,7 @@ struct reiserfs_journal_header { | |||
2357 | (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) | 2803 | (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) |
2358 | #define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) | 2804 | #define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) |
2359 | 2805 | ||
2360 | // We need these to make journal.c code more readable | 2806 | /* We need these to make journal.c code more readable */ |
2361 | #define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) | 2807 | #define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) |
2362 | #define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) | 2808 | #define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) |
2363 | #define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) | 2809 | #define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) |
@@ -2365,12 +2811,14 @@ struct reiserfs_journal_header { | |||
2365 | enum reiserfs_bh_state_bits { | 2811 | enum reiserfs_bh_state_bits { |
2366 | BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ | 2812 | BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ |
2367 | BH_JDirty_wait, | 2813 | BH_JDirty_wait, |
2368 | BH_JNew, /* disk block was taken off free list before | 2814 | /* |
2369 | * being in a finished transaction, or | 2815 | * disk block was taken off free list before being in a |
2370 | * written to disk. Can be reused immed. */ | 2816 | * finished transaction, or written to disk. Can be reused immed. |
2817 | */ | ||
2818 | BH_JNew, | ||
2371 | BH_JPrepared, | 2819 | BH_JPrepared, |
2372 | BH_JRestore_dirty, | 2820 | BH_JRestore_dirty, |
2373 | BH_JTest, // debugging only will go away | 2821 | BH_JTest, /* debugging only will go away */ |
2374 | }; | 2822 | }; |
2375 | 2823 | ||
2376 | BUFFER_FNS(JDirty, journaled); | 2824 | BUFFER_FNS(JDirty, journaled); |
@@ -2386,27 +2834,36 @@ TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty); | |||
2386 | BUFFER_FNS(JTest, journal_test); | 2834 | BUFFER_FNS(JTest, journal_test); |
2387 | TAS_BUFFER_FNS(JTest, journal_test); | 2835 | TAS_BUFFER_FNS(JTest, journal_test); |
2388 | 2836 | ||
2389 | /* | 2837 | /* transaction handle which is passed around for all journal calls */ |
2390 | ** transaction handle which is passed around for all journal calls | ||
2391 | */ | ||
2392 | struct reiserfs_transaction_handle { | 2838 | struct reiserfs_transaction_handle { |
2393 | struct super_block *t_super; /* super for this FS when journal_begin was | 2839 | /* |
2394 | called. saves calls to reiserfs_get_super | 2840 | * super for this FS when journal_begin was called. saves calls to |
2395 | also used by nested transactions to make | 2841 | * reiserfs_get_super also used by nested transactions to make |
2396 | sure they are nesting on the right FS | 2842 | * sure they are nesting on the right FS _must_ be first |
2397 | _must_ be first in the handle | 2843 | * in the handle |
2398 | */ | 2844 | */ |
2845 | struct super_block *t_super; | ||
2846 | |||
2399 | int t_refcount; | 2847 | int t_refcount; |
2400 | int t_blocks_logged; /* number of blocks this writer has logged */ | 2848 | int t_blocks_logged; /* number of blocks this writer has logged */ |
2401 | int t_blocks_allocated; /* number of blocks this writer allocated */ | 2849 | int t_blocks_allocated; /* number of blocks this writer allocated */ |
2402 | unsigned int t_trans_id; /* sanity check, equals the current trans id */ | 2850 | |
2851 | /* sanity check, equals the current trans id */ | ||
2852 | unsigned int t_trans_id; | ||
2853 | |||
2403 | void *t_handle_save; /* save existing current->journal_info */ | 2854 | void *t_handle_save; /* save existing current->journal_info */ |
2404 | unsigned displace_new_blocks:1; /* if new block allocation occurres, that block | 2855 | |
2405 | should be displaced from others */ | 2856 | /* |
2857 | * if new block allocation occurres, that block | ||
2858 | * should be displaced from others | ||
2859 | */ | ||
2860 | unsigned displace_new_blocks:1; | ||
2861 | |||
2406 | struct list_head t_list; | 2862 | struct list_head t_list; |
2407 | }; | 2863 | }; |
2408 | 2864 | ||
2409 | /* used to keep track of ordered and tail writes, attached to the buffer | 2865 | /* |
2866 | * used to keep track of ordered and tail writes, attached to the buffer | ||
2410 | * head through b_journal_head. | 2867 | * head through b_journal_head. |
2411 | */ | 2868 | */ |
2412 | struct reiserfs_jh { | 2869 | struct reiserfs_jh { |
@@ -2419,7 +2876,7 @@ void reiserfs_free_jh(struct buffer_head *bh); | |||
2419 | int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); | 2876 | int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); |
2420 | int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); | 2877 | int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); |
2421 | int journal_mark_dirty(struct reiserfs_transaction_handle *, | 2878 | int journal_mark_dirty(struct reiserfs_transaction_handle *, |
2422 | struct super_block *, struct buffer_head *bh); | 2879 | struct buffer_head *bh); |
2423 | 2880 | ||
2424 | static inline int reiserfs_file_data_log(struct inode *inode) | 2881 | static inline int reiserfs_file_data_log(struct inode *inode) |
2425 | { | 2882 | { |
@@ -2469,10 +2926,8 @@ int journal_init(struct super_block *, const char *j_dev_name, int old_format, | |||
2469 | int journal_release(struct reiserfs_transaction_handle *, struct super_block *); | 2926 | int journal_release(struct reiserfs_transaction_handle *, struct super_block *); |
2470 | int journal_release_error(struct reiserfs_transaction_handle *, | 2927 | int journal_release_error(struct reiserfs_transaction_handle *, |
2471 | struct super_block *); | 2928 | struct super_block *); |
2472 | int journal_end(struct reiserfs_transaction_handle *, struct super_block *, | 2929 | int journal_end(struct reiserfs_transaction_handle *); |
2473 | unsigned long); | 2930 | int journal_end_sync(struct reiserfs_transaction_handle *); |
2474 | int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, | ||
2475 | unsigned long); | ||
2476 | int journal_mark_freed(struct reiserfs_transaction_handle *, | 2931 | int journal_mark_freed(struct reiserfs_transaction_handle *, |
2477 | struct super_block *, b_blocknr_t blocknr); | 2932 | struct super_block *, b_blocknr_t blocknr); |
2478 | int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); | 2933 | int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); |
@@ -2481,7 +2936,7 @@ int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, | |||
2481 | int journal_begin(struct reiserfs_transaction_handle *, | 2936 | int journal_begin(struct reiserfs_transaction_handle *, |
2482 | struct super_block *sb, unsigned long); | 2937 | struct super_block *sb, unsigned long); |
2483 | int journal_join_abort(struct reiserfs_transaction_handle *, | 2938 | int journal_join_abort(struct reiserfs_transaction_handle *, |
2484 | struct super_block *sb, unsigned long); | 2939 | struct super_block *sb); |
2485 | void reiserfs_abort_journal(struct super_block *sb, int errno); | 2940 | void reiserfs_abort_journal(struct super_block *sb, int errno); |
2486 | void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); | 2941 | void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); |
2487 | int reiserfs_allocate_list_bitmaps(struct super_block *s, | 2942 | int reiserfs_allocate_list_bitmaps(struct super_block *s, |
@@ -2503,20 +2958,18 @@ int B_IS_IN_TREE(const struct buffer_head *); | |||
2503 | extern void copy_item_head(struct item_head *to, | 2958 | extern void copy_item_head(struct item_head *to, |
2504 | const struct item_head *from); | 2959 | const struct item_head *from); |
2505 | 2960 | ||
2506 | // first key is in cpu form, second - le | 2961 | /* first key is in cpu form, second - le */ |
2507 | extern int comp_short_keys(const struct reiserfs_key *le_key, | 2962 | extern int comp_short_keys(const struct reiserfs_key *le_key, |
2508 | const struct cpu_key *cpu_key); | 2963 | const struct cpu_key *cpu_key); |
2509 | extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); | 2964 | extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); |
2510 | 2965 | ||
2511 | // both are in le form | 2966 | /* both are in le form */ |
2512 | extern int comp_le_keys(const struct reiserfs_key *, | 2967 | extern int comp_le_keys(const struct reiserfs_key *, |
2513 | const struct reiserfs_key *); | 2968 | const struct reiserfs_key *); |
2514 | extern int comp_short_le_keys(const struct reiserfs_key *, | 2969 | extern int comp_short_le_keys(const struct reiserfs_key *, |
2515 | const struct reiserfs_key *); | 2970 | const struct reiserfs_key *); |
2516 | 2971 | ||
2517 | // | 2972 | /* * get key version from on disk key - kludge */ |
2518 | // get key version from on disk key - kludge | ||
2519 | // | ||
2520 | static inline int le_key_version(const struct reiserfs_key *key) | 2973 | static inline int le_key_version(const struct reiserfs_key *key) |
2521 | { | 2974 | { |
2522 | int type; | 2975 | int type; |
@@ -2593,12 +3046,12 @@ void padd_item(char *item, int total_length, int length); | |||
2593 | 3046 | ||
2594 | /* inode.c */ | 3047 | /* inode.c */ |
2595 | /* args for the create parameter of reiserfs_get_block */ | 3048 | /* args for the create parameter of reiserfs_get_block */ |
2596 | #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ | 3049 | #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ |
2597 | #define GET_BLOCK_CREATE 1 /* add anything you need to find block */ | 3050 | #define GET_BLOCK_CREATE 1 /* add anything you need to find block */ |
2598 | #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ | 3051 | #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ |
2599 | #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ | 3052 | #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ |
2600 | #define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ | 3053 | #define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ |
2601 | #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ | 3054 | #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ |
2602 | 3055 | ||
2603 | void reiserfs_read_locked_inode(struct inode *inode, | 3056 | void reiserfs_read_locked_inode(struct inode *inode, |
2604 | struct reiserfs_iget_args *args); | 3057 | struct reiserfs_iget_args *args); |
@@ -2797,25 +3250,49 @@ struct buffer_head *get_FEB(struct tree_balance *); | |||
2797 | 3250 | ||
2798 | /* bitmap.c */ | 3251 | /* bitmap.c */ |
2799 | 3252 | ||
2800 | /* structure contains hints for block allocator, and it is a container for | 3253 | /* |
2801 | * arguments, such as node, search path, transaction_handle, etc. */ | 3254 | * structure contains hints for block allocator, and it is a container for |
3255 | * arguments, such as node, search path, transaction_handle, etc. | ||
3256 | */ | ||
2802 | struct __reiserfs_blocknr_hint { | 3257 | struct __reiserfs_blocknr_hint { |
2803 | struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ | 3258 | /* inode passed to allocator, if we allocate unf. nodes */ |
3259 | struct inode *inode; | ||
3260 | |||
2804 | sector_t block; /* file offset, in blocks */ | 3261 | sector_t block; /* file offset, in blocks */ |
2805 | struct in_core_key key; | 3262 | struct in_core_key key; |
2806 | struct treepath *path; /* search path, used by allocator to deternine search_start by | 3263 | |
2807 | * various ways */ | 3264 | /* |
2808 | struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and | 3265 | * search path, used by allocator to deternine search_start by |
2809 | * bitmap blocks changes */ | 3266 | * various ways |
3267 | */ | ||
3268 | struct treepath *path; | ||
3269 | |||
3270 | /* | ||
3271 | * transaction handle is needed to log super blocks | ||
3272 | * and bitmap blocks changes | ||
3273 | */ | ||
3274 | struct reiserfs_transaction_handle *th; | ||
3275 | |||
2810 | b_blocknr_t beg, end; | 3276 | b_blocknr_t beg, end; |
2811 | b_blocknr_t search_start; /* a field used to transfer search start value (block number) | 3277 | |
2812 | * between different block allocator procedures | 3278 | /* |
2813 | * (determine_search_start() and others) */ | 3279 | * a field used to transfer search start value (block number) |
2814 | int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed | 3280 | * between different block allocator procedures |
2815 | * function that do actual allocation */ | 3281 | * (determine_search_start() and others) |
2816 | 3282 | */ | |
2817 | unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for | 3283 | b_blocknr_t search_start; |
2818 | * formatted/unformatted blocks with/without preallocation */ | 3284 | |
3285 | /* | ||
3286 | * is set in determine_prealloc_size() function, | ||
3287 | * used by underlayed function that do actual allocation | ||
3288 | */ | ||
3289 | int prealloc_size; | ||
3290 | |||
3291 | /* | ||
3292 | * the allocator uses different polices for getting disk | ||
3293 | * space for formatted/unformatted blocks with/without preallocation | ||
3294 | */ | ||
3295 | unsigned formatted_node:1; | ||
2819 | unsigned preallocate:1; | 3296 | unsigned preallocate:1; |
2820 | }; | 3297 | }; |
2821 | 3298 | ||
@@ -2909,13 +3386,15 @@ __u32 r5_hash(const signed char *msg, int len); | |||
2909 | #define reiserfs_test_le_bit test_bit_le | 3386 | #define reiserfs_test_le_bit test_bit_le |
2910 | #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le | 3387 | #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le |
2911 | 3388 | ||
2912 | /* sometimes reiserfs_truncate may require to allocate few new blocks | 3389 | /* |
2913 | to perform indirect2direct conversion. People probably used to | 3390 | * sometimes reiserfs_truncate may require to allocate few new blocks |
2914 | think, that truncate should work without problems on a filesystem | 3391 | * to perform indirect2direct conversion. People probably used to |
2915 | without free disk space. They may complain that they can not | 3392 | * think, that truncate should work without problems on a filesystem |
2916 | truncate due to lack of free disk space. This spare space allows us | 3393 | * without free disk space. They may complain that they can not |
2917 | to not worry about it. 500 is probably too much, but it should be | 3394 | * truncate due to lack of free disk space. This spare space allows us |
2918 | absolutely safe */ | 3395 | * to not worry about it. 500 is probably too much, but it should be |
3396 | * absolutely safe | ||
3397 | */ | ||
2919 | #define SPARE_SPACE 500 | 3398 | #define SPARE_SPACE 500 |
2920 | 3399 | ||
2921 | /* prototypes from ioctl.c */ | 3400 | /* prototypes from ioctl.c */ |
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index a4ef5cd606eb..6052d323bc9a 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c | |||
@@ -53,8 +53,10 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
53 | } | 53 | } |
54 | bforget(bh); | 54 | bforget(bh); |
55 | 55 | ||
56 | /* old disk layout detection; those partitions can be mounted, but | 56 | /* |
57 | * cannot be resized */ | 57 | * old disk layout detection; those partitions can be mounted, but |
58 | * cannot be resized | ||
59 | */ | ||
58 | if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size | 60 | if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size |
59 | != REISERFS_DISK_OFFSET_IN_BYTES) { | 61 | != REISERFS_DISK_OFFSET_IN_BYTES) { |
60 | printk | 62 | printk |
@@ -86,12 +88,14 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
86 | ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); | 88 | ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); |
87 | return -ENOMEM; | 89 | return -ENOMEM; |
88 | } | 90 | } |
89 | /* the new journal bitmaps are zero filled, now we copy in the bitmap | 91 | /* |
90 | ** node pointers from the old journal bitmap structs, and then | 92 | * the new journal bitmaps are zero filled, now we copy i |
91 | ** transfer the new data structures into the journal struct. | 93 | * the bitmap node pointers from the old journal bitmap |
92 | ** | 94 | * structs, and then transfer the new data structures |
93 | ** using the copy_size var below allows this code to work for | 95 | * into the journal struct. |
94 | ** both shrinking and expanding the FS. | 96 | * |
97 | * using the copy_size var below allows this code to work for | ||
98 | * both shrinking and expanding the FS. | ||
95 | */ | 99 | */ |
96 | copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; | 100 | copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; |
97 | copy_size = | 101 | copy_size = |
@@ -101,36 +105,45 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
101 | jb = SB_JOURNAL(s)->j_list_bitmap + i; | 105 | jb = SB_JOURNAL(s)->j_list_bitmap + i; |
102 | memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); | 106 | memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); |
103 | 107 | ||
104 | /* just in case vfree schedules on us, copy the new | 108 | /* |
105 | ** pointer into the journal struct before freeing the | 109 | * just in case vfree schedules on us, copy the new |
106 | ** old one | 110 | * pointer into the journal struct before freeing the |
111 | * old one | ||
107 | */ | 112 | */ |
108 | node_tmp = jb->bitmaps; | 113 | node_tmp = jb->bitmaps; |
109 | jb->bitmaps = jbitmap[i].bitmaps; | 114 | jb->bitmaps = jbitmap[i].bitmaps; |
110 | vfree(node_tmp); | 115 | vfree(node_tmp); |
111 | } | 116 | } |
112 | 117 | ||
113 | /* allocate additional bitmap blocks, reallocate array of bitmap | 118 | /* |
114 | * block pointers */ | 119 | * allocate additional bitmap blocks, reallocate |
120 | * array of bitmap block pointers | ||
121 | */ | ||
115 | bitmap = | 122 | bitmap = |
116 | vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); | 123 | vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); |
117 | if (!bitmap) { | 124 | if (!bitmap) { |
118 | /* Journal bitmaps are still supersized, but the memory isn't | 125 | /* |
119 | * leaked, so I guess it's ok */ | 126 | * Journal bitmaps are still supersized, but the |
127 | * memory isn't leaked, so I guess it's ok | ||
128 | */ | ||
120 | printk("reiserfs_resize: unable to allocate memory.\n"); | 129 | printk("reiserfs_resize: unable to allocate memory.\n"); |
121 | return -ENOMEM; | 130 | return -ENOMEM; |
122 | } | 131 | } |
123 | for (i = 0; i < bmap_nr; i++) | 132 | for (i = 0; i < bmap_nr; i++) |
124 | bitmap[i] = old_bitmap[i]; | 133 | bitmap[i] = old_bitmap[i]; |
125 | 134 | ||
126 | /* This doesn't go through the journal, but it doesn't have to. | 135 | /* |
127 | * The changes are still atomic: We're synced up when the journal | 136 | * This doesn't go through the journal, but it doesn't have to. |
128 | * transaction begins, and the new bitmaps don't matter if the | 137 | * The changes are still atomic: We're synced up when the |
129 | * transaction fails. */ | 138 | * journal transaction begins, and the new bitmaps don't |
139 | * matter if the transaction fails. | ||
140 | */ | ||
130 | for (i = bmap_nr; i < bmap_nr_new; i++) { | 141 | for (i = bmap_nr; i < bmap_nr_new; i++) { |
131 | int depth; | 142 | int depth; |
132 | /* don't use read_bitmap_block since it will cache | 143 | /* |
133 | * the uninitialized bitmap */ | 144 | * don't use read_bitmap_block since it will cache |
145 | * the uninitialized bitmap | ||
146 | */ | ||
134 | depth = reiserfs_write_unlock_nested(s); | 147 | depth = reiserfs_write_unlock_nested(s); |
135 | bh = sb_bread(s, i * s->s_blocksize * 8); | 148 | bh = sb_bread(s, i * s->s_blocksize * 8); |
136 | reiserfs_write_lock_nested(s, depth); | 149 | reiserfs_write_lock_nested(s, depth); |
@@ -147,7 +160,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
147 | depth = reiserfs_write_unlock_nested(s); | 160 | depth = reiserfs_write_unlock_nested(s); |
148 | sync_dirty_buffer(bh); | 161 | sync_dirty_buffer(bh); |
149 | reiserfs_write_lock_nested(s, depth); | 162 | reiserfs_write_lock_nested(s, depth); |
150 | // update bitmap_info stuff | 163 | /* update bitmap_info stuff */ |
151 | bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; | 164 | bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; |
152 | brelse(bh); | 165 | brelse(bh); |
153 | } | 166 | } |
@@ -156,9 +169,11 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
156 | vfree(old_bitmap); | 169 | vfree(old_bitmap); |
157 | } | 170 | } |
158 | 171 | ||
159 | /* begin transaction, if there was an error, it's fine. Yes, we have | 172 | /* |
173 | * begin transaction, if there was an error, it's fine. Yes, we have | ||
160 | * incorrect bitmaps now, but none of it is ever going to touch the | 174 | * incorrect bitmaps now, but none of it is ever going to touch the |
161 | * disk anyway. */ | 175 | * disk anyway. |
176 | */ | ||
162 | err = journal_begin(&th, s, 10); | 177 | err = journal_begin(&th, s, 10); |
163 | if (err) | 178 | if (err) |
164 | return err; | 179 | return err; |
@@ -167,7 +182,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
167 | info = SB_AP_BITMAP(s) + bmap_nr - 1; | 182 | info = SB_AP_BITMAP(s) + bmap_nr - 1; |
168 | bh = reiserfs_read_bitmap_block(s, bmap_nr - 1); | 183 | bh = reiserfs_read_bitmap_block(s, bmap_nr - 1); |
169 | if (!bh) { | 184 | if (!bh) { |
170 | int jerr = journal_end(&th, s, 10); | 185 | int jerr = journal_end(&th); |
171 | if (jerr) | 186 | if (jerr) |
172 | return jerr; | 187 | return jerr; |
173 | return -EIO; | 188 | return -EIO; |
@@ -178,14 +193,14 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
178 | reiserfs_clear_le_bit(i, bh->b_data); | 193 | reiserfs_clear_le_bit(i, bh->b_data); |
179 | info->free_count += s->s_blocksize * 8 - block_r; | 194 | info->free_count += s->s_blocksize * 8 - block_r; |
180 | 195 | ||
181 | journal_mark_dirty(&th, s, bh); | 196 | journal_mark_dirty(&th, bh); |
182 | brelse(bh); | 197 | brelse(bh); |
183 | 198 | ||
184 | /* Correct new last bitmap block - It may not be full */ | 199 | /* Correct new last bitmap block - It may not be full */ |
185 | info = SB_AP_BITMAP(s) + bmap_nr_new - 1; | 200 | info = SB_AP_BITMAP(s) + bmap_nr_new - 1; |
186 | bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1); | 201 | bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1); |
187 | if (!bh) { | 202 | if (!bh) { |
188 | int jerr = journal_end(&th, s, 10); | 203 | int jerr = journal_end(&th); |
189 | if (jerr) | 204 | if (jerr) |
190 | return jerr; | 205 | return jerr; |
191 | return -EIO; | 206 | return -EIO; |
@@ -194,7 +209,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
194 | reiserfs_prepare_for_journal(s, bh, 1); | 209 | reiserfs_prepare_for_journal(s, bh, 1); |
195 | for (i = block_r_new; i < s->s_blocksize * 8; i++) | 210 | for (i = block_r_new; i < s->s_blocksize * 8; i++) |
196 | reiserfs_set_le_bit(i, bh->b_data); | 211 | reiserfs_set_le_bit(i, bh->b_data); |
197 | journal_mark_dirty(&th, s, bh); | 212 | journal_mark_dirty(&th, bh); |
198 | brelse(bh); | 213 | brelse(bh); |
199 | 214 | ||
200 | info->free_count -= s->s_blocksize * 8 - block_r_new; | 215 | info->free_count -= s->s_blocksize * 8 - block_r_new; |
@@ -207,8 +222,8 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
207 | PUT_SB_BLOCK_COUNT(s, block_count_new); | 222 | PUT_SB_BLOCK_COUNT(s, block_count_new); |
208 | PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); | 223 | PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); |
209 | 224 | ||
210 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); | 225 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); |
211 | 226 | ||
212 | SB_JOURNAL(s)->j_must_wait = 1; | 227 | SB_JOURNAL(s)->j_must_wait = 1; |
213 | return journal_end(&th, s, 10); | 228 | return journal_end(&th); |
214 | } | 229 | } |
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 615cd9ab7940..dd44468edc2b 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c | |||
@@ -8,46 +8,6 @@ | |||
8 | * Pereslavl-Zalessky Russia | 8 | * Pereslavl-Zalessky Russia |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* | ||
12 | * This file contains functions dealing with S+tree | ||
13 | * | ||
14 | * B_IS_IN_TREE | ||
15 | * copy_item_head | ||
16 | * comp_short_keys | ||
17 | * comp_keys | ||
18 | * comp_short_le_keys | ||
19 | * le_key2cpu_key | ||
20 | * comp_le_keys | ||
21 | * bin_search | ||
22 | * get_lkey | ||
23 | * get_rkey | ||
24 | * key_in_buffer | ||
25 | * decrement_bcount | ||
26 | * reiserfs_check_path | ||
27 | * pathrelse_and_restore | ||
28 | * pathrelse | ||
29 | * search_by_key_reada | ||
30 | * search_by_key | ||
31 | * search_for_position_by_key | ||
32 | * comp_items | ||
33 | * prepare_for_direct_item | ||
34 | * prepare_for_direntry_item | ||
35 | * prepare_for_delete_or_cut | ||
36 | * calc_deleted_bytes_number | ||
37 | * init_tb_struct | ||
38 | * padd_item | ||
39 | * reiserfs_delete_item | ||
40 | * reiserfs_delete_solid_item | ||
41 | * reiserfs_delete_object | ||
42 | * maybe_indirect_to_direct | ||
43 | * indirect_to_direct_roll_back | ||
44 | * reiserfs_cut_from_item | ||
45 | * truncate_directory | ||
46 | * reiserfs_do_truncate | ||
47 | * reiserfs_paste_into_item | ||
48 | * reiserfs_insert_item | ||
49 | */ | ||
50 | |||
51 | #include <linux/time.h> | 11 | #include <linux/time.h> |
52 | #include <linux/string.h> | 12 | #include <linux/string.h> |
53 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
@@ -65,21 +25,21 @@ inline int B_IS_IN_TREE(const struct buffer_head *bh) | |||
65 | return (B_LEVEL(bh) != FREE_LEVEL); | 25 | return (B_LEVEL(bh) != FREE_LEVEL); |
66 | } | 26 | } |
67 | 27 | ||
68 | // | 28 | /* to get item head in le form */ |
69 | // to gets item head in le form | ||
70 | // | ||
71 | inline void copy_item_head(struct item_head *to, | 29 | inline void copy_item_head(struct item_head *to, |
72 | const struct item_head *from) | 30 | const struct item_head *from) |
73 | { | 31 | { |
74 | memcpy(to, from, IH_SIZE); | 32 | memcpy(to, from, IH_SIZE); |
75 | } | 33 | } |
76 | 34 | ||
77 | /* k1 is pointer to on-disk structure which is stored in little-endian | 35 | /* |
78 | form. k2 is pointer to cpu variable. For key of items of the same | 36 | * k1 is pointer to on-disk structure which is stored in little-endian |
79 | object this returns 0. | 37 | * form. k2 is pointer to cpu variable. For key of items of the same |
80 | Returns: -1 if key1 < key2 | 38 | * object this returns 0. |
81 | 0 if key1 == key2 | 39 | * Returns: -1 if key1 < key2 |
82 | 1 if key1 > key2 */ | 40 | * 0 if key1 == key2 |
41 | * 1 if key1 > key2 | ||
42 | */ | ||
83 | inline int comp_short_keys(const struct reiserfs_key *le_key, | 43 | inline int comp_short_keys(const struct reiserfs_key *le_key, |
84 | const struct cpu_key *cpu_key) | 44 | const struct cpu_key *cpu_key) |
85 | { | 45 | { |
@@ -97,11 +57,13 @@ inline int comp_short_keys(const struct reiserfs_key *le_key, | |||
97 | return 0; | 57 | return 0; |
98 | } | 58 | } |
99 | 59 | ||
100 | /* k1 is pointer to on-disk structure which is stored in little-endian | 60 | /* |
101 | form. k2 is pointer to cpu variable. | 61 | * k1 is pointer to on-disk structure which is stored in little-endian |
102 | Compare keys using all 4 key fields. | 62 | * form. k2 is pointer to cpu variable. |
103 | Returns: -1 if key1 < key2 0 | 63 | * Compare keys using all 4 key fields. |
104 | if key1 = key2 1 if key1 > key2 */ | 64 | * Returns: -1 if key1 < key2 0 |
65 | * if key1 = key2 1 if key1 > key2 | ||
66 | */ | ||
105 | static inline int comp_keys(const struct reiserfs_key *le_key, | 67 | static inline int comp_keys(const struct reiserfs_key *le_key, |
106 | const struct cpu_key *cpu_key) | 68 | const struct cpu_key *cpu_key) |
107 | { | 69 | { |
@@ -155,15 +117,17 @@ inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from) | |||
155 | to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); | 117 | to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); |
156 | to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); | 118 | to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); |
157 | 119 | ||
158 | // find out version of the key | 120 | /* find out version of the key */ |
159 | version = le_key_version(from); | 121 | version = le_key_version(from); |
160 | to->version = version; | 122 | to->version = version; |
161 | to->on_disk_key.k_offset = le_key_k_offset(version, from); | 123 | to->on_disk_key.k_offset = le_key_k_offset(version, from); |
162 | to->on_disk_key.k_type = le_key_k_type(version, from); | 124 | to->on_disk_key.k_type = le_key_k_type(version, from); |
163 | } | 125 | } |
164 | 126 | ||
165 | // this does not say which one is bigger, it only returns 1 if keys | 127 | /* |
166 | // are not equal, 0 otherwise | 128 | * this does not say which one is bigger, it only returns 1 if keys |
129 | * are not equal, 0 otherwise | ||
130 | */ | ||
167 | inline int comp_le_keys(const struct reiserfs_key *k1, | 131 | inline int comp_le_keys(const struct reiserfs_key *k1, |
168 | const struct reiserfs_key *k2) | 132 | const struct reiserfs_key *k2) |
169 | { | 133 | { |
@@ -177,24 +141,27 @@ inline int comp_le_keys(const struct reiserfs_key *k1, | |||
177 | * *pos = number of the searched element if found, else the * | 141 | * *pos = number of the searched element if found, else the * |
178 | * number of the first element that is larger than key. * | 142 | * number of the first element that is larger than key. * |
179 | **************************************************************************/ | 143 | **************************************************************************/ |
180 | /* For those not familiar with binary search: lbound is the leftmost item that it | 144 | /* |
181 | could be, rbound the rightmost item that it could be. We examine the item | 145 | * For those not familiar with binary search: lbound is the leftmost item |
182 | halfway between lbound and rbound, and that tells us either that we can increase | 146 | * that it could be, rbound the rightmost item that it could be. We examine |
183 | lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that | 147 | * the item halfway between lbound and rbound, and that tells us either |
184 | there are no possible items, and we have not found it. With each examination we | 148 | * that we can increase lbound, or decrease rbound, or that we have found it, |
185 | cut the number of possible items it could be by one more than half rounded down, | 149 | * or if lbound <= rbound that there are no possible items, and we have not |
186 | or we find it. */ | 150 | * found it. With each examination we cut the number of possible items it |
151 | * could be by one more than half rounded down, or we find it. | ||
152 | */ | ||
187 | static inline int bin_search(const void *key, /* Key to search for. */ | 153 | static inline int bin_search(const void *key, /* Key to search for. */ |
188 | const void *base, /* First item in the array. */ | 154 | const void *base, /* First item in the array. */ |
189 | int num, /* Number of items in the array. */ | 155 | int num, /* Number of items in the array. */ |
190 | int width, /* Item size in the array. | 156 | /* |
191 | searched. Lest the reader be | 157 | * Item size in the array. searched. Lest the |
192 | confused, note that this is crafted | 158 | * reader be confused, note that this is crafted |
193 | as a general function, and when it | 159 | * as a general function, and when it is applied |
194 | is applied specifically to the array | 160 | * specifically to the array of item headers in a |
195 | of item headers in a node, width | 161 | * node, width is actually the item header size |
196 | is actually the item header size not | 162 | * not the item size. |
197 | the item size. */ | 163 | */ |
164 | int width, | ||
198 | int *pos /* Number of the searched for element. */ | 165 | int *pos /* Number of the searched for element. */ |
199 | ) | 166 | ) |
200 | { | 167 | { |
@@ -216,8 +183,10 @@ static inline int bin_search(const void *key, /* Key to search for. */ | |||
216 | return ITEM_FOUND; /* Key found in the array. */ | 183 | return ITEM_FOUND; /* Key found in the array. */ |
217 | } | 184 | } |
218 | 185 | ||
219 | /* bin_search did not find given key, it returns position of key, | 186 | /* |
220 | that is minimal and greater than the given one. */ | 187 | * bin_search did not find given key, it returns position of key, |
188 | * that is minimal and greater than the given one. | ||
189 | */ | ||
221 | *pos = lbound; | 190 | *pos = lbound; |
222 | return ITEM_NOT_FOUND; | 191 | return ITEM_NOT_FOUND; |
223 | } | 192 | } |
@@ -234,10 +203,14 @@ static const struct reiserfs_key MAX_KEY = { | |||
234 | cpu_to_le32(0xffffffff)},} | 203 | cpu_to_le32(0xffffffff)},} |
235 | }; | 204 | }; |
236 | 205 | ||
237 | /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom | 206 | /* |
238 | of the path, and going upwards. We must check the path's validity at each step. If the key is not in | 207 | * Get delimiting key of the buffer by looking for it in the buffers in the |
239 | the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this | 208 | * path, starting from the bottom of the path, and going upwards. We must |
240 | case we return a special key, either MIN_KEY or MAX_KEY. */ | 209 | * check the path's validity at each step. If the key is not in the path, |
210 | * there is no delimiting key in the tree (buffer is first or last buffer | ||
211 | * in tree), and in this case we return a special key, either MIN_KEY or | ||
212 | * MAX_KEY. | ||
213 | */ | ||
241 | static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, | 214 | static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, |
242 | const struct super_block *sb) | 215 | const struct super_block *sb) |
243 | { | 216 | { |
@@ -270,9 +243,12 @@ static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_pat | |||
270 | PATH_OFFSET_PBUFFER(chk_path, | 243 | PATH_OFFSET_PBUFFER(chk_path, |
271 | path_offset + 1)->b_blocknr) | 244 | path_offset + 1)->b_blocknr) |
272 | return &MAX_KEY; | 245 | return &MAX_KEY; |
273 | /* Return delimiting key if position in the parent is not equal to zero. */ | 246 | /* |
247 | * Return delimiting key if position in the parent | ||
248 | * is not equal to zero. | ||
249 | */ | ||
274 | if (position) | 250 | if (position) |
275 | return B_N_PDELIM_KEY(parent, position - 1); | 251 | return internal_key(parent, position - 1); |
276 | } | 252 | } |
277 | /* Return MIN_KEY if we are in the root of the buffer tree. */ | 253 | /* Return MIN_KEY if we are in the root of the buffer tree. */ |
278 | if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> | 254 | if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> |
@@ -308,15 +284,23 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, | |||
308 | path_offset)) > | 284 | path_offset)) > |
309 | B_NR_ITEMS(parent)) | 285 | B_NR_ITEMS(parent)) |
310 | return &MIN_KEY; | 286 | return &MIN_KEY; |
311 | /* Check whether parent at the path really points to the child. */ | 287 | /* |
288 | * Check whether parent at the path really points | ||
289 | * to the child. | ||
290 | */ | ||
312 | if (B_N_CHILD_NUM(parent, position) != | 291 | if (B_N_CHILD_NUM(parent, position) != |
313 | PATH_OFFSET_PBUFFER(chk_path, | 292 | PATH_OFFSET_PBUFFER(chk_path, |
314 | path_offset + 1)->b_blocknr) | 293 | path_offset + 1)->b_blocknr) |
315 | return &MIN_KEY; | 294 | return &MIN_KEY; |
316 | /* Return delimiting key if position in the parent is not the last one. */ | 295 | |
296 | /* | ||
297 | * Return delimiting key if position in the parent | ||
298 | * is not the last one. | ||
299 | */ | ||
317 | if (position != B_NR_ITEMS(parent)) | 300 | if (position != B_NR_ITEMS(parent)) |
318 | return B_N_PDELIM_KEY(parent, position); | 301 | return internal_key(parent, position); |
319 | } | 302 | } |
303 | |||
320 | /* Return MAX_KEY if we are in the root of the buffer tree. */ | 304 | /* Return MAX_KEY if we are in the root of the buffer tree. */ |
321 | if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> | 305 | if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> |
322 | b_blocknr == SB_ROOT_BLOCK(sb)) | 306 | b_blocknr == SB_ROOT_BLOCK(sb)) |
@@ -324,13 +308,20 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, | |||
324 | return &MIN_KEY; | 308 | return &MIN_KEY; |
325 | } | 309 | } |
326 | 310 | ||
327 | /* Check whether a key is contained in the tree rooted from a buffer at a path. */ | 311 | /* |
328 | /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in | 312 | * Check whether a key is contained in the tree rooted from a buffer at a path. |
329 | the path. These delimiting keys are stored at least one level above that buffer in the tree. If the | 313 | * This works by looking at the left and right delimiting keys for the buffer |
330 | buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in | 314 | * in the last path_element in the path. These delimiting keys are stored |
331 | this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ | 315 | * at least one level above that buffer in the tree. If the buffer is the |
332 | static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */ | 316 | * first or last node in the tree order then one of the delimiting keys may |
333 | const struct cpu_key *key, /* Key which should be checked. */ | 317 | * be absent, and in this case get_lkey and get_rkey return a special key |
318 | * which is MIN_KEY or MAX_KEY. | ||
319 | */ | ||
320 | static inline int key_in_buffer( | ||
321 | /* Path which should be checked. */ | ||
322 | struct treepath *chk_path, | ||
323 | /* Key which should be checked. */ | ||
324 | const struct cpu_key *key, | ||
334 | struct super_block *sb | 325 | struct super_block *sb |
335 | ) | 326 | ) |
336 | { | 327 | { |
@@ -359,9 +350,11 @@ int reiserfs_check_path(struct treepath *p) | |||
359 | return 0; | 350 | return 0; |
360 | } | 351 | } |
361 | 352 | ||
362 | /* Drop the reference to each buffer in a path and restore | 353 | /* |
354 | * Drop the reference to each buffer in a path and restore | ||
363 | * dirty bits clean when preparing the buffer for the log. | 355 | * dirty bits clean when preparing the buffer for the log. |
364 | * This version should only be called from fix_nodes() */ | 356 | * This version should only be called from fix_nodes() |
357 | */ | ||
365 | void pathrelse_and_restore(struct super_block *sb, | 358 | void pathrelse_and_restore(struct super_block *sb, |
366 | struct treepath *search_path) | 359 | struct treepath *search_path) |
367 | { | 360 | { |
@@ -418,14 +411,17 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) | |||
418 | } | 411 | } |
419 | ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; | 412 | ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; |
420 | used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); | 413 | used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); |
414 | |||
415 | /* free space does not match to calculated amount of use space */ | ||
421 | if (used_space != blocksize - blkh_free_space(blkh)) { | 416 | if (used_space != blocksize - blkh_free_space(blkh)) { |
422 | /* free space does not match to calculated amount of use space */ | ||
423 | reiserfs_warning(NULL, "reiserfs-5082", | 417 | reiserfs_warning(NULL, "reiserfs-5082", |
424 | "free space seems wrong: %z", bh); | 418 | "free space seems wrong: %z", bh); |
425 | return 0; | 419 | return 0; |
426 | } | 420 | } |
427 | // FIXME: it is_leaf will hit performance too much - we may have | 421 | /* |
428 | // return 1 here | 422 | * FIXME: it is_leaf will hit performance too much - we may have |
423 | * return 1 here | ||
424 | */ | ||
429 | 425 | ||
430 | /* check tables of item heads */ | 426 | /* check tables of item heads */ |
431 | ih = (struct item_head *)(buf + BLKH_SIZE); | 427 | ih = (struct item_head *)(buf + BLKH_SIZE); |
@@ -460,7 +456,7 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) | |||
460 | prev_location = ih_location(ih); | 456 | prev_location = ih_location(ih); |
461 | } | 457 | } |
462 | 458 | ||
463 | // one may imagine much more checks | 459 | /* one may imagine many more checks */ |
464 | return 1; | 460 | return 1; |
465 | } | 461 | } |
466 | 462 | ||
@@ -481,8 +477,8 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh) | |||
481 | } | 477 | } |
482 | 478 | ||
483 | nr = blkh_nr_item(blkh); | 479 | nr = blkh_nr_item(blkh); |
480 | /* for internal which is not root we might check min number of keys */ | ||
484 | if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { | 481 | if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { |
485 | /* for internal which is not root we might check min number of keys */ | ||
486 | reiserfs_warning(NULL, "reiserfs-5088", | 482 | reiserfs_warning(NULL, "reiserfs-5088", |
487 | "number of key seems wrong: %z", bh); | 483 | "number of key seems wrong: %z", bh); |
488 | return 0; | 484 | return 0; |
@@ -494,12 +490,15 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh) | |||
494 | "free space seems wrong: %z", bh); | 490 | "free space seems wrong: %z", bh); |
495 | return 0; | 491 | return 0; |
496 | } | 492 | } |
497 | // one may imagine much more checks | 493 | |
494 | /* one may imagine many more checks */ | ||
498 | return 1; | 495 | return 1; |
499 | } | 496 | } |
500 | 497 | ||
501 | // make sure that bh contains formatted node of reiserfs tree of | 498 | /* |
502 | // 'level'-th level | 499 | * make sure that bh contains formatted node of reiserfs tree of |
500 | * 'level'-th level | ||
501 | */ | ||
503 | static int is_tree_node(struct buffer_head *bh, int level) | 502 | static int is_tree_node(struct buffer_head *bh, int level) |
504 | { | 503 | { |
505 | if (B_LEVEL(bh) != level) { | 504 | if (B_LEVEL(bh) != level) { |
@@ -546,7 +545,8 @@ static int search_by_key_reada(struct super_block *s, | |||
546 | for (j = 0; j < i; j++) { | 545 | for (j = 0; j < i; j++) { |
547 | /* | 546 | /* |
548 | * note, this needs attention if we are getting rid of the BKL | 547 | * note, this needs attention if we are getting rid of the BKL |
549 | * you have to make sure the prepared bit isn't set on this buffer | 548 | * you have to make sure the prepared bit isn't set on this |
549 | * buffer | ||
550 | */ | 550 | */ |
551 | if (!buffer_uptodate(bh[j])) { | 551 | if (!buffer_uptodate(bh[j])) { |
552 | if (depth == -1) | 552 | if (depth == -1) |
@@ -558,39 +558,34 @@ static int search_by_key_reada(struct super_block *s, | |||
558 | return depth; | 558 | return depth; |
559 | } | 559 | } |
560 | 560 | ||
561 | /************************************************************************** | 561 | /* |
562 | * Algorithm SearchByKey * | 562 | * This function fills up the path from the root to the leaf as it |
563 | * look for item in the Disk S+Tree by its key * | 563 | * descends the tree looking for the key. It uses reiserfs_bread to |
564 | * Input: sb - super block * | 564 | * try to find buffers in the cache given their block number. If it |
565 | * key - pointer to the key to search * | 565 | * does not find them in the cache it reads them from disk. For each |
566 | * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * | 566 | * node search_by_key finds using reiserfs_bread it then uses |
567 | * search_path - path from the root to the needed leaf * | 567 | * bin_search to look through that node. bin_search will find the |
568 | **************************************************************************/ | 568 | * position of the block_number of the next node if it is looking |
569 | 569 | * through an internal node. If it is looking through a leaf node | |
570 | /* This function fills up the path from the root to the leaf as it | 570 | * bin_search will find the position of the item which has key either |
571 | descends the tree looking for the key. It uses reiserfs_bread to | 571 | * equal to given key, or which is the maximal key less than the given |
572 | try to find buffers in the cache given their block number. If it | 572 | * key. search_by_key returns a path that must be checked for the |
573 | does not find them in the cache it reads them from disk. For each | 573 | * correctness of the top of the path but need not be checked for the |
574 | node search_by_key finds using reiserfs_bread it then uses | 574 | * correctness of the bottom of the path |
575 | bin_search to look through that node. bin_search will find the | 575 | */ |
576 | position of the block_number of the next node if it is looking | 576 | /* |
577 | through an internal node. If it is looking through a leaf node | 577 | * search_by_key - search for key (and item) in stree |
578 | bin_search will find the position of the item which has key either | 578 | * @sb: superblock |
579 | equal to given key, or which is the maximal key less than the given | 579 | * @key: pointer to key to search for |
580 | key. search_by_key returns a path that must be checked for the | 580 | * @search_path: Allocated and initialized struct treepath; Returned filled |
581 | correctness of the top of the path but need not be checked for the | 581 | * on success. |
582 | correctness of the bottom of the path */ | 582 | * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to |
583 | /* The function is NOT SCHEDULE-SAFE! */ | 583 | * stop at leaf level. |
584 | int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */ | 584 | * |
585 | struct treepath *search_path,/* This structure was | 585 | * The function is NOT SCHEDULE-SAFE! |
586 | allocated and initialized | 586 | */ |
587 | by the calling | 587 | int search_by_key(struct super_block *sb, const struct cpu_key *key, |
588 | function. It is filled up | 588 | struct treepath *search_path, int stop_level) |
589 | by this function. */ | ||
590 | int stop_level /* How far down the tree to search. To | ||
591 | stop at leaf level - set to | ||
592 | DISK_LEAF_NODE_LEVEL */ | ||
593 | ) | ||
594 | { | 589 | { |
595 | b_blocknr_t block_number; | 590 | b_blocknr_t block_number; |
596 | int expected_level; | 591 | int expected_level; |
@@ -609,17 +604,22 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
609 | 604 | ||
610 | PROC_INFO_INC(sb, search_by_key); | 605 | PROC_INFO_INC(sb, search_by_key); |
611 | 606 | ||
612 | /* As we add each node to a path we increase its count. This means that | 607 | /* |
613 | we must be careful to release all nodes in a path before we either | 608 | * As we add each node to a path we increase its count. This means |
614 | discard the path struct or re-use the path struct, as we do here. */ | 609 | * that we must be careful to release all nodes in a path before we |
610 | * either discard the path struct or re-use the path struct, as we | ||
611 | * do here. | ||
612 | */ | ||
615 | 613 | ||
616 | pathrelse(search_path); | 614 | pathrelse(search_path); |
617 | 615 | ||
618 | right_neighbor_of_leaf_node = 0; | 616 | right_neighbor_of_leaf_node = 0; |
619 | 617 | ||
620 | /* With each iteration of this loop we search through the items in the | 618 | /* |
621 | current node, and calculate the next current node(next path element) | 619 | * With each iteration of this loop we search through the items in the |
622 | for the next iteration of this loop.. */ | 620 | * current node, and calculate the next current node(next path element) |
621 | * for the next iteration of this loop.. | ||
622 | */ | ||
623 | block_number = SB_ROOT_BLOCK(sb); | 623 | block_number = SB_ROOT_BLOCK(sb); |
624 | expected_level = -1; | 624 | expected_level = -1; |
625 | while (1) { | 625 | while (1) { |
@@ -639,8 +639,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
639 | ++search_path->path_length); | 639 | ++search_path->path_length); |
640 | fs_gen = get_generation(sb); | 640 | fs_gen = get_generation(sb); |
641 | 641 | ||
642 | /* Read the next tree node, and set the last element in the path to | 642 | /* |
643 | have a pointer to it. */ | 643 | * Read the next tree node, and set the last element |
644 | * in the path to have a pointer to it. | ||
645 | */ | ||
644 | if ((bh = last_element->pe_buffer = | 646 | if ((bh = last_element->pe_buffer = |
645 | sb_getblk(sb, block_number))) { | 647 | sb_getblk(sb, block_number))) { |
646 | 648 | ||
@@ -666,7 +668,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
666 | if (!buffer_uptodate(bh)) | 668 | if (!buffer_uptodate(bh)) |
667 | goto io_error; | 669 | goto io_error; |
668 | } else { | 670 | } else { |
669 | io_error: | 671 | io_error: |
670 | search_path->path_length--; | 672 | search_path->path_length--; |
671 | pathrelse(search_path); | 673 | pathrelse(search_path); |
672 | return IO_ERROR; | 674 | return IO_ERROR; |
@@ -676,9 +678,12 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
676 | expected_level = SB_TREE_HEIGHT(sb); | 678 | expected_level = SB_TREE_HEIGHT(sb); |
677 | expected_level--; | 679 | expected_level--; |
678 | 680 | ||
679 | /* It is possible that schedule occurred. We must check whether the key | 681 | /* |
680 | to search is still in the tree rooted from the current buffer. If | 682 | * It is possible that schedule occurred. We must check |
681 | not then repeat search from the root. */ | 683 | * whether the key to search is still in the tree rooted |
684 | * from the current buffer. If not then repeat search | ||
685 | * from the root. | ||
686 | */ | ||
682 | if (fs_changed(fs_gen, sb) && | 687 | if (fs_changed(fs_gen, sb) && |
683 | (!B_IS_IN_TREE(bh) || | 688 | (!B_IS_IN_TREE(bh) || |
684 | B_LEVEL(bh) != expected_level || | 689 | B_LEVEL(bh) != expected_level || |
@@ -689,8 +694,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
689 | sbk_restarted[expected_level - 1]); | 694 | sbk_restarted[expected_level - 1]); |
690 | pathrelse(search_path); | 695 | pathrelse(search_path); |
691 | 696 | ||
692 | /* Get the root block number so that we can repeat the search | 697 | /* |
693 | starting from the root. */ | 698 | * Get the root block number so that we can |
699 | * repeat the search starting from the root. | ||
700 | */ | ||
694 | block_number = SB_ROOT_BLOCK(sb); | 701 | block_number = SB_ROOT_BLOCK(sb); |
695 | expected_level = -1; | 702 | expected_level = -1; |
696 | right_neighbor_of_leaf_node = 0; | 703 | right_neighbor_of_leaf_node = 0; |
@@ -699,9 +706,11 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
699 | continue; | 706 | continue; |
700 | } | 707 | } |
701 | 708 | ||
702 | /* only check that the key is in the buffer if key is not | 709 | /* |
703 | equal to the MAX_KEY. Latter case is only possible in | 710 | * only check that the key is in the buffer if key is not |
704 | "finish_unfinished()" processing during mount. */ | 711 | * equal to the MAX_KEY. Latter case is only possible in |
712 | * "finish_unfinished()" processing during mount. | ||
713 | */ | ||
705 | RFALSE(comp_keys(&MAX_KEY, key) && | 714 | RFALSE(comp_keys(&MAX_KEY, key) && |
706 | !key_in_buffer(search_path, key, sb), | 715 | !key_in_buffer(search_path, key, sb), |
707 | "PAP-5130: key is not in the buffer"); | 716 | "PAP-5130: key is not in the buffer"); |
@@ -713,8 +722,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
713 | } | 722 | } |
714 | #endif | 723 | #endif |
715 | 724 | ||
716 | // make sure, that the node contents look like a node of | 725 | /* |
717 | // certain level | 726 | * make sure, that the node contents look like a node of |
727 | * certain level | ||
728 | */ | ||
718 | if (!is_tree_node(bh, expected_level)) { | 729 | if (!is_tree_node(bh, expected_level)) { |
719 | reiserfs_error(sb, "vs-5150", | 730 | reiserfs_error(sb, "vs-5150", |
720 | "invalid format found in block %ld. " | 731 | "invalid format found in block %ld. " |
@@ -732,32 +743,42 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
732 | "vs-5152: tree level (%d) is less than stop level (%d)", | 743 | "vs-5152: tree level (%d) is less than stop level (%d)", |
733 | node_level, stop_level); | 744 | node_level, stop_level); |
734 | 745 | ||
735 | retval = bin_search(key, B_N_PITEM_HEAD(bh, 0), | 746 | retval = bin_search(key, item_head(bh, 0), |
736 | B_NR_ITEMS(bh), | 747 | B_NR_ITEMS(bh), |
737 | (node_level == | 748 | (node_level == |
738 | DISK_LEAF_NODE_LEVEL) ? IH_SIZE : | 749 | DISK_LEAF_NODE_LEVEL) ? IH_SIZE : |
739 | KEY_SIZE, | 750 | KEY_SIZE, |
740 | &(last_element->pe_position)); | 751 | &last_element->pe_position); |
741 | if (node_level == stop_level) { | 752 | if (node_level == stop_level) { |
742 | return retval; | 753 | return retval; |
743 | } | 754 | } |
744 | 755 | ||
745 | /* we are not in the stop level */ | 756 | /* we are not in the stop level */ |
757 | /* | ||
758 | * item has been found, so we choose the pointer which | ||
759 | * is to the right of the found one | ||
760 | */ | ||
746 | if (retval == ITEM_FOUND) | 761 | if (retval == ITEM_FOUND) |
747 | /* item has been found, so we choose the pointer which is to the right of the found one */ | ||
748 | last_element->pe_position++; | 762 | last_element->pe_position++; |
749 | 763 | ||
750 | /* if item was not found we choose the position which is to | 764 | /* |
751 | the left of the found item. This requires no code, | 765 | * if item was not found we choose the position which is to |
752 | bin_search did it already. */ | 766 | * the left of the found item. This requires no code, |
767 | * bin_search did it already. | ||
768 | */ | ||
753 | 769 | ||
754 | /* So we have chosen a position in the current node which is | 770 | /* |
755 | an internal node. Now we calculate child block number by | 771 | * So we have chosen a position in the current node which is |
756 | position in the node. */ | 772 | * an internal node. Now we calculate child block number by |
773 | * position in the node. | ||
774 | */ | ||
757 | block_number = | 775 | block_number = |
758 | B_N_CHILD_NUM(bh, last_element->pe_position); | 776 | B_N_CHILD_NUM(bh, last_element->pe_position); |
759 | 777 | ||
760 | /* if we are going to read leaf nodes, try for read ahead as well */ | 778 | /* |
779 | * if we are going to read leaf nodes, try for read | ||
780 | * ahead as well | ||
781 | */ | ||
761 | if ((search_path->reada & PATH_READA) && | 782 | if ((search_path->reada & PATH_READA) && |
762 | node_level == DISK_LEAF_NODE_LEVEL + 1) { | 783 | node_level == DISK_LEAF_NODE_LEVEL + 1) { |
763 | int pos = last_element->pe_position; | 784 | int pos = last_element->pe_position; |
@@ -779,7 +800,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
779 | /* | 800 | /* |
780 | * check to make sure we're in the same object | 801 | * check to make sure we're in the same object |
781 | */ | 802 | */ |
782 | le_key = B_N_PDELIM_KEY(bh, pos); | 803 | le_key = internal_key(bh, pos); |
783 | if (le32_to_cpu(le_key->k_objectid) != | 804 | if (le32_to_cpu(le_key->k_objectid) != |
784 | key->on_disk_key.k_objectid) { | 805 | key->on_disk_key.k_objectid) { |
785 | break; | 806 | break; |
@@ -789,26 +810,28 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
789 | } | 810 | } |
790 | } | 811 | } |
791 | 812 | ||
792 | /* Form the path to an item and position in this item which contains | 813 | /* |
793 | file byte defined by key. If there is no such item | 814 | * Form the path to an item and position in this item which contains |
794 | corresponding to the key, we point the path to the item with | 815 | * file byte defined by key. If there is no such item |
795 | maximal key less than key, and *pos_in_item is set to one | 816 | * corresponding to the key, we point the path to the item with |
796 | past the last entry/byte in the item. If searching for entry in a | 817 | * maximal key less than key, and *pos_in_item is set to one |
797 | directory item, and it is not found, *pos_in_item is set to one | 818 | * past the last entry/byte in the item. If searching for entry in a |
798 | entry more than the entry with maximal key which is less than the | 819 | * directory item, and it is not found, *pos_in_item is set to one |
799 | sought key. | 820 | * entry more than the entry with maximal key which is less than the |
800 | 821 | * sought key. | |
801 | Note that if there is no entry in this same node which is one more, | 822 | * |
802 | then we point to an imaginary entry. for direct items, the | 823 | * Note that if there is no entry in this same node which is one more, |
803 | position is in units of bytes, for indirect items the position is | 824 | * then we point to an imaginary entry. for direct items, the |
804 | in units of blocknr entries, for directory items the position is in | 825 | * position is in units of bytes, for indirect items the position is |
805 | units of directory entries. */ | 826 | * in units of blocknr entries, for directory items the position is in |
806 | 827 | * units of directory entries. | |
828 | */ | ||
807 | /* The function is NOT SCHEDULE-SAFE! */ | 829 | /* The function is NOT SCHEDULE-SAFE! */ |
808 | int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */ | 830 | int search_for_position_by_key(struct super_block *sb, |
809 | const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ | 831 | /* Key to search (cpu variable) */ |
810 | struct treepath *search_path /* Filled up by this function. */ | 832 | const struct cpu_key *p_cpu_key, |
811 | ) | 833 | /* Filled up by this function. */ |
834 | struct treepath *search_path) | ||
812 | { | 835 | { |
813 | struct item_head *p_le_ih; /* pointer to on-disk structure */ | 836 | struct item_head *p_le_ih; /* pointer to on-disk structure */ |
814 | int blk_size; | 837 | int blk_size; |
@@ -830,7 +853,7 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b | |||
830 | if (retval == ITEM_FOUND) { | 853 | if (retval == ITEM_FOUND) { |
831 | 854 | ||
832 | RFALSE(!ih_item_len | 855 | RFALSE(!ih_item_len |
833 | (B_N_PITEM_HEAD | 856 | (item_head |
834 | (PATH_PLAST_BUFFER(search_path), | 857 | (PATH_PLAST_BUFFER(search_path), |
835 | PATH_LAST_POSITION(search_path))), | 858 | PATH_LAST_POSITION(search_path))), |
836 | "PAP-5165: item length equals zero"); | 859 | "PAP-5165: item length equals zero"); |
@@ -844,14 +867,14 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b | |||
844 | 867 | ||
845 | /* Item is not found. Set path to the previous item. */ | 868 | /* Item is not found. Set path to the previous item. */ |
846 | p_le_ih = | 869 | p_le_ih = |
847 | B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path), | 870 | item_head(PATH_PLAST_BUFFER(search_path), |
848 | --PATH_LAST_POSITION(search_path)); | 871 | --PATH_LAST_POSITION(search_path)); |
849 | blk_size = sb->s_blocksize; | 872 | blk_size = sb->s_blocksize; |
850 | 873 | ||
851 | if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { | 874 | if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key)) |
852 | return FILE_NOT_FOUND; | 875 | return FILE_NOT_FOUND; |
853 | } | 876 | |
854 | // FIXME: quite ugly this far | 877 | /* FIXME: quite ugly this far */ |
855 | 878 | ||
856 | item_offset = le_ih_k_offset(p_le_ih); | 879 | item_offset = le_ih_k_offset(p_le_ih); |
857 | offset = cpu_key_k_offset(p_cpu_key); | 880 | offset = cpu_key_k_offset(p_cpu_key); |
@@ -866,8 +889,10 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b | |||
866 | return POSITION_FOUND; | 889 | return POSITION_FOUND; |
867 | } | 890 | } |
868 | 891 | ||
869 | /* Needed byte is not contained in the item pointed to by the | 892 | /* |
870 | path. Set pos_in_item out of the item. */ | 893 | * Needed byte is not contained in the item pointed to by the |
894 | * path. Set pos_in_item out of the item. | ||
895 | */ | ||
871 | if (is_indirect_le_ih(p_le_ih)) | 896 | if (is_indirect_le_ih(p_le_ih)) |
872 | pos_in_item(search_path) = | 897 | pos_in_item(search_path) = |
873 | ih_item_len(p_le_ih) / UNFM_P_SIZE; | 898 | ih_item_len(p_le_ih) / UNFM_P_SIZE; |
@@ -892,19 +917,17 @@ int comp_items(const struct item_head *stored_ih, const struct treepath *path) | |||
892 | return 1; | 917 | return 1; |
893 | 918 | ||
894 | /* we need only to know, whether it is the same item */ | 919 | /* we need only to know, whether it is the same item */ |
895 | ih = get_ih(path); | 920 | ih = tp_item_head(path); |
896 | return memcmp(stored_ih, ih, IH_SIZE); | 921 | return memcmp(stored_ih, ih, IH_SIZE); |
897 | } | 922 | } |
898 | 923 | ||
899 | /* unformatted nodes are not logged anymore, ever. This is safe | 924 | /* unformatted nodes are not logged anymore, ever. This is safe now */ |
900 | ** now | ||
901 | */ | ||
902 | #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) | 925 | #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) |
903 | 926 | ||
904 | // block can not be forgotten as it is in I/O or held by someone | 927 | /* block can not be forgotten as it is in I/O or held by someone */ |
905 | #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) | 928 | #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) |
906 | 929 | ||
907 | // prepare for delete or cut of direct item | 930 | /* prepare for delete or cut of direct item */ |
908 | static inline int prepare_for_direct_item(struct treepath *path, | 931 | static inline int prepare_for_direct_item(struct treepath *path, |
909 | struct item_head *le_ih, | 932 | struct item_head *le_ih, |
910 | struct inode *inode, | 933 | struct inode *inode, |
@@ -917,9 +940,8 @@ static inline int prepare_for_direct_item(struct treepath *path, | |||
917 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 940 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
918 | return M_DELETE; | 941 | return M_DELETE; |
919 | } | 942 | } |
920 | // new file gets truncated | 943 | /* new file gets truncated */ |
921 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { | 944 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { |
922 | // | ||
923 | round_len = ROUND_UP(new_file_length); | 945 | round_len = ROUND_UP(new_file_length); |
924 | /* this was new_file_length < le_ih ... */ | 946 | /* this was new_file_length < le_ih ... */ |
925 | if (round_len < le_ih_k_offset(le_ih)) { | 947 | if (round_len < le_ih_k_offset(le_ih)) { |
@@ -933,12 +955,13 @@ static inline int prepare_for_direct_item(struct treepath *path, | |||
933 | return M_CUT; /* Cut from this item. */ | 955 | return M_CUT; /* Cut from this item. */ |
934 | } | 956 | } |
935 | 957 | ||
936 | // old file: items may have any length | 958 | /* old file: items may have any length */ |
937 | 959 | ||
938 | if (new_file_length < le_ih_k_offset(le_ih)) { | 960 | if (new_file_length < le_ih_k_offset(le_ih)) { |
939 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 961 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
940 | return M_DELETE; /* Delete this item. */ | 962 | return M_DELETE; /* Delete this item. */ |
941 | } | 963 | } |
964 | |||
942 | /* Calculate first position and size for cutting from item. */ | 965 | /* Calculate first position and size for cutting from item. */ |
943 | *cut_size = -(ih_item_len(le_ih) - | 966 | *cut_size = -(ih_item_len(le_ih) - |
944 | (pos_in_item(path) = | 967 | (pos_in_item(path) = |
@@ -957,12 +980,15 @@ static inline int prepare_for_direntry_item(struct treepath *path, | |||
957 | RFALSE(ih_entry_count(le_ih) != 2, | 980 | RFALSE(ih_entry_count(le_ih) != 2, |
958 | "PAP-5220: incorrect empty directory item (%h)", le_ih); | 981 | "PAP-5220: incorrect empty directory item (%h)", le_ih); |
959 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 982 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
960 | return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ | 983 | /* Delete the directory item containing "." and ".." entry. */ |
984 | return M_DELETE; | ||
961 | } | 985 | } |
962 | 986 | ||
963 | if (ih_entry_count(le_ih) == 1) { | 987 | if (ih_entry_count(le_ih) == 1) { |
964 | /* Delete the directory item such as there is one record only | 988 | /* |
965 | in this item */ | 989 | * Delete the directory item such as there is one record only |
990 | * in this item | ||
991 | */ | ||
966 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 992 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
967 | return M_DELETE; | 993 | return M_DELETE; |
968 | } | 994 | } |
@@ -976,18 +1002,34 @@ static inline int prepare_for_direntry_item(struct treepath *path, | |||
976 | 1002 | ||
977 | #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) | 1003 | #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) |
978 | 1004 | ||
979 | /* If the path points to a directory or direct item, calculate mode and the size cut, for balance. | 1005 | /* |
980 | If the path points to an indirect item, remove some number of its unformatted nodes. | 1006 | * If the path points to a directory or direct item, calculate mode |
981 | In case of file truncate calculate whether this item must be deleted/truncated or last | 1007 | * and the size cut, for balance. |
982 | unformatted node of this item will be converted to a direct item. | 1008 | * If the path points to an indirect item, remove some number of its |
983 | This function returns a determination of what balance mode the calling function should employ. */ | 1009 | * unformatted nodes. |
984 | static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed | 1010 | * In case of file truncate calculate whether this item must be |
985 | from end of the file. */ | 1011 | * deleted/truncated or last unformatted node of this item will be |
986 | int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */ | 1012 | * converted to a direct item. |
1013 | * This function returns a determination of what balance mode the | ||
1014 | * calling function should employ. | ||
1015 | */ | ||
1016 | static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, | ||
1017 | struct inode *inode, | ||
1018 | struct treepath *path, | ||
1019 | const struct cpu_key *item_key, | ||
1020 | /* | ||
1021 | * Number of unformatted nodes | ||
1022 | * which were removed from end | ||
1023 | * of the file. | ||
1024 | */ | ||
1025 | int *removed, | ||
1026 | int *cut_size, | ||
1027 | /* MAX_KEY_OFFSET in case of delete. */ | ||
1028 | unsigned long long new_file_length | ||
987 | ) | 1029 | ) |
988 | { | 1030 | { |
989 | struct super_block *sb = inode->i_sb; | 1031 | struct super_block *sb = inode->i_sb; |
990 | struct item_head *p_le_ih = PATH_PITEM_HEAD(path); | 1032 | struct item_head *p_le_ih = tp_item_head(path); |
991 | struct buffer_head *bh = PATH_PLAST_BUFFER(path); | 1033 | struct buffer_head *bh = PATH_PLAST_BUFFER(path); |
992 | 1034 | ||
993 | BUG_ON(!th->t_trans_id); | 1035 | BUG_ON(!th->t_trans_id); |
@@ -1023,8 +1065,10 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1023 | int pos = 0; | 1065 | int pos = 0; |
1024 | 1066 | ||
1025 | if ( new_file_length == max_reiserfs_offset (inode) ) { | 1067 | if ( new_file_length == max_reiserfs_offset (inode) ) { |
1026 | /* prepare_for_delete_or_cut() is called by | 1068 | /* |
1027 | * reiserfs_delete_item() */ | 1069 | * prepare_for_delete_or_cut() is called by |
1070 | * reiserfs_delete_item() | ||
1071 | */ | ||
1028 | new_file_length = 0; | 1072 | new_file_length = 0; |
1029 | delete = 1; | 1073 | delete = 1; |
1030 | } | 1074 | } |
@@ -1033,27 +1077,30 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1033 | need_re_search = 0; | 1077 | need_re_search = 0; |
1034 | *cut_size = 0; | 1078 | *cut_size = 0; |
1035 | bh = PATH_PLAST_BUFFER(path); | 1079 | bh = PATH_PLAST_BUFFER(path); |
1036 | copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); | 1080 | copy_item_head(&s_ih, tp_item_head(path)); |
1037 | pos = I_UNFM_NUM(&s_ih); | 1081 | pos = I_UNFM_NUM(&s_ih); |
1038 | 1082 | ||
1039 | while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { | 1083 | while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { |
1040 | __le32 *unfm; | 1084 | __le32 *unfm; |
1041 | __u32 block; | 1085 | __u32 block; |
1042 | 1086 | ||
1043 | /* Each unformatted block deletion may involve one additional | 1087 | /* |
1044 | * bitmap block into the transaction, thereby the initial | 1088 | * Each unformatted block deletion may involve |
1045 | * journal space reservation might not be enough. */ | 1089 | * one additional bitmap block into the transaction, |
1090 | * thereby the initial journal space reservation | ||
1091 | * might not be enough. | ||
1092 | */ | ||
1046 | if (!delete && (*cut_size) != 0 && | 1093 | if (!delete && (*cut_size) != 0 && |
1047 | reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) | 1094 | reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) |
1048 | break; | 1095 | break; |
1049 | 1096 | ||
1050 | unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1; | 1097 | unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1; |
1051 | block = get_block_num(unfm, 0); | 1098 | block = get_block_num(unfm, 0); |
1052 | 1099 | ||
1053 | if (block != 0) { | 1100 | if (block != 0) { |
1054 | reiserfs_prepare_for_journal(sb, bh, 1); | 1101 | reiserfs_prepare_for_journal(sb, bh, 1); |
1055 | put_block_num(unfm, 0, 0); | 1102 | put_block_num(unfm, 0, 0); |
1056 | journal_mark_dirty(th, sb, bh); | 1103 | journal_mark_dirty(th, bh); |
1057 | reiserfs_free_block(th, inode, block, 1); | 1104 | reiserfs_free_block(th, inode, block, 1); |
1058 | } | 1105 | } |
1059 | 1106 | ||
@@ -1074,17 +1121,21 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1074 | break; | 1121 | break; |
1075 | } | 1122 | } |
1076 | } | 1123 | } |
1077 | /* a trick. If the buffer has been logged, this will do nothing. If | 1124 | /* |
1078 | ** we've broken the loop without logging it, it will restore the | 1125 | * a trick. If the buffer has been logged, this will |
1079 | ** buffer */ | 1126 | * do nothing. If we've broken the loop without logging |
1127 | * it, it will restore the buffer | ||
1128 | */ | ||
1080 | reiserfs_restore_prepared_buffer(sb, bh); | 1129 | reiserfs_restore_prepared_buffer(sb, bh); |
1081 | } while (need_re_search && | 1130 | } while (need_re_search && |
1082 | search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); | 1131 | search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); |
1083 | pos_in_item(path) = pos * UNFM_P_SIZE; | 1132 | pos_in_item(path) = pos * UNFM_P_SIZE; |
1084 | 1133 | ||
1085 | if (*cut_size == 0) { | 1134 | if (*cut_size == 0) { |
1086 | /* Nothing were cut. maybe convert last unformatted node to the | 1135 | /* |
1087 | * direct item? */ | 1136 | * Nothing was cut. maybe convert last unformatted node to the |
1137 | * direct item? | ||
1138 | */ | ||
1088 | result = M_CONVERT; | 1139 | result = M_CONVERT; |
1089 | } | 1140 | } |
1090 | return result; | 1141 | return result; |
@@ -1095,7 +1146,7 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1095 | static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) | 1146 | static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) |
1096 | { | 1147 | { |
1097 | int del_size; | 1148 | int del_size; |
1098 | struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path); | 1149 | struct item_head *p_le_ih = tp_item_head(tb->tb_path); |
1099 | 1150 | ||
1100 | if (is_statdata_le_ih(p_le_ih)) | 1151 | if (is_statdata_le_ih(p_le_ih)) |
1101 | return 0; | 1152 | return 0; |
@@ -1104,9 +1155,11 @@ static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) | |||
1104 | (mode == | 1155 | (mode == |
1105 | M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; | 1156 | M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; |
1106 | if (is_direntry_le_ih(p_le_ih)) { | 1157 | if (is_direntry_le_ih(p_le_ih)) { |
1107 | /* return EMPTY_DIR_SIZE; We delete emty directoris only. | 1158 | /* |
1108 | * we can't use EMPTY_DIR_SIZE, as old format dirs have a different | 1159 | * return EMPTY_DIR_SIZE; We delete emty directories only. |
1109 | * empty size. ick. FIXME, is this right? */ | 1160 | * we can't use EMPTY_DIR_SIZE, as old format dirs have a |
1161 | * different empty size. ick. FIXME, is this right? | ||
1162 | */ | ||
1110 | return del_size; | 1163 | return del_size; |
1111 | } | 1164 | } |
1112 | 1165 | ||
@@ -1169,7 +1222,8 @@ char head2type(struct item_head *ih) | |||
1169 | } | 1222 | } |
1170 | #endif | 1223 | #endif |
1171 | 1224 | ||
1172 | /* Delete object item. | 1225 | /* |
1226 | * Delete object item. | ||
1173 | * th - active transaction handle | 1227 | * th - active transaction handle |
1174 | * path - path to the deleted item | 1228 | * path - path to the deleted item |
1175 | * item_key - key to search for the deleted item | 1229 | * item_key - key to search for the deleted item |
@@ -1212,7 +1266,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1212 | 1266 | ||
1213 | RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); | 1267 | RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); |
1214 | 1268 | ||
1215 | copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); | 1269 | copy_item_head(&s_ih, tp_item_head(path)); |
1216 | s_del_balance.insert_size[0] = del_size; | 1270 | s_del_balance.insert_size[0] = del_size; |
1217 | 1271 | ||
1218 | ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); | 1272 | ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); |
@@ -1221,7 +1275,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1221 | 1275 | ||
1222 | PROC_INFO_INC(sb, delete_item_restarted); | 1276 | PROC_INFO_INC(sb, delete_item_restarted); |
1223 | 1277 | ||
1224 | // file system changed, repeat search | 1278 | /* file system changed, repeat search */ |
1225 | ret_value = | 1279 | ret_value = |
1226 | search_for_position_by_key(sb, item_key, path); | 1280 | search_for_position_by_key(sb, item_key, path); |
1227 | if (ret_value == IO_ERROR) | 1281 | if (ret_value == IO_ERROR) |
@@ -1238,16 +1292,18 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1238 | unfix_nodes(&s_del_balance); | 1292 | unfix_nodes(&s_del_balance); |
1239 | return 0; | 1293 | return 0; |
1240 | } | 1294 | } |
1241 | // reiserfs_delete_item returns item length when success | 1295 | |
1296 | /* reiserfs_delete_item returns item length when success */ | ||
1242 | ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); | 1297 | ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); |
1243 | q_ih = get_ih(path); | 1298 | q_ih = tp_item_head(path); |
1244 | quota_cut_bytes = ih_item_len(q_ih); | 1299 | quota_cut_bytes = ih_item_len(q_ih); |
1245 | 1300 | ||
1246 | /* hack so the quota code doesn't have to guess if the file | 1301 | /* |
1247 | ** has a tail. On tail insert, we allocate quota for 1 unformatted node. | 1302 | * hack so the quota code doesn't have to guess if the file has a |
1248 | ** We test the offset because the tail might have been | 1303 | * tail. On tail insert, we allocate quota for 1 unformatted node. |
1249 | ** split into multiple items, and we only want to decrement for | 1304 | * We test the offset because the tail might have been |
1250 | ** the unfm node once | 1305 | * split into multiple items, and we only want to decrement for |
1306 | * the unfm node once | ||
1251 | */ | 1307 | */ |
1252 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { | 1308 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { |
1253 | if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { | 1309 | if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { |
@@ -1261,33 +1317,38 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1261 | int off; | 1317 | int off; |
1262 | char *data; | 1318 | char *data; |
1263 | 1319 | ||
1264 | /* We are in direct2indirect conversion, so move tail contents | 1320 | /* |
1265 | to the unformatted node */ | 1321 | * We are in direct2indirect conversion, so move tail contents |
1266 | /* note, we do the copy before preparing the buffer because we | 1322 | * to the unformatted node |
1267 | ** don't care about the contents of the unformatted node yet. | 1323 | */ |
1268 | ** the only thing we really care about is the direct item's data | 1324 | /* |
1269 | ** is in the unformatted node. | 1325 | * note, we do the copy before preparing the buffer because we |
1270 | ** | 1326 | * don't care about the contents of the unformatted node yet. |
1271 | ** Otherwise, we would have to call reiserfs_prepare_for_journal on | 1327 | * the only thing we really care about is the direct item's |
1272 | ** the unformatted node, which might schedule, meaning we'd have to | 1328 | * data is in the unformatted node. |
1273 | ** loop all the way back up to the start of the while loop. | 1329 | * |
1274 | ** | 1330 | * Otherwise, we would have to call |
1275 | ** The unformatted node must be dirtied later on. We can't be | 1331 | * reiserfs_prepare_for_journal on the unformatted node, |
1276 | ** sure here if the entire tail has been deleted yet. | 1332 | * which might schedule, meaning we'd have to loop all the |
1277 | ** | 1333 | * way back up to the start of the while loop. |
1278 | ** un_bh is from the page cache (all unformatted nodes are | 1334 | * |
1279 | ** from the page cache) and might be a highmem page. So, we | 1335 | * The unformatted node must be dirtied later on. We can't be |
1280 | ** can't use un_bh->b_data. | 1336 | * sure here if the entire tail has been deleted yet. |
1281 | ** -clm | 1337 | * |
1338 | * un_bh is from the page cache (all unformatted nodes are | ||
1339 | * from the page cache) and might be a highmem page. So, we | ||
1340 | * can't use un_bh->b_data. | ||
1341 | * -clm | ||
1282 | */ | 1342 | */ |
1283 | 1343 | ||
1284 | data = kmap_atomic(un_bh->b_page); | 1344 | data = kmap_atomic(un_bh->b_page); |
1285 | off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); | 1345 | off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); |
1286 | memcpy(data + off, | 1346 | memcpy(data + off, |
1287 | B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), | 1347 | ih_item_body(PATH_PLAST_BUFFER(path), &s_ih), |
1288 | ret_value); | 1348 | ret_value); |
1289 | kunmap_atomic(data); | 1349 | kunmap_atomic(data); |
1290 | } | 1350 | } |
1351 | |||
1291 | /* Perform balancing after all resources have been collected at once. */ | 1352 | /* Perform balancing after all resources have been collected at once. */ |
1292 | do_balance(&s_del_balance, NULL, NULL, M_DELETE); | 1353 | do_balance(&s_del_balance, NULL, NULL, M_DELETE); |
1293 | 1354 | ||
@@ -1304,20 +1365,21 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1304 | return ret_value; | 1365 | return ret_value; |
1305 | } | 1366 | } |
1306 | 1367 | ||
1307 | /* Summary Of Mechanisms For Handling Collisions Between Processes: | 1368 | /* |
1308 | 1369 | * Summary Of Mechanisms For Handling Collisions Between Processes: | |
1309 | deletion of the body of the object is performed by iput(), with the | 1370 | * |
1310 | result that if multiple processes are operating on a file, the | 1371 | * deletion of the body of the object is performed by iput(), with the |
1311 | deletion of the body of the file is deferred until the last process | 1372 | * result that if multiple processes are operating on a file, the |
1312 | that has an open inode performs its iput(). | 1373 | * deletion of the body of the file is deferred until the last process |
1313 | 1374 | * that has an open inode performs its iput(). | |
1314 | writes and truncates are protected from collisions by use of | 1375 | * |
1315 | semaphores. | 1376 | * writes and truncates are protected from collisions by use of |
1316 | 1377 | * semaphores. | |
1317 | creates, linking, and mknod are protected from collisions with other | 1378 | * |
1318 | processes by making the reiserfs_add_entry() the last step in the | 1379 | * creates, linking, and mknod are protected from collisions with other |
1319 | creation, and then rolling back all changes if there was a collision. | 1380 | * processes by making the reiserfs_add_entry() the last step in the |
1320 | - Hans | 1381 | * creation, and then rolling back all changes if there was a collision. |
1382 | * - Hans | ||
1321 | */ | 1383 | */ |
1322 | 1384 | ||
1323 | /* this deletes item which never gets split */ | 1385 | /* this deletes item which never gets split */ |
@@ -1347,7 +1409,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, | |||
1347 | } | 1409 | } |
1348 | if (retval != ITEM_FOUND) { | 1410 | if (retval != ITEM_FOUND) { |
1349 | pathrelse(&path); | 1411 | pathrelse(&path); |
1350 | // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir | 1412 | /* |
1413 | * No need for a warning, if there is just no free | ||
1414 | * space to insert '..' item into the | ||
1415 | * newly-created subdir | ||
1416 | */ | ||
1351 | if (! | 1417 | if (! |
1352 | ((unsigned long long) | 1418 | ((unsigned long long) |
1353 | GET_HASH_VALUE(le_key_k_offset | 1419 | GET_HASH_VALUE(le_key_k_offset |
@@ -1362,11 +1428,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, | |||
1362 | } | 1428 | } |
1363 | if (!tb_init) { | 1429 | if (!tb_init) { |
1364 | tb_init = 1; | 1430 | tb_init = 1; |
1365 | item_len = ih_item_len(PATH_PITEM_HEAD(&path)); | 1431 | item_len = ih_item_len(tp_item_head(&path)); |
1366 | init_tb_struct(th, &tb, th->t_super, &path, | 1432 | init_tb_struct(th, &tb, th->t_super, &path, |
1367 | -(IH_SIZE + item_len)); | 1433 | -(IH_SIZE + item_len)); |
1368 | } | 1434 | } |
1369 | quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)); | 1435 | quota_cut_bytes = ih_item_len(tp_item_head(&path)); |
1370 | 1436 | ||
1371 | retval = fix_nodes(M_DELETE, &tb, NULL, NULL); | 1437 | retval = fix_nodes(M_DELETE, &tb, NULL, NULL); |
1372 | if (retval == REPEAT_SEARCH) { | 1438 | if (retval == REPEAT_SEARCH) { |
@@ -1376,7 +1442,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, | |||
1376 | 1442 | ||
1377 | if (retval == CARRY_ON) { | 1443 | if (retval == CARRY_ON) { |
1378 | do_balance(&tb, NULL, NULL, M_DELETE); | 1444 | do_balance(&tb, NULL, NULL, M_DELETE); |
1379 | if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ | 1445 | /* |
1446 | * Should we count quota for item? (we don't | ||
1447 | * count quotas for save-links) | ||
1448 | */ | ||
1449 | if (inode) { | ||
1380 | int depth; | 1450 | int depth; |
1381 | #ifdef REISERQUOTA_DEBUG | 1451 | #ifdef REISERQUOTA_DEBUG |
1382 | reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, | 1452 | reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, |
@@ -1391,7 +1461,8 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, | |||
1391 | } | 1461 | } |
1392 | break; | 1462 | break; |
1393 | } | 1463 | } |
1394 | // IO_ERROR, NO_DISK_SPACE, etc | 1464 | |
1465 | /* IO_ERROR, NO_DISK_SPACE, etc */ | ||
1395 | reiserfs_warning(th->t_super, "vs-5360", | 1466 | reiserfs_warning(th->t_super, "vs-5360", |
1396 | "could not delete %K due to fix_nodes failure", | 1467 | "could not delete %K due to fix_nodes failure", |
1397 | &cpu_key); | 1468 | &cpu_key); |
@@ -1447,11 +1518,13 @@ static void unmap_buffers(struct page *page, loff_t pos) | |||
1447 | do { | 1518 | do { |
1448 | next = bh->b_this_page; | 1519 | next = bh->b_this_page; |
1449 | 1520 | ||
1450 | /* we want to unmap the buffers that contain the tail, and | 1521 | /* |
1451 | ** all the buffers after it (since the tail must be at the | 1522 | * we want to unmap the buffers that contain |
1452 | ** end of the file). We don't want to unmap file data | 1523 | * the tail, and all the buffers after it |
1453 | ** before the tail, since it might be dirty and waiting to | 1524 | * (since the tail must be at the end of the |
1454 | ** reach disk | 1525 | * file). We don't want to unmap file data |
1526 | * before the tail, since it might be dirty | ||
1527 | * and waiting to reach disk | ||
1455 | */ | 1528 | */ |
1456 | cur_index += bh->b_size; | 1529 | cur_index += bh->b_size; |
1457 | if (cur_index > tail_index) { | 1530 | if (cur_index > tail_index) { |
@@ -1476,9 +1549,10 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, | |||
1476 | BUG_ON(!th->t_trans_id); | 1549 | BUG_ON(!th->t_trans_id); |
1477 | BUG_ON(new_file_size != inode->i_size); | 1550 | BUG_ON(new_file_size != inode->i_size); |
1478 | 1551 | ||
1479 | /* the page being sent in could be NULL if there was an i/o error | 1552 | /* |
1480 | ** reading in the last block. The user will hit problems trying to | 1553 | * the page being sent in could be NULL if there was an i/o error |
1481 | ** read the file, but for now we just skip the indirect2direct | 1554 | * reading in the last block. The user will hit problems trying to |
1555 | * read the file, but for now we just skip the indirect2direct | ||
1482 | */ | 1556 | */ |
1483 | if (atomic_read(&inode->i_count) > 1 || | 1557 | if (atomic_read(&inode->i_count) > 1 || |
1484 | !tail_has_to_be_packed(inode) || | 1558 | !tail_has_to_be_packed(inode) || |
@@ -1490,17 +1564,18 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, | |||
1490 | pathrelse(path); | 1564 | pathrelse(path); |
1491 | return cut_bytes; | 1565 | return cut_bytes; |
1492 | } | 1566 | } |
1567 | |||
1493 | /* Perform the conversion to a direct_item. */ | 1568 | /* Perform the conversion to a direct_item. */ |
1494 | /* return indirect_to_direct(inode, path, item_key, | ||
1495 | new_file_size, mode); */ | ||
1496 | return indirect2direct(th, inode, page, path, item_key, | 1569 | return indirect2direct(th, inode, page, path, item_key, |
1497 | new_file_size, mode); | 1570 | new_file_size, mode); |
1498 | } | 1571 | } |
1499 | 1572 | ||
1500 | /* we did indirect_to_direct conversion. And we have inserted direct | 1573 | /* |
1501 | item successesfully, but there were no disk space to cut unfm | 1574 | * we did indirect_to_direct conversion. And we have inserted direct |
1502 | pointer being converted. Therefore we have to delete inserted | 1575 | * item successesfully, but there were no disk space to cut unfm |
1503 | direct item(s) */ | 1576 | * pointer being converted. Therefore we have to delete inserted |
1577 | * direct item(s) | ||
1578 | */ | ||
1504 | static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, | 1579 | static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, |
1505 | struct inode *inode, struct treepath *path) | 1580 | struct inode *inode, struct treepath *path) |
1506 | { | 1581 | { |
@@ -1509,7 +1584,7 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, | |||
1509 | int removed; | 1584 | int removed; |
1510 | BUG_ON(!th->t_trans_id); | 1585 | BUG_ON(!th->t_trans_id); |
1511 | 1586 | ||
1512 | make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!! | 1587 | make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); |
1513 | tail_key.key_length = 4; | 1588 | tail_key.key_length = 4; |
1514 | 1589 | ||
1515 | tail_len = | 1590 | tail_len = |
@@ -1521,7 +1596,7 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, | |||
1521 | reiserfs_panic(inode->i_sb, "vs-5615", | 1596 | reiserfs_panic(inode->i_sb, "vs-5615", |
1522 | "found invalid item"); | 1597 | "found invalid item"); |
1523 | RFALSE(path->pos_in_item != | 1598 | RFALSE(path->pos_in_item != |
1524 | ih_item_len(PATH_PITEM_HEAD(path)) - 1, | 1599 | ih_item_len(tp_item_head(path)) - 1, |
1525 | "vs-5616: appended bytes found"); | 1600 | "vs-5616: appended bytes found"); |
1526 | PATH_LAST_POSITION(path)--; | 1601 | PATH_LAST_POSITION(path)--; |
1527 | 1602 | ||
@@ -1539,7 +1614,6 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, | |||
1539 | reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " | 1614 | reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " |
1540 | "conversion has been rolled back due to " | 1615 | "conversion has been rolled back due to " |
1541 | "lack of disk space"); | 1616 | "lack of disk space"); |
1542 | //mark_file_without_tail (inode); | ||
1543 | mark_inode_dirty(inode); | 1617 | mark_inode_dirty(inode); |
1544 | } | 1618 | } |
1545 | 1619 | ||
@@ -1551,15 +1625,18 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1551 | struct page *page, loff_t new_file_size) | 1625 | struct page *page, loff_t new_file_size) |
1552 | { | 1626 | { |
1553 | struct super_block *sb = inode->i_sb; | 1627 | struct super_block *sb = inode->i_sb; |
1554 | /* Every function which is going to call do_balance must first | 1628 | /* |
1555 | create a tree_balance structure. Then it must fill up this | 1629 | * Every function which is going to call do_balance must first |
1556 | structure by using the init_tb_struct and fix_nodes functions. | 1630 | * create a tree_balance structure. Then it must fill up this |
1557 | After that we can make tree balancing. */ | 1631 | * structure by using the init_tb_struct and fix_nodes functions. |
1632 | * After that we can make tree balancing. | ||
1633 | */ | ||
1558 | struct tree_balance s_cut_balance; | 1634 | struct tree_balance s_cut_balance; |
1559 | struct item_head *p_le_ih; | 1635 | struct item_head *p_le_ih; |
1560 | int cut_size = 0, /* Amount to be cut. */ | 1636 | int cut_size = 0; /* Amount to be cut. */ |
1561 | ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */ | 1637 | int ret_value = CARRY_ON; |
1562 | is_inode_locked = 0; | 1638 | int removed = 0; /* Number of the removed unformatted nodes. */ |
1639 | int is_inode_locked = 0; | ||
1563 | char mode; /* Mode of the balance. */ | 1640 | char mode; /* Mode of the balance. */ |
1564 | int retval2 = -1; | 1641 | int retval2 = -1; |
1565 | int quota_cut_bytes; | 1642 | int quota_cut_bytes; |
@@ -1571,21 +1648,27 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1571 | init_tb_struct(th, &s_cut_balance, inode->i_sb, path, | 1648 | init_tb_struct(th, &s_cut_balance, inode->i_sb, path, |
1572 | cut_size); | 1649 | cut_size); |
1573 | 1650 | ||
1574 | /* Repeat this loop until we either cut the item without needing | 1651 | /* |
1575 | to balance, or we fix_nodes without schedule occurring */ | 1652 | * Repeat this loop until we either cut the item without needing |
1653 | * to balance, or we fix_nodes without schedule occurring | ||
1654 | */ | ||
1576 | while (1) { | 1655 | while (1) { |
1577 | /* Determine the balance mode, position of the first byte to | 1656 | /* |
1578 | be cut, and size to be cut. In case of the indirect item | 1657 | * Determine the balance mode, position of the first byte to |
1579 | free unformatted nodes which are pointed to by the cut | 1658 | * be cut, and size to be cut. In case of the indirect item |
1580 | pointers. */ | 1659 | * free unformatted nodes which are pointed to by the cut |
1660 | * pointers. | ||
1661 | */ | ||
1581 | 1662 | ||
1582 | mode = | 1663 | mode = |
1583 | prepare_for_delete_or_cut(th, inode, path, | 1664 | prepare_for_delete_or_cut(th, inode, path, |
1584 | item_key, &removed, | 1665 | item_key, &removed, |
1585 | &cut_size, new_file_size); | 1666 | &cut_size, new_file_size); |
1586 | if (mode == M_CONVERT) { | 1667 | if (mode == M_CONVERT) { |
1587 | /* convert last unformatted node to direct item or leave | 1668 | /* |
1588 | tail in the unformatted node */ | 1669 | * convert last unformatted node to direct item or |
1670 | * leave tail in the unformatted node | ||
1671 | */ | ||
1589 | RFALSE(ret_value != CARRY_ON, | 1672 | RFALSE(ret_value != CARRY_ON, |
1590 | "PAP-5570: can not convert twice"); | 1673 | "PAP-5570: can not convert twice"); |
1591 | 1674 | ||
@@ -1599,15 +1682,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1599 | 1682 | ||
1600 | is_inode_locked = 1; | 1683 | is_inode_locked = 1; |
1601 | 1684 | ||
1602 | /* removing of last unformatted node will change value we | 1685 | /* |
1603 | have to return to truncate. Save it */ | 1686 | * removing of last unformatted node will |
1687 | * change value we have to return to truncate. | ||
1688 | * Save it | ||
1689 | */ | ||
1604 | retval2 = ret_value; | 1690 | retval2 = ret_value; |
1605 | /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */ | ||
1606 | 1691 | ||
1607 | /* So, we have performed the first part of the conversion: | 1692 | /* |
1608 | inserting the new direct item. Now we are removing the | 1693 | * So, we have performed the first part of the |
1609 | last unformatted node pointer. Set key to search for | 1694 | * conversion: |
1610 | it. */ | 1695 | * inserting the new direct item. Now we are |
1696 | * removing the last unformatted node pointer. | ||
1697 | * Set key to search for it. | ||
1698 | */ | ||
1611 | set_cpu_key_k_type(item_key, TYPE_INDIRECT); | 1699 | set_cpu_key_k_type(item_key, TYPE_INDIRECT); |
1612 | item_key->key_length = 4; | 1700 | item_key->key_length = 4; |
1613 | new_file_size -= | 1701 | new_file_size -= |
@@ -1650,11 +1738,13 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1650 | return (ret_value == IO_ERROR) ? -EIO : -ENOENT; | 1738 | return (ret_value == IO_ERROR) ? -EIO : -ENOENT; |
1651 | } /* while */ | 1739 | } /* while */ |
1652 | 1740 | ||
1653 | // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) | 1741 | /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */ |
1654 | if (ret_value != CARRY_ON) { | 1742 | if (ret_value != CARRY_ON) { |
1655 | if (is_inode_locked) { | 1743 | if (is_inode_locked) { |
1656 | // FIXME: this seems to be not needed: we are always able | 1744 | /* |
1657 | // to cut item | 1745 | * FIXME: this seems to be not needed: we are always |
1746 | * able to cut item | ||
1747 | */ | ||
1658 | indirect_to_direct_roll_back(th, inode, path); | 1748 | indirect_to_direct_roll_back(th, inode, path); |
1659 | } | 1749 | } |
1660 | if (ret_value == NO_DISK_SPACE) | 1750 | if (ret_value == NO_DISK_SPACE) |
@@ -1671,22 +1761,23 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1671 | /* Calculate number of bytes that need to be cut from the item. */ | 1761 | /* Calculate number of bytes that need to be cut from the item. */ |
1672 | quota_cut_bytes = | 1762 | quota_cut_bytes = |
1673 | (mode == | 1763 | (mode == |
1674 | M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance. | 1764 | M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance. |
1675 | insert_size[0]; | 1765 | insert_size[0]; |
1676 | if (retval2 == -1) | 1766 | if (retval2 == -1) |
1677 | ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); | 1767 | ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); |
1678 | else | 1768 | else |
1679 | ret_value = retval2; | 1769 | ret_value = retval2; |
1680 | 1770 | ||
1681 | /* For direct items, we only change the quota when deleting the last | 1771 | /* |
1682 | ** item. | 1772 | * For direct items, we only change the quota when deleting the last |
1773 | * item. | ||
1683 | */ | 1774 | */ |
1684 | p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); | 1775 | p_le_ih = tp_item_head(s_cut_balance.tb_path); |
1685 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { | 1776 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { |
1686 | if (mode == M_DELETE && | 1777 | if (mode == M_DELETE && |
1687 | (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == | 1778 | (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == |
1688 | 1) { | 1779 | 1) { |
1689 | // FIXME: this is to keep 3.5 happy | 1780 | /* FIXME: this is to keep 3.5 happy */ |
1690 | REISERFS_I(inode)->i_first_direct_byte = U32_MAX; | 1781 | REISERFS_I(inode)->i_first_direct_byte = U32_MAX; |
1691 | quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; | 1782 | quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; |
1692 | } else { | 1783 | } else { |
@@ -1696,10 +1787,12 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1696 | #ifdef CONFIG_REISERFS_CHECK | 1787 | #ifdef CONFIG_REISERFS_CHECK |
1697 | if (is_inode_locked) { | 1788 | if (is_inode_locked) { |
1698 | struct item_head *le_ih = | 1789 | struct item_head *le_ih = |
1699 | PATH_PITEM_HEAD(s_cut_balance.tb_path); | 1790 | tp_item_head(s_cut_balance.tb_path); |
1700 | /* we are going to complete indirect2direct conversion. Make | 1791 | /* |
1701 | sure, that we exactly remove last unformatted node pointer | 1792 | * we are going to complete indirect2direct conversion. Make |
1702 | of the item */ | 1793 | * sure, that we exactly remove last unformatted node pointer |
1794 | * of the item | ||
1795 | */ | ||
1703 | if (!is_indirect_le_ih(le_ih)) | 1796 | if (!is_indirect_le_ih(le_ih)) |
1704 | reiserfs_panic(sb, "vs-5652", | 1797 | reiserfs_panic(sb, "vs-5652", |
1705 | "item must be indirect %h", le_ih); | 1798 | "item must be indirect %h", le_ih); |
@@ -1717,17 +1810,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1717 | "(CUT, insert_size==%d)", | 1810 | "(CUT, insert_size==%d)", |
1718 | le_ih, s_cut_balance.insert_size[0]); | 1811 | le_ih, s_cut_balance.insert_size[0]); |
1719 | } | 1812 | } |
1720 | /* it would be useful to make sure, that right neighboring | 1813 | /* |
1721 | item is direct item of this file */ | 1814 | * it would be useful to make sure, that right neighboring |
1815 | * item is direct item of this file | ||
1816 | */ | ||
1722 | } | 1817 | } |
1723 | #endif | 1818 | #endif |
1724 | 1819 | ||
1725 | do_balance(&s_cut_balance, NULL, NULL, mode); | 1820 | do_balance(&s_cut_balance, NULL, NULL, mode); |
1726 | if (is_inode_locked) { | 1821 | if (is_inode_locked) { |
1727 | /* we've done an indirect->direct conversion. when the data block | 1822 | /* |
1728 | ** was freed, it was removed from the list of blocks that must | 1823 | * we've done an indirect->direct conversion. when the |
1729 | ** be flushed before the transaction commits, make sure to | 1824 | * data block was freed, it was removed from the list of |
1730 | ** unmap and invalidate it | 1825 | * blocks that must be flushed before the transaction |
1826 | * commits, make sure to unmap and invalidate it | ||
1731 | */ | 1827 | */ |
1732 | unmap_buffers(page, tail_pos); | 1828 | unmap_buffers(page, tail_pos); |
1733 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 1829 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
@@ -1758,20 +1854,25 @@ static void truncate_directory(struct reiserfs_transaction_handle *th, | |||
1758 | set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); | 1854 | set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); |
1759 | } | 1855 | } |
1760 | 1856 | ||
1761 | /* Truncate file to the new size. Note, this must be called with a transaction | 1857 | /* |
1762 | already started */ | 1858 | * Truncate file to the new size. Note, this must be called with a |
1859 | * transaction already started | ||
1860 | */ | ||
1763 | int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | 1861 | int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, |
1764 | struct inode *inode, /* ->i_size contains new size */ | 1862 | struct inode *inode, /* ->i_size contains new size */ |
1765 | struct page *page, /* up to date for last block */ | 1863 | struct page *page, /* up to date for last block */ |
1766 | int update_timestamps /* when it is called by | 1864 | /* |
1767 | file_release to convert | 1865 | * when it is called by file_release to convert |
1768 | the tail - no timestamps | 1866 | * the tail - no timestamps should be updated |
1769 | should be updated */ | 1867 | */ |
1868 | int update_timestamps | ||
1770 | ) | 1869 | ) |
1771 | { | 1870 | { |
1772 | INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ | 1871 | INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ |
1773 | struct item_head *p_le_ih; /* Pointer to an item header. */ | 1872 | struct item_head *p_le_ih; /* Pointer to an item header. */ |
1774 | struct cpu_key s_item_key; /* Key to search for a previous file item. */ | 1873 | |
1874 | /* Key to search for a previous file item. */ | ||
1875 | struct cpu_key s_item_key; | ||
1775 | loff_t file_size, /* Old file size. */ | 1876 | loff_t file_size, /* Old file size. */ |
1776 | new_file_size; /* New file size. */ | 1877 | new_file_size; /* New file size. */ |
1777 | int deleted; /* Number of deleted or truncated bytes. */ | 1878 | int deleted; /* Number of deleted or truncated bytes. */ |
@@ -1784,8 +1885,8 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1784 | || S_ISLNK(inode->i_mode))) | 1885 | || S_ISLNK(inode->i_mode))) |
1785 | return 0; | 1886 | return 0; |
1786 | 1887 | ||
1888 | /* deletion of directory - no need to update timestamps */ | ||
1787 | if (S_ISDIR(inode->i_mode)) { | 1889 | if (S_ISDIR(inode->i_mode)) { |
1788 | // deletion of directory - no need to update timestamps | ||
1789 | truncate_directory(th, inode); | 1890 | truncate_directory(th, inode); |
1790 | return 0; | 1891 | return 0; |
1791 | } | 1892 | } |
@@ -1793,7 +1894,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1793 | /* Get new file size. */ | 1894 | /* Get new file size. */ |
1794 | new_file_size = inode->i_size; | 1895 | new_file_size = inode->i_size; |
1795 | 1896 | ||
1796 | // FIXME: note, that key type is unimportant here | 1897 | /* FIXME: note, that key type is unimportant here */ |
1797 | make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), | 1898 | make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), |
1798 | TYPE_DIRECT, 3); | 1899 | TYPE_DIRECT, 3); |
1799 | 1900 | ||
@@ -1819,7 +1920,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1819 | s_search_path.pos_in_item--; | 1920 | s_search_path.pos_in_item--; |
1820 | 1921 | ||
1821 | /* Get real file size (total length of all file items) */ | 1922 | /* Get real file size (total length of all file items) */ |
1822 | p_le_ih = PATH_PITEM_HEAD(&s_search_path); | 1923 | p_le_ih = tp_item_head(&s_search_path); |
1823 | if (is_statdata_le_ih(p_le_ih)) | 1924 | if (is_statdata_le_ih(p_le_ih)) |
1824 | file_size = 0; | 1925 | file_size = 0; |
1825 | else { | 1926 | else { |
@@ -1827,9 +1928,11 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1827 | int bytes = | 1928 | int bytes = |
1828 | op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); | 1929 | op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); |
1829 | 1930 | ||
1830 | /* this may mismatch with real file size: if last direct item | 1931 | /* |
1831 | had no padding zeros and last unformatted node had no free | 1932 | * this may mismatch with real file size: if last direct item |
1832 | space, this file would have this file size */ | 1933 | * had no padding zeros and last unformatted node had no free |
1934 | * space, this file would have this file size | ||
1935 | */ | ||
1833 | file_size = offset + bytes - 1; | 1936 | file_size = offset + bytes - 1; |
1834 | } | 1937 | } |
1835 | /* | 1938 | /* |
@@ -1867,18 +1970,20 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1867 | 1970 | ||
1868 | set_cpu_key_k_offset(&s_item_key, file_size); | 1971 | set_cpu_key_k_offset(&s_item_key, file_size); |
1869 | 1972 | ||
1870 | /* While there are bytes to truncate and previous file item is presented in the tree. */ | 1973 | /* |
1974 | * While there are bytes to truncate and previous | ||
1975 | * file item is presented in the tree. | ||
1976 | */ | ||
1871 | 1977 | ||
1872 | /* | 1978 | /* |
1873 | ** This loop could take a really long time, and could log | 1979 | * This loop could take a really long time, and could log |
1874 | ** many more blocks than a transaction can hold. So, we do a polite | 1980 | * many more blocks than a transaction can hold. So, we do |
1875 | ** journal end here, and if the transaction needs ending, we make | 1981 | * a polite journal end here, and if the transaction needs |
1876 | ** sure the file is consistent before ending the current trans | 1982 | * ending, we make sure the file is consistent before ending |
1877 | ** and starting a new one | 1983 | * the current trans and starting a new one |
1878 | */ | 1984 | */ |
1879 | if (journal_transaction_should_end(th, 0) || | 1985 | if (journal_transaction_should_end(th, 0) || |
1880 | reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { | 1986 | reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { |
1881 | int orig_len_alloc = th->t_blocks_allocated; | ||
1882 | pathrelse(&s_search_path); | 1987 | pathrelse(&s_search_path); |
1883 | 1988 | ||
1884 | if (update_timestamps) { | 1989 | if (update_timestamps) { |
@@ -1887,7 +1992,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1887 | } | 1992 | } |
1888 | reiserfs_update_sd(th, inode); | 1993 | reiserfs_update_sd(th, inode); |
1889 | 1994 | ||
1890 | err = journal_end(th, inode->i_sb, orig_len_alloc); | 1995 | err = journal_end(th); |
1891 | if (err) | 1996 | if (err) |
1892 | goto out; | 1997 | goto out; |
1893 | err = journal_begin(th, inode->i_sb, | 1998 | err = journal_begin(th, inode->i_sb, |
@@ -1904,25 +2009,25 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1904 | "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", | 2009 | "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", |
1905 | new_file_size, file_size, s_item_key.on_disk_key.k_objectid); | 2010 | new_file_size, file_size, s_item_key.on_disk_key.k_objectid); |
1906 | 2011 | ||
1907 | update_and_out: | 2012 | update_and_out: |
1908 | if (update_timestamps) { | 2013 | if (update_timestamps) { |
1909 | // this is truncate, not file closing | 2014 | /* this is truncate, not file closing */ |
1910 | inode->i_mtime = CURRENT_TIME_SEC; | 2015 | inode->i_mtime = CURRENT_TIME_SEC; |
1911 | inode->i_ctime = CURRENT_TIME_SEC; | 2016 | inode->i_ctime = CURRENT_TIME_SEC; |
1912 | } | 2017 | } |
1913 | reiserfs_update_sd(th, inode); | 2018 | reiserfs_update_sd(th, inode); |
1914 | 2019 | ||
1915 | out: | 2020 | out: |
1916 | pathrelse(&s_search_path); | 2021 | pathrelse(&s_search_path); |
1917 | return err; | 2022 | return err; |
1918 | } | 2023 | } |
1919 | 2024 | ||
1920 | #ifdef CONFIG_REISERFS_CHECK | 2025 | #ifdef CONFIG_REISERFS_CHECK |
1921 | // this makes sure, that we __append__, not overwrite or add holes | 2026 | /* this makes sure, that we __append__, not overwrite or add holes */ |
1922 | static void check_research_for_paste(struct treepath *path, | 2027 | static void check_research_for_paste(struct treepath *path, |
1923 | const struct cpu_key *key) | 2028 | const struct cpu_key *key) |
1924 | { | 2029 | { |
1925 | struct item_head *found_ih = get_ih(path); | 2030 | struct item_head *found_ih = tp_item_head(path); |
1926 | 2031 | ||
1927 | if (is_direct_le_ih(found_ih)) { | 2032 | if (is_direct_le_ih(found_ih)) { |
1928 | if (le_ih_k_offset(found_ih) + | 2033 | if (le_ih_k_offset(found_ih) + |
@@ -1952,13 +2057,22 @@ static void check_research_for_paste(struct treepath *path, | |||
1952 | } | 2057 | } |
1953 | #endif /* config reiserfs check */ | 2058 | #endif /* config reiserfs check */ |
1954 | 2059 | ||
1955 | /* Paste bytes to the existing item. Returns bytes number pasted into the item. */ | 2060 | /* |
1956 | int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */ | 2061 | * Paste bytes to the existing item. |
1957 | const struct cpu_key *key, /* Key to search for the needed item. */ | 2062 | * Returns bytes number pasted into the item. |
1958 | struct inode *inode, /* Inode item belongs to */ | 2063 | */ |
1959 | const char *body, /* Pointer to the bytes to paste. */ | 2064 | int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, |
2065 | /* Path to the pasted item. */ | ||
2066 | struct treepath *search_path, | ||
2067 | /* Key to search for the needed item. */ | ||
2068 | const struct cpu_key *key, | ||
2069 | /* Inode item belongs to */ | ||
2070 | struct inode *inode, | ||
2071 | /* Pointer to the bytes to paste. */ | ||
2072 | const char *body, | ||
2073 | /* Size of pasted bytes. */ | ||
1960 | int pasted_size) | 2074 | int pasted_size) |
1961 | { /* Size of pasted bytes. */ | 2075 | { |
1962 | struct super_block *sb = inode->i_sb; | 2076 | struct super_block *sb = inode->i_sb; |
1963 | struct tree_balance s_paste_balance; | 2077 | struct tree_balance s_paste_balance; |
1964 | int retval; | 2078 | int retval; |
@@ -1973,7 +2087,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree | |||
1973 | reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, | 2087 | reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, |
1974 | "reiserquota paste_into_item(): allocating %u id=%u type=%c", | 2088 | "reiserquota paste_into_item(): allocating %u id=%u type=%c", |
1975 | pasted_size, inode->i_uid, | 2089 | pasted_size, inode->i_uid, |
1976 | key2type(&(key->on_disk_key))); | 2090 | key2type(&key->on_disk_key)); |
1977 | #endif | 2091 | #endif |
1978 | 2092 | ||
1979 | depth = reiserfs_write_unlock_nested(sb); | 2093 | depth = reiserfs_write_unlock_nested(sb); |
@@ -1997,7 +2111,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree | |||
1997 | while ((retval = | 2111 | while ((retval = |
1998 | fix_nodes(M_PASTE, &s_paste_balance, NULL, | 2112 | fix_nodes(M_PASTE, &s_paste_balance, NULL, |
1999 | body)) == REPEAT_SEARCH) { | 2113 | body)) == REPEAT_SEARCH) { |
2000 | search_again: | 2114 | search_again: |
2001 | /* file system changed while we were in the fix_nodes */ | 2115 | /* file system changed while we were in the fix_nodes */ |
2002 | PROC_INFO_INC(th->t_super, paste_into_item_restarted); | 2116 | PROC_INFO_INC(th->t_super, paste_into_item_restarted); |
2003 | retval = | 2117 | retval = |
@@ -2019,21 +2133,23 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree | |||
2019 | #endif | 2133 | #endif |
2020 | } | 2134 | } |
2021 | 2135 | ||
2022 | /* Perform balancing after all resources are collected by fix_nodes, and | 2136 | /* |
2023 | accessing them will not risk triggering schedule. */ | 2137 | * Perform balancing after all resources are collected by fix_nodes, |
2138 | * and accessing them will not risk triggering schedule. | ||
2139 | */ | ||
2024 | if (retval == CARRY_ON) { | 2140 | if (retval == CARRY_ON) { |
2025 | do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); | 2141 | do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); |
2026 | return 0; | 2142 | return 0; |
2027 | } | 2143 | } |
2028 | retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; | 2144 | retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; |
2029 | error_out: | 2145 | error_out: |
2030 | /* this also releases the path */ | 2146 | /* this also releases the path */ |
2031 | unfix_nodes(&s_paste_balance); | 2147 | unfix_nodes(&s_paste_balance); |
2032 | #ifdef REISERQUOTA_DEBUG | 2148 | #ifdef REISERQUOTA_DEBUG |
2033 | reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, | 2149 | reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, |
2034 | "reiserquota paste_into_item(): freeing %u id=%u type=%c", | 2150 | "reiserquota paste_into_item(): freeing %u id=%u type=%c", |
2035 | pasted_size, inode->i_uid, | 2151 | pasted_size, inode->i_uid, |
2036 | key2type(&(key->on_disk_key))); | 2152 | key2type(&key->on_disk_key)); |
2037 | #endif | 2153 | #endif |
2038 | depth = reiserfs_write_unlock_nested(sb); | 2154 | depth = reiserfs_write_unlock_nested(sb); |
2039 | dquot_free_space_nodirty(inode, pasted_size); | 2155 | dquot_free_space_nodirty(inode, pasted_size); |
@@ -2041,7 +2157,8 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree | |||
2041 | return retval; | 2157 | return retval; |
2042 | } | 2158 | } |
2043 | 2159 | ||
2044 | /* Insert new item into the buffer at the path. | 2160 | /* |
2161 | * Insert new item into the buffer at the path. | ||
2045 | * th - active transaction handle | 2162 | * th - active transaction handle |
2046 | * path - path to the inserted item | 2163 | * path - path to the inserted item |
2047 | * ih - pointer to the item header to insert | 2164 | * ih - pointer to the item header to insert |
@@ -2064,8 +2181,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2064 | fs_gen = get_generation(inode->i_sb); | 2181 | fs_gen = get_generation(inode->i_sb); |
2065 | quota_bytes = ih_item_len(ih); | 2182 | quota_bytes = ih_item_len(ih); |
2066 | 2183 | ||
2067 | /* hack so the quota code doesn't have to guess if the file has | 2184 | /* |
2068 | ** a tail, links are always tails, so there's no guessing needed | 2185 | * hack so the quota code doesn't have to guess |
2186 | * if the file has a tail, links are always tails, | ||
2187 | * so there's no guessing needed | ||
2069 | */ | 2188 | */ |
2070 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) | 2189 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) |
2071 | quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; | 2190 | quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; |
@@ -2074,8 +2193,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2074 | "reiserquota insert_item(): allocating %u id=%u type=%c", | 2193 | "reiserquota insert_item(): allocating %u id=%u type=%c", |
2075 | quota_bytes, inode->i_uid, head2type(ih)); | 2194 | quota_bytes, inode->i_uid, head2type(ih)); |
2076 | #endif | 2195 | #endif |
2077 | /* We can't dirty inode here. It would be immediately written but | 2196 | /* |
2078 | * appropriate stat item isn't inserted yet... */ | 2197 | * We can't dirty inode here. It would be immediately |
2198 | * written but appropriate stat item isn't inserted yet... | ||
2199 | */ | ||
2079 | depth = reiserfs_write_unlock_nested(inode->i_sb); | 2200 | depth = reiserfs_write_unlock_nested(inode->i_sb); |
2080 | retval = dquot_alloc_space_nodirty(inode, quota_bytes); | 2201 | retval = dquot_alloc_space_nodirty(inode, quota_bytes); |
2081 | reiserfs_write_lock_nested(inode->i_sb, depth); | 2202 | reiserfs_write_lock_nested(inode->i_sb, depth); |
@@ -2089,7 +2210,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2089 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 2210 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
2090 | s_ins_balance.key = key->on_disk_key; | 2211 | s_ins_balance.key = key->on_disk_key; |
2091 | #endif | 2212 | #endif |
2092 | /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ | 2213 | /* |
2214 | * DQUOT_* can schedule, must check to be sure calling | ||
2215 | * fix_nodes is safe | ||
2216 | */ | ||
2093 | if (inode && fs_changed(fs_gen, inode->i_sb)) { | 2217 | if (inode && fs_changed(fs_gen, inode->i_sb)) { |
2094 | goto search_again; | 2218 | goto search_again; |
2095 | } | 2219 | } |
@@ -2097,7 +2221,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2097 | while ((retval = | 2221 | while ((retval = |
2098 | fix_nodes(M_INSERT, &s_ins_balance, ih, | 2222 | fix_nodes(M_INSERT, &s_ins_balance, ih, |
2099 | body)) == REPEAT_SEARCH) { | 2223 | body)) == REPEAT_SEARCH) { |
2100 | search_again: | 2224 | search_again: |
2101 | /* file system changed while we were in the fix_nodes */ | 2225 | /* file system changed while we were in the fix_nodes */ |
2102 | PROC_INFO_INC(th->t_super, insert_item_restarted); | 2226 | PROC_INFO_INC(th->t_super, insert_item_restarted); |
2103 | retval = search_item(th->t_super, key, path); | 2227 | retval = search_item(th->t_super, key, path); |
@@ -2121,7 +2245,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2121 | } | 2245 | } |
2122 | 2246 | ||
2123 | retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; | 2247 | retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; |
2124 | error_out: | 2248 | error_out: |
2125 | /* also releases the path */ | 2249 | /* also releases the path */ |
2126 | unfix_nodes(&s_ins_balance); | 2250 | unfix_nodes(&s_ins_balance); |
2127 | #ifdef REISERQUOTA_DEBUG | 2251 | #ifdef REISERQUOTA_DEBUG |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 9fb20426005e..a392cef6acc6 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -74,7 +74,7 @@ static int reiserfs_sync_fs(struct super_block *s, int wait) | |||
74 | dquot_writeback_dquots(s, -1); | 74 | dquot_writeback_dquots(s, -1); |
75 | reiserfs_write_lock(s); | 75 | reiserfs_write_lock(s); |
76 | if (!journal_begin(&th, s, 1)) | 76 | if (!journal_begin(&th, s, 1)) |
77 | if (!journal_end_sync(&th, s, 1)) | 77 | if (!journal_end_sync(&th)) |
78 | reiserfs_flush_old_commits(s); | 78 | reiserfs_flush_old_commits(s); |
79 | reiserfs_write_unlock(s); | 79 | reiserfs_write_unlock(s); |
80 | return 0; | 80 | return 0; |
@@ -136,9 +136,9 @@ static int reiserfs_freeze(struct super_block *s) | |||
136 | } else { | 136 | } else { |
137 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), | 137 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), |
138 | 1); | 138 | 1); |
139 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); | 139 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); |
140 | reiserfs_block_writes(&th); | 140 | reiserfs_block_writes(&th); |
141 | journal_end_sync(&th, s, 1); | 141 | journal_end_sync(&th); |
142 | } | 142 | } |
143 | } | 143 | } |
144 | reiserfs_write_unlock(s); | 144 | reiserfs_write_unlock(s); |
@@ -153,13 +153,15 @@ static int reiserfs_unfreeze(struct super_block *s) | |||
153 | 153 | ||
154 | extern const struct in_core_key MAX_IN_CORE_KEY; | 154 | extern const struct in_core_key MAX_IN_CORE_KEY; |
155 | 155 | ||
156 | /* this is used to delete "save link" when there are no items of a | 156 | /* |
157 | file it points to. It can either happen if unlink is completed but | 157 | * this is used to delete "save link" when there are no items of a |
158 | "save unlink" removal, or if file has both unlink and truncate | 158 | * file it points to. It can either happen if unlink is completed but |
159 | pending and as unlink completes first (because key of "save link" | 159 | * "save unlink" removal, or if file has both unlink and truncate |
160 | protecting unlink is bigger that a key lf "save link" which | 160 | * pending and as unlink completes first (because key of "save link" |
161 | protects truncate), so there left no items to make truncate | 161 | * protecting unlink is bigger that a key lf "save link" which |
162 | completion on */ | 162 | * protects truncate), so there left no items to make truncate |
163 | * completion on | ||
164 | */ | ||
163 | static int remove_save_link_only(struct super_block *s, | 165 | static int remove_save_link_only(struct super_block *s, |
164 | struct reiserfs_key *key, int oid_free) | 166 | struct reiserfs_key *key, int oid_free) |
165 | { | 167 | { |
@@ -176,7 +178,7 @@ static int remove_save_link_only(struct super_block *s, | |||
176 | /* removals are protected by direct items */ | 178 | /* removals are protected by direct items */ |
177 | reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); | 179 | reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); |
178 | 180 | ||
179 | return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT); | 181 | return journal_end(&th); |
180 | } | 182 | } |
181 | 183 | ||
182 | #ifdef CONFIG_QUOTA | 184 | #ifdef CONFIG_QUOTA |
@@ -258,7 +260,7 @@ static int finish_unfinished(struct super_block *s) | |||
258 | break; | 260 | break; |
259 | } | 261 | } |
260 | item_pos--; | 262 | item_pos--; |
261 | ih = B_N_PITEM_HEAD(bh, item_pos); | 263 | ih = item_head(bh, item_pos); |
262 | 264 | ||
263 | if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) | 265 | if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) |
264 | /* there are no "save" links anymore */ | 266 | /* there are no "save" links anymore */ |
@@ -271,7 +273,7 @@ static int finish_unfinished(struct super_block *s) | |||
271 | truncate = 0; | 273 | truncate = 0; |
272 | 274 | ||
273 | /* reiserfs_iget needs k_dirid and k_objectid only */ | 275 | /* reiserfs_iget needs k_dirid and k_objectid only */ |
274 | item = B_I_PITEM(bh, ih); | 276 | item = ih_item_body(bh, ih); |
275 | obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); | 277 | obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); |
276 | obj_key.on_disk_key.k_objectid = | 278 | obj_key.on_disk_key.k_objectid = |
277 | le32_to_cpu(ih->ih_key.k_objectid); | 279 | le32_to_cpu(ih->ih_key.k_objectid); |
@@ -282,8 +284,10 @@ static int finish_unfinished(struct super_block *s) | |||
282 | 284 | ||
283 | inode = reiserfs_iget(s, &obj_key); | 285 | inode = reiserfs_iget(s, &obj_key); |
284 | if (!inode) { | 286 | if (!inode) { |
285 | /* the unlink almost completed, it just did not manage to remove | 287 | /* |
286 | "save" link and release objectid */ | 288 | * the unlink almost completed, it just did not |
289 | * manage to remove "save" link and release objectid | ||
290 | */ | ||
287 | reiserfs_warning(s, "vs-2180", "iget failed for %K", | 291 | reiserfs_warning(s, "vs-2180", "iget failed for %K", |
288 | &obj_key); | 292 | &obj_key); |
289 | retval = remove_save_link_only(s, &save_link_key, 1); | 293 | retval = remove_save_link_only(s, &save_link_key, 1); |
@@ -303,10 +307,13 @@ static int finish_unfinished(struct super_block *s) | |||
303 | reiserfs_write_lock_nested(inode->i_sb, depth); | 307 | reiserfs_write_lock_nested(inode->i_sb, depth); |
304 | 308 | ||
305 | if (truncate && S_ISDIR(inode->i_mode)) { | 309 | if (truncate && S_ISDIR(inode->i_mode)) { |
306 | /* We got a truncate request for a dir which is impossible. | 310 | /* |
307 | The only imaginable way is to execute unfinished truncate request | 311 | * We got a truncate request for a dir which |
308 | then boot into old kernel, remove the file and create dir with | 312 | * is impossible. The only imaginable way is to |
309 | the same key. */ | 313 | * execute unfinished truncate request then boot |
314 | * into old kernel, remove the file and create dir | ||
315 | * with the same key. | ||
316 | */ | ||
310 | reiserfs_warning(s, "green-2101", | 317 | reiserfs_warning(s, "green-2101", |
311 | "impossible truncate on a " | 318 | "impossible truncate on a " |
312 | "directory %k. Please report", | 319 | "directory %k. Please report", |
@@ -320,14 +327,16 @@ static int finish_unfinished(struct super_block *s) | |||
320 | if (truncate) { | 327 | if (truncate) { |
321 | REISERFS_I(inode)->i_flags |= | 328 | REISERFS_I(inode)->i_flags |= |
322 | i_link_saved_truncate_mask; | 329 | i_link_saved_truncate_mask; |
323 | /* not completed truncate found. New size was committed together | 330 | /* |
324 | with "save" link */ | 331 | * not completed truncate found. New size was |
332 | * committed together with "save" link | ||
333 | */ | ||
325 | reiserfs_info(s, "Truncating %k to %Ld ..", | 334 | reiserfs_info(s, "Truncating %k to %Ld ..", |
326 | INODE_PKEY(inode), inode->i_size); | 335 | INODE_PKEY(inode), inode->i_size); |
327 | reiserfs_truncate_file(inode, | 336 | |
328 | 0 | 337 | /* don't update modification time */ |
329 | /*don't update modification time */ | 338 | reiserfs_truncate_file(inode, 0); |
330 | ); | 339 | |
331 | retval = remove_save_link(inode, truncate); | 340 | retval = remove_save_link(inode, truncate); |
332 | } else { | 341 | } else { |
333 | REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; | 342 | REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; |
@@ -373,10 +382,12 @@ static int finish_unfinished(struct super_block *s) | |||
373 | return retval; | 382 | return retval; |
374 | } | 383 | } |
375 | 384 | ||
376 | /* to protect file being unlinked from getting lost we "safe" link files | 385 | /* |
377 | being unlinked. This link will be deleted in the same transaction with last | 386 | * to protect file being unlinked from getting lost we "safe" link files |
378 | item of file. mounting the filesystem we scan all these links and remove | 387 | * being unlinked. This link will be deleted in the same transaction with last |
379 | files which almost got lost */ | 388 | * item of file. mounting the filesystem we scan all these links and remove |
389 | * files which almost got lost | ||
390 | */ | ||
380 | void add_save_link(struct reiserfs_transaction_handle *th, | 391 | void add_save_link(struct reiserfs_transaction_handle *th, |
381 | struct inode *inode, int truncate) | 392 | struct inode *inode, int truncate) |
382 | { | 393 | { |
@@ -495,7 +506,7 @@ int remove_save_link(struct inode *inode, int truncate) | |||
495 | } else | 506 | } else |
496 | REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; | 507 | REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; |
497 | 508 | ||
498 | return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); | 509 | return journal_end(&th); |
499 | } | 510 | } |
500 | 511 | ||
501 | static void reiserfs_kill_sb(struct super_block *s) | 512 | static void reiserfs_kill_sb(struct super_block *s) |
@@ -530,19 +541,23 @@ static void reiserfs_put_super(struct super_block *s) | |||
530 | 541 | ||
531 | reiserfs_write_lock(s); | 542 | reiserfs_write_lock(s); |
532 | 543 | ||
533 | /* change file system state to current state if it was mounted with read-write permissions */ | 544 | /* |
545 | * change file system state to current state if it was mounted | ||
546 | * with read-write permissions | ||
547 | */ | ||
534 | if (!(s->s_flags & MS_RDONLY)) { | 548 | if (!(s->s_flags & MS_RDONLY)) { |
535 | if (!journal_begin(&th, s, 10)) { | 549 | if (!journal_begin(&th, s, 10)) { |
536 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), | 550 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), |
537 | 1); | 551 | 1); |
538 | set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), | 552 | set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), |
539 | REISERFS_SB(s)->s_mount_state); | 553 | REISERFS_SB(s)->s_mount_state); |
540 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); | 554 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); |
541 | } | 555 | } |
542 | } | 556 | } |
543 | 557 | ||
544 | /* note, journal_release checks for readonly mount, and can decide not | 558 | /* |
545 | ** to do a journal_end | 559 | * note, journal_release checks for readonly mount, and can |
560 | * decide not to do a journal_end | ||
546 | */ | 561 | */ |
547 | journal_release(&th, s); | 562 | journal_release(&th, s); |
548 | 563 | ||
@@ -559,6 +574,7 @@ static void reiserfs_put_super(struct super_block *s) | |||
559 | 574 | ||
560 | reiserfs_write_unlock(s); | 575 | reiserfs_write_unlock(s); |
561 | mutex_destroy(&REISERFS_SB(s)->lock); | 576 | mutex_destroy(&REISERFS_SB(s)->lock); |
577 | destroy_workqueue(REISERFS_SB(s)->commit_wq); | ||
562 | kfree(s->s_fs_info); | 578 | kfree(s->s_fs_info); |
563 | s->s_fs_info = NULL; | 579 | s->s_fs_info = NULL; |
564 | } | 580 | } |
@@ -634,15 +650,16 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags) | |||
634 | } | 650 | } |
635 | reiserfs_write_lock(inode->i_sb); | 651 | reiserfs_write_lock(inode->i_sb); |
636 | 652 | ||
637 | /* this is really only used for atime updates, so they don't have | 653 | /* |
638 | ** to be included in O_SYNC or fsync | 654 | * this is really only used for atime updates, so they don't have |
655 | * to be included in O_SYNC or fsync | ||
639 | */ | 656 | */ |
640 | err = journal_begin(&th, inode->i_sb, 1); | 657 | err = journal_begin(&th, inode->i_sb, 1); |
641 | if (err) | 658 | if (err) |
642 | goto out; | 659 | goto out; |
643 | 660 | ||
644 | reiserfs_update_sd(&th, inode); | 661 | reiserfs_update_sd(&th, inode); |
645 | journal_end(&th, inode->i_sb, 1); | 662 | journal_end(&th); |
646 | 663 | ||
647 | out: | 664 | out: |
648 | reiserfs_write_unlock(inode->i_sb); | 665 | reiserfs_write_unlock(inode->i_sb); |
@@ -788,31 +805,53 @@ static const struct export_operations reiserfs_export_ops = { | |||
788 | .get_parent = reiserfs_get_parent, | 805 | .get_parent = reiserfs_get_parent, |
789 | }; | 806 | }; |
790 | 807 | ||
791 | /* this struct is used in reiserfs_getopt () for containing the value for those | 808 | /* |
792 | mount options that have values rather than being toggles. */ | 809 | * this struct is used in reiserfs_getopt () for containing the value for |
810 | * those mount options that have values rather than being toggles. | ||
811 | */ | ||
793 | typedef struct { | 812 | typedef struct { |
794 | char *value; | 813 | char *value; |
795 | int setmask; /* bitmask which is to set on mount_options bitmask when this | 814 | /* |
796 | value is found, 0 is no bits are to be changed. */ | 815 | * bitmask which is to set on mount_options bitmask |
797 | int clrmask; /* bitmask which is to clear on mount_options bitmask when this | 816 | * when this value is found, 0 is no bits are to be changed. |
798 | value is found, 0 is no bits are to be changed. This is | 817 | */ |
799 | applied BEFORE setmask */ | 818 | int setmask; |
819 | /* | ||
820 | * bitmask which is to clear on mount_options bitmask | ||
821 | * when this value is found, 0 is no bits are to be changed. | ||
822 | * This is applied BEFORE setmask | ||
823 | */ | ||
824 | int clrmask; | ||
800 | } arg_desc_t; | 825 | } arg_desc_t; |
801 | 826 | ||
802 | /* Set this bit in arg_required to allow empty arguments */ | 827 | /* Set this bit in arg_required to allow empty arguments */ |
803 | #define REISERFS_OPT_ALLOWEMPTY 31 | 828 | #define REISERFS_OPT_ALLOWEMPTY 31 |
804 | 829 | ||
805 | /* this struct is used in reiserfs_getopt() for describing the set of reiserfs | 830 | /* |
806 | mount options */ | 831 | * this struct is used in reiserfs_getopt() for describing the |
832 | * set of reiserfs mount options | ||
833 | */ | ||
807 | typedef struct { | 834 | typedef struct { |
808 | char *option_name; | 835 | char *option_name; |
809 | int arg_required; /* 0 if argument is not required, not 0 otherwise */ | 836 | |
810 | const arg_desc_t *values; /* list of values accepted by an option */ | 837 | /* 0 if argument is not required, not 0 otherwise */ |
811 | int setmask; /* bitmask which is to set on mount_options bitmask when this | 838 | int arg_required; |
812 | value is found, 0 is no bits are to be changed. */ | 839 | |
813 | int clrmask; /* bitmask which is to clear on mount_options bitmask when this | 840 | /* list of values accepted by an option */ |
814 | value is found, 0 is no bits are to be changed. This is | 841 | const arg_desc_t *values; |
815 | applied BEFORE setmask */ | 842 | |
843 | /* | ||
844 | * bitmask which is to set on mount_options bitmask | ||
845 | * when this value is found, 0 is no bits are to be changed. | ||
846 | */ | ||
847 | int setmask; | ||
848 | |||
849 | /* | ||
850 | * bitmask which is to clear on mount_options bitmask | ||
851 | * when this value is found, 0 is no bits are to be changed. | ||
852 | * This is applied BEFORE setmask | ||
853 | */ | ||
854 | int clrmask; | ||
816 | } opt_desc_t; | 855 | } opt_desc_t; |
817 | 856 | ||
818 | /* possible values for -o data= */ | 857 | /* possible values for -o data= */ |
@@ -833,8 +872,10 @@ static const arg_desc_t barrier_mode[] = { | |||
833 | {.value = NULL} | 872 | {.value = NULL} |
834 | }; | 873 | }; |
835 | 874 | ||
836 | /* possible values for "-o block-allocator=" and bits which are to be set in | 875 | /* |
837 | s_mount_opt of reiserfs specific part of in-core super block */ | 876 | * possible values for "-o block-allocator=" and bits which are to be set in |
877 | * s_mount_opt of reiserfs specific part of in-core super block | ||
878 | */ | ||
838 | static const arg_desc_t balloc[] = { | 879 | static const arg_desc_t balloc[] = { |
839 | {"noborder", 1 << REISERFS_NO_BORDER, 0}, | 880 | {"noborder", 1 << REISERFS_NO_BORDER, 0}, |
840 | {"border", 0, 1 << REISERFS_NO_BORDER}, | 881 | {"border", 0, 1 << REISERFS_NO_BORDER}, |
@@ -864,21 +905,25 @@ static const arg_desc_t error_actions[] = { | |||
864 | {NULL, 0, 0}, | 905 | {NULL, 0, 0}, |
865 | }; | 906 | }; |
866 | 907 | ||
867 | /* proceed only one option from a list *cur - string containing of mount options | 908 | /* |
868 | opts - array of options which are accepted | 909 | * proceed only one option from a list *cur - string containing of mount |
869 | opt_arg - if option is found and requires an argument and if it is specifed | 910 | * options |
870 | in the input - pointer to the argument is stored here | 911 | * opts - array of options which are accepted |
871 | bit_flags - if option requires to set a certain bit - it is set here | 912 | * opt_arg - if option is found and requires an argument and if it is specifed |
872 | return -1 if unknown option is found, opt->arg_required otherwise */ | 913 | * in the input - pointer to the argument is stored here |
914 | * bit_flags - if option requires to set a certain bit - it is set here | ||
915 | * return -1 if unknown option is found, opt->arg_required otherwise | ||
916 | */ | ||
873 | static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | 917 | static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, |
874 | char **opt_arg, unsigned long *bit_flags) | 918 | char **opt_arg, unsigned long *bit_flags) |
875 | { | 919 | { |
876 | char *p; | 920 | char *p; |
877 | /* foo=bar, | 921 | /* |
878 | ^ ^ ^ | 922 | * foo=bar, |
879 | | | +-- option_end | 923 | * ^ ^ ^ |
880 | | +-- arg_start | 924 | * | | +-- option_end |
881 | +-- option_start | 925 | * | +-- arg_start |
926 | * +-- option_start | ||
882 | */ | 927 | */ |
883 | const opt_desc_t *opt; | 928 | const opt_desc_t *opt; |
884 | const arg_desc_t *arg; | 929 | const arg_desc_t *arg; |
@@ -893,9 +938,12 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | |||
893 | } | 938 | } |
894 | 939 | ||
895 | if (!strncmp(p, "alloc=", 6)) { | 940 | if (!strncmp(p, "alloc=", 6)) { |
896 | /* Ugly special case, probably we should redo options parser so that | 941 | /* |
897 | it can understand several arguments for some options, also so that | 942 | * Ugly special case, probably we should redo options |
898 | it can fill several bitfields with option values. */ | 943 | * parser so that it can understand several arguments for |
944 | * some options, also so that it can fill several bitfields | ||
945 | * with option values. | ||
946 | */ | ||
899 | if (reiserfs_parse_alloc_options(s, p + 6)) { | 947 | if (reiserfs_parse_alloc_options(s, p + 6)) { |
900 | return -1; | 948 | return -1; |
901 | } else { | 949 | } else { |
@@ -958,7 +1006,10 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | |||
958 | return -1; | 1006 | return -1; |
959 | } | 1007 | } |
960 | 1008 | ||
961 | /* move to the argument, or to next option if argument is not required */ | 1009 | /* |
1010 | * move to the argument, or to next option if argument is not | ||
1011 | * required | ||
1012 | */ | ||
962 | p++; | 1013 | p++; |
963 | 1014 | ||
964 | if (opt->arg_required | 1015 | if (opt->arg_required |
@@ -995,12 +1046,20 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | |||
995 | } | 1046 | } |
996 | 1047 | ||
997 | /* returns 0 if something is wrong in option string, 1 - otherwise */ | 1048 | /* returns 0 if something is wrong in option string, 1 - otherwise */ |
998 | static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */ | 1049 | static int reiserfs_parse_options(struct super_block *s, |
1050 | |||
1051 | /* string given via mount's -o */ | ||
1052 | char *options, | ||
1053 | |||
1054 | /* | ||
1055 | * after the parsing phase, contains the | ||
1056 | * collection of bitflags defining what | ||
1057 | * mount options were selected. | ||
1058 | */ | ||
999 | unsigned long *mount_options, | 1059 | unsigned long *mount_options, |
1000 | /* after the parsing phase, contains the | 1060 | |
1001 | collection of bitflags defining what | 1061 | /* strtol-ed from NNN of resize=NNN */ |
1002 | mount options were selected. */ | 1062 | unsigned long *blocks, |
1003 | unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ | ||
1004 | char **jdev_name, | 1063 | char **jdev_name, |
1005 | unsigned int *commit_max_age, | 1064 | unsigned int *commit_max_age, |
1006 | char **qf_names, | 1065 | char **qf_names, |
@@ -1010,7 +1069,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin | |||
1010 | char *arg = NULL; | 1069 | char *arg = NULL; |
1011 | char *pos; | 1070 | char *pos; |
1012 | opt_desc_t opts[] = { | 1071 | opt_desc_t opts[] = { |
1013 | /* Compatibility stuff, so that -o notail for old setups still work */ | 1072 | /* |
1073 | * Compatibility stuff, so that -o notail for old | ||
1074 | * setups still work | ||
1075 | */ | ||
1014 | {"tails",.arg_required = 't',.values = tails}, | 1076 | {"tails",.arg_required = 't',.values = tails}, |
1015 | {"notail",.clrmask = | 1077 | {"notail",.clrmask = |
1016 | (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, | 1078 | (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, |
@@ -1055,8 +1117,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin | |||
1055 | 1117 | ||
1056 | *blocks = 0; | 1118 | *blocks = 0; |
1057 | if (!options || !*options) | 1119 | if (!options || !*options) |
1058 | /* use default configuration: create tails, journaling on, no | 1120 | /* |
1059 | conversion to newest format */ | 1121 | * use default configuration: create tails, journaling on, no |
1122 | * conversion to newest format | ||
1123 | */ | ||
1060 | return 1; | 1124 | return 1; |
1061 | 1125 | ||
1062 | for (pos = options; pos;) { | 1126 | for (pos = options; pos;) { |
@@ -1109,7 +1173,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin | |||
1109 | 1173 | ||
1110 | if (c == 'j') { | 1174 | if (c == 'j') { |
1111 | if (arg && *arg && jdev_name) { | 1175 | if (arg && *arg && jdev_name) { |
1112 | if (*jdev_name) { //Hm, already assigned? | 1176 | /* Hm, already assigned? */ |
1177 | if (*jdev_name) { | ||
1113 | reiserfs_warning(s, "super-6510", | 1178 | reiserfs_warning(s, "super-6510", |
1114 | "journal device was " | 1179 | "journal device was " |
1115 | "already specified to " | 1180 | "already specified to " |
@@ -1362,8 +1427,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1362 | safe_mask |= 1 << REISERFS_USRQUOTA; | 1427 | safe_mask |= 1 << REISERFS_USRQUOTA; |
1363 | safe_mask |= 1 << REISERFS_GRPQUOTA; | 1428 | safe_mask |= 1 << REISERFS_GRPQUOTA; |
1364 | 1429 | ||
1365 | /* Update the bitmask, taking care to keep | 1430 | /* |
1366 | * the bits we're not allowed to change here */ | 1431 | * Update the bitmask, taking care to keep |
1432 | * the bits we're not allowed to change here | ||
1433 | */ | ||
1367 | REISERFS_SB(s)->s_mount_opt = | 1434 | REISERFS_SB(s)->s_mount_opt = |
1368 | (REISERFS_SB(s)-> | 1435 | (REISERFS_SB(s)-> |
1369 | s_mount_opt & ~safe_mask) | (mount_options & safe_mask); | 1436 | s_mount_opt & ~safe_mask) | (mount_options & safe_mask); |
@@ -1410,7 +1477,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1410 | /* Mounting a rw partition read-only. */ | 1477 | /* Mounting a rw partition read-only. */ |
1411 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); | 1478 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); |
1412 | set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state); | 1479 | set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state); |
1413 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); | 1480 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); |
1414 | } else { | 1481 | } else { |
1415 | /* remount read-write */ | 1482 | /* remount read-write */ |
1416 | if (!(s->s_flags & MS_RDONLY)) { | 1483 | if (!(s->s_flags & MS_RDONLY)) { |
@@ -1427,7 +1494,9 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1427 | handle_data_mode(s, mount_options); | 1494 | handle_data_mode(s, mount_options); |
1428 | handle_barrier_mode(s, mount_options); | 1495 | handle_barrier_mode(s, mount_options); |
1429 | REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); | 1496 | REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); |
1430 | s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ | 1497 | |
1498 | /* now it is safe to call journal_begin */ | ||
1499 | s->s_flags &= ~MS_RDONLY; | ||
1431 | err = journal_begin(&th, s, 10); | 1500 | err = journal_begin(&th, s, 10); |
1432 | if (err) | 1501 | if (err) |
1433 | goto out_err_unlock; | 1502 | goto out_err_unlock; |
@@ -1440,12 +1509,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1440 | if (!old_format_only(s)) | 1509 | if (!old_format_only(s)) |
1441 | set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); | 1510 | set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); |
1442 | /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ | 1511 | /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ |
1443 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); | 1512 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); |
1444 | REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; | 1513 | REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; |
1445 | } | 1514 | } |
1446 | /* this will force a full flush of all journal lists */ | 1515 | /* this will force a full flush of all journal lists */ |
1447 | SB_JOURNAL(s)->j_must_wait = 1; | 1516 | SB_JOURNAL(s)->j_must_wait = 1; |
1448 | err = journal_end(&th, s, 10); | 1517 | err = journal_end(&th); |
1449 | if (err) | 1518 | if (err) |
1450 | goto out_err_unlock; | 1519 | goto out_err_unlock; |
1451 | 1520 | ||
@@ -1489,9 +1558,9 @@ static int read_super_block(struct super_block *s, int offset) | |||
1489 | brelse(bh); | 1558 | brelse(bh); |
1490 | return 1; | 1559 | return 1; |
1491 | } | 1560 | } |
1492 | // | 1561 | /* |
1493 | // ok, reiserfs signature (old or new) found in at the given offset | 1562 | * ok, reiserfs signature (old or new) found in at the given offset |
1494 | // | 1563 | */ |
1495 | fs_blocksize = sb_blocksize(rs); | 1564 | fs_blocksize = sb_blocksize(rs); |
1496 | brelse(bh); | 1565 | brelse(bh); |
1497 | sb_set_blocksize(s, fs_blocksize); | 1566 | sb_set_blocksize(s, fs_blocksize); |
@@ -1529,9 +1598,11 @@ static int read_super_block(struct super_block *s, int offset) | |||
1529 | SB_BUFFER_WITH_SB(s) = bh; | 1598 | SB_BUFFER_WITH_SB(s) = bh; |
1530 | SB_DISK_SUPER_BLOCK(s) = rs; | 1599 | SB_DISK_SUPER_BLOCK(s) = rs; |
1531 | 1600 | ||
1601 | /* | ||
1602 | * magic is of non-standard journal filesystem, look at s_version to | ||
1603 | * find which format is in use | ||
1604 | */ | ||
1532 | if (is_reiserfs_jr(rs)) { | 1605 | if (is_reiserfs_jr(rs)) { |
1533 | /* magic is of non-standard journal filesystem, look at s_version to | ||
1534 | find which format is in use */ | ||
1535 | if (sb_version(rs) == REISERFS_VERSION_2) | 1606 | if (sb_version(rs) == REISERFS_VERSION_2) |
1536 | reiserfs_info(s, "found reiserfs format \"3.6\"" | 1607 | reiserfs_info(s, "found reiserfs format \"3.6\"" |
1537 | " with non-standard journal\n"); | 1608 | " with non-standard journal\n"); |
@@ -1545,8 +1616,10 @@ static int read_super_block(struct super_block *s, int offset) | |||
1545 | return 1; | 1616 | return 1; |
1546 | } | 1617 | } |
1547 | } else | 1618 | } else |
1548 | /* s_version of standard format may contain incorrect information, | 1619 | /* |
1549 | so we just look at the magic string */ | 1620 | * s_version of standard format may contain incorrect |
1621 | * information, so we just look at the magic string | ||
1622 | */ | ||
1550 | reiserfs_info(s, | 1623 | reiserfs_info(s, |
1551 | "found reiserfs format \"%s\" with standard journal\n", | 1624 | "found reiserfs format \"%s\" with standard journal\n", |
1552 | is_reiserfs_3_5(rs) ? "3.5" : "3.6"); | 1625 | is_reiserfs_3_5(rs) ? "3.5" : "3.6"); |
@@ -1558,8 +1631,9 @@ static int read_super_block(struct super_block *s, int offset) | |||
1558 | s->dq_op = &reiserfs_quota_operations; | 1631 | s->dq_op = &reiserfs_quota_operations; |
1559 | #endif | 1632 | #endif |
1560 | 1633 | ||
1561 | /* new format is limited by the 32 bit wide i_blocks field, want to | 1634 | /* |
1562 | ** be one full block below that. | 1635 | * new format is limited by the 32 bit wide i_blocks field, want to |
1636 | * be one full block below that. | ||
1563 | */ | 1637 | */ |
1564 | s->s_maxbytes = (512LL << 32) - s->s_blocksize; | 1638 | s->s_maxbytes = (512LL << 32) - s->s_blocksize; |
1565 | return 0; | 1639 | return 0; |
@@ -1568,7 +1642,7 @@ static int read_super_block(struct super_block *s, int offset) | |||
1568 | /* after journal replay, reread all bitmap and super blocks */ | 1642 | /* after journal replay, reread all bitmap and super blocks */ |
1569 | static int reread_meta_blocks(struct super_block *s) | 1643 | static int reread_meta_blocks(struct super_block *s) |
1570 | { | 1644 | { |
1571 | ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); | 1645 | ll_rw_block(READ, 1, &SB_BUFFER_WITH_SB(s)); |
1572 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); | 1646 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); |
1573 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { | 1647 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { |
1574 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); | 1648 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); |
@@ -1578,14 +1652,15 @@ static int reread_meta_blocks(struct super_block *s) | |||
1578 | return 0; | 1652 | return 0; |
1579 | } | 1653 | } |
1580 | 1654 | ||
1581 | ///////////////////////////////////////////////////// | 1655 | /* hash detection stuff */ |
1582 | // hash detection stuff | ||
1583 | 1656 | ||
1584 | // if root directory is empty - we set default - Yura's - hash and | 1657 | /* |
1585 | // warn about it | 1658 | * if root directory is empty - we set default - Yura's - hash and |
1586 | // FIXME: we look for only one name in a directory. If tea and yura | 1659 | * warn about it |
1587 | // bith have the same value - we ask user to send report to the | 1660 | * FIXME: we look for only one name in a directory. If tea and yura |
1588 | // mailing list | 1661 | * both have the same value - we ask user to send report to the |
1662 | * mailing list | ||
1663 | */ | ||
1589 | static __u32 find_hash_out(struct super_block *s) | 1664 | static __u32 find_hash_out(struct super_block *s) |
1590 | { | 1665 | { |
1591 | int retval; | 1666 | int retval; |
@@ -1593,92 +1668,83 @@ static __u32 find_hash_out(struct super_block *s) | |||
1593 | struct cpu_key key; | 1668 | struct cpu_key key; |
1594 | INITIALIZE_PATH(path); | 1669 | INITIALIZE_PATH(path); |
1595 | struct reiserfs_dir_entry de; | 1670 | struct reiserfs_dir_entry de; |
1671 | struct reiserfs_de_head *deh; | ||
1596 | __u32 hash = DEFAULT_HASH; | 1672 | __u32 hash = DEFAULT_HASH; |
1673 | __u32 deh_hashval, teahash, r5hash, yurahash; | ||
1597 | 1674 | ||
1598 | inode = s->s_root->d_inode; | 1675 | inode = s->s_root->d_inode; |
1599 | 1676 | ||
1600 | do { // Some serious "goto"-hater was there ;) | 1677 | make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); |
1601 | u32 teahash, r5hash, yurahash; | 1678 | retval = search_by_entry_key(s, &key, &path, &de); |
1679 | if (retval == IO_ERROR) { | ||
1680 | pathrelse(&path); | ||
1681 | return UNSET_HASH; | ||
1682 | } | ||
1683 | if (retval == NAME_NOT_FOUND) | ||
1684 | de.de_entry_num--; | ||
1685 | |||
1686 | set_de_name_and_namelen(&de); | ||
1687 | deh = de.de_deh + de.de_entry_num; | ||
1602 | 1688 | ||
1603 | make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); | 1689 | if (deh_offset(deh) == DOT_DOT_OFFSET) { |
1604 | retval = search_by_entry_key(s, &key, &path, &de); | 1690 | /* allow override in this case */ |
1605 | if (retval == IO_ERROR) { | 1691 | if (reiserfs_rupasov_hash(s)) |
1606 | pathrelse(&path); | ||
1607 | return UNSET_HASH; | ||
1608 | } | ||
1609 | if (retval == NAME_NOT_FOUND) | ||
1610 | de.de_entry_num--; | ||
1611 | set_de_name_and_namelen(&de); | ||
1612 | if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) { | ||
1613 | /* allow override in this case */ | ||
1614 | if (reiserfs_rupasov_hash(s)) { | ||
1615 | hash = YURA_HASH; | ||
1616 | } | ||
1617 | reiserfs_info(s, "FS seems to be empty, autodetect " | ||
1618 | "is using the default hash\n"); | ||
1619 | break; | ||
1620 | } | ||
1621 | r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); | ||
1622 | teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen)); | ||
1623 | yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen)); | ||
1624 | if (((teahash == r5hash) | ||
1625 | && | ||
1626 | (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) | ||
1627 | == r5hash)) || ((teahash == yurahash) | ||
1628 | && (yurahash == | ||
1629 | GET_HASH_VALUE(deh_offset | ||
1630 | (& | ||
1631 | (de. | ||
1632 | de_deh[de. | ||
1633 | de_entry_num]))))) | ||
1634 | || ((r5hash == yurahash) | ||
1635 | && (yurahash == | ||
1636 | GET_HASH_VALUE(deh_offset | ||
1637 | (&(de.de_deh[de.de_entry_num])))))) { | ||
1638 | reiserfs_warning(s, "reiserfs-2506", "Unable to " | ||
1639 | "automatically detect hash function. " | ||
1640 | "Please mount with -o " | ||
1641 | "hash={tea,rupasov,r5}"); | ||
1642 | hash = UNSET_HASH; | ||
1643 | break; | ||
1644 | } | ||
1645 | if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) == | ||
1646 | yurahash) | ||
1647 | hash = YURA_HASH; | 1692 | hash = YURA_HASH; |
1648 | else if (GET_HASH_VALUE | 1693 | reiserfs_info(s, "FS seems to be empty, autodetect is using the default hash\n"); |
1649 | (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash) | 1694 | goto out; |
1650 | hash = TEA_HASH; | 1695 | } |
1651 | else if (GET_HASH_VALUE | 1696 | |
1652 | (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) | 1697 | deh_hashval = GET_HASH_VALUE(deh_offset(deh)); |
1653 | hash = R5_HASH; | 1698 | r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); |
1654 | else { | 1699 | teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen)); |
1655 | reiserfs_warning(s, "reiserfs-2506", | 1700 | yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen)); |
1656 | "Unrecognised hash function"); | 1701 | |
1657 | hash = UNSET_HASH; | 1702 | if ((teahash == r5hash && deh_hashval == r5hash) || |
1658 | } | 1703 | (teahash == yurahash && deh_hashval == yurahash) || |
1659 | } while (0); | 1704 | (r5hash == yurahash && deh_hashval == yurahash)) { |
1705 | reiserfs_warning(s, "reiserfs-2506", | ||
1706 | "Unable to automatically detect hash " | ||
1707 | "function. Please mount with -o " | ||
1708 | "hash={tea,rupasov,r5}"); | ||
1709 | hash = UNSET_HASH; | ||
1710 | goto out; | ||
1711 | } | ||
1660 | 1712 | ||
1713 | if (deh_hashval == yurahash) | ||
1714 | hash = YURA_HASH; | ||
1715 | else if (deh_hashval == teahash) | ||
1716 | hash = TEA_HASH; | ||
1717 | else if (deh_hashval == r5hash) | ||
1718 | hash = R5_HASH; | ||
1719 | else { | ||
1720 | reiserfs_warning(s, "reiserfs-2506", | ||
1721 | "Unrecognised hash function"); | ||
1722 | hash = UNSET_HASH; | ||
1723 | } | ||
1724 | out: | ||
1661 | pathrelse(&path); | 1725 | pathrelse(&path); |
1662 | return hash; | 1726 | return hash; |
1663 | } | 1727 | } |
1664 | 1728 | ||
1665 | // finds out which hash names are sorted with | 1729 | /* finds out which hash names are sorted with */ |
1666 | static int what_hash(struct super_block *s) | 1730 | static int what_hash(struct super_block *s) |
1667 | { | 1731 | { |
1668 | __u32 code; | 1732 | __u32 code; |
1669 | 1733 | ||
1670 | code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); | 1734 | code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); |
1671 | 1735 | ||
1672 | /* reiserfs_hash_detect() == true if any of the hash mount options | 1736 | /* |
1673 | ** were used. We must check them to make sure the user isn't | 1737 | * reiserfs_hash_detect() == true if any of the hash mount options |
1674 | ** using a bad hash value | 1738 | * were used. We must check them to make sure the user isn't |
1739 | * using a bad hash value | ||
1675 | */ | 1740 | */ |
1676 | if (code == UNSET_HASH || reiserfs_hash_detect(s)) | 1741 | if (code == UNSET_HASH || reiserfs_hash_detect(s)) |
1677 | code = find_hash_out(s); | 1742 | code = find_hash_out(s); |
1678 | 1743 | ||
1679 | if (code != UNSET_HASH && reiserfs_hash_detect(s)) { | 1744 | if (code != UNSET_HASH && reiserfs_hash_detect(s)) { |
1680 | /* detection has found the hash, and we must check against the | 1745 | /* |
1681 | ** mount options | 1746 | * detection has found the hash, and we must check against the |
1747 | * mount options | ||
1682 | */ | 1748 | */ |
1683 | if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { | 1749 | if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { |
1684 | reiserfs_warning(s, "reiserfs-2507", | 1750 | reiserfs_warning(s, "reiserfs-2507", |
@@ -1700,7 +1766,10 @@ static int what_hash(struct super_block *s) | |||
1700 | code = UNSET_HASH; | 1766 | code = UNSET_HASH; |
1701 | } | 1767 | } |
1702 | } else { | 1768 | } else { |
1703 | /* find_hash_out was not called or could not determine the hash */ | 1769 | /* |
1770 | * find_hash_out was not called or | ||
1771 | * could not determine the hash | ||
1772 | */ | ||
1704 | if (reiserfs_rupasov_hash(s)) { | 1773 | if (reiserfs_rupasov_hash(s)) { |
1705 | code = YURA_HASH; | 1774 | code = YURA_HASH; |
1706 | } else if (reiserfs_tea_hash(s)) { | 1775 | } else if (reiserfs_tea_hash(s)) { |
@@ -1710,8 +1779,9 @@ static int what_hash(struct super_block *s) | |||
1710 | } | 1779 | } |
1711 | } | 1780 | } |
1712 | 1781 | ||
1713 | /* if we are mounted RW, and we have a new valid hash code, update | 1782 | /* |
1714 | ** the super | 1783 | * if we are mounted RW, and we have a new valid hash code, update |
1784 | * the super | ||
1715 | */ | 1785 | */ |
1716 | if (code != UNSET_HASH && | 1786 | if (code != UNSET_HASH && |
1717 | !(s->s_flags & MS_RDONLY) && | 1787 | !(s->s_flags & MS_RDONLY) && |
@@ -1721,7 +1791,7 @@ static int what_hash(struct super_block *s) | |||
1721 | return code; | 1791 | return code; |
1722 | } | 1792 | } |
1723 | 1793 | ||
1724 | // return pointer to appropriate function | 1794 | /* return pointer to appropriate function */ |
1725 | static hashf_t hash_function(struct super_block *s) | 1795 | static hashf_t hash_function(struct super_block *s) |
1726 | { | 1796 | { |
1727 | switch (what_hash(s)) { | 1797 | switch (what_hash(s)) { |
@@ -1738,7 +1808,7 @@ static hashf_t hash_function(struct super_block *s) | |||
1738 | return NULL; | 1808 | return NULL; |
1739 | } | 1809 | } |
1740 | 1810 | ||
1741 | // this is used to set up correct value for old partitions | 1811 | /* this is used to set up correct value for old partitions */ |
1742 | static int function2code(hashf_t func) | 1812 | static int function2code(hashf_t func) |
1743 | { | 1813 | { |
1744 | if (func == keyed_hash) | 1814 | if (func == keyed_hash) |
@@ -1748,7 +1818,7 @@ static int function2code(hashf_t func) | |||
1748 | if (func == r5_hash) | 1818 | if (func == r5_hash) |
1749 | return R5_HASH; | 1819 | return R5_HASH; |
1750 | 1820 | ||
1751 | BUG(); // should never happen | 1821 | BUG(); /* should never happen */ |
1752 | 1822 | ||
1753 | return 0; | 1823 | return 0; |
1754 | } | 1824 | } |
@@ -1783,8 +1853,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1783 | sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); | 1853 | sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); |
1784 | sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); | 1854 | sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); |
1785 | sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); | 1855 | sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); |
1786 | /* no preallocation minimum, be smart in | 1856 | /* no preallocation minimum, be smart in reiserfs_file_write instead */ |
1787 | reiserfs_file_write instead */ | ||
1788 | sbi->s_alloc_options.preallocmin = 0; | 1857 | sbi->s_alloc_options.preallocmin = 0; |
1789 | /* Preallocate by 16 blocks (17-1) at once */ | 1858 | /* Preallocate by 16 blocks (17-1) at once */ |
1790 | sbi->s_alloc_options.preallocsize = 17; | 1859 | sbi->s_alloc_options.preallocsize = 17; |
@@ -1796,9 +1865,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1796 | mutex_init(&sbi->lock); | 1865 | mutex_init(&sbi->lock); |
1797 | sbi->lock_depth = -1; | 1866 | sbi->lock_depth = -1; |
1798 | 1867 | ||
1868 | sbi->commit_wq = alloc_workqueue("reiserfs/%s", WQ_MEM_RECLAIM, 0, | ||
1869 | s->s_id); | ||
1870 | if (!sbi->commit_wq) { | ||
1871 | SWARN(silent, s, "", "Cannot allocate commit workqueue"); | ||
1872 | errval = -ENOMEM; | ||
1873 | goto error_unlocked; | ||
1874 | } | ||
1875 | |||
1799 | jdev_name = NULL; | 1876 | jdev_name = NULL; |
1800 | if (reiserfs_parse_options | 1877 | if (reiserfs_parse_options |
1801 | (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, | 1878 | (s, (char *)data, &sbi->s_mount_opt, &blocks, &jdev_name, |
1802 | &commit_max_age, qf_names, &qfmt) == 0) { | 1879 | &commit_max_age, qf_names, &qfmt) == 0) { |
1803 | goto error_unlocked; | 1880 | goto error_unlocked; |
1804 | } | 1881 | } |
@@ -1819,10 +1896,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1819 | goto error_unlocked; | 1896 | goto error_unlocked; |
1820 | } | 1897 | } |
1821 | 1898 | ||
1822 | /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ | 1899 | /* |
1900 | * try old format (undistributed bitmap, super block in 8-th 1k | ||
1901 | * block of a device) | ||
1902 | */ | ||
1823 | if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) | 1903 | if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) |
1824 | old_format = 1; | 1904 | old_format = 1; |
1825 | /* try new format (64-th 1k block), which can contain reiserfs super block */ | 1905 | |
1906 | /* | ||
1907 | * try new format (64-th 1k block), which can contain reiserfs | ||
1908 | * super block | ||
1909 | */ | ||
1826 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { | 1910 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { |
1827 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", | 1911 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", |
1828 | s->s_id); | 1912 | s->s_id); |
@@ -1830,9 +1914,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1830 | } | 1914 | } |
1831 | 1915 | ||
1832 | rs = SB_DISK_SUPER_BLOCK(s); | 1916 | rs = SB_DISK_SUPER_BLOCK(s); |
1833 | /* Let's do basic sanity check to verify that underlying device is not | 1917 | /* |
1834 | smaller than the filesystem. If the check fails then abort and scream, | 1918 | * Let's do basic sanity check to verify that underlying device is not |
1835 | because bad stuff will happen otherwise. */ | 1919 | * smaller than the filesystem. If the check fails then abort and |
1920 | * scream, because bad stuff will happen otherwise. | ||
1921 | */ | ||
1836 | if (s->s_bdev && s->s_bdev->bd_inode | 1922 | if (s->s_bdev && s->s_bdev->bd_inode |
1837 | && i_size_read(s->s_bdev->bd_inode) < | 1923 | && i_size_read(s->s_bdev->bd_inode) < |
1838 | sb_block_count(rs) * sb_blocksize(rs)) { | 1924 | sb_block_count(rs) * sb_blocksize(rs)) { |
@@ -1876,15 +1962,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1876 | printk("reiserfs: using flush barriers\n"); | 1962 | printk("reiserfs: using flush barriers\n"); |
1877 | } | 1963 | } |
1878 | 1964 | ||
1879 | // set_device_ro(s->s_dev, 1) ; | ||
1880 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { | 1965 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { |
1881 | SWARN(silent, s, "sh-2022", | 1966 | SWARN(silent, s, "sh-2022", |
1882 | "unable to initialize journal space"); | 1967 | "unable to initialize journal space"); |
1883 | goto error_unlocked; | 1968 | goto error_unlocked; |
1884 | } else { | 1969 | } else { |
1885 | jinit_done = 1; /* once this is set, journal_release must be called | 1970 | /* |
1886 | ** if we error out of the mount | 1971 | * once this is set, journal_release must be called |
1887 | */ | 1972 | * if we error out of the mount |
1973 | */ | ||
1974 | jinit_done = 1; | ||
1888 | } | 1975 | } |
1889 | 1976 | ||
1890 | if (reread_meta_blocks(s)) { | 1977 | if (reread_meta_blocks(s)) { |
@@ -1905,7 +1992,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1905 | args.dirid = REISERFS_ROOT_PARENT_OBJECTID; | 1992 | args.dirid = REISERFS_ROOT_PARENT_OBJECTID; |
1906 | root_inode = | 1993 | root_inode = |
1907 | iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, | 1994 | iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, |
1908 | reiserfs_init_locked_inode, (void *)(&args)); | 1995 | reiserfs_init_locked_inode, (void *)&args); |
1909 | if (!root_inode) { | 1996 | if (!root_inode) { |
1910 | SWARN(silent, s, "jmacd-10", "get root inode failed"); | 1997 | SWARN(silent, s, "jmacd-10", "get root inode failed"); |
1911 | goto error_unlocked; | 1998 | goto error_unlocked; |
@@ -1929,7 +2016,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1929 | s->s_root = d_make_root(root_inode); | 2016 | s->s_root = d_make_root(root_inode); |
1930 | if (!s->s_root) | 2017 | if (!s->s_root) |
1931 | goto error; | 2018 | goto error; |
1932 | // define and initialize hash function | 2019 | /* define and initialize hash function */ |
1933 | sbi->s_hash_function = hash_function(s); | 2020 | sbi->s_hash_function = hash_function(s); |
1934 | if (sbi->s_hash_function == NULL) { | 2021 | if (sbi->s_hash_function == NULL) { |
1935 | dput(s->s_root); | 2022 | dput(s->s_root); |
@@ -1939,11 +2026,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1939 | 2026 | ||
1940 | if (is_reiserfs_3_5(rs) | 2027 | if (is_reiserfs_3_5(rs) |
1941 | || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) | 2028 | || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) |
1942 | set_bit(REISERFS_3_5, &(sbi->s_properties)); | 2029 | set_bit(REISERFS_3_5, &sbi->s_properties); |
1943 | else if (old_format) | 2030 | else if (old_format) |
1944 | set_bit(REISERFS_OLD_FORMAT, &(sbi->s_properties)); | 2031 | set_bit(REISERFS_OLD_FORMAT, &sbi->s_properties); |
1945 | else | 2032 | else |
1946 | set_bit(REISERFS_3_6, &(sbi->s_properties)); | 2033 | set_bit(REISERFS_3_6, &sbi->s_properties); |
1947 | 2034 | ||
1948 | if (!(s->s_flags & MS_RDONLY)) { | 2035 | if (!(s->s_flags & MS_RDONLY)) { |
1949 | 2036 | ||
@@ -1958,10 +2045,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1958 | set_sb_umount_state(rs, REISERFS_ERROR_FS); | 2045 | set_sb_umount_state(rs, REISERFS_ERROR_FS); |
1959 | set_sb_fs_state(rs, 0); | 2046 | set_sb_fs_state(rs, 0); |
1960 | 2047 | ||
1961 | /* Clear out s_bmap_nr if it would wrap. We can handle this | 2048 | /* |
2049 | * Clear out s_bmap_nr if it would wrap. We can handle this | ||
1962 | * case, but older revisions can't. This will cause the | 2050 | * case, but older revisions can't. This will cause the |
1963 | * file system to fail mount on those older implementations, | 2051 | * file system to fail mount on those older implementations, |
1964 | * avoiding corruption. -jeffm */ | 2052 | * avoiding corruption. -jeffm |
2053 | */ | ||
1965 | if (bmap_would_wrap(reiserfs_bmap_count(s)) && | 2054 | if (bmap_would_wrap(reiserfs_bmap_count(s)) && |
1966 | sb_bmap_nr(rs) != 0) { | 2055 | sb_bmap_nr(rs) != 0) { |
1967 | reiserfs_warning(s, "super-2030", "This file system " | 2056 | reiserfs_warning(s, "super-2030", "This file system " |
@@ -1974,8 +2063,10 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1974 | } | 2063 | } |
1975 | 2064 | ||
1976 | if (old_format_only(s)) { | 2065 | if (old_format_only(s)) { |
1977 | /* filesystem of format 3.5 either with standard or non-standard | 2066 | /* |
1978 | journal */ | 2067 | * filesystem of format 3.5 either with standard |
2068 | * or non-standard journal | ||
2069 | */ | ||
1979 | if (convert_reiserfs(s)) { | 2070 | if (convert_reiserfs(s)) { |
1980 | /* and -o conv is given */ | 2071 | /* and -o conv is given */ |
1981 | if (!silent) | 2072 | if (!silent) |
@@ -1983,8 +2074,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1983 | "converting 3.5 filesystem to the 3.6 format"); | 2074 | "converting 3.5 filesystem to the 3.6 format"); |
1984 | 2075 | ||
1985 | if (is_reiserfs_3_5(rs)) | 2076 | if (is_reiserfs_3_5(rs)) |
1986 | /* put magic string of 3.6 format. 2.2 will not be able to | 2077 | /* |
1987 | mount this filesystem anymore */ | 2078 | * put magic string of 3.6 format. |
2079 | * 2.2 will not be able to | ||
2080 | * mount this filesystem anymore | ||
2081 | */ | ||
1988 | memcpy(rs->s_v1.s_magic, | 2082 | memcpy(rs->s_v1.s_magic, |
1989 | reiserfs_3_6_magic_string, | 2083 | reiserfs_3_6_magic_string, |
1990 | sizeof | 2084 | sizeof |
@@ -1992,8 +2086,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1992 | 2086 | ||
1993 | set_sb_version(rs, REISERFS_VERSION_2); | 2087 | set_sb_version(rs, REISERFS_VERSION_2); |
1994 | reiserfs_convert_objectid_map_v1(s); | 2088 | reiserfs_convert_objectid_map_v1(s); |
1995 | set_bit(REISERFS_3_6, &(sbi->s_properties)); | 2089 | set_bit(REISERFS_3_6, &sbi->s_properties); |
1996 | clear_bit(REISERFS_3_5, &(sbi->s_properties)); | 2090 | clear_bit(REISERFS_3_5, &sbi->s_properties); |
1997 | } else if (!silent) { | 2091 | } else if (!silent) { |
1998 | reiserfs_info(s, "using 3.5.x disk format\n"); | 2092 | reiserfs_info(s, "using 3.5.x disk format\n"); |
1999 | } | 2093 | } |
@@ -2001,8 +2095,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
2001 | set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); | 2095 | set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); |
2002 | 2096 | ||
2003 | 2097 | ||
2004 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); | 2098 | journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s)); |
2005 | errval = journal_end(&th, s, 1); | 2099 | errval = journal_end(&th); |
2006 | if (errval) { | 2100 | if (errval) { |
2007 | dput(s->s_root); | 2101 | dput(s->s_root); |
2008 | s->s_root = NULL; | 2102 | s->s_root = NULL; |
@@ -2018,7 +2112,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
2018 | } | 2112 | } |
2019 | reiserfs_write_lock(s); | 2113 | reiserfs_write_lock(s); |
2020 | 2114 | ||
2021 | /* look for files which were to be removed in previous session */ | 2115 | /* |
2116 | * look for files which were to be removed in previous session | ||
2117 | */ | ||
2022 | finish_unfinished(s); | 2118 | finish_unfinished(s); |
2023 | } else { | 2119 | } else { |
2024 | if (old_format_only(s) && !silent) { | 2120 | if (old_format_only(s) && !silent) { |
@@ -2034,7 +2130,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
2034 | } | 2130 | } |
2035 | reiserfs_write_lock(s); | 2131 | reiserfs_write_lock(s); |
2036 | } | 2132 | } |
2037 | // mark hash in super block: it could be unset. overwrite should be ok | 2133 | /* |
2134 | * mark hash in super block: it could be unset. overwrite should be ok | ||
2135 | */ | ||
2038 | set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); | 2136 | set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); |
2039 | 2137 | ||
2040 | handle_attrs(s); | 2138 | handle_attrs(s); |
@@ -2111,9 +2209,7 @@ static int reiserfs_write_dquot(struct dquot *dquot) | |||
2111 | depth = reiserfs_write_unlock_nested(dquot->dq_sb); | 2209 | depth = reiserfs_write_unlock_nested(dquot->dq_sb); |
2112 | ret = dquot_commit(dquot); | 2210 | ret = dquot_commit(dquot); |
2113 | reiserfs_write_lock_nested(dquot->dq_sb, depth); | 2211 | reiserfs_write_lock_nested(dquot->dq_sb, depth); |
2114 | err = | 2212 | err = journal_end(&th); |
2115 | journal_end(&th, dquot->dq_sb, | ||
2116 | REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); | ||
2117 | if (!ret && err) | 2213 | if (!ret && err) |
2118 | ret = err; | 2214 | ret = err; |
2119 | out: | 2215 | out: |
@@ -2136,9 +2232,7 @@ static int reiserfs_acquire_dquot(struct dquot *dquot) | |||
2136 | depth = reiserfs_write_unlock_nested(dquot->dq_sb); | 2232 | depth = reiserfs_write_unlock_nested(dquot->dq_sb); |
2137 | ret = dquot_acquire(dquot); | 2233 | ret = dquot_acquire(dquot); |
2138 | reiserfs_write_lock_nested(dquot->dq_sb, depth); | 2234 | reiserfs_write_lock_nested(dquot->dq_sb, depth); |
2139 | err = | 2235 | err = journal_end(&th); |
2140 | journal_end(&th, dquot->dq_sb, | ||
2141 | REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); | ||
2142 | if (!ret && err) | 2236 | if (!ret && err) |
2143 | ret = err; | 2237 | ret = err; |
2144 | out: | 2238 | out: |
@@ -2163,9 +2257,7 @@ static int reiserfs_release_dquot(struct dquot *dquot) | |||
2163 | } | 2257 | } |
2164 | ret = dquot_release(dquot); | 2258 | ret = dquot_release(dquot); |
2165 | reiserfs_write_lock(dquot->dq_sb); | 2259 | reiserfs_write_lock(dquot->dq_sb); |
2166 | err = | 2260 | err = journal_end(&th); |
2167 | journal_end(&th, dquot->dq_sb, | ||
2168 | REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); | ||
2169 | if (!ret && err) | 2261 | if (!ret && err) |
2170 | ret = err; | 2262 | ret = err; |
2171 | reiserfs_write_unlock(dquot->dq_sb); | 2263 | reiserfs_write_unlock(dquot->dq_sb); |
@@ -2198,7 +2290,7 @@ static int reiserfs_write_info(struct super_block *sb, int type) | |||
2198 | depth = reiserfs_write_unlock_nested(sb); | 2290 | depth = reiserfs_write_unlock_nested(sb); |
2199 | ret = dquot_commit_info(sb, type); | 2291 | ret = dquot_commit_info(sb, type); |
2200 | reiserfs_write_lock_nested(sb, depth); | 2292 | reiserfs_write_lock_nested(sb, depth); |
2201 | err = journal_end(&th, sb, 2); | 2293 | err = journal_end(&th); |
2202 | if (!ret && err) | 2294 | if (!ret && err) |
2203 | ret = err; | 2295 | ret = err; |
2204 | out: | 2296 | out: |
@@ -2238,7 +2330,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2238 | goto out; | 2330 | goto out; |
2239 | } | 2331 | } |
2240 | inode = path->dentry->d_inode; | 2332 | inode = path->dentry->d_inode; |
2241 | /* We must not pack tails for quota files on reiserfs for quota IO to work */ | 2333 | /* |
2334 | * We must not pack tails for quota files on reiserfs for quota | ||
2335 | * IO to work | ||
2336 | */ | ||
2242 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { | 2337 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { |
2243 | err = reiserfs_unpack(inode, NULL); | 2338 | err = reiserfs_unpack(inode, NULL); |
2244 | if (err) { | 2339 | if (err) { |
@@ -2268,7 +2363,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2268 | err = journal_begin(&th, sb, 1); | 2363 | err = journal_begin(&th, sb, 1); |
2269 | if (err) | 2364 | if (err) |
2270 | goto out; | 2365 | goto out; |
2271 | err = journal_end_sync(&th, sb, 1); | 2366 | err = journal_end_sync(&th); |
2272 | if (err) | 2367 | if (err) |
2273 | goto out; | 2368 | goto out; |
2274 | } | 2369 | } |
@@ -2279,10 +2374,12 @@ out: | |||
2279 | return err; | 2374 | return err; |
2280 | } | 2375 | } |
2281 | 2376 | ||
2282 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 2377 | /* |
2378 | * Read data from quotafile - avoid pagecache and such because we cannot afford | ||
2283 | * acquiring the locks... As quota files are never truncated and quota code | 2379 | * acquiring the locks... As quota files are never truncated and quota code |
2284 | * itself serializes the operations (and no one else should touch the files) | 2380 | * itself serializes the operations (and no one else should touch the files) |
2285 | * we don't have to be afraid of races */ | 2381 | * we don't have to be afraid of races |
2382 | */ | ||
2286 | static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, | 2383 | static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, |
2287 | size_t len, loff_t off) | 2384 | size_t len, loff_t off) |
2288 | { | 2385 | { |
@@ -2303,7 +2400,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, | |||
2303 | sb->s_blocksize - offset < | 2400 | sb->s_blocksize - offset < |
2304 | toread ? sb->s_blocksize - offset : toread; | 2401 | toread ? sb->s_blocksize - offset : toread; |
2305 | tmp_bh.b_state = 0; | 2402 | tmp_bh.b_state = 0; |
2306 | /* Quota files are without tails so we can safely use this function */ | 2403 | /* |
2404 | * Quota files are without tails so we can safely | ||
2405 | * use this function | ||
2406 | */ | ||
2307 | reiserfs_write_lock(sb); | 2407 | reiserfs_write_lock(sb); |
2308 | err = reiserfs_get_block(inode, blk, &tmp_bh, 0); | 2408 | err = reiserfs_get_block(inode, blk, &tmp_bh, 0); |
2309 | reiserfs_write_unlock(sb); | 2409 | reiserfs_write_unlock(sb); |
@@ -2326,8 +2426,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, | |||
2326 | return len; | 2426 | return len; |
2327 | } | 2427 | } |
2328 | 2428 | ||
2329 | /* Write to quotafile (we know the transaction is already started and has | 2429 | /* |
2330 | * enough credits) */ | 2430 | * Write to quotafile (we know the transaction is already started and has |
2431 | * enough credits) | ||
2432 | */ | ||
2331 | static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | 2433 | static ssize_t reiserfs_quota_write(struct super_block *sb, int type, |
2332 | const char *data, size_t len, loff_t off) | 2434 | const char *data, size_t len, loff_t off) |
2333 | { | 2435 | { |
@@ -2368,7 +2470,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | |||
2368 | unlock_buffer(bh); | 2470 | unlock_buffer(bh); |
2369 | reiserfs_write_lock(sb); | 2471 | reiserfs_write_lock(sb); |
2370 | reiserfs_prepare_for_journal(sb, bh, 1); | 2472 | reiserfs_prepare_for_journal(sb, bh, 1); |
2371 | journal_mark_dirty(current->journal_info, sb, bh); | 2473 | journal_mark_dirty(current->journal_info, bh); |
2372 | if (!journal_quota) | 2474 | if (!journal_quota) |
2373 | reiserfs_add_ordered_list(inode, bh); | 2475 | reiserfs_add_ordered_list(inode, bh); |
2374 | reiserfs_write_unlock(sb); | 2476 | reiserfs_write_unlock(sb); |
@@ -2402,18 +2504,18 @@ static int __init init_reiserfs_fs(void) | |||
2402 | { | 2504 | { |
2403 | int ret; | 2505 | int ret; |
2404 | 2506 | ||
2405 | if ((ret = init_inodecache())) { | 2507 | ret = init_inodecache(); |
2508 | if (ret) | ||
2406 | return ret; | 2509 | return ret; |
2407 | } | ||
2408 | 2510 | ||
2409 | reiserfs_proc_info_global_init(); | 2511 | reiserfs_proc_info_global_init(); |
2410 | 2512 | ||
2411 | ret = register_filesystem(&reiserfs_fs_type); | 2513 | ret = register_filesystem(&reiserfs_fs_type); |
2514 | if (ret) | ||
2515 | goto out; | ||
2412 | 2516 | ||
2413 | if (ret == 0) { | 2517 | return 0; |
2414 | return 0; | 2518 | out: |
2415 | } | ||
2416 | |||
2417 | reiserfs_proc_info_global_done(); | 2519 | reiserfs_proc_info_global_done(); |
2418 | destroy_inodecache(); | 2520 | destroy_inodecache(); |
2419 | 2521 | ||
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index 5e2624d12f70..f41e19b4bb42 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details | 2 | * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright |
3 | * details | ||
3 | */ | 4 | */ |
4 | 5 | ||
5 | #include <linux/time.h> | 6 | #include <linux/time.h> |
@@ -7,29 +8,41 @@ | |||
7 | #include <linux/buffer_head.h> | 8 | #include <linux/buffer_head.h> |
8 | #include "reiserfs.h" | 9 | #include "reiserfs.h" |
9 | 10 | ||
10 | /* access to tail : when one is going to read tail it must make sure, that is not running. | 11 | /* |
11 | direct2indirect and indirect2direct can not run concurrently */ | 12 | * access to tail : when one is going to read tail it must make sure, that is |
13 | * not running. direct2indirect and indirect2direct can not run concurrently | ||
14 | */ | ||
12 | 15 | ||
13 | /* Converts direct items to an unformatted node. Panics if file has no | 16 | /* |
14 | tail. -ENOSPC if no disk space for conversion */ | 17 | * Converts direct items to an unformatted node. Panics if file has no |
15 | /* path points to first direct item of the file regarless of how many of | 18 | * tail. -ENOSPC if no disk space for conversion |
16 | them are there */ | 19 | */ |
20 | /* | ||
21 | * path points to first direct item of the file regardless of how many of | ||
22 | * them are there | ||
23 | */ | ||
17 | int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | 24 | int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, |
18 | struct treepath *path, struct buffer_head *unbh, | 25 | struct treepath *path, struct buffer_head *unbh, |
19 | loff_t tail_offset) | 26 | loff_t tail_offset) |
20 | { | 27 | { |
21 | struct super_block *sb = inode->i_sb; | 28 | struct super_block *sb = inode->i_sb; |
22 | struct buffer_head *up_to_date_bh; | 29 | struct buffer_head *up_to_date_bh; |
23 | struct item_head *p_le_ih = PATH_PITEM_HEAD(path); | 30 | struct item_head *p_le_ih = tp_item_head(path); |
24 | unsigned long total_tail = 0; | 31 | unsigned long total_tail = 0; |
25 | struct cpu_key end_key; /* Key to search for the last byte of the | 32 | |
26 | converted item. */ | 33 | /* Key to search for the last byte of the converted item. */ |
27 | struct item_head ind_ih; /* new indirect item to be inserted or | 34 | struct cpu_key end_key; |
28 | key of unfm pointer to be pasted */ | 35 | |
29 | int blk_size, retval; /* returned value for reiserfs_insert_item and clones */ | 36 | /* |
30 | unp_t unfm_ptr; /* Handle on an unformatted node | 37 | * new indirect item to be inserted or key |
31 | that will be inserted in the | 38 | * of unfm pointer to be pasted |
32 | tree. */ | 39 | */ |
40 | struct item_head ind_ih; | ||
41 | int blk_size; | ||
42 | /* returned value for reiserfs_insert_item and clones */ | ||
43 | int retval; | ||
44 | /* Handle on an unformatted node that will be inserted in the tree. */ | ||
45 | unp_t unfm_ptr; | ||
33 | 46 | ||
34 | BUG_ON(!th->t_trans_id); | 47 | BUG_ON(!th->t_trans_id); |
35 | 48 | ||
@@ -37,8 +50,10 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
37 | 50 | ||
38 | blk_size = sb->s_blocksize; | 51 | blk_size = sb->s_blocksize; |
39 | 52 | ||
40 | /* and key to search for append or insert pointer to the new | 53 | /* |
41 | unformatted node. */ | 54 | * and key to search for append or insert pointer to the new |
55 | * unformatted node. | ||
56 | */ | ||
42 | copy_item_head(&ind_ih, p_le_ih); | 57 | copy_item_head(&ind_ih, p_le_ih); |
43 | set_le_ih_k_offset(&ind_ih, tail_offset); | 58 | set_le_ih_k_offset(&ind_ih, tail_offset); |
44 | set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); | 59 | set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); |
@@ -55,7 +70,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
55 | return -EIO; | 70 | return -EIO; |
56 | } | 71 | } |
57 | 72 | ||
58 | p_le_ih = PATH_PITEM_HEAD(path); | 73 | p_le_ih = tp_item_head(path); |
59 | 74 | ||
60 | unfm_ptr = cpu_to_le32(unbh->b_blocknr); | 75 | unfm_ptr = cpu_to_le32(unbh->b_blocknr); |
61 | 76 | ||
@@ -76,36 +91,43 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
76 | if (retval) { | 91 | if (retval) { |
77 | return retval; | 92 | return retval; |
78 | } | 93 | } |
79 | // note: from here there are two keys which have matching first | 94 | /* |
80 | // three key components. They only differ by the fourth one. | 95 | * note: from here there are two keys which have matching first |
96 | * three key components. They only differ by the fourth one. | ||
97 | */ | ||
81 | 98 | ||
82 | /* Set the key to search for the direct items of the file */ | 99 | /* Set the key to search for the direct items of the file */ |
83 | make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, | 100 | make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, |
84 | 4); | 101 | 4); |
85 | 102 | ||
86 | /* Move bytes from the direct items to the new unformatted node | 103 | /* |
87 | and delete them. */ | 104 | * Move bytes from the direct items to the new unformatted node |
105 | * and delete them. | ||
106 | */ | ||
88 | while (1) { | 107 | while (1) { |
89 | int tail_size; | 108 | int tail_size; |
90 | 109 | ||
91 | /* end_key.k_offset is set so, that we will always have found | 110 | /* |
92 | last item of the file */ | 111 | * end_key.k_offset is set so, that we will always have found |
112 | * last item of the file | ||
113 | */ | ||
93 | if (search_for_position_by_key(sb, &end_key, path) == | 114 | if (search_for_position_by_key(sb, &end_key, path) == |
94 | POSITION_FOUND) | 115 | POSITION_FOUND) |
95 | reiserfs_panic(sb, "PAP-14050", | 116 | reiserfs_panic(sb, "PAP-14050", |
96 | "direct item (%K) not found", &end_key); | 117 | "direct item (%K) not found", &end_key); |
97 | p_le_ih = PATH_PITEM_HEAD(path); | 118 | p_le_ih = tp_item_head(path); |
98 | RFALSE(!is_direct_le_ih(p_le_ih), | 119 | RFALSE(!is_direct_le_ih(p_le_ih), |
99 | "vs-14055: direct item expected(%K), found %h", | 120 | "vs-14055: direct item expected(%K), found %h", |
100 | &end_key, p_le_ih); | 121 | &end_key, p_le_ih); |
101 | tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) | 122 | tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) |
102 | + ih_item_len(p_le_ih) - 1; | 123 | + ih_item_len(p_le_ih) - 1; |
103 | 124 | ||
104 | /* we only send the unbh pointer if the buffer is not up to date. | 125 | /* |
105 | ** this avoids overwriting good data from writepage() with old data | 126 | * we only send the unbh pointer if the buffer is not |
106 | ** from the disk or buffer cache | 127 | * up to date. this avoids overwriting good data from |
107 | ** Special case: unbh->b_page will be NULL if we are coming through | 128 | * writepage() with old data from the disk or buffer cache |
108 | ** DIRECT_IO handler here. | 129 | * Special case: unbh->b_page will be NULL if we are coming |
130 | * through DIRECT_IO handler here. | ||
109 | */ | 131 | */ |
110 | if (!unbh->b_page || buffer_uptodate(unbh) | 132 | if (!unbh->b_page || buffer_uptodate(unbh) |
111 | || PageUptodate(unbh->b_page)) { | 133 | || PageUptodate(unbh->b_page)) { |
@@ -117,13 +139,15 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
117 | up_to_date_bh); | 139 | up_to_date_bh); |
118 | 140 | ||
119 | total_tail += retval; | 141 | total_tail += retval; |
142 | |||
143 | /* done: file does not have direct items anymore */ | ||
120 | if (tail_size == retval) | 144 | if (tail_size == retval) |
121 | // done: file does not have direct items anymore | ||
122 | break; | 145 | break; |
123 | 146 | ||
124 | } | 147 | } |
125 | /* if we've copied bytes from disk into the page, we need to zero | 148 | /* |
126 | ** out the unused part of the block (it was not up to date before) | 149 | * if we've copied bytes from disk into the page, we need to zero |
150 | * out the unused part of the block (it was not up to date before) | ||
127 | */ | 151 | */ |
128 | if (up_to_date_bh) { | 152 | if (up_to_date_bh) { |
129 | unsigned pgoff = | 153 | unsigned pgoff = |
@@ -146,9 +170,11 @@ void reiserfs_unmap_buffer(struct buffer_head *bh) | |||
146 | BUG(); | 170 | BUG(); |
147 | } | 171 | } |
148 | clear_buffer_dirty(bh); | 172 | clear_buffer_dirty(bh); |
149 | /* Remove the buffer from whatever list it belongs to. We are mostly | 173 | /* |
150 | interested in removing it from per-sb j_dirty_buffers list, to avoid | 174 | * Remove the buffer from whatever list it belongs to. We are mostly |
151 | BUG() on attempt to write not mapped buffer */ | 175 | * interested in removing it from per-sb j_dirty_buffers list, to avoid |
176 | * BUG() on attempt to write not mapped buffer | ||
177 | */ | ||
152 | if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { | 178 | if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { |
153 | struct inode *inode = bh->b_page->mapping->host; | 179 | struct inode *inode = bh->b_page->mapping->host; |
154 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); | 180 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); |
@@ -164,12 +190,14 @@ void reiserfs_unmap_buffer(struct buffer_head *bh) | |||
164 | unlock_buffer(bh); | 190 | unlock_buffer(bh); |
165 | } | 191 | } |
166 | 192 | ||
167 | /* this first locks inode (neither reads nor sync are permitted), | 193 | /* |
168 | reads tail through page cache, insert direct item. When direct item | 194 | * this first locks inode (neither reads nor sync are permitted), |
169 | inserted successfully inode is left locked. Return value is always | 195 | * reads tail through page cache, insert direct item. When direct item |
170 | what we expect from it (number of cut bytes). But when tail remains | 196 | * inserted successfully inode is left locked. Return value is always |
171 | in the unformatted node, we set mode to SKIP_BALANCING and unlock | 197 | * what we expect from it (number of cut bytes). But when tail remains |
172 | inode */ | 198 | * in the unformatted node, we set mode to SKIP_BALANCING and unlock |
199 | * inode | ||
200 | */ | ||
173 | int indirect2direct(struct reiserfs_transaction_handle *th, | 201 | int indirect2direct(struct reiserfs_transaction_handle *th, |
174 | struct inode *inode, struct page *page, | 202 | struct inode *inode, struct page *page, |
175 | struct treepath *path, /* path to the indirect item. */ | 203 | struct treepath *path, /* path to the indirect item. */ |
@@ -194,7 +222,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
194 | *mode = M_SKIP_BALANCING; | 222 | *mode = M_SKIP_BALANCING; |
195 | 223 | ||
196 | /* store item head path points to. */ | 224 | /* store item head path points to. */ |
197 | copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); | 225 | copy_item_head(&s_ih, tp_item_head(path)); |
198 | 226 | ||
199 | tail_len = (n_new_file_size & (block_size - 1)); | 227 | tail_len = (n_new_file_size & (block_size - 1)); |
200 | if (get_inode_sd_version(inode) == STAT_DATA_V2) | 228 | if (get_inode_sd_version(inode) == STAT_DATA_V2) |
@@ -207,9 +235,11 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
207 | 1) * sb->s_blocksize; | 235 | 1) * sb->s_blocksize; |
208 | pos1 = pos; | 236 | pos1 = pos; |
209 | 237 | ||
210 | // we are protected by i_mutex. The tail can not disapper, not | 238 | /* |
211 | // append can be done either | 239 | * we are protected by i_mutex. The tail can not disapper, not |
212 | // we are in truncate or packing tail in file_release | 240 | * append can be done either |
241 | * we are in truncate or packing tail in file_release | ||
242 | */ | ||
213 | 243 | ||
214 | tail = (char *)kmap(page); /* this can schedule */ | 244 | tail = (char *)kmap(page); /* this can schedule */ |
215 | 245 | ||
@@ -220,7 +250,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
220 | reiserfs_panic(sb, "PAP-5520", | 250 | reiserfs_panic(sb, "PAP-5520", |
221 | "item to be converted %K does not exist", | 251 | "item to be converted %K does not exist", |
222 | item_key); | 252 | item_key); |
223 | copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); | 253 | copy_item_head(&s_ih, tp_item_head(path)); |
224 | #ifdef CONFIG_REISERFS_CHECK | 254 | #ifdef CONFIG_REISERFS_CHECK |
225 | pos = le_ih_k_offset(&s_ih) - 1 + | 255 | pos = le_ih_k_offset(&s_ih) - 1 + |
226 | (ih_item_len(&s_ih) / UNFM_P_SIZE - | 256 | (ih_item_len(&s_ih) / UNFM_P_SIZE - |
@@ -236,9 +266,10 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
236 | pos1 + 1, TYPE_DIRECT, round_tail_len, | 266 | pos1 + 1, TYPE_DIRECT, round_tail_len, |
237 | 0xffff /*ih_free_space */ ); | 267 | 0xffff /*ih_free_space */ ); |
238 | 268 | ||
239 | /* we want a pointer to the first byte of the tail in the page. | 269 | /* |
240 | ** the page was locked and this part of the page was up to date when | 270 | * we want a pointer to the first byte of the tail in the page. |
241 | ** indirect2direct was called, so we know the bytes are still valid | 271 | * the page was locked and this part of the page was up to date when |
272 | * indirect2direct was called, so we know the bytes are still valid | ||
242 | */ | 273 | */ |
243 | tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); | 274 | tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); |
244 | 275 | ||
@@ -250,12 +281,14 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
250 | /* Insert tail as new direct item in the tree */ | 281 | /* Insert tail as new direct item in the tree */ |
251 | if (reiserfs_insert_item(th, path, &key, &s_ih, inode, | 282 | if (reiserfs_insert_item(th, path, &key, &s_ih, inode, |
252 | tail ? tail : NULL) < 0) { | 283 | tail ? tail : NULL) < 0) { |
253 | /* No disk memory. So we can not convert last unformatted node | 284 | /* |
254 | to the direct item. In this case we used to adjust | 285 | * No disk memory. So we can not convert last unformatted node |
255 | indirect items's ih_free_space. Now ih_free_space is not | 286 | * to the direct item. In this case we used to adjust |
256 | used, it would be ideal to write zeros to corresponding | 287 | * indirect items's ih_free_space. Now ih_free_space is not |
257 | unformatted node. For now i_size is considered as guard for | 288 | * used, it would be ideal to write zeros to corresponding |
258 | going out of file size */ | 289 | * unformatted node. For now i_size is considered as guard for |
290 | * going out of file size | ||
291 | */ | ||
259 | kunmap(page); | 292 | kunmap(page); |
260 | return block_size - round_tail_len; | 293 | return block_size - round_tail_len; |
261 | } | 294 | } |
@@ -264,12 +297,16 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
264 | /* make sure to get the i_blocks changes from reiserfs_insert_item */ | 297 | /* make sure to get the i_blocks changes from reiserfs_insert_item */ |
265 | reiserfs_update_sd(th, inode); | 298 | reiserfs_update_sd(th, inode); |
266 | 299 | ||
267 | // note: we have now the same as in above direct2indirect | 300 | /* |
268 | // conversion: there are two keys which have matching first three | 301 | * note: we have now the same as in above direct2indirect |
269 | // key components. They only differ by the fouhth one. | 302 | * conversion: there are two keys which have matching first three |
303 | * key components. They only differ by the fourth one. | ||
304 | */ | ||
270 | 305 | ||
271 | /* We have inserted new direct item and must remove last | 306 | /* |
272 | unformatted node. */ | 307 | * We have inserted new direct item and must remove last |
308 | * unformatted node. | ||
309 | */ | ||
273 | *mode = M_CUT; | 310 | *mode = M_CUT; |
274 | 311 | ||
275 | /* we store position of first direct item in the in-core inode */ | 312 | /* we store position of first direct item in the in-core inode */ |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5cdfbd638b5c..ca416d099e7d 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -56,9 +56,11 @@ | |||
56 | #define XAROOT_NAME "xattrs" | 56 | #define XAROOT_NAME "xattrs" |
57 | 57 | ||
58 | 58 | ||
59 | /* Helpers for inode ops. We do this so that we don't have all the VFS | 59 | /* |
60 | * Helpers for inode ops. We do this so that we don't have all the VFS | ||
60 | * overhead and also for proper i_mutex annotation. | 61 | * overhead and also for proper i_mutex annotation. |
61 | * dir->i_mutex must be held for all of them. */ | 62 | * dir->i_mutex must be held for all of them. |
63 | */ | ||
62 | #ifdef CONFIG_REISERFS_FS_XATTR | 64 | #ifdef CONFIG_REISERFS_FS_XATTR |
63 | static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) | 65 | static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) |
64 | { | 66 | { |
@@ -73,10 +75,12 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
73 | return dir->i_op->mkdir(dir, dentry, mode); | 75 | return dir->i_op->mkdir(dir, dentry, mode); |
74 | } | 76 | } |
75 | 77 | ||
76 | /* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr | 78 | /* |
79 | * We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr | ||
77 | * mutation ops aren't called during rename or splace, which are the | 80 | * mutation ops aren't called during rename or splace, which are the |
78 | * only other users of I_MUTEX_CHILD. It violates the ordering, but that's | 81 | * only other users of I_MUTEX_CHILD. It violates the ordering, but that's |
79 | * better than allocating another subclass just for this code. */ | 82 | * better than allocating another subclass just for this code. |
83 | */ | ||
80 | static int xattr_unlink(struct inode *dir, struct dentry *dentry) | 84 | static int xattr_unlink(struct inode *dir, struct dentry *dentry) |
81 | { | 85 | { |
82 | int error; | 86 | int error; |
@@ -166,9 +170,11 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) | |||
166 | return xadir; | 170 | return xadir; |
167 | } | 171 | } |
168 | 172 | ||
169 | /* The following are side effects of other operations that aren't explicitly | 173 | /* |
174 | * The following are side effects of other operations that aren't explicitly | ||
170 | * modifying extended attributes. This includes operations such as permissions | 175 | * modifying extended attributes. This includes operations such as permissions |
171 | * or ownership changes, object deletions, etc. */ | 176 | * or ownership changes, object deletions, etc. |
177 | */ | ||
172 | struct reiserfs_dentry_buf { | 178 | struct reiserfs_dentry_buf { |
173 | struct dir_context ctx; | 179 | struct dir_context ctx; |
174 | struct dentry *xadir; | 180 | struct dentry *xadir; |
@@ -267,11 +273,13 @@ static int reiserfs_for_each_xattr(struct inode *inode, | |||
267 | cleanup_dentry_buf(&buf); | 273 | cleanup_dentry_buf(&buf); |
268 | 274 | ||
269 | if (!err) { | 275 | if (!err) { |
270 | /* We start a transaction here to avoid a ABBA situation | 276 | /* |
277 | * We start a transaction here to avoid a ABBA situation | ||
271 | * between the xattr root's i_mutex and the journal lock. | 278 | * between the xattr root's i_mutex and the journal lock. |
272 | * This doesn't incur much additional overhead since the | 279 | * This doesn't incur much additional overhead since the |
273 | * new transaction will just nest inside the | 280 | * new transaction will just nest inside the |
274 | * outer transaction. */ | 281 | * outer transaction. |
282 | */ | ||
275 | int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + | 283 | int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + |
276 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); | 284 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); |
277 | struct reiserfs_transaction_handle th; | 285 | struct reiserfs_transaction_handle th; |
@@ -284,7 +292,7 @@ static int reiserfs_for_each_xattr(struct inode *inode, | |||
284 | I_MUTEX_XATTR); | 292 | I_MUTEX_XATTR); |
285 | err = action(dir, data); | 293 | err = action(dir, data); |
286 | reiserfs_write_lock(inode->i_sb); | 294 | reiserfs_write_lock(inode->i_sb); |
287 | jerror = journal_end(&th, inode->i_sb, blocks); | 295 | jerror = journal_end(&th); |
288 | reiserfs_write_unlock(inode->i_sb); | 296 | reiserfs_write_unlock(inode->i_sb); |
289 | mutex_unlock(&dir->d_parent->d_inode->i_mutex); | 297 | mutex_unlock(&dir->d_parent->d_inode->i_mutex); |
290 | err = jerror ?: err; | 298 | err = jerror ?: err; |
@@ -349,9 +357,11 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) | |||
349 | } | 357 | } |
350 | 358 | ||
351 | #ifdef CONFIG_REISERFS_FS_XATTR | 359 | #ifdef CONFIG_REISERFS_FS_XATTR |
352 | /* Returns a dentry corresponding to a specific extended attribute file | 360 | /* |
361 | * Returns a dentry corresponding to a specific extended attribute file | ||
353 | * for the inode. If flags allow, the file is created. Otherwise, a | 362 | * for the inode. If flags allow, the file is created. Otherwise, a |
354 | * valid or negative dentry, or an error is returned. */ | 363 | * valid or negative dentry, or an error is returned. |
364 | */ | ||
355 | static struct dentry *xattr_lookup(struct inode *inode, const char *name, | 365 | static struct dentry *xattr_lookup(struct inode *inode, const char *name, |
356 | int flags) | 366 | int flags) |
357 | { | 367 | { |
@@ -400,8 +410,10 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n) | |||
400 | { | 410 | { |
401 | struct address_space *mapping = dir->i_mapping; | 411 | struct address_space *mapping = dir->i_mapping; |
402 | struct page *page; | 412 | struct page *page; |
403 | /* We can deadlock if we try to free dentries, | 413 | /* |
404 | and an unlink/rmdir has just occurred - GFP_NOFS avoids this */ | 414 | * We can deadlock if we try to free dentries, |
415 | * and an unlink/rmdir has just occurred - GFP_NOFS avoids this | ||
416 | */ | ||
405 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 417 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
406 | page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); | 418 | page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); |
407 | if (!IS_ERR(page)) { | 419 | if (!IS_ERR(page)) { |
@@ -411,7 +423,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n) | |||
411 | } | 423 | } |
412 | return page; | 424 | return page; |
413 | 425 | ||
414 | fail: | 426 | fail: |
415 | reiserfs_put_page(page); | 427 | reiserfs_put_page(page); |
416 | return ERR_PTR(-EIO); | 428 | return ERR_PTR(-EIO); |
417 | } | 429 | } |
@@ -589,7 +601,7 @@ int reiserfs_xattr_set(struct inode *inode, const char *name, | |||
589 | buffer, buffer_size, flags); | 601 | buffer, buffer_size, flags); |
590 | 602 | ||
591 | reiserfs_write_lock(inode->i_sb); | 603 | reiserfs_write_lock(inode->i_sb); |
592 | error2 = journal_end(&th, inode->i_sb, jbegin_count); | 604 | error2 = journal_end(&th); |
593 | reiserfs_write_unlock(inode->i_sb); | 605 | reiserfs_write_unlock(inode->i_sb); |
594 | if (error == 0) | 606 | if (error == 0) |
595 | error = error2; | 607 | error = error2; |
@@ -615,8 +627,10 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer, | |||
615 | if (name == NULL) | 627 | if (name == NULL) |
616 | return -EINVAL; | 628 | return -EINVAL; |
617 | 629 | ||
618 | /* We can't have xattrs attached to v1 items since they don't have | 630 | /* |
619 | * generation numbers */ | 631 | * We can't have xattrs attached to v1 items since they don't have |
632 | * generation numbers | ||
633 | */ | ||
620 | if (get_inode_sd_version(inode) == STAT_DATA_V1) | 634 | if (get_inode_sd_version(inode) == STAT_DATA_V1) |
621 | return -EOPNOTSUPP; | 635 | return -EOPNOTSUPP; |
622 | 636 | ||
@@ -913,12 +927,16 @@ static const struct xattr_handler *reiserfs_xattr_handlers[] = { | |||
913 | 927 | ||
914 | static int xattr_mount_check(struct super_block *s) | 928 | static int xattr_mount_check(struct super_block *s) |
915 | { | 929 | { |
916 | /* We need generation numbers to ensure that the oid mapping is correct | 930 | /* |
917 | * v3.5 filesystems don't have them. */ | 931 | * We need generation numbers to ensure that the oid mapping is correct |
932 | * v3.5 filesystems don't have them. | ||
933 | */ | ||
918 | if (old_format_only(s)) { | 934 | if (old_format_only(s)) { |
919 | if (reiserfs_xattrs_optional(s)) { | 935 | if (reiserfs_xattrs_optional(s)) { |
920 | /* Old format filesystem, but optional xattrs have | 936 | /* |
921 | * been enabled. Error out. */ | 937 | * Old format filesystem, but optional xattrs have |
938 | * been enabled. Error out. | ||
939 | */ | ||
922 | reiserfs_warning(s, "jdm-2005", | 940 | reiserfs_warning(s, "jdm-2005", |
923 | "xattrs/ACLs not supported " | 941 | "xattrs/ACLs not supported " |
924 | "on pre-v3.6 format filesystems. " | 942 | "on pre-v3.6 format filesystems. " |
@@ -972,9 +990,11 @@ int reiserfs_lookup_privroot(struct super_block *s) | |||
972 | return err; | 990 | return err; |
973 | } | 991 | } |
974 | 992 | ||
975 | /* We need to take a copy of the mount flags since things like | 993 | /* |
994 | * We need to take a copy of the mount flags since things like | ||
976 | * MS_RDONLY don't get set until *after* we're called. | 995 | * MS_RDONLY don't get set until *after* we're called. |
977 | * mount_flags != mount_options */ | 996 | * mount_flags != mount_options |
997 | */ | ||
978 | int reiserfs_xattr_init(struct super_block *s, int mount_flags) | 998 | int reiserfs_xattr_init(struct super_block *s, int mount_flags) |
979 | { | 999 | { |
980 | int err = 0; | 1000 | int err = 0; |
@@ -1007,8 +1027,8 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) | |||
1007 | 1027 | ||
1008 | error: | 1028 | error: |
1009 | if (err) { | 1029 | if (err) { |
1010 | clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); | 1030 | clear_bit(REISERFS_XATTRS_USER, &REISERFS_SB(s)->s_mount_opt); |
1011 | clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); | 1031 | clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt); |
1012 | } | 1032 | } |
1013 | 1033 | ||
1014 | /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ | 1034 | /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ |
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h index f59626c5d33b..857ec7e3016f 100644 --- a/fs/reiserfs/xattr.h +++ b/fs/reiserfs/xattr.h | |||
@@ -61,7 +61,8 @@ static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size) | |||
61 | return ret; | 61 | return ret; |
62 | } | 62 | } |
63 | 63 | ||
64 | /* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. | 64 | /* |
65 | * We may have to create up to 3 objects: xattr root, xattr dir, xattr file. | ||
65 | * Let's try to be smart about it. | 66 | * Let's try to be smart about it. |
66 | * xattr root: We cache it. If it's not cached, we may need to create it. | 67 | * xattr root: We cache it. If it's not cached, we may need to create it. |
67 | * xattr dir: If anything has been loaded for this inode, we can set a flag | 68 | * xattr dir: If anything has been loaded for this inode, we can set a flag |
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index a6ce532402dc..44503e293790 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -25,8 +25,10 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
25 | int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; | 25 | int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; |
26 | 26 | ||
27 | 27 | ||
28 | /* Pessimism: We can't assume that anything from the xattr root up | 28 | /* |
29 | * has been created. */ | 29 | * Pessimism: We can't assume that anything from the xattr root up |
30 | * has been created. | ||
31 | */ | ||
30 | 32 | ||
31 | jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + | 33 | jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + |
32 | reiserfs_xattr_nblocks(inode, size) * 2; | 34 | reiserfs_xattr_nblocks(inode, size) * 2; |
@@ -37,7 +39,7 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
37 | if (error == 0) { | 39 | if (error == 0) { |
38 | error = __reiserfs_set_acl(&th, inode, type, acl); | 40 | error = __reiserfs_set_acl(&th, inode, type, acl); |
39 | reiserfs_write_lock(inode->i_sb); | 41 | reiserfs_write_lock(inode->i_sb); |
40 | error2 = journal_end(&th, inode->i_sb, jcreate_blocks); | 42 | error2 = journal_end(&th); |
41 | reiserfs_write_unlock(inode->i_sb); | 43 | reiserfs_write_unlock(inode->i_sb); |
42 | if (error2) | 44 | if (error2) |
43 | error = error2; | 45 | error = error2; |
@@ -111,7 +113,7 @@ static struct posix_acl *reiserfs_posix_acl_from_disk(const void *value, size_t | |||
111 | goto fail; | 113 | goto fail; |
112 | return acl; | 114 | return acl; |
113 | 115 | ||
114 | fail: | 116 | fail: |
115 | posix_acl_release(acl); | 117 | posix_acl_release(acl); |
116 | return ERR_PTR(-EINVAL); | 118 | return ERR_PTR(-EINVAL); |
117 | } | 119 | } |
@@ -164,7 +166,7 @@ static void *reiserfs_posix_acl_to_disk(const struct posix_acl *acl, size_t * si | |||
164 | } | 166 | } |
165 | return (char *)ext_acl; | 167 | return (char *)ext_acl; |
166 | 168 | ||
167 | fail: | 169 | fail: |
168 | kfree(ext_acl); | 170 | kfree(ext_acl); |
169 | return ERR_PTR(-EINVAL); | 171 | return ERR_PTR(-EINVAL); |
170 | } | 172 | } |
@@ -208,8 +210,10 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) | |||
208 | 210 | ||
209 | retval = reiserfs_xattr_get(inode, name, value, size); | 211 | retval = reiserfs_xattr_get(inode, name, value, size); |
210 | if (retval == -ENODATA || retval == -ENOSYS) { | 212 | if (retval == -ENODATA || retval == -ENOSYS) { |
211 | /* This shouldn't actually happen as it should have | 213 | /* |
212 | been caught above.. but just in case */ | 214 | * This shouldn't actually happen as it should have |
215 | * been caught above.. but just in case | ||
216 | */ | ||
213 | acl = NULL; | 217 | acl = NULL; |
214 | } else if (retval < 0) { | 218 | } else if (retval < 0) { |
215 | acl = ERR_PTR(retval); | 219 | acl = ERR_PTR(retval); |
@@ -290,8 +294,10 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
290 | return error; | 294 | return error; |
291 | } | 295 | } |
292 | 296 | ||
293 | /* dir->i_mutex: locked, | 297 | /* |
294 | * inode is new and not released into the wild yet */ | 298 | * dir->i_mutex: locked, |
299 | * inode is new and not released into the wild yet | ||
300 | */ | ||
295 | int | 301 | int |
296 | reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | 302 | reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, |
297 | struct inode *dir, struct dentry *dentry, | 303 | struct inode *dir, struct dentry *dentry, |
@@ -304,14 +310,18 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |||
304 | if (S_ISLNK(inode->i_mode)) | 310 | if (S_ISLNK(inode->i_mode)) |
305 | return 0; | 311 | return 0; |
306 | 312 | ||
307 | /* ACLs can only be used on "new" objects, so if it's an old object | 313 | /* |
308 | * there is nothing to inherit from */ | 314 | * ACLs can only be used on "new" objects, so if it's an old object |
315 | * there is nothing to inherit from | ||
316 | */ | ||
309 | if (get_inode_sd_version(dir) == STAT_DATA_V1) | 317 | if (get_inode_sd_version(dir) == STAT_DATA_V1) |
310 | goto apply_umask; | 318 | goto apply_umask; |
311 | 319 | ||
312 | /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This | 320 | /* |
321 | * Don't apply ACLs to objects in the .reiserfs_priv tree.. This | ||
313 | * would be useless since permissions are ignored, and a pain because | 322 | * would be useless since permissions are ignored, and a pain because |
314 | * it introduces locking cycles */ | 323 | * it introduces locking cycles |
324 | */ | ||
315 | if (IS_PRIVATE(dir)) { | 325 | if (IS_PRIVATE(dir)) { |
316 | inode->i_flags |= S_PRIVATE; | 326 | inode->i_flags |= S_PRIVATE; |
317 | goto apply_umask; | 327 | goto apply_umask; |
@@ -335,7 +345,7 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |||
335 | 345 | ||
336 | return err; | 346 | return err; |
337 | 347 | ||
338 | apply_umask: | 348 | apply_umask: |
339 | /* no ACL, apply umask */ | 349 | /* no ACL, apply umask */ |
340 | inode->i_mode &= ~current_umask(); | 350 | inode->i_mode &= ~current_umask(); |
341 | return err; | 351 | return err; |